diff --git common/src/java/org/apache/hadoop/hive/conf/HiveConf.java common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 6802b4d..4cccb1a 100644 --- common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -500,6 +500,7 @@ HIVEMERGEMAPFILES("hive.merge.mapfiles", true), HIVEMERGEMAPREDFILES("hive.merge.mapredfiles", false), + HIVEMERGETEZFILES("hive.merge.tezfiles", false), HIVEMERGEMAPFILESSIZE("hive.merge.size.per.task", (long) (256 * 1000 * 1000)), HIVEMERGEMAPFILESAVGSIZE("hive.merge.smallfiles.avgsize", (long) (16 * 1000 * 1000)), HIVEMERGERCFILEBLOCKLEVEL("hive.merge.rcfile.block.level", true), @@ -536,6 +537,8 @@ HIVE_ORC_COMPUTE_SPLITS_NUM_THREADS("hive.orc.compute.splits.num.threads", 10), HIVE_ORC_SKIP_CORRUPT_DATA("hive.exec.orc.skip.corrupt.data", false), + HIVE_ORC_ZEROCOPY("hive.exec.orc.zerocopy", false), + HIVESKEWJOIN("hive.optimize.skewjoin", false), HIVECONVERTJOIN("hive.auto.convert.join", true), HIVECONVERTJOINNOCONDITIONALTASK("hive.auto.convert.join.noconditionaltask", true), @@ -562,6 +565,10 @@ HIVEDEBUGLOCALTASK("hive.debug.localtask",false), HIVEINPUTFORMAT("hive.input.format", "org.apache.hadoop.hive.ql.io.CombineHiveInputFormat"), + HIVETEZINPUTFORMAT("hive.tez.input.format", "org.apache.hadoop.hive.ql.io.HiveInputFormat"), + + HIVETEZCONTAINERSIZE("hive.tez.container.size", -1), + HIVETEZJAVAOPTS("hive.tez.java.opts", null), HIVEENFORCEBUCKETING("hive.enforce.bucketing", false), HIVEENFORCESORTING("hive.enforce.sorting", false), @@ -903,6 +910,9 @@ // Whether to generate the splits locally or in the AM (tez only) HIVE_AM_SPLIT_GENERATION("hive.compute.splits.in.am", true), + HIVE_PREWARM_ENABLED("hive.prewarm.enabled", false), + HIVE_PREWARM_NUM_CONTAINERS("hive.prewarm.numcontainers", 10), + // none, idonly, traverse, execution HIVESTAGEIDREARRANGE("hive.stageid.rearrange", "none"), HIVEEXPLAINDEPENDENCYAPPENDTASKTYPES("hive.explain.dependency.append.tasktype", false), diff --git conf/hive-default.xml.template conf/hive-default.xml.template index 0b86b9c..3f01e0b 100644 --- conf/hive-default.xml.template +++ conf/hive-default.xml.template @@ -794,6 +794,12 @@ + hive.merge.tezfiles + false + Merge small files at the end of a Tez DAG + + + hive.heartbeat.interval 1000 Send a heartbeat after this interval - used by mapjoin and filter operators @@ -960,6 +966,12 @@ + hive.tez.input.format + org.apache.hadoop.hive.ql.io.HiveInputFormat + The default input format for tez. Tez groups splits in the AM. + + + hive.udtf.auto.progress false Whether Hive should automatically send progress information to TaskTracker when using UDTF's to prevent the task getting killed because of inactivity. Users should be cautious because this may prevent TaskTracker from killing tasks with infinite loops. @@ -2222,6 +2234,22 @@ + hive.prewarm.enabled + false + + Enables container prewarm for tez (hadoop 2 only) + + + + + hive.prewarm.numcontainers + 10 + + Controls the number of containers to prewarm for tez (hadoop 2 only) + + + + hive.server2.table.type.mapping CLASSIC @@ -2340,6 +2368,14 @@ + hive.exec.orc.zerocopy. + false + + Use zerocopy reads with ORC. + + + + hive.jar.directory hdfs:///user/hive/ @@ -2358,4 +2394,16 @@ + + hive.tez.container.size + -1 + By default tez will spawn containers of the size of a mapper. This can be used to overwrite. + + + + hive.tez.java.opts + + By default tez will use the java opts from map tasks. This can be used to overwrite. + + diff --git data/conf/tez/hive-site.xml data/conf/tez/hive-site.xml index 1af4495..5ff5b4c 100644 --- data/conf/tez/hive-site.xml +++ data/conf/tez/hive-site.xml @@ -33,8 +33,26 @@ - mapred.child.java.opts - -Xmx200m + mapred.tez.java.opts + -Xmx128m + + + + hive.tez.container.size + 128 + + + + + hive.merge.tezfiles + false + Merge small files at the end of a Tez DAG + + + + hive.tez.input.format + org.apache.hadoop.hive.ql.io.HiveInputFormat + The default input format for tez. Tez groups splits in the AM. @@ -172,7 +190,7 @@ hive.input.format - org.apache.hadoop.hive.ql.io.HiveInputFormat + org.apache.hadoop.hive.ql.io.CombineHiveInputFormat The default input format, if it is not specified, the system assigns it. It is set to HiveInputFormat for hadoop versions 17, 18 and 19, whereas it is set to CombineHiveInputFormat for hadoop 20. The user can always overwrite it - if there is a bug in CombineHiveInputFormat, it can always be manually set to HiveInputFormat. @@ -194,4 +212,20 @@ Whether to use MR or Tez + + hive.prewarm.enabled + true + + Enables container prewarm for tez (hadoop 2 only) + + + + + hive.prewarm.numcontainers + 3 + + Controls the number of containers to prewarm for tez (hadoop 2 only) + + + diff --git itests/qtest/pom.xml itests/qtest/pom.xml index e70da0f..cd871ff 100644 --- itests/qtest/pom.xml +++ itests/qtest/pom.xml @@ -39,7 +39,7 @@ stats_counter_partitioned.q,list_bucket_dml_10.q,input16_cc.q,scriptfile1.q,scriptfile1_win.q,bucket4.q,bucketmapjoin6.q,disable_merge_for_bucketing.q,reduce_deduplicate.q,smb_mapjoin_8.q,join1.q,groupby2.q,bucketizedhiveinputformat.q,bucketmapjoin7.q,optrstat_groupby.q,bucket_num_reducers.q,bucket5.q,load_fs2.q,bucket_num_reducers2.q,infer_bucket_sort_merge.q,infer_bucket_sort_reducers_power_two.q,infer_bucket_sort_dyn_part.q,infer_bucket_sort_bucketed_table.q,infer_bucket_sort_map_operators.q,infer_bucket_sort_num_buckets.q,leftsemijoin_mr.q,schemeAuthority.q,schemeAuthority2.q,truncate_column_buckets.q,remote_script.q,,load_hdfs_file_with_space_in_the_name.q,parallel_orderby.q,import_exported_table.q,stats_counter.q,auto_sortmerge_join_16.q,quotedid_smb.q,file_with_header_footer.q,external_table_with_space_in_location_path.q,root_dir_external_table.q,index_bitmap3.q,ql_rewrite_gbtoidx.q,index_bitmap_auto.q,udf_using.q cluster_tasklog_retrieval.q,minimr_broken_pipe.q,mapreduce_stack_trace.q,mapreduce_stack_trace_turnoff.q,mapreduce_stack_trace_hadoop20.q,mapreduce_stack_trace_turnoff_hadoop20.q,file_with_header_footer_negative.q,udf_local_resource.q tez_join_tests.q,tez_joins_explain.q,mrr.q,tez_dml.q,tez_insert_overwrite_local_directory_1.q - join0.q,join1.q,auto_join0.q,auto_join1.q,bucket2.q,bucket3.q,bucket4.q,count.q,create_merge_compressed.q,cross_join.q,ctas.q,custom_input_output_format.q,disable_merge_for_bucketing.q,enforce_order.q,filter_join_breaktask.q,filter_join_breaktask2.q,groupby1.q,groupby2.q,groupby3.q,having.q,insert1.q,insert_into1.q,insert_into2.q,leftsemijoin.q,limit_pushdown.q,load_dyn_part1.q,load_dyn_part2.q,load_dyn_part3.q,mapjoin_mapjoin.q,mapreduce1.q,mapreduce2.q,merge1.q,merge2.q,metadata_only_queries.q,sample1.q,subquery_in.q,subquery_exists.q,vectorization_15.q,ptf.q,stats_counter.q,stats_noscan_1.q,stats_counter_partitioned.q + join0.q,join1.q,auto_join0.q,auto_join1.q,bucket2.q,bucket3.q,bucket4.q,count.q,create_merge_compressed.q,cross_join.q,ctas.q,custom_input_output_format.q,disable_merge_for_bucketing.q,enforce_order.q,filter_join_breaktask.q,filter_join_breaktask2.q,groupby1.q,groupby2.q,groupby3.q,having.q,insert1.q,insert_into1.q,insert_into2.q,leftsemijoin.q,limit_pushdown.q,load_dyn_part1.q,load_dyn_part2.q,load_dyn_part3.q,mapjoin_mapjoin.q,mapreduce1.q,mapreduce2.q,merge1.q,merge2.q,metadata_only_queries.q,sample1.q,subquery_in.q,subquery_exists.q,vectorization_15.q,ptf.q,stats_counter.q,stats_noscan_1.q,stats_counter_partitioned.q,union2.q,union3.q,union4.q,union5.q,union6.q,union7.q,union8.q,union9.q add_part_exist.q,alter1.q,alter2.q,alter4.q,alter5.q,alter_rename_partition.q,alter_rename_partition_authorization.q,archive.q,archive_corrupt.q,archive_multi.q,archive_mr_1806.q,archive_multi_mr_1806.q,authorization_1.q,authorization_2.q,authorization_4.q,authorization_5.q,authorization_6.q,authorization_7.q,ba_table1.q,ba_table2.q,ba_table3.q,ba_table_udfs.q,binary_table_bincolserde.q,binary_table_colserde.q,cluster.q,columnarserde_create_shortcut.q,combine2.q,constant_prop.q,create_nested_type.q,create_or_replace_view.q,create_struct_table.q,create_union_table.q,database.q,database_location.q,database_properties.q,ddltime.q,describe_database_json.q,drop_database_removes_partition_dirs.q,escape1.q,escape2.q,exim_00_nonpart_empty.q,exim_01_nonpart.q,exim_02_00_part_empty.q,exim_02_part.q,exim_03_nonpart_over_compat.q,exim_04_all_part.q,exim_04_evolved_parts.q,exim_05_some_part.q,exim_06_one_part.q,exim_07_all_part_over_nonoverlap.q,exim_08_nonpart_rename.q,exim_09_part_spec_nonoverlap.q,exim_10_external_managed.q,exim_11_managed_external.q,exim_12_external_location.q,exim_13_managed_location.q,exim_14_managed_location_over_existing.q,exim_15_external_part.q,exim_16_part_external.q,exim_17_part_managed.q,exim_18_part_external.q,exim_19_00_part_external_location.q,exim_19_part_external_location.q,exim_20_part_managed_location.q,exim_21_export_authsuccess.q,exim_22_import_exist_authsuccess.q,exim_23_import_part_authsuccess.q,exim_24_import_nonexist_authsuccess.q,global_limit.q,groupby_complex_types.q,groupby_complex_types_multi_single_reducer.q,index_auth.q,index_auto.q,index_auto_empty.q,index_bitmap.q,index_bitmap1.q,index_bitmap2.q,index_bitmap3.q,index_bitmap_auto.q,index_bitmap_rc.q,index_compact.q,index_compact_1.q,index_compact_2.q,index_compact_3.q,index_stale_partitioned.q,init_file.q,input16.q,input16_cc.q,input46.q,input_columnarserde.q,input_dynamicserde.q,input_lazyserde.q,input_testxpath3.q,input_testxpath4.q,insert2_overwrite_partitions.q,insertexternal1.q,join_thrift.q,lateral_view.q,load_binary_data.q,load_exist_part_authsuccess.q,load_nonpart_authsuccess.q,load_part_authsuccess.q,loadpart_err.q,lock1.q,lock2.q,lock3.q,lock4.q,merge_dynamic_partition.q,multi_insert.q,multi_insert_move_tasks_share_dependencies.q,null_column.q,ppd_clusterby.q,query_with_semi.q,rename_column.q,sample6.q,sample_islocalmode_hook.q,set_processor_namespaces.q,show_tables.q,source.q,split_sample.q,str_to_map.q,transform1.q,udaf_collect_set.q,udaf_context_ngrams.q,udaf_histogram_numeric.q,udaf_ngrams.q,udaf_percentile_approx.q,udf_array.q,udf_bitmap_and.q,udf_bitmap_or.q,udf_explode.q,udf_format_number.q,udf_map.q,udf_map_keys.q,udf_map_values.q,udf_max.q,udf_min.q,udf_named_struct.q,udf_percentile.q,udf_printf.q,udf_sentences.q,udf_sort_array.q,udf_split.q,udf_struct.q,udf_substr.q,udf_translate.q,udf_union.q,udf_xpath.q,udtf_stack.q,view.q,virtual_column.q @@ -273,6 +273,12 @@ tests + org.apache.hadoop + hadoop-yarn-client + ${hadoop-23.version} + test + + org.apache.hbase hbase-common ${hbase.hadoop2.version} diff --git itests/util/src/main/java/org/apache/hadoop/hive/ql/QTestUtil.java itests/util/src/main/java/org/apache/hadoop/hive/ql/QTestUtil.java index b192aaf..96868eb 100644 --- itests/util/src/main/java/org/apache/hadoop/hive/ql/QTestUtil.java +++ itests/util/src/main/java/org/apache/hadoop/hive/ql/QTestUtil.java @@ -1264,6 +1264,7 @@ private void maskPatterns(Pattern[] patterns, String fname) throws Exception { ".*job_local[0-9_]*.*", ".*USING 'java -cp.*", "^Deleted.*", + ".*DagName:.*", ".*Input:.*/data/files/.*", ".*Output:.*/data/files/.*" }); diff --git pom.xml pom.xml index a09b01d..46b10a0 100644 --- pom.xml +++ pom.xml @@ -94,11 +94,11 @@ 3.1 1.1.3 10.10.1.1 - 11.0.2 + 15.0 2.1.6 0.20.2 1.2.1 - 2.2.0 + 2.3.0 0.96.0-hadoop1 0.96.0-hadoop2 @@ -134,7 +134,7 @@ 1.0.1 1.7.5 4.0.4 - 0.2.0 + 0.3.0-incubating-SNAPSHOT 1.1 0.2 1.4 @@ -188,6 +188,17 @@ false + + apache.snapshots + Apache Snapshot Repository + http://repository.apache.org/snapshots + + false + + + true + + diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java index fe929fc..35f4fa9 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java @@ -154,17 +154,24 @@ public JSONObject getJSONLogicalPlan(PrintStream out, ExplainWork work) throws E public JSONObject getJSONPlan(PrintStream out, ExplainWork work) throws Exception { + return getJSONPlan(out, work.getAstStringTree(), work.getRootTasks(), work.getFetchTask(), + work.isFormatted(), work.getExtended(), work.isAppendTaskType()); + } + + public JSONObject getJSONPlan(PrintStream out, String ast, List> tasks, Task fetchTask, + boolean jsonOutput, boolean isExtended, boolean appendTaskType) throws Exception { + // If the user asked for a formatted output, dump the json output // in the output stream JSONObject outJSONObject = new JSONObject(); - boolean jsonOutput = work.isFormatted(); + if (jsonOutput) { out = null; } // Print out the parse AST - if (work.getAstStringTree() != null && work.getExtended()) { - String jsonAST = outputAST(work.getAstStringTree(), out, jsonOutput, 0); + if (ast != null && isExtended) { + String jsonAST = outputAST(ast, out, jsonOutput, 0); if (out != null) { out.println(); } @@ -173,16 +180,15 @@ public JSONObject getJSONPlan(PrintStream out, ExplainWork work) outJSONObject.put("ABSTRACT SYNTAX TREE", jsonAST); } } - List> tasks = work.getRootTasks(); List ordered = StageIDsRearranger.getExplainOrder(conf, tasks); - Task fetchTask = work.getFetchTask(); + if (fetchTask != null) { fetchTask.setRootTask(true); // todo HIVE-3925 ordered.add(fetchTask); } - JSONObject jsonDependencies = outputDependencies(out, work, ordered); + JSONObject jsonDependencies = outputDependencies(out, jsonOutput, appendTaskType, ordered); if (out != null) { out.println(); @@ -193,7 +199,8 @@ public JSONObject getJSONPlan(PrintStream out, ExplainWork work) } // Go over all the tasks and dump out the plans - JSONObject jsonPlan = outputStagePlans(out, work, ordered); + JSONObject jsonPlan = outputStagePlans(out, ordered, + jsonOutput, isExtended); if (jsonOutput) { outJSONObject.put("STAGE PLANS", jsonPlan); @@ -748,10 +755,10 @@ public String outputAST(String treeString, PrintStream out, return jsonOutput ? treeString : null; } - public JSONObject outputDependencies(PrintStream out, ExplainWork work, List tasks) + public JSONObject outputDependencies(PrintStream out, boolean jsonOutput, + boolean appendTaskType, List tasks) throws Exception { - boolean jsonOutput = work.isFormatted(); - boolean appendTaskType = work.isAppendTaskType(); + if (out != null) { out.println("STAGE DEPENDENCIES:"); } @@ -767,16 +774,17 @@ public JSONObject outputDependencies(PrintStream out, ExplainWork work, List tasks) + public JSONObject outputStagePlans(PrintStream out, List tasks, + boolean jsonOutput, boolean isExtended) throws Exception { - boolean jsonOutput = work.isFormatted(); + if (out != null) { out.println("STAGE PLANS:"); } JSONObject json = jsonOutput ? new JSONObject() : null; for (Task task : tasks) { - outputPlan(task, out, json, work.getExtended(), jsonOutput, 2); + outputPlan(task, out, json, isExtended, jsonOutput, 2); } return jsonOutput ? json : null; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java index cc840be..23ef69b 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java @@ -777,6 +777,14 @@ public Path read(Kryo kryo, Input input, Class type) { } } + public static Set> cloneOperatorTree(Configuration conf, Set> roots) { + ByteArrayOutputStream baos = new ByteArrayOutputStream(4096); + serializePlan(roots, baos, conf, true); + Set> result = deserializePlan(new ByteArrayInputStream(baos.toByteArray()), + roots.getClass(), conf, true); + return result; + } + private static void serializePlan(Object plan, OutputStream out, Configuration conf, boolean cloningPlan) { PerfLogger perfLogger = PerfLogger.getPerfLogger(); perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.SERIALIZE_PLAN); @@ -3136,8 +3144,10 @@ public static void setInputPaths(JobConf job, List pathsToAdd) { * Set hive input format, and input format file if necessary. */ public static void setInputAttributes(Configuration conf, MapWork mWork) { + HiveConf.ConfVars var = HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("tez") ? + HiveConf.ConfVars.HIVETEZINPUTFORMAT : HiveConf.ConfVars.HIVEINPUTFORMAT; if (mWork.getInputformat() != null) { - HiveConf.setVar(conf, HiveConf.ConfVars.HIVEINPUTFORMAT, mWork.getInputformat()); + HiveConf.setVar(conf, var, mWork.getInputformat()); } if (mWork.getIndexIntermediateFile() != null) { conf.set("hive.index.compact.file", mWork.getIndexIntermediateFile()); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java index 642841f..1fbc57d 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java @@ -17,18 +17,27 @@ */ package org.apache.hadoop.hive.ql.exec.tez; +import com.google.common.base.Function; +import com.google.common.collect.Iterators; + import java.io.FileNotFoundException; import java.io.IOException; +import java.net.URI; import java.net.URISyntaxException; import java.util.ArrayList; import java.util.HashMap; +import java.util.HashSet; +import java.util.Iterator; import java.util.List; import java.util.Map; +import java.util.Set; import javax.security.auth.login.LoginException; import org.apache.commons.io.FilenameUtils; import org.apache.commons.lang.StringUtils; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; @@ -41,6 +50,7 @@ import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.exec.mr.ExecMapper; import org.apache.hadoop.hive.ql.exec.mr.ExecReducer; +import org.apache.hadoop.hive.ql.exec.tez.tools.TezMergedLogicalInput; import org.apache.hadoop.hive.ql.io.BucketizedHiveInputFormat; import org.apache.hadoop.hive.ql.io.HiveInputFormat; import org.apache.hadoop.hive.ql.io.HiveKey; @@ -64,18 +74,26 @@ import org.apache.hadoop.yarn.api.records.LocalResource; import org.apache.hadoop.yarn.api.records.LocalResourceType; import org.apache.hadoop.yarn.api.records.LocalResourceVisibility; +import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.api.records.URL; import org.apache.hadoop.yarn.util.ConverterUtils; import org.apache.hadoop.yarn.util.Records; +import org.apache.tez.dag.api.DAG; import org.apache.tez.dag.api.Edge; import org.apache.tez.dag.api.EdgeProperty; import org.apache.tez.dag.api.EdgeProperty.DataMovementType; import org.apache.tez.dag.api.EdgeProperty.DataSourceType; import org.apache.tez.dag.api.EdgeProperty.SchedulingType; +import org.apache.tez.dag.api.GroupInputEdge; import org.apache.tez.dag.api.InputDescriptor; import org.apache.tez.dag.api.OutputDescriptor; import org.apache.tez.dag.api.ProcessorDescriptor; import org.apache.tez.dag.api.Vertex; +import org.apache.tez.dag.api.VertexLocationHint; +import org.apache.tez.dag.api.TezException; +import org.apache.tez.client.PreWarmContext; +import org.apache.tez.client.TezSessionConfiguration; +import org.apache.tez.dag.api.VertexGroup; import org.apache.tez.mapreduce.common.MRInputAMSplitGenerator; import org.apache.tez.mapreduce.hadoop.InputSplitInfo; import org.apache.tez.mapreduce.hadoop.MRHelpers; @@ -84,6 +102,8 @@ import org.apache.tez.mapreduce.input.MRInputLegacy; import org.apache.tez.mapreduce.output.MROutput; import org.apache.tez.mapreduce.partition.MRPartitioner; +import org.apache.tez.runtime.library.input.ConcatenatedMergedKeyValueInput; +import org.apache.tez.runtime.library.input.ConcatenatedMergedKeyValuesInput; import org.apache.tez.runtime.library.input.ShuffledMergedInputLegacy; import org.apache.tez.runtime.library.input.ShuffledUnorderedKVInput; import org.apache.tez.runtime.library.output.OnFileSortedOutput; @@ -96,9 +116,36 @@ */ public class DagUtils { + private static final Log LOG = LogFactory.getLog(DagUtils.class.getName()); private static final String TEZ_DIR = "_tez_scratch_dir"; private static DagUtils instance; + private void addCredentials(MapWork mapWork, DAG dag) { + Set paths = mapWork.getPathToAliases().keySet(); + if (paths != null && !paths.isEmpty()) { + Iterator pathIterator = Iterators.transform(paths.iterator(), new Function() { + @Override + public URI apply(String input) { + return new Path(input).toUri(); + } + }); + + Set uris = new HashSet(); + Iterators.addAll(uris, pathIterator); + + if (LOG.isDebugEnabled()) { + for (URI uri: uris) { + LOG.debug("Marking URI as needing credentials: "+uri); + } + } + dag.addURIsForCredentials(uris); + } + } + + private void addCredentials(ReduceWork reduceWork, DAG dag) { + // nothing at the moment + } + /* * Creates the configuration object necessary to run a specific vertex from * map work. This includes input formats, input processor, etc. @@ -132,7 +179,7 @@ private JobConf initializeVertexConf(JobConf baseConf, MapWork mapWork) { Utilities.setInputAttributes(conf, mapWork); - String inpFormat = HiveConf.getVar(conf, HiveConf.ConfVars.HIVEINPUTFORMAT); + String inpFormat = HiveConf.getVar(conf, HiveConf.ConfVars.HIVETEZINPUTFORMAT); if ((inpFormat == null) || (!StringUtils.isNotBlank(inpFormat))) { inpFormat = ShimLoader.getHadoopShims().getInputFormatClassName(); } @@ -148,9 +195,56 @@ private JobConf initializeVertexConf(JobConf baseConf, MapWork mapWork) { } /** + * Given a Vertex group and a vertex createEdge will create an + * Edge between them. + * + * @param group The parent VertexGroup + * @param wConf The job conf of the child vertex + * @param w The child vertex + * @param edgeType the type of connection between the two + * endpoints. + */ + public GroupInputEdge createEdge(VertexGroup group, JobConf wConf, + Vertex w, EdgeType edgeType) + throws IOException { + + Class mergeInputClass; + + LOG.info("Creating Edge between " + group.getGroupName() + " and " + w.getVertexName()); + w.getProcessorDescriptor().setUserPayload(MRHelpers.createUserPayloadFromConf(wConf)); + + switch (edgeType) { + case BROADCAST_EDGE: + mergeInputClass = ConcatenatedMergedKeyValueInput.class; + break; + + case SIMPLE_EDGE: + default: + mergeInputClass = TezMergedLogicalInput.class; + break; + } + + return new GroupInputEdge(group, w, createEdgeProperty(edgeType), + new InputDescriptor(mergeInputClass.getName())); + } + + /** + * Given two vertices a, b update their configurations to be used in an Edge a-b + */ + public void updateConfigurationForEdge(JobConf vConf, Vertex v, JobConf wConf, Vertex w) + throws IOException { + + // Tez needs to setup output subsequent input pairs correctly + MultiStageMRConfToTezTranslator.translateVertexConfToTez(wConf, vConf); + + // update payloads (configuration for the vertices might have changed) + v.getProcessorDescriptor().setUserPayload(MRHelpers.createUserPayloadFromConf(vConf)); + w.getProcessorDescriptor().setUserPayload(MRHelpers.createUserPayloadFromConf(wConf)); + } + + /** * Given two vertices and their respective configuration objects createEdge - * will create an Edge object that connects the two. Currently the edge will - * always be a stable bi-partite edge. + * will create an Edge object that connects the two. * * @param vConf JobConf of the first vertex * @param v The first vertex (source) @@ -162,13 +256,15 @@ public Edge createEdge(JobConf vConf, Vertex v, JobConf wConf, Vertex w, EdgeType edgeType) throws IOException { - // Tez needs to setup output subsequent input pairs correctly - MultiStageMRConfToTezTranslator.translateVertexConfToTez(wConf, vConf); + updateConfigurationForEdge(vConf, v, wConf, w); - // update payloads (configuration for the vertices might have changed) - v.getProcessorDescriptor().setUserPayload(MRHelpers.createUserPayloadFromConf(vConf)); - w.getProcessorDescriptor().setUserPayload(MRHelpers.createUserPayloadFromConf(wConf)); + return new Edge(v, w, createEdgeProperty(edgeType)); + } + /* + * Helper function to create an edge property from an edge type. + */ + private EdgeProperty createEdgeProperty(EdgeType edgeType) { DataMovementType dataMovementType; Class logicalInputClass; Class logicalOutputClass; @@ -194,10 +290,40 @@ public Edge createEdge(JobConf vConf, Vertex v, JobConf wConf, Vertex w, SchedulingType.SEQUENTIAL, new OutputDescriptor(logicalOutputClass.getName()), new InputDescriptor(logicalInputClass.getName())); - return new Edge(v, w, edgeProperty); + + return edgeProperty; } /* + * Helper to determine the size of the container requested + * from yarn. Falls back to Map-reduce's map size if tez + * container size isn't set. + */ + private Resource getContainerResource(Configuration conf) { + Resource containerResource; + int memory = HiveConf.getIntVar(conf, HiveConf.ConfVars.HIVETEZCONTAINERSIZE) > 0 ? + HiveConf.getIntVar(conf, HiveConf.ConfVars.HIVETEZCONTAINERSIZE) : + conf.getInt(MRJobConfig.MAP_MEMORY_MB, MRJobConfig.DEFAULT_MAP_MEMORY_MB); + int cpus = conf.getInt(MRJobConfig.MAP_CPU_VCORES, + MRJobConfig.DEFAULT_MAP_CPU_VCORES); + return Resource.newInstance(memory, cpus); + } + + /* + * Helper to determine what java options to use for the containers + * Falls back to Map-reduces map java opts if no tez specific options + * are set + */ + private String getContainerJavaOpts(Configuration conf) { + String javaOpts = HiveConf.getVar(conf, HiveConf.ConfVars.HIVETEZJAVAOPTS); + if (javaOpts != null && !javaOpts.isEmpty()) { + return javaOpts; + } + return MRHelpers.getMapJavaOpts(conf); + } + + + /* * Helper function to create Vertex from MapWork. */ private Vertex createVertex(JobConf conf, MapWork mapWork, @@ -248,12 +374,11 @@ private Vertex createVertex(JobConf conf, MapWork mapWork, byte[] serializedConf = MRHelpers.createUserPayloadFromConf(conf); map = new Vertex(mapWork.getName(), new ProcessorDescriptor(MapTezProcessor.class.getName()). - setUserPayload(serializedConf), numTasks, - MRHelpers.getMapResource(conf)); + setUserPayload(serializedConf), numTasks, getContainerResource(conf)); Map environment = new HashMap(); MRHelpers.updateEnvironmentForMRTasks(conf, environment, true); map.setTaskEnvironment(environment); - map.setJavaOpts(MRHelpers.getMapJavaOpts(conf)); + map.setJavaOpts(getContainerJavaOpts(conf)); assert mapWork.getAliasToWork().keySet().size() == 1; @@ -262,7 +387,7 @@ private Vertex createVertex(JobConf conf, MapWork mapWork, byte[] mrInput = null; if (useTezGroupedSplits) { mrInput = MRHelpers.createMRInputPayloadWithGrouping(serializedConf, - null, HiveInputFormat.class.getName()); + HiveInputFormat.class.getName()); } else { mrInput = MRHelpers.createMRInputPayload(serializedConf, null); } @@ -323,14 +448,14 @@ private Vertex createVertex(JobConf conf, ReduceWork reduceWork, Vertex reducer = new Vertex(reduceWork.getName(), new ProcessorDescriptor(ReduceTezProcessor.class.getName()). setUserPayload(MRHelpers.createUserPayloadFromConf(conf)), - reduceWork.getNumReduceTasks(), MRHelpers.getReduceResource(conf)); + reduceWork.getNumReduceTasks(), getContainerResource(conf)); Map environment = new HashMap(); MRHelpers.updateEnvironmentForMRTasks(conf, environment, false); reducer.setTaskEnvironment(environment); - reducer.setJavaOpts(MRHelpers.getReduceJavaOpts(conf)); + reducer.setJavaOpts(getContainerJavaOpts(conf)); Map localResources = new HashMap(); localResources.put(getBaseName(appJarLr), appJarLr); @@ -370,6 +495,49 @@ private LocalResource createLocalResource(FileSystem remoteFs, Path file, } /** + * @param sessionConfig session configuration + * @param numContainers number of containers to pre-warm + * @param localResources additional resources to pre-warm with + * @return prewarm context object + */ + public PreWarmContext createPreWarmContext(TezSessionConfiguration sessionConfig, int numContainers, + Map localResources) throws IOException, TezException { + + Configuration conf = sessionConfig.getTezConfiguration(); + + ProcessorDescriptor prewarmProcDescriptor = new ProcessorDescriptor(HivePreWarmProcessor.class.getName()); + prewarmProcDescriptor.setUserPayload(MRHelpers.createUserPayloadFromConf(conf)); + + PreWarmContext context = new PreWarmContext(prewarmProcDescriptor, getContainerResource(conf), + numContainers, new VertexLocationHint(null)); + + Map combinedResources = new HashMap(); + + combinedResources.putAll(sessionConfig.getSessionResources()); + + try { + for(LocalResource lr : localizeTempFiles(conf)) { + combinedResources.put(getBaseName(lr), lr); + } + } catch(LoginException le) { + throw new IOException(le); + } + + if(localResources != null) { + combinedResources.putAll(localResources); + } + + context.setLocalResources(combinedResources); + + /* boiler plate task env */ + Map environment = new HashMap(); + MRHelpers.updateEnvironmentForMRTasks(conf, environment, true); + context.setEnvironment(environment); + context.setJavaOpts(getContainerJavaOpts(conf)); + return context; + } + + /** * @param conf * @return path to destination directory on hdfs * @throws LoginException if we are unable to figure user information @@ -651,6 +819,17 @@ public Vertex createVertex(JobConf conf, BaseWork work, } /** + * Set up credentials for the base work on secure clusters + */ + public void addCredentials(BaseWork work, DAG dag) { + if (work instanceof MapWork) { + addCredentials((MapWork) work, dag); + } else if (work instanceof ReduceWork) { + addCredentials((ReduceWork) work, dag); + } + } + + /** * createTezDir creates a temporary directory in the scratchDir folder to * be used with Tez. Assumes scratchDir exists. */ diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/tez/HivePreWarmProcessor.java ql/src/java/org/apache/hadoop/hive/ql/exec/tez/HivePreWarmProcessor.java new file mode 100644 index 0000000..c756e72 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/tez/HivePreWarmProcessor.java @@ -0,0 +1,119 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.tez; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.io.ReadaheadPool; +import org.apache.hadoop.hive.shims.ShimLoader; +import org.apache.tez.common.TezUtils; +import org.apache.tez.runtime.api.Event; +import org.apache.tez.runtime.api.LogicalIOProcessor; +import org.apache.tez.runtime.api.LogicalInput; +import org.apache.tez.runtime.api.LogicalOutput; +import org.apache.tez.runtime.api.TezProcessorContext; + +import java.net.URL; +import java.net.JarURLConnection; +import java.util.ArrayList; +import java.util.Enumeration; +import java.util.List; +import java.util.Map; +import java.util.jar.JarFile; +import java.util.jar.JarEntry; + +import javax.crypto.Mac; + +/** + * A simple sleep processor implementation that sleeps for the configured + * time in milliseconds. + * + * @see Config for configuring the HivePreWarmProcessor + */ +public class HivePreWarmProcessor implements LogicalIOProcessor { + + private static boolean prewarmed = false; + + private static final Log LOG = LogFactory.getLog(HivePreWarmProcessor.class); + + private Configuration conf; + + @Override + public void initialize(TezProcessorContext processorContext) + throws Exception { + byte[] userPayload = processorContext.getUserPayload(); + this.conf = TezUtils.createConfFromUserPayload(userPayload); + } + + @Override + public void run(Map inputs, + Map outputs) throws Exception { + if(prewarmed) { + /* container reuse */ + return; + } + for (LogicalInput input : inputs.values()) { + input.start(); + } + for (LogicalOutput output : outputs.values()) { + output.start(); + } + /* these are things that goes through singleton initialization on most queries */ + FileSystem fs = FileSystem.get(conf); + Mac mac = Mac.getInstance("HmacSHA1"); + ReadaheadPool rpool = ReadaheadPool.getInstance(); + ShimLoader.getHadoopShims(); + + URL hiveurl = new URL("jar:"+DagUtils.getInstance().getExecJarPathLocal()+"!/"); + JarURLConnection hiveconn = (JarURLConnection)hiveurl.openConnection(); + JarFile hivejar = hiveconn.getJarFile(); + try { + Enumeration classes = hivejar.entries(); + while(classes.hasMoreElements()) { + JarEntry je = classes.nextElement(); + if (je.getName().endsWith(".class")) { + String klass = je.getName().replace(".class","").replaceAll("/","\\."); + if(klass.indexOf("ql.exec") != -1 || klass.indexOf("ql.io") != -1) { + /* several hive classes depend on the metastore APIs, which is not included + * in hive-exec.jar. These are the relatively safe ones - operators & io classes. + */ + if(klass.indexOf("vector") != -1 || klass.indexOf("Operator") != -1) { + Class.forName(klass); + } + } + } + } + } finally { + hivejar.close(); + } + prewarmed = true; + } + + @Override + public void handleEvents(List processorEvents) { + // Nothing to do + } + + @Override + public void close() throws Exception { + // Nothing to cleanup + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordProcessor.java ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordProcessor.java index 7c2c2a6..d89f2c7 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordProcessor.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordProcessor.java @@ -55,7 +55,6 @@ import org.apache.tez.mapreduce.processor.MRTaskReporter; import org.apache.tez.runtime.api.LogicalInput; import org.apache.tez.runtime.library.api.KeyValuesReader; -import org.apache.tez.runtime.library.input.ShuffledMergedInput; /** * Process input from tez LogicalInput and write output - for a map plan @@ -184,15 +183,19 @@ void init(JobConf jconf, MRTaskReporter mrReporter, Map in @Override void run() throws IOException{ - List shuffleInputs = getShuffleInputs(inputs); + List shuffleInputs = getShuffleInputs(inputs); KeyValuesReader kvsReader; - if(shuffleInputs.size() == 1){ - //no merging of inputs required - kvsReader = shuffleInputs.get(0).getReader(); - }else { - //get a sort merged input - kvsReader = new InputMerger(shuffleInputs); + try { + if(shuffleInputs.size() == 1){ + //no merging of inputs required + kvsReader = (KeyValuesReader) shuffleInputs.get(0).getReader(); + }else { + //get a sort merged input + kvsReader = new InputMerger(shuffleInputs); + } + } catch (Exception e) { + throw new IOException(e); } while(kvsReader.next()){ @@ -211,12 +214,12 @@ void run() throws IOException{ * @param inputs * @return */ - private List getShuffleInputs(Map inputs) { + private List getShuffleInputs(Map inputs) { //the reduce plan inputs have tags, add all inputs that have tags Map tag2input = redWork.getTagToInput(); - ArrayList shuffleInputs = new ArrayList(); + ArrayList shuffleInputs = new ArrayList(); for(String inpStr : tag2input.values()){ - shuffleInputs.add((ShuffledMergedInput)inputs.get(inpStr)); + shuffleInputs.add((LogicalInput)inputs.get(inpStr)); } return shuffleInputs; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezProcessor.java ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezProcessor.java index 9c3284b..9be2aa2 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezProcessor.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezProcessor.java @@ -124,6 +124,12 @@ public void run(Map inputs, Map out // (possibly asynchronously) LOG.info("Running map: " + processorContext.getUniqueIdentifier()); + for (LogicalInput input : inputs.values()) { + input.start(); + } + for (LogicalOutput output : outputs.values()) { + output.start(); + } Map outMap = new HashMap(); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezSessionState.java ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezSessionState.java index b8552a3..aef6e68 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezSessionState.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezSessionState.java @@ -35,6 +35,7 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.hdfs.DistributedFileSystem; import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.conf.HiveConf.ConfVars; import org.apache.hadoop.hive.ql.ErrorMsg; import org.apache.hadoop.yarn.api.records.LocalResource; import org.apache.tez.client.AMConfiguration; @@ -43,7 +44,7 @@ import org.apache.tez.dag.api.SessionNotRunning; import org.apache.tez.dag.api.TezConfiguration; import org.apache.tez.dag.api.TezException; -import org.apache.tez.mapreduce.hadoop.MRHelpers; +import org.apache.tez.client.PreWarmContext; /** * Holds session state related to Tez @@ -134,8 +135,24 @@ public void open(String sessionId, HiveConf conf) session = new TezSession("HIVE-"+sessionId, sessionConfig); LOG.info("Opening new Tez Session (id: "+sessionId+", scratch dir: "+tezScratchDir+")"); + session.start(); + if (HiveConf.getBoolVar(conf, ConfVars.HIVE_PREWARM_ENABLED)) { + int n = HiveConf.getIntVar(conf, ConfVars.HIVE_PREWARM_NUM_CONTAINERS); + LOG.info("Prewarming " + n + " containers (id: " + sessionId + + ", scratch dir: " + tezScratchDir + ")"); + PreWarmContext context = utils.createPreWarmContext(sessionConfig, n, + commonLocalResources); + try { + session.preWarm(context); + } catch (InterruptedException ie) { + if (LOG.isDebugEnabled()) { + LOG.debug("Hive Prewarm threw an exception ", ie); + } + } + } + // In case we need to run some MR jobs, we'll run them under tez MR emulation. The session // id is used for tez to reuse the current session rather than start a new one. conf.set("mapreduce.framework.name", "yarn-tez"); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezTask.java ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezTask.java index bef5ba3..d30ec8e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezTask.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezTask.java @@ -23,6 +23,7 @@ import java.util.Collections; import java.util.EnumSet; import java.util.HashMap; +import java.util.LinkedList; import java.util.List; import java.util.Map; import java.util.Set; @@ -42,6 +43,7 @@ import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.BaseWork; import org.apache.hadoop.hive.ql.plan.TezWork; +import org.apache.hadoop.hive.ql.plan.UnionWork; import org.apache.hadoop.hive.ql.plan.TezWork.EdgeType; import org.apache.hadoop.hive.ql.plan.api.StageType; import org.apache.hadoop.hive.ql.session.SessionState; @@ -54,9 +56,11 @@ import org.apache.tez.common.counters.TezCounters; import org.apache.tez.dag.api.DAG; import org.apache.tez.dag.api.Edge; +import org.apache.tez.dag.api.GroupInputEdge; import org.apache.tez.dag.api.SessionNotRunning; import org.apache.tez.dag.api.TezException; import org.apache.tez.dag.api.Vertex; +import org.apache.tez.dag.api.VertexGroup; import org.apache.tez.dag.api.client.DAGClient; import org.apache.tez.dag.api.client.StatusGetOpts; @@ -97,9 +101,6 @@ public int execute(DriverContext driverContext) { DAGClient client = null; TezSessionState session = null; - // Tez requires us to use RPC for the query plan - HiveConf.setBoolVar(conf, ConfVars.HIVE_RPC_QUERY_PLAN, true); - try { // Get or create Context object. If we create it we have to clean // it later as well. @@ -206,9 +207,7 @@ DAG build(JobConf conf, TezWork work, Path scratchDir, FileSystem fs = tezDir.getFileSystem(conf); // the name of the dag is what is displayed in the AM/Job UI - DAG dag = new DAG( - Utilities.abbreviate(HiveConf.getVar(conf, HiveConf.ConfVars.HIVEQUERYSTRING), - HiveConf.getIntVar(conf, HiveConf.ConfVars.HIVEJOBNAMELENGTH))); + DAG dag = new DAG(work.getName()); for (BaseWork w: ws) { @@ -216,23 +215,68 @@ DAG build(JobConf conf, TezWork work, Path scratchDir, // translate work to vertex perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.TEZ_CREATE_VERTEX + w.getName()); - JobConf wxConf = utils.initializeVertexConf(conf, w); - Vertex wx = utils.createVertex(wxConf, w, tezDir, - appJarLr, additionalLr, fs, ctx, !isFinal); - dag.addVertex(wx); - perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.TEZ_CREATE_VERTEX + w.getName()); - workToVertex.put(w, wx); - workToConf.put(w, wxConf); - - // add all dependencies (i.e.: edges) to the graph - for (BaseWork v: work.getChildren(w)) { - assert workToVertex.containsKey(v); - Edge e = null; - - EdgeType edgeType = work.getEdgeProperty(w, v); - - e = utils.createEdge(wxConf, wx, workToConf.get(v), workToVertex.get(v), edgeType); - dag.addEdge(e); + + if (w instanceof UnionWork) { + // Special case for unions. These items translate to VertexGroups + + List unionWorkItems = new LinkedList(); + List children = new LinkedList(); + + // split the children into vertices that make up the union and vertices that are + // proper children of the union + for (BaseWork v: work.getChildren(w)) { + EdgeType type = work.getEdgeProperty(w, v); + if (type == EdgeType.CONTAINS) { + unionWorkItems.add(v); + } else { + children.add(v); + } + } + + // create VertexGroup + Vertex[] vertexArray = new Vertex[unionWorkItems.size()]; + + int i = 0; + for (BaseWork v: unionWorkItems) { + vertexArray[i++] = workToVertex.get(v); + } + VertexGroup group = dag.createVertexGroup(w.getName(), vertexArray); + + // now hook up the children + for (BaseWork v: children) { + // need to pairwise patch up the configuration of the vertices + for (BaseWork part: unionWorkItems) { + utils.updateConfigurationForEdge(workToConf.get(part), workToVertex.get(part), + workToConf.get(v), workToVertex.get(v)); + } + + // finally we can create the grouped edge + GroupInputEdge e = utils.createEdge(group, workToConf.get(v), + workToVertex.get(v), work.getEdgeProperty(w, v)); + + dag.addEdge(e); + } + } else { + // Regular vertices + JobConf wxConf = utils.initializeVertexConf(conf, w); + Vertex wx = utils.createVertex(wxConf, w, tezDir, appJarLr, + additionalLr, fs, ctx, !isFinal); + dag.addVertex(wx); + utils.addCredentials(w, dag); + perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.TEZ_CREATE_VERTEX + w.getName()); + workToVertex.put(w, wx); + workToConf.put(w, wxConf); + + // add all dependencies (i.e.: edges) to the graph + for (BaseWork v: work.getChildren(w)) { + assert workToVertex.containsKey(v); + Edge e = null; + + EdgeType edgeType = work.getEdgeProperty(w, v); + + e = utils.createEdge(wxConf, wx, workToConf.get(v), workToVertex.get(v), edgeType); + dag.addEdge(e); + } } } perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.TEZ_BUILD_DAG); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/tez/tools/InputMerger.java ql/src/java/org/apache/hadoop/hive/ql/exec/tez/tools/InputMerger.java index e5746c4..726e122 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/tez/tools/InputMerger.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/tez/tools/InputMerger.java @@ -26,12 +26,13 @@ import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hive.ql.exec.tez.ReduceRecordProcessor; import org.apache.hadoop.io.BinaryComparable; +import org.apache.tez.runtime.api.Input; +import org.apache.tez.runtime.api.LogicalInput; import org.apache.tez.runtime.library.api.KeyValuesReader; -import org.apache.tez.runtime.library.input.ShuffledMergedInput; /** * A KeyValuesReader implementation that returns a sorted stream of key-values - * by doing a sorted merge of the key-value in ShuffledMergedInputs. + * by doing a sorted merge of the key-value in LogicalInputs. * Tags are in the last byte of the key, so no special handling for tags is required. * Uses a priority queue to pick the KeyValuesReader of the input that is next in * sort order. @@ -42,12 +43,12 @@ private PriorityQueue pQueue = null; private KeyValuesReader nextKVReader = null; - public InputMerger(List shuffleInputs) throws IOException { - //get KeyValuesReaders from the ShuffledMergedInput and add them to priority queue + public InputMerger(List shuffleInputs) throws Exception { + //get KeyValuesReaders from the LogicalInput and add them to priority queue int initialCapacity = shuffleInputs.size(); pQueue = new PriorityQueue(initialCapacity, new KVReaderComparator()); - for(ShuffledMergedInput input : shuffleInputs){ - addToQueue(input.getReader()); + for(Input input : shuffleInputs){ + addToQueue((KeyValuesReader)input.getReader()); } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/tez/tools/TezMergedLogicalInput.java ql/src/java/org/apache/hadoop/hive/ql/exec/tez/tools/TezMergedLogicalInput.java new file mode 100644 index 0000000..c4b99e5 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/tez/tools/TezMergedLogicalInput.java @@ -0,0 +1,44 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.exec.tez.tools; + +import java.util.List; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.tez.runtime.api.Input; +import org.apache.tez.runtime.api.LogicalInput; +import org.apache.tez.runtime.api.MergedLogicalInput; +import org.apache.tez.runtime.api.Reader; + +/** + * TezMergedLogicalInput is an adapter to make union input look like + * a single input in tez. + */ +public class TezMergedLogicalInput extends MergedLogicalInput { + + @Override + public Reader getReader() throws Exception { + return new InputMerger(getInputs()); + } + + @Override + public void setConstituentInputIsReady(Input input) { + // ignore notification + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/index/AggregateIndexHandler.java ql/src/java/org/apache/hadoop/hive/ql/index/AggregateIndexHandler.java index 427ea12..c39e823 100644 --- ql/src/java/org/apache/hadoop/hive/ql/index/AggregateIndexHandler.java +++ ql/src/java/org/apache/hadoop/hive/ql/index/AggregateIndexHandler.java @@ -152,6 +152,7 @@ private void createAggregationFunction(List indexTblCols, String pr HiveConf builderConf = new HiveConf(getConf(), AggregateIndexHandler.class); builderConf.setBoolVar(HiveConf.ConfVars.HIVEMERGEMAPFILES, false); builderConf.setBoolVar(HiveConf.ConfVars.HIVEMERGEMAPREDFILES, false); + builderConf.setBoolVar(HiveConf.ConfVars.HIVEMERGETEZFILES, false); Task rootTask = IndexUtils.createRootTask(builderConf, inputs, outputs, command, (LinkedHashMap) partSpec, indexTableName, dbName); diff --git ql/src/java/org/apache/hadoop/hive/ql/index/compact/CompactIndexHandler.java ql/src/java/org/apache/hadoop/hive/ql/index/compact/CompactIndexHandler.java index 11ddcae..0135a71 100644 --- ql/src/java/org/apache/hadoop/hive/ql/index/compact/CompactIndexHandler.java +++ ql/src/java/org/apache/hadoop/hive/ql/index/compact/CompactIndexHandler.java @@ -144,6 +144,7 @@ public void analyzeIndexDefinition(Table baseTable, Index index, HiveConf builderConf = new HiveConf(getConf(), CompactIndexHandler.class); builderConf.setBoolVar(HiveConf.ConfVars.HIVEMERGEMAPFILES, false); builderConf.setBoolVar(HiveConf.ConfVars.HIVEMERGEMAPREDFILES, false); + builderConf.setBoolVar(HiveConf.ConfVars.HIVEMERGETEZFILES, false); Task rootTask = IndexUtils.createRootTask(builderConf, inputs, outputs, command, partSpec, indexTableName, dbName); return rootTask; diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/DirectDecompressionCodec.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/DirectDecompressionCodec.java new file mode 100644 index 0000000..41a77b0 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/DirectDecompressionCodec.java @@ -0,0 +1,26 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.io.orc; + +import java.io.IOException; +import java.nio.ByteBuffer; + +public interface DirectDecompressionCodec extends CompressionCodec { + public boolean isAvailable(); + public void directDecompress(ByteBuffer in, ByteBuffer out) throws IOException; +} diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/InStream.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/InStream.java index 6da3d03..74ba971 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/InStream.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/InStream.java @@ -21,8 +21,13 @@ import java.io.InputStream; import java.nio.ByteBuffer; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; + abstract class InStream extends InputStream { + private static final Log LOG = LogFactory.getLog(InStream.class); + private static class UncompressedStream extends InStream { private final String name; private final ByteBuffer[] bytes; @@ -172,7 +177,7 @@ private void readHeader() throws IOException { bufferSize + " needed = " + chunkLength); } // read 3 bytes, which should be equal to OutStream.HEADER_SIZE always - assert OutStream.HEADER_SIZE == 3 : "The Orc HEADER_SIZE must be the same in OutStream and InStream"; + assert OutStream.HEADER_SIZE == 3 : "The Orc HEADER_SIZE must be the same in OutStream and InStream"; currentOffset += OutStream.HEADER_SIZE; ByteBuffer slice = this.slice(chunkLength); @@ -274,14 +279,23 @@ private ByteBuffer slice(int chunkLength) throws IOException { chunkLength + " bytes"); } + if (LOG.isDebugEnabled()) { + LOG.debug(String.format( + "Crossing into next BufferChunk because compressed only has %d bytes (needs %d)", + compressed.remaining(), len)); + } + // we need to consolidate 2 or more buffers into 1 - // first clear out compressed buffers + // first copy out compressed buffers ByteBuffer copy = allocateBuffer(chunkLength); currentOffset += compressed.remaining(); len -= compressed.remaining(); copy.put(compressed); while (len > 0 && (++currentRange) < bytes.length) { + if (LOG.isDebugEnabled()) { + LOG.debug(String.format("Read slow-path, >1 cross block reads with %s", this.toString())); + } compressed = bytes[currentRange].duplicate(); if (compressed.remaining() >= len) { slice = compressed.slice(); diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/MemoryManager.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/MemoryManager.java index 9af12de..ac56702 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/MemoryManager.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/MemoryManager.java @@ -122,6 +122,9 @@ synchronized void removeWriter(Path path) throws IOException { totalAllocation -= val.allocation; updateScale(false); } + if(writerList.isEmpty()) { + rowsAddedSinceCheck = 0; + } } /** diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java index 7798a7c..ef68d1a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java @@ -28,7 +28,9 @@ import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.TreeMap; +import org.apache.commons.lang.builder.HashCodeBuilder; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; @@ -36,6 +38,8 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.common.type.HiveDecimal; +import org.apache.hadoop.hive.conf.HiveConf; +import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVE_ORC_ZEROCOPY; import org.apache.hadoop.hive.ql.exec.vector.*; import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf; import org.apache.hadoop.hive.ql.io.sarg.SearchArgument; @@ -53,6 +57,10 @@ import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; +import org.apache.hadoop.hive.shims.ShimLoader; +import org.apache.hadoop.hive.shims.HadoopShims.*; + +import com.google.common.collect.ComparisonChain; class RecordReaderImpl implements RecordReader { @@ -87,6 +95,89 @@ private boolean[] includedRowGroups = null; private final Configuration conf; + private final ByteBufferAllocatorPool pool = new ByteBufferAllocatorPool(); + private final ZeroCopyReaderShim zcr; + + // this is an implementation copied from ElasticByteBufferPool in hadoop-2, + // which lacks a clear()/clean() operation + public final static class ByteBufferAllocatorPool implements ByteBufferPoolShim { + private static final class Key implements Comparable { + private final int capacity; + private final long insertionGeneration; + + Key(int capacity, long insertionGeneration) { + this.capacity = capacity; + this.insertionGeneration = insertionGeneration; + } + + @Override + public int compareTo(Key other) { + return ComparisonChain.start().compare(capacity, other.capacity) + .compare(insertionGeneration, other.insertionGeneration).result(); + } + + @Override + public boolean equals(Object rhs) { + if (rhs == null) { + return false; + } + try { + Key o = (Key) rhs; + return (compareTo(o) == 0); + } catch (ClassCastException e) { + return false; + } + } + + @Override + public int hashCode() { + return new HashCodeBuilder().append(capacity).append(insertionGeneration) + .toHashCode(); + } + } + + private final TreeMap buffers = new TreeMap(); + + private final TreeMap directBuffers = new TreeMap(); + + private long currentGeneration = 0; + + private final TreeMap getBufferTree(boolean direct) { + return direct ? directBuffers : buffers; + } + + public void clear() { + buffers.clear(); + directBuffers.clear(); + } + + @Override + public ByteBuffer getBuffer(boolean direct, int length) { + TreeMap tree = getBufferTree(direct); + Map.Entry entry = tree.ceilingEntry(new Key(length, 0)); + if (entry == null) { + return direct ? ByteBuffer.allocateDirect(length) : ByteBuffer + .allocate(length); + } + tree.remove(entry.getKey()); + return entry.getValue(); + } + + @Override + public void putBuffer(ByteBuffer buffer) { + TreeMap tree = getBufferTree(buffer.isDirect()); + while (true) { + Key key = new Key(buffer.capacity(), currentGeneration++); + if (!tree.containsKey(key)) { + tree.put(key, buffer); + return; + } + // Buffers are indexed by (capacity, generation). + // If our key is not unique on the first try, we try again + } + } + } + RecordReaderImpl(Iterable stripes, FileSystem fileSystem, Path path, @@ -130,6 +221,18 @@ } } + final boolean zeroCopy = (conf != null) + && (HiveConf.getBoolVar(conf, HIVE_ORC_ZEROCOPY)); + + if (zeroCopy + && (codec == null || ((codec instanceof DirectDecompressionCodec) + && ((DirectDecompressionCodec) codec).isAvailable()))) { + /* codec is null or is available */ + this.zcr = ShimLoader.getHadoopShims().getZeroCopyReader(file, pool); + } else { + this.zcr = null; + } + firstRow = skippedRows; totalRowCount = rows; reader = createTreeReader(path, 0, types, included, conf); @@ -2283,6 +2386,11 @@ private void clearStreams() throws IOException { is.close(); } if(bufferChunks != null) { + if(zcr != null) { + for (BufferChunk bufChunk : bufferChunks) { + zcr.releaseBuffer(bufChunk.chunk); + } + } bufferChunks.clear(); } streams.clear(); @@ -2599,10 +2707,20 @@ static void mergeDiskRanges(List ranges) { for(DiskRange range: ranges) { int len = (int) (range.end - range.offset); long off = range.offset; - file.seek(base + off); - byte[] buffer = new byte[len]; - file.readFully(buffer, 0, buffer.length); - result.add(new BufferChunk(ByteBuffer.wrap(buffer), range.offset)); + file.seek(base + off); + if(zcr != null) { + while(len > 0) { + ByteBuffer partial = zcr.readBuffer(len, false); + result.add(new BufferChunk(partial, off)); + int read = partial.remaining(); + len -= read; + off += read; + } + } else { + byte[] buffer = new byte[len]; + file.readFully(buffer, 0, buffer.length); + result.add(new BufferChunk(ByteBuffer.wrap(buffer), range.offset)); + } } return result; } @@ -2840,6 +2958,7 @@ public VectorizedRowBatch nextBatch(VectorizedRowBatch previous) throws IOExcept @Override public void close() throws IOException { clearStreams(); + pool.clear(); file.close(); } diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/SnappyCodec.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/SnappyCodec.java index e3131a3..4613015 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/SnappyCodec.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/SnappyCodec.java @@ -18,12 +18,17 @@ package org.apache.hadoop.hive.ql.io.orc; +import org.apache.hadoop.hive.shims.HadoopShims.DirectDecompressorShim; +import org.apache.hadoop.hive.shims.ShimLoader; +import org.apache.hadoop.hive.shims.HadoopShims.DirectCompressionType; import org.iq80.snappy.Snappy; import java.io.IOException; import java.nio.ByteBuffer; -class SnappyCodec implements CompressionCodec { +class SnappyCodec implements CompressionCodec, DirectDecompressionCodec { + + Boolean direct = null; @Override public boolean compress(ByteBuffer in, ByteBuffer out, @@ -57,6 +62,10 @@ public boolean compress(ByteBuffer in, ByteBuffer out, @Override public void decompress(ByteBuffer in, ByteBuffer out) throws IOException { + if(in.isDirect() && out.isDirect()) { + directDecompress(in, out); + return; + } int inOffset = in.position(); int uncompressLen = Snappy.uncompress(in.array(), in.arrayOffset() + inOffset, @@ -64,4 +73,30 @@ public void decompress(ByteBuffer in, ByteBuffer out) throws IOException { out.position(uncompressLen + out.position()); out.flip(); } + + @Override + public boolean isAvailable() { + if (direct == null) { + try { + if (ShimLoader.getHadoopShims().getDirectDecompressor( + DirectCompressionType.SNAPPY) != null) { + direct = Boolean.valueOf(true); + } else { + direct = Boolean.valueOf(false); + } + } catch (UnsatisfiedLinkError ule) { + direct = Boolean.valueOf(false); + } + } + return direct.booleanValue(); + } + + @Override + public void directDecompress(ByteBuffer in, ByteBuffer out) + throws IOException { + DirectDecompressorShim decompressShim = ShimLoader.getHadoopShims() + .getDirectDecompressor(DirectCompressionType.SNAPPY); + decompressShim.decompress(in, out); + out.flip(); // flip for read + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcInputFormat.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcInputFormat.java index 27a9338..660d8e3 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcInputFormat.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcInputFormat.java @@ -67,6 +67,7 @@ this.offset = fileSplit.getStart(); this.length = fileSplit.getLength(); this.reader = file.rows(offset, length, includedColumns, sarg, columnNames); + try { rbCtx = new VectorizedRowBatchCtx(); rbCtx.init(conf, fileSplit); diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/ZlibCodec.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/ZlibCodec.java index a75fdea..27fbb42 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/ZlibCodec.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/ZlibCodec.java @@ -23,7 +23,14 @@ import java.util.zip.Deflater; import java.util.zip.Inflater; -class ZlibCodec implements CompressionCodec { +import org.apache.hadoop.hive.shims.HadoopShims; +import org.apache.hadoop.hive.shims.HadoopShims.DirectCompressionType; +import org.apache.hadoop.hive.shims.HadoopShims.DirectDecompressorShim; +import org.apache.hadoop.hive.shims.ShimLoader; + +class ZlibCodec implements CompressionCodec, DirectDecompressionCodec { + + private Boolean direct = null; @Override public boolean compress(ByteBuffer in, ByteBuffer out, @@ -55,6 +62,12 @@ public boolean compress(ByteBuffer in, ByteBuffer out, @Override public void decompress(ByteBuffer in, ByteBuffer out) throws IOException { + + if(in.isDirect() && out.isDirect()) { + directDecompress(in, out); + return; + } + Inflater inflater = new Inflater(true); inflater.setInput(in.array(), in.arrayOffset() + in.position(), in.remaining()); @@ -74,4 +87,30 @@ public void decompress(ByteBuffer in, ByteBuffer out) throws IOException { in.position(in.limit()); } + @Override + public boolean isAvailable() { + if (direct == null) { + // see nowrap option in new Inflater(boolean) which disables zlib headers + try { + if (ShimLoader.getHadoopShims().getDirectDecompressor( + DirectCompressionType.ZLIB_NOHEADER) != null) { + direct = Boolean.valueOf(true); + } else { + direct = Boolean.valueOf(false); + } + } catch (UnsatisfiedLinkError ule) { + direct = Boolean.valueOf(false); + } + } + return direct.booleanValue(); + } + + @Override + public void directDecompress(ByteBuffer in, ByteBuffer out) + throws IOException { + DirectDecompressorShim decompressShim = ShimLoader.getHadoopShims() + .getDirectDecompressor(DirectCompressionType.ZLIB_NOHEADER); + decompressShim.decompress(in, out); + out.flip(); // flip for read + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java index d2aa220..a6b0a56 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java @@ -1253,7 +1253,7 @@ public static void createMRWorkForMergingFiles (FileSinkOperator fsInput, } else { cplan = createMRWorkForMergingFiles(conf, tsMerge, fsInputDesc); if (conf.getVar(ConfVars.HIVE_EXECUTION_ENGINE).equals("tez")) { - work = new TezWork(); + work = new TezWork(conf.getVar(HiveConf.ConfVars.HIVEQUERYID)); cplan.setName("Merge"); ((TezWork)work).add(cplan); } else { @@ -1622,6 +1622,13 @@ public static boolean isMergeRequired(List> mvTasks, HiveConf hco } if ((mvTask != null) && !mvTask.isLocal() && fsOp.getConf().canBeMerged()) { + + if (currTask.getWork() instanceof TezWork) { + // tez blurs the boundary between map and reduce, thus it has it's own + // config + return hconf.getBoolVar(ConfVars.HIVEMERGETEZFILES); + } + if (fsOp.getConf().isLinkedFileSink()) { // If the user has HIVEMERGEMAPREDFILES set to false, the idea was the // number of reducers are few, so the number of files anyway are small. @@ -1635,16 +1642,13 @@ public static boolean isMergeRequired(List> mvTasks, HiveConf hco // There are separate configuration parameters to control whether to // merge for a map-only job // or for a map-reduce job - if (currTask.getWork() instanceof TezWork) { - return hconf.getBoolVar(ConfVars.HIVEMERGEMAPFILES) || - hconf.getBoolVar(ConfVars.HIVEMERGEMAPREDFILES); - } else if (currTask.getWork() instanceof MapredWork) { + if (currTask.getWork() instanceof MapredWork) { ReduceWork reduceWork = ((MapredWork) currTask.getWork()).getReduceWork(); boolean mergeMapOnly = - hconf.getBoolVar(ConfVars.HIVEMERGEMAPFILES) && reduceWork == null; + hconf.getBoolVar(ConfVars.HIVEMERGEMAPFILES) && reduceWork == null; boolean mergeMapRed = - hconf.getBoolVar(ConfVars.HIVEMERGEMAPREDFILES) && - reduceWork != null; + hconf.getBoolVar(ConfVars.HIVEMERGEMAPREDFILES) && + reduceWork != null; if (mergeMapOnly || mergeMapRed) { return true; } diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/FileSinkProcessor.java ql/src/java/org/apache/hadoop/hive/ql/parse/FileSinkProcessor.java index 9592992..9aed5fd 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/FileSinkProcessor.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/FileSinkProcessor.java @@ -51,28 +51,9 @@ public Object process(Node nd, Stack stack, GenTezProcContext context = (GenTezProcContext) procCtx; FileSinkOperator fileSink = (FileSinkOperator) nd; - ParseContext parseContext = context.parseContext; - - - boolean isInsertTable = // is INSERT OVERWRITE TABLE - GenMapRedUtils.isInsertInto(parseContext, fileSink); - HiveConf hconf = parseContext.getConf(); - - boolean chDir = GenMapRedUtils.isMergeRequired(context.moveTask, - hconf, fileSink, context.currentTask, isInsertTable); - - Path finalName = GenMapRedUtils.createMoveTask(context.currentTask, - chDir, fileSink, parseContext, context.moveTask, hconf, context.dependencyTask); - - if (chDir) { - // Merge the files in the destination table/partitions by creating Map-only merge job - // If underlying data is RCFile a RCFileBlockMerge task would be created. - LOG.info("using CombineHiveInputformat for the merge job"); - GenMapRedUtils.createMRWorkForMergingFiles(fileSink, finalName, - context.dependencyTask, context.moveTask, - hconf, context.currentTask); - } + // just remember it for later processing + context.fileSinkSet.add(fileSink); return true; } } \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezProcContext.java ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezProcContext.java index f4b6016..7581e16 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezProcContext.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezProcContext.java @@ -20,14 +20,19 @@ import java.io.Serializable; import java.util.HashMap; +import java.util.HashSet; +import java.util.LinkedList; import java.util.List; import java.util.Map; import java.util.Set; +import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.exec.DependencyCollectionTask; +import org.apache.hadoop.hive.ql.exec.FileSinkOperator; import org.apache.hadoop.hive.ql.exec.MapJoinOperator; import org.apache.hadoop.hive.ql.exec.Operator; +import org.apache.hadoop.hive.ql.exec.UnionOperator; import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator; import org.apache.hadoop.hive.ql.exec.Task; import org.apache.hadoop.hive.ql.exec.TaskFactory; @@ -39,6 +44,7 @@ import org.apache.hadoop.hive.ql.plan.DependencyCollectionWork; import org.apache.hadoop.hive.ql.plan.MoveWork; import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.FileSinkDesc; import org.apache.hadoop.hive.ql.plan.TezWork; /** @@ -105,6 +111,15 @@ // used to group dependent tasks for multi table inserts public final DependencyCollectionTask dependencyTask; + // used to hook up unions + public final Map, BaseWork> unionWorkMap; + public final List currentUnionOperators; + public final Set workWithUnionOperators; + + // we link filesink that will write to the same final location + public final Map> linkedFileSinks; + public final Set fileSinkSet; + @SuppressWarnings("unchecked") public GenTezProcContext(HiveConf conf, ParseContext parseContext, List> moveTask, List> rootTasks, @@ -116,7 +131,8 @@ public GenTezProcContext(HiveConf conf, ParseContext parseContext, this.rootTasks = rootTasks; this.inputs = inputs; this.outputs = outputs; - this.currentTask = (TezTask) TaskFactory.get(new TezWork(), conf); + this.currentTask = (TezTask) TaskFactory.get( + new TezWork(conf.getVar(HiveConf.ConfVars.HIVEQUERYID)), conf); this.leafOperatorToFollowingWork = new HashMap, BaseWork>(); this.linkOpWithWorkMap = new HashMap, List>(); this.linkWorkWithReduceSinkMap = new HashMap>(); @@ -126,6 +142,11 @@ public GenTezProcContext(HiveConf conf, ParseContext parseContext, this.linkChildOpWithDummyOp = new HashMap, List>>(); this.dependencyTask = (DependencyCollectionTask) TaskFactory.get(new DependencyCollectionWork(), conf); + this.unionWorkMap = new HashMap, BaseWork>(); + this.currentUnionOperators = new LinkedList(); + this.workWithUnionOperators = new HashSet(); + this.linkedFileSinks = new HashMap>(); + this.fileSinkSet = new HashSet(); rootTasks.add(currentTask); } diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java index 042cb39..742532f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java @@ -18,17 +18,35 @@ package org.apache.hadoop.hive.ql.parse; +import java.util.ArrayList; +import java.util.Deque; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Iterator; +import java.util.List; +import java.util.LinkedList; +import java.util.Map; +import java.util.Set; + +import org.apache.hadoop.fs.Path; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.exec.Operator; +import org.apache.hadoop.hive.ql.exec.FileSinkOperator; import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator; import org.apache.hadoop.hive.ql.exec.TableScanOperator; +import org.apache.hadoop.hive.ql.exec.UnionOperator; +import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.optimizer.GenMapRedUtils; import org.apache.hadoop.hive.ql.plan.BaseWork; import org.apache.hadoop.hive.ql.plan.MapWork; import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.FileSinkDesc; import org.apache.hadoop.hive.ql.plan.ReduceWork; import org.apache.hadoop.hive.ql.plan.TezWork; +import org.apache.hadoop.hive.ql.plan.UnionWork; import org.apache.hadoop.hive.ql.plan.TezWork.EdgeType; /** @@ -59,6 +77,13 @@ public void resetSequenceNumber() { sequenceNumber = 0; } + public UnionWork createUnionWork(GenTezProcContext context, Operator operator, TezWork tezWork) { + UnionWork unionWork = new UnionWork("Union "+ (++sequenceNumber)); + context.unionWorkMap.put(operator, unionWork); + tezWork.add(unionWork); + return unionWork; + } + public ReduceWork createReduceWork(GenTezProcContext context, Operator root, TezWork tezWork) { assert !root.getParentOperators().isEmpty(); ReduceWork reduceWork = new ReduceWork("Reducer "+ (++sequenceNumber)); @@ -121,11 +146,118 @@ public MapWork createMapWork(GenTezProcContext context, Operator root, } // this method's main use is to help unit testing this class - protected void setupMapWork(MapWork mapWork, GenTezProcContext context, - PrunedPartitionList partitions, Operator root, + protected void setupMapWork(MapWork mapWork, GenTezProcContext context, + PrunedPartitionList partitions, Operator root, String alias) throws SemanticException { // All the setup is done in GenMapRedUtils GenMapRedUtils.setMapWork(mapWork, context.parseContext, context.inputs, partitions, root, alias, context.conf, false); } + + // removes any union operator and clones the plan + public void removeUnionOperators(Configuration conf, GenTezProcContext context, + BaseWork work) + throws SemanticException { + + Set> roots = work.getAllRootOperators(); + + // need to clone the plan. + Set> newRoots = Utilities.cloneOperatorTree(conf, roots); + + Map, Operator> replacementMap = new HashMap, Operator>(); + + Iterator> it = newRoots.iterator(); + for (Operator orig: roots) { + replacementMap.put(orig,it.next()); + } + + // now we remove all the unions. we throw away any branch that's not reachable from + // the current set of roots. The reason is that those branches will be handled in + // different tasks. + Deque> operators = new LinkedList>(); + operators.addAll(newRoots); + + Set> seen = new HashSet>(); + + while(!operators.isEmpty()) { + Operator current = operators.pop(); + seen.add(current); + + if (current instanceof FileSinkOperator) { + FileSinkOperator fileSink = (FileSinkOperator)current; + + // remember it for additional processing later + context.fileSinkSet.add(fileSink); + + FileSinkDesc desc = fileSink.getConf(); + Path path = desc.getDirName(); + List linked; + + if (!context.linkedFileSinks.containsKey(path)) { + linked = new ArrayList(); + context.linkedFileSinks.put(path, linked); + } + linked = context.linkedFileSinks.get(path); + linked.add(desc); + + desc.setDirName(new Path(path, ""+linked.size())); + desc.setLinkedFileSinkDesc(linked); + } + + if (current instanceof UnionOperator) { + Operator parent = null; + int count = 0; + + for (Operator op: current.getParentOperators()) { + if (seen.contains(op)) { + ++count; + parent = op; + } + } + + // we should have been able to reach the union from only one side. + assert count <= 1; + + if (parent == null) { + // root operator is union (can happen in reducers) + replacementMap.put(current, current.getChildOperators().get(0)); + } else { + parent.removeChildAndAdoptItsChildren(current); + } + } + + if (current instanceof FileSinkOperator + || current instanceof ReduceSinkOperator) { + current.setChildOperators(null); + } else { + operators.addAll(current.getChildOperators()); + } + } + work.replaceRoots(replacementMap); + } + + public void processFileSink(GenTezProcContext context, FileSinkOperator fileSink) + throws SemanticException { + + ParseContext parseContext = context.parseContext; + + boolean isInsertTable = // is INSERT OVERWRITE TABLE + GenMapRedUtils.isInsertInto(parseContext, fileSink); + HiveConf hconf = parseContext.getConf(); + + boolean chDir = GenMapRedUtils.isMergeRequired(context.moveTask, + hconf, fileSink, context.currentTask, isInsertTable); + + Path finalName = GenMapRedUtils.createMoveTask(context.currentTask, + chDir, fileSink, parseContext, context.moveTask, hconf, context.dependencyTask); + + if (chDir) { + // Merge the files in the destination table/partitions by creating Map-only merge job + // If underlying data is RCFile a RCFileBlockMerge task would be created. + LOG.info("using CombineHiveInputformat for the merge job"); + GenMapRedUtils.createMRWorkForMergingFiles(fileSink, finalName, + context.dependencyTask, context.moveTask, + hconf, context.currentTask); + } + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezWork.java ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezWork.java index 475c940..a6c30a3 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezWork.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezWork.java @@ -19,15 +19,21 @@ package org.apache.hadoop.hive.ql.parse; import java.util.ArrayList; +import java.util.HashMap; +import java.util.LinkedList; import java.util.List; +import java.util.Map; import java.util.Stack; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.ql.exec.FileSinkOperator; import org.apache.hadoop.hive.ql.exec.HashTableDummyOperator; import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator; import org.apache.hadoop.hive.ql.exec.TableScanOperator; +import org.apache.hadoop.hive.ql.exec.UnionOperator; +import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.lib.Node; import org.apache.hadoop.hive.ql.lib.NodeProcessor; import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; @@ -37,6 +43,7 @@ import org.apache.hadoop.hive.ql.plan.OperatorDesc; import org.apache.hadoop.hive.ql.plan.ReduceWork; import org.apache.hadoop.hive.ql.plan.TezWork; +import org.apache.hadoop.hive.ql.plan.UnionWork; import org.apache.hadoop.hive.ql.plan.TezWork.EdgeType; /** @@ -106,6 +113,41 @@ public Object process(Node nd, Stack stack, context.rootToWorkMap.put(root, work); } + // This is where we cut the tree as described above. We also remember that + // we might have to connect parent work with this work later. + for (Operator parent: new ArrayList>(root.getParentOperators())) { + context.leafOperatorToFollowingWork.put(parent, work); + LOG.debug("Removing " + parent + " as parent from " + root); + root.removeParent(parent); + } + + if (!context.currentUnionOperators.isEmpty()) { + // if there are union all operators we need to add the work to the set + // of union operators. + + UnionWork unionWork; + if (context.unionWorkMap.containsKey(operator)) { + // we've seen this terminal before and have created a union work object. + // just need to add this work to it. There will be no children of this one + // since we've passed this operator before. + assert operator.getChildOperators().isEmpty(); + unionWork = (UnionWork) context.unionWorkMap.get(operator); + + } else { + // first time through. we need to create a union work object and add this + // work to it. Subsequent work should reference the union and not the actual + // work. + unionWork = utils.createUnionWork(context, operator, tezWork); + } + + // finally hook everything up + tezWork.connect(unionWork, work, EdgeType.CONTAINS); + unionWork.addUnionOperators(context.currentUnionOperators); + context.currentUnionOperators.clear(); + context.workWithUnionOperators.add(work); + work = unionWork; + } + // We're scanning a tree from roots to leaf (this is not technically // correct, demux and mux operators might form a diamond shape, but // we will only scan one path and ignore the others, because the @@ -134,16 +176,10 @@ public Object process(Node nd, Stack stack, // remember the output name of the reduce sink rs.getConf().setOutputName(rWork.getName()); - // add dependency between the two work items - tezWork.connect(work, rWork, EdgeType.SIMPLE_EDGE); - } - - // This is where we cut the tree as described above. We also remember that - // we might have to connect parent work with this work later. - for (Operator parent: new ArrayList>(root.getParentOperators())) { - context.leafOperatorToFollowingWork.put(parent, work); - LOG.debug("Removing " + parent + " as parent from " + root); - root.removeParent(parent); + if (!context.unionWorkMap.containsKey(operator)) { + // add dependency between the two work items + tezWork.connect(work, rWork, EdgeType.SIMPLE_EDGE); + } } // No children means we're at the bottom. If there are more operators to scan @@ -182,7 +218,7 @@ public Object process(Node nd, Stack stack, for (BaseWork parentWork : linkWorkList) { tezWork.connect(parentWork, work, EdgeType.BROADCAST_EDGE); - // need to set up output name for reduce sink not that we know the name + // need to set up output name for reduce sink now that we know the name // of the downstream work for (ReduceSinkOperator r: context.linkWorkWithReduceSinkMap.get(parentWork)) { diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index a01aa0e..c704b77 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -644,7 +644,9 @@ private String processTable(QB qb, ASTNode tabref) throws SemanticException { } private void assertCombineInputFormat(Tree numerator, String message) throws SemanticException { - String inputFormat = HiveConf.getVar(conf, HiveConf.ConfVars.HIVEINPUTFORMAT); + String inputFormat = conf.getVar(HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("tez") ? + HiveConf.getVar(conf, HiveConf.ConfVars.HIVETEZINPUTFORMAT): + HiveConf.getVar(conf, HiveConf.ConfVars.HIVEINPUTFORMAT); if (!inputFormat.equals(CombineHiveInputFormat.class.getName())) { throw new SemanticException(generateErrorMessage((ASTNode) numerator, message + " sampling is not supported in " + inputFormat)); diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java index b7738c5..a5e6cbf 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java @@ -31,6 +31,7 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.conf.HiveConf.ConfVars; import org.apache.hadoop.hive.ql.Context; import org.apache.hadoop.hive.ql.exec.ConditionalTask; import org.apache.hadoop.hive.ql.exec.FileSinkOperator; @@ -53,16 +54,19 @@ import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; import org.apache.hadoop.hive.ql.lib.Rule; import org.apache.hadoop.hive.ql.lib.RuleRegExp; +import org.apache.hadoop.hive.ql.metadata.Hive; import org.apache.hadoop.hive.ql.optimizer.ConvertJoinMapJoin; import org.apache.hadoop.hive.ql.optimizer.ReduceSinkMapJoinProc; import org.apache.hadoop.hive.ql.optimizer.SetReducerParallelism; import org.apache.hadoop.hive.ql.optimizer.physical.PhysicalContext; import org.apache.hadoop.hive.ql.optimizer.physical.Vectorizer; +import org.apache.hadoop.hive.ql.optimizer.physical.StageIDsRearranger; import org.apache.hadoop.hive.ql.plan.BaseWork; import org.apache.hadoop.hive.ql.plan.MapWork; import org.apache.hadoop.hive.ql.plan.MoveWork; import org.apache.hadoop.hive.ql.plan.OperatorDesc; import org.apache.hadoop.hive.ql.plan.TezWork; +import org.apache.hadoop.hive.ql.session.SessionState.LogHelper; /** * TezCompiler translates the operator plan into TezTasks. @@ -75,6 +79,18 @@ public TezCompiler() { } @Override + public void init(HiveConf conf, LogHelper console, Hive db) { + super.init(conf, console, db); + + // Tez requires us to use RPC for the query plan + HiveConf.setBoolVar(conf, ConfVars.HIVE_RPC_QUERY_PLAN, true); + + // We require the use of recursive input dirs for union processing + conf.setBoolean("mapred.input.dir.recursive", true); + HiveConf.setBoolVar(conf, ConfVars.HIVE_HADOOP_SUPPORTS_SUBDIRECTORIES, true); + } + + @Override protected void optimizeOperatorPlan(ParseContext pCtx, Set inputs, Set outputs) throws SemanticException { @@ -138,14 +154,18 @@ protected void generateTaskTree(List> rootTasks, Pa TableScanOperator.getOperatorName() + "%"), new ProcessAnalyzeTable(GenTezUtils.getUtils())); - opRules.put(new RuleRegExp("Bail on Union", + opRules.put(new RuleRegExp("Handle union", UnionOperator.getOperatorName() + "%"), new NodeProcessor() { @Override public Object process(Node n, Stack s, NodeProcessorCtx procCtx, Object... os) throws SemanticException { - throw new SemanticException("Unions not yet supported on Tez." - +" Please use MR for this query"); + GenTezProcContext context = (GenTezProcContext) procCtx; + UnionOperator union = (UnionOperator) n; + + // simply need to remember that we've seen a union. + context.currentUnionOperators.add(union); + return null; } }); @@ -156,20 +176,31 @@ public Object process(Node n, Stack s, topNodes.addAll(pCtx.getTopOps().values()); GraphWalker ogw = new GenTezWorkWalker(disp, procCtx); ogw.startWalking(topNodes, null); + + // we need to clone some operator plans and remove union operators still + for (BaseWork w: procCtx.workWithUnionOperators) { + GenTezUtils.getUtils().removeUnionOperators(conf, procCtx, w); + } + + // finally make sure the file sink operators are set up right + for (FileSinkOperator fileSink: procCtx.fileSinkSet) { + GenTezUtils.getUtils().processFileSink(procCtx, fileSink); + } } @Override protected void setInputFormat(Task task) { if (task instanceof TezTask) { TezWork work = ((TezTask)task).getWork(); - Set roots = work.getRoots(); - for (BaseWork w: roots) { - assert w instanceof MapWork; - MapWork mapWork = (MapWork)w; - HashMap> opMap = mapWork.getAliasToWork(); - if (!opMap.isEmpty()) { - for (Operator op : opMap.values()) { - setInputFormat(mapWork, op); + List all = work.getAllWork(); + for (BaseWork w: all) { + if (w instanceof MapWork) { + MapWork mapWork = (MapWork) w; + HashMap> opMap = mapWork.getAliasToWork(); + if (!opMap.isEmpty()) { + for (Operator op : opMap.values()) { + setInputFormat(mapWork, op); + } } } } @@ -217,6 +248,9 @@ protected void optimizeTaskPlan(List> rootTasks, Pa if (conf.getBoolVar(HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED)) { (new Vectorizer()).resolve(physicalCtx); } + if (!"none".equalsIgnoreCase(conf.getVar(HiveConf.ConfVars.HIVESTAGEIDREARRANGE))) { + (new StageIDsRearranger()).resolve(physicalCtx); + } return; } } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/BaseWork.java ql/src/java/org/apache/hadoop/hive/ql/plan/BaseWork.java index eb85446..38c4c11 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/BaseWork.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/BaseWork.java @@ -22,6 +22,7 @@ import java.util.LinkedList; import java.util.LinkedHashSet; import java.util.List; +import java.util.Map; import java.util.Set; import java.util.Stack; @@ -82,7 +83,9 @@ public void addDummyOp(HashTableDummyOperator dummyOp) { dummyOps.add(dummyOp); } - protected abstract Set> getAllRootOperators(); + public abstract void replaceRoots(Map, Operator> replacementMap); + + public abstract Set> getAllRootOperators(); public Set> getAllOperators() { diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/MapWork.java ql/src/java/org/apache/hadoop/hive/ql/plan/MapWork.java index 19b553f..e1cc3f4 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/MapWork.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/MapWork.java @@ -305,6 +305,17 @@ public String getVectorModeOn() { } @Override + public void replaceRoots(Map, Operator> replacementMap) { + LinkedHashMap> newAliasToWork = new LinkedHashMap>(); + + for (Map.Entry> entry: aliasToWork.entrySet()) { + newAliasToWork.put(entry.getKey(), replacementMap.get(entry.getValue())); + } + + setAliasToWork(newAliasToWork); + } + + @Override @Explain(displayName = "Map Operator Tree") public Set> getAllRootOperators() { Set> opSet = new LinkedHashSet>(); diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceWork.java ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceWork.java index afb3648..a68374e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceWork.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceWork.java @@ -129,7 +129,13 @@ public void setTagToInput(final Map tagToInput) { } @Override - protected Set> getAllRootOperators() { + public void replaceRoots(Map, Operator> replacementMap) { + assert replacementMap.size() == 1; + setReducer(replacementMap.get(getReducer())); + } + + @Override + public Set> getAllRootOperators() { Set> opSet = new LinkedHashSet>(); opSet.add(getReducer()); return opSet; diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/TezWork.java ql/src/java/org/apache/hadoop/hive/ql/plan/TezWork.java index 9112a77..f974c57 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/TezWork.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/TezWork.java @@ -45,11 +45,14 @@ public enum EdgeType { SIMPLE_EDGE, - BROADCAST_EDGE + BROADCAST_EDGE, + CONTAINS } private static transient final Log LOG = LogFactory.getLog(TezWork.class); + private static int counter; + private final String name; private final Set roots = new HashSet(); private final Set leaves = new HashSet(); private final Map> workGraph = new HashMap>(); @@ -57,6 +60,15 @@ private final Map, EdgeType> edgeProperties = new HashMap, EdgeType>(); + public TezWork(String name) { + this.name = name + ":" + (++counter); + } + + @Explain(displayName = "DagName") + public String getName() { + return name; + } + /** * getWorkMap returns a map of "vertex name" to BaseWork */ diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/UnionWork.java ql/src/java/org/apache/hadoop/hive/ql/plan/UnionWork.java new file mode 100644 index 0000000..60781e6 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/plan/UnionWork.java @@ -0,0 +1,71 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.plan; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.HashSet; +import org.apache.hadoop.hive.ql.plan.BaseWork; +import org.apache.hadoop.hive.ql.exec.Operator; +import org.apache.hadoop.hive.ql.exec.UnionOperator; + +/** + * Simple wrapper for union all cases. All contributing work for a union all + * is collected here. Downstream work will connect to the union not the individual + * work. + */ +public class UnionWork extends BaseWork { + + private final Set unionOperators = new HashSet(); + + public UnionWork() { + super(); + } + + public UnionWork(String name) { + super(name); + } + + @Explain(displayName = "Vertex") + @Override + public String getName() { + return super.getName(); + } + + @Override + public void replaceRoots(Map, Operator> replacementMap) { + } + + @Override + public Set> getAllRootOperators() { + return new HashSet>(); + } + + public void addUnionOperators(Collection unions) { + unionOperators.addAll(unions); + } + + public Set getUnionOperators() { + return unionOperators; + } +} diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/tez/TestTezTask.java ql/src/test/org/apache/hadoop/hive/ql/exec/tez/TestTezTask.java index 544fd82..859b5ad 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/tez/TestTezTask.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/tez/TestTezTask.java @@ -112,7 +112,7 @@ public Edge answer(InvocationOnMock invocation) throws Throwable { } }); - work = new TezWork(); + work = new TezWork(""); mws = new MapWork[] { new MapWork(), new MapWork()}; rws = new ReduceWork[] { new ReduceWork(), new ReduceWork() }; @@ -194,7 +194,7 @@ public void testBuildDag() throws IllegalArgumentException, IOException, Excepti @Test public void testEmptyWork() throws IllegalArgumentException, IOException, Exception { - DAG dag = task.build(conf, new TezWork(), path, appLr, new Context(conf)); + DAG dag = task.build(conf, new TezWork(""), path, appLr, new Context(conf)); assertEquals(dag.getVertices().size(), 0); } diff --git ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java index 4d3013d..cc8e73a 100644 --- ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java +++ ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java @@ -29,6 +29,8 @@ import java.nio.ByteBuffer; import java.sql.Timestamp; import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; import java.util.HashMap; import java.util.Iterator; import java.util.List; @@ -39,6 +41,9 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.common.type.HiveDecimal; + +import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVE_ORC_ZEROCOPY; + import org.apache.hadoop.hive.ql.io.sarg.SearchArgument; import org.apache.hadoop.hive.serde2.io.ByteWritable; import org.apache.hadoop.hive.serde2.io.DoubleWritable; @@ -67,14 +72,19 @@ import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hive.common.util.HiveTestUtils; +import org.junit.After; import org.junit.Before; import org.junit.Rule; import org.junit.Test; import org.junit.rules.TestName; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; +import org.junit.runners.Parameterized.Parameters; /** * Tests for the top level reader/streamFactory of ORC files. */ +@RunWith(value = Parameterized.class) public class TestOrcFile { public static class SimpleStruct { @@ -191,6 +201,16 @@ private static ByteBuffer byteBuf(int... items) { Configuration conf; FileSystem fs; Path testFilePath; + private final boolean zeroCopy; + + @Parameters + public static Collection data() { + return Arrays.asList(new Boolean[][] { {false}, {true}}); + } + + public TestOrcFile(Boolean zcr) { + zeroCopy = zcr.booleanValue(); + } @Rule public TestName testCaseName = new TestName(); @@ -198,6 +218,9 @@ private static ByteBuffer byteBuf(int... items) { @Before public void openFileSystem () throws Exception { conf = new Configuration(); + if(zeroCopy) { + conf.setBoolean(HIVE_ORC_ZEROCOPY.varname, zeroCopy); + } fs = FileSystem.getLocal(conf); testFilePath = new Path(workDir, "TestOrcFile." + testCaseName.getMethodName() + ".orc"); @@ -547,6 +570,7 @@ public void testStripeLevelStats() throws Exception { inspector = ObjectInspectorFactory.getReflectionObjectInspector (InnerStruct.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA); } + Writer writer = OrcFile.createWriter(testFilePath, OrcFile.writerOptions(conf) .inspector(inspector) @@ -572,35 +596,36 @@ public void testStripeLevelStats() throws Exception { StripeStatistics ss1 = metadata.getStripeStatistics().get(0); StripeStatistics ss2 = metadata.getStripeStatistics().get(1); StripeStatistics ss3 = metadata.getStripeStatistics().get(2); - assertEquals(4996, ss1.getColumnStatistics()[0].getNumberOfValues()); + + assertEquals(5000, ss1.getColumnStatistics()[0].getNumberOfValues()); assertEquals(5000, ss2.getColumnStatistics()[0].getNumberOfValues()); - assertEquals(1004, ss3.getColumnStatistics()[0].getNumberOfValues()); + assertEquals(1000, ss3.getColumnStatistics()[0].getNumberOfValues()); - assertEquals(4996, ((IntegerColumnStatistics)ss1.getColumnStatistics()[1]).getNumberOfValues()); + assertEquals(5000, ((IntegerColumnStatistics)ss1.getColumnStatistics()[1]).getNumberOfValues()); assertEquals(5000, ((IntegerColumnStatistics)ss2.getColumnStatistics()[1]).getNumberOfValues()); - assertEquals(1004, ((IntegerColumnStatistics)ss3.getColumnStatistics()[1]).getNumberOfValues()); + assertEquals(1000, ((IntegerColumnStatistics)ss3.getColumnStatistics()[1]).getNumberOfValues()); assertEquals(1, ((IntegerColumnStatistics)ss1.getColumnStatistics()[1]).getMinimum()); - assertEquals(1, ((IntegerColumnStatistics)ss2.getColumnStatistics()[1]).getMinimum()); - assertEquals(2, ((IntegerColumnStatistics)ss3.getColumnStatistics()[1]).getMinimum()); + assertEquals(2, ((IntegerColumnStatistics)ss2.getColumnStatistics()[1]).getMinimum()); + assertEquals(3, ((IntegerColumnStatistics)ss3.getColumnStatistics()[1]).getMinimum()); assertEquals(1, ((IntegerColumnStatistics)ss1.getColumnStatistics()[1]).getMaximum()); assertEquals(2, ((IntegerColumnStatistics)ss2.getColumnStatistics()[1]).getMaximum()); assertEquals(3, ((IntegerColumnStatistics)ss3.getColumnStatistics()[1]).getMaximum()); - assertEquals(4996, ((IntegerColumnStatistics)ss1.getColumnStatistics()[1]).getSum()); - assertEquals(9996, ((IntegerColumnStatistics)ss2.getColumnStatistics()[1]).getSum()); - assertEquals(3008, ((IntegerColumnStatistics)ss3.getColumnStatistics()[1]).getSum()); + assertEquals(5000, ((IntegerColumnStatistics)ss1.getColumnStatistics()[1]).getSum()); + assertEquals(10000, ((IntegerColumnStatistics)ss2.getColumnStatistics()[1]).getSum()); + assertEquals(3000, ((IntegerColumnStatistics)ss3.getColumnStatistics()[1]).getSum()); - assertEquals(4996, ((StringColumnStatistics)ss1.getColumnStatistics()[2]).getNumberOfValues()); + assertEquals(5000, ((StringColumnStatistics)ss1.getColumnStatistics()[2]).getNumberOfValues()); assertEquals(5000, ((StringColumnStatistics)ss2.getColumnStatistics()[2]).getNumberOfValues()); - assertEquals(1004, ((StringColumnStatistics)ss3.getColumnStatistics()[2]).getNumberOfValues()); + assertEquals(1000, ((StringColumnStatistics)ss3.getColumnStatistics()[2]).getNumberOfValues()); assertEquals("one", ((StringColumnStatistics)ss1.getColumnStatistics()[2]).getMinimum()); - assertEquals("one", ((StringColumnStatistics)ss2.getColumnStatistics()[2]).getMinimum()); + assertEquals("two", ((StringColumnStatistics)ss2.getColumnStatistics()[2]).getMinimum()); assertEquals("three", ((StringColumnStatistics)ss3.getColumnStatistics()[2]).getMinimum()); assertEquals("one", ((StringColumnStatistics)ss1.getColumnStatistics()[2]).getMaximum()); assertEquals("two", ((StringColumnStatistics)ss2.getColumnStatistics()[2]).getMaximum()); - assertEquals("two", ((StringColumnStatistics)ss3.getColumnStatistics()[2]).getMaximum()); - assertEquals(14988, ((StringColumnStatistics)ss1.getColumnStatistics()[2]).getSum()); + assertEquals("three", ((StringColumnStatistics)ss3.getColumnStatistics()[2]).getMaximum()); + assertEquals(15000, ((StringColumnStatistics)ss1.getColumnStatistics()[2]).getSum()); assertEquals(15000, ((StringColumnStatistics)ss2.getColumnStatistics()[2]).getSum()); - assertEquals(5012, ((StringColumnStatistics)ss3.getColumnStatistics()[2]).getSum()); + assertEquals(5000, ((StringColumnStatistics)ss3.getColumnStatistics()[2]).getSum()); RecordReaderImpl recordReader = (RecordReaderImpl) reader.rows(null); OrcProto.RowIndex[] index = recordReader.readRowIndex(0); diff --git ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestStringRedBlackTree.java ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestStringRedBlackTree.java index 251cd83..5581ad3 100644 --- ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestStringRedBlackTree.java +++ ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestStringRedBlackTree.java @@ -262,7 +262,7 @@ public static void main(String[] args) throws Exception { bit.testCompressedSeek(); bit.testBiggerItems(); bit.testSkips(); - TestOrcFile test1 = new TestOrcFile(); + TestOrcFile test1 = new TestOrcFile(false); test1.test1(); test1.emptyFile(); test1.metaData(); diff --git ql/src/test/org/apache/hadoop/hive/ql/plan/TestTezWork.java ql/src/test/org/apache/hadoop/hive/ql/plan/TestTezWork.java index a11d418..d57a64c 100644 --- ql/src/test/org/apache/hadoop/hive/ql/plan/TestTezWork.java +++ ql/src/test/org/apache/hadoop/hive/ql/plan/TestTezWork.java @@ -34,7 +34,7 @@ @Before public void setup() throws Exception { nodes = new LinkedList(); - work = new TezWork(); + work = new TezWork(""); addWork(5); } diff --git ql/src/test/org/apache/hadoop/hive/ql/session/TestSessionState.java ql/src/test/org/apache/hadoop/hive/ql/session/TestSessionState.java index d4e737f..c51ff09 100644 --- ql/src/test/org/apache/hadoop/hive/ql/session/TestSessionState.java +++ ql/src/test/org/apache/hadoop/hive/ql/session/TestSessionState.java @@ -20,20 +20,43 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNull; +import java.util.Arrays; +import java.util.Collection; + import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.conf.HiveConf.ConfVars; import org.apache.hadoop.hive.metastore.MetaStoreUtils; import org.junit.Before; import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; +import org.junit.runners.Parameterized.Parameters; /** * Test SessionState */ +@RunWith(value = Parameterized.class) public class TestSessionState { + private final boolean prewarm; + + public TestSessionState(Boolean mode) { + this.prewarm = mode.booleanValue(); + } + + @Parameters + public static Collection data() { + return Arrays.asList(new Boolean[][] { {false}, {true}}); + } @Before - public void setup(){ - SessionState.start(new HiveConf()); + public void setup() { + HiveConf conf = new HiveConf(); + if (prewarm) { + HiveConf.setBoolVar(conf, ConfVars.HIVE_PREWARM_ENABLED, true); + HiveConf.setIntVar(conf, ConfVars.HIVE_PREWARM_NUM_CONTAINERS, 1); + } + SessionState.start(conf); } /** diff --git ql/src/test/resources/orc-file-dump-dictionary-threshold.out ql/src/test/resources/orc-file-dump-dictionary-threshold.out index 8687b26..0a06481 100644 --- ql/src/test/resources/orc-file-dump-dictionary-threshold.out +++ ql/src/test/resources/orc-file-dump-dictionary-threshold.out @@ -6,30 +6,30 @@ Type: struct Stripe Statistics: Stripe 1: - Column 0: count: 4000 - Column 1: count: 4000 min: -2147115959 max: 2145911404 sum: 71315665983 - Column 2: count: 4000 min: -9211329013123260308 max: 9217851628057711416 - Column 3: count: 4000 min: Darkness,-230 max: worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660-3114-3676-3788 sum: 245096 + Column 0: count: 5000 + Column 1: count: 5000 min: -2147115959 max: 2145911404 sum: 159677169195 + Column 2: count: 5000 min: -9216505819108477308 max: 9217851628057711416 + Column 3: count: 5000 min: Darkness,-230 max: worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660-3114-3676-3788-4068-4150-4706-4744 sum: 381254 Stripe 2: Column 0: count: 5000 - Column 1: count: 5000 min: -2147390285 max: 2146838901 sum: 107869424275 + Column 1: count: 5000 min: -2147390285 max: 2147224606 sum: -14961457759 Column 2: count: 5000 min: -9222178666167296739 max: 9221301751385928177 - Column 3: count: 5000 min: Darkness,-230-368-488-586-862-930-1686-2044-2636-2652-2872-3108-3162-3192-3404-3442-3508-3542-3550-3712-3980-4146 max: worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660-3114-3676-3788-4068-4150-4706-4744-5350-5420-5582-5696-5726-6006-6020-6024-6098-6184-6568-6636-6802-6994-7004-7318-7498-7758-7780-7798-7920-7952-7960-7988-8232-8256-8390-8416-8478-8620-8840-8984 sum: 972748 + Column 3: count: 5000 min: Darkness,-230-368-488-586-862-930-1686-2044-2636-2652-2872-3108-3162-3192-3404-3442-3508-3542-3550-3712-3980-4146-4204-4336-4390-4418-4424-4490-4512-4650-4768-4924-4950-5210 max: worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660-3114-3676-3788-4068-4150-4706-4744-5350-5420-5582-5696-5726-6006-6020-6024-6098-6184-6568-6636-6802-6994-7004-7318-7498-7758-7780-7798-7920-7952-7960-7988-8232-8256-8390-8416-8478-8620-8840-8984-9038-9128-9236-9248-9344-9594-9650-9714-9928-9938 sum: 1117994 Stripe 3: Column 0: count: 5000 - Column 1: count: 5000 min: -2145928262 max: 2147224606 sum: 38276585043 + Column 1: count: 5000 min: -2145842720 max: 2146718321 sum: 141092475520 Column 2: count: 5000 min: -9221963099397084326 max: 9222722740629726770 - Column 3: count: 5000 min: Darkness,-230-368-488-586-862-930-1686-2044-2636-2652-2872-3108-3162-3192-3404-3442-3508-3542-3550-3712-3980-4146-4204-4336-4390-4418-4424-4490-4512-4650-4768-4924-4950-5210-5524-5630-5678-5710-5758-5952-6238-6252-6300-6366-6668-6712-6926-6942-7100-7194-7802-8030-8452-8608-8640-8862-8868-9134 max: worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660-3114-3676-3788-4068-4150-4706-4744-5350-5420-5582-5696-5726-6006-6020-6024-6098-6184-6568-6636-6802-6994-7004-7318-7498-7758-7780-7798-7920-7952-7960-7988-8232-8256-8390-8416-8478-8620-8840-8984-9038-9128-9236-9248-9344-9594-9650-9714-9928-9938-10178-10368-10414-10502-10732-10876-11008-11158-11410-11722-11836-11964-12054-12096-12126-12136-12202-12246-12298-12616-12774-12782-12790-12802-12976-13216-13246-13502-13766 sum: 1753024 + Column 3: count: 5000 min: Darkness,-230-368-488-586-862-930-1686-2044-2636-2652-2872-3108-3162-3192-3404-3442-3508-3542-3550-3712-3980-4146-4204-4336-4390-4418-4424-4490-4512-4650-4768-4924-4950-5210-5524-5630-5678-5710-5758-5952-6238-6252-6300-6366-6668-6712-6926-6942-7100-7194-7802-8030-8452-8608-8640-8862-8868-9134-9234-9412-9602-9608-9642-9678-9740-9780-10426 max: worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660-3114-3676-3788-4068-4150-4706-4744-5350-5420-5582-5696-5726-6006-6020-6024-6098-6184-6568-6636-6802-6994-7004-7318-7498-7758-7780-7798-7920-7952-7960-7988-8232-8256-8390-8416-8478-8620-8840-8984-9038-9128-9236-9248-9344-9594-9650-9714-9928-9938-10178-10368-10414-10502-10732-10876-11008-11158-11410-11722-11836-11964-12054-12096-12126-12136-12202-12246-12298-12616-12774-12782-12790-12802-12976-13216-13246-13502-13766-14454-14974 sum: 1925226 Stripe 4: Column 0: count: 5000 - Column 1: count: 5000 min: -2145378214 max: 2147453086 sum: -43469576640 + Column 1: count: 5000 min: -2145378214 max: 2147453086 sum: -153680004530 Column 2: count: 5000 min: -9222731174895935707 max: 9222919052987871506 - Column 3: count: 5000 min: Darkness,-230-368-488-586-862-930-1686-2044-2636-2652-2872-3108-3162-3192-3404-3442-3508-3542-3550-3712-3980-4146-4204-4336-4390-4418-4424-4490-4512-4650-4768-4924-4950-5210-5524-5630-5678-5710-5758-5952-6238-6252-6300-6366-6668-6712-6926-6942-7100-7194-7802-8030-8452-8608-8640-8862-8868-9134-9234-9412-9602-9608-9642-9678-9740-9780-10426-10510-10514-10706-10814-10870-10942-11028-11244-11326-11462-11496-11656-11830-12022-12178-12418-12832-13304-13448-13590-13618-13908-14188 max: worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660-3114-3676-3788-4068-4150-4706-4744-5350-5420-5582-5696-5726-6006-6020-6024-6098-6184-6568-6636-6802-6994-7004-7318-7498-7758-7780-7798-7920-7952-7960-7988-8232-8256-8390-8416-8478-8620-8840-8984-9038-9128-9236-9248-9344-9594-9650-9714-9928-9938-10178-10368-10414-10502-10732-10876-11008-11158-11410-11722-11836-11964-12054-12096-12126-12136-12202-12246-12298-12616-12774-12782-12790-12802-12976-13216-13246-13502-13766-14454-14974-15004-15124-15252-15294-15356-15530-15610-16316-16936-17024-17122-17214-17310-17528-17682-17742-17870-17878-18010-18410-18524-18788 sum: 2636664 + Column 3: count: 5000 min: Darkness,-230-368-488-586-862-930-1686-2044-2636-2652-2872-3108-3162-3192-3404-3442-3508-3542-3550-3712-3980-4146-4204-4336-4390-4418-4424-4490-4512-4650-4768-4924-4950-5210-5524-5630-5678-5710-5758-5952-6238-6252-6300-6366-6668-6712-6926-6942-7100-7194-7802-8030-8452-8608-8640-8862-8868-9134-9234-9412-9602-9608-9642-9678-9740-9780-10426-10510-10514-10706-10814-10870-10942-11028-11244-11326-11462-11496-11656-11830-12022-12178-12418-12832-13304-13448-13590-13618-13908-14188-14246-14340-14364-14394-14762-14850-14964-15048 max: worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660-3114-3676-3788-4068-4150-4706-4744-5350-5420-5582-5696-5726-6006-6020-6024-6098-6184-6568-6636-6802-6994-7004-7318-7498-7758-7780-7798-7920-7952-7960-7988-8232-8256-8390-8416-8478-8620-8840-8984-9038-9128-9236-9248-9344-9594-9650-9714-9928-9938-10178-10368-10414-10502-10732-10876-11008-11158-11410-11722-11836-11964-12054-12096-12126-12136-12202-12246-12298-12616-12774-12782-12790-12802-12976-13216-13246-13502-13766-14454-14974-15004-15124-15252-15294-15356-15530-15610-16316-16936-17024-17122-17214-17310-17528-17682-17742-17870-17878-18010-18410-18524-18788-19204-19254-19518-19596-19786-19874-19904 sum: 2815002 Stripe 5: - Column 0: count: 2000 - Column 1: count: 2000 min: -2143595397 max: 2144595861 sum: -64863580335 - Column 2: count: 2000 min: -9212379634781416464 max: 9208134757538374043 - Column 3: count: 2000 min: Darkness,-230-368-488-586-862-930-1686-2044-2636-2652-2872-3108-3162-3192-3404-3442-3508-3542-3550-3712-3980-4146-4204-4336-4390-4418-4424-4490-4512-4650-4768-4924-4950-5210-5524-5630-5678-5710-5758-5952-6238-6252-6300-6366-6668-6712-6926-6942-7100-7194-7802-8030-8452-8608-8640-8862-8868-9134-9234-9412-9602-9608-9642-9678-9740-9780-10426-10510-10514-10706-10814-10870-10942-11028-11244-11326-11462-11496-11656-11830-12022-12178-12418-12832-13304-13448-13590-13618-13908-14188-14246-14340-14364-14394-14762-14850-14964-15048-15494-15674-15726-16006-16056-16180-16304-16332-16452-16598-16730-16810-16994-17210-17268-17786-17962-18214-18444-18446-18724-18912-18952-19164 max: worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660-3114-3676-3788-4068-4150-4706-4744-5350-5420-5582-5696-5726-6006-6020-6024-6098-6184-6568-6636-6802-6994-7004-7318-7498-7758-7780-7798-7920-7952-7960-7988-8232-8256-8390-8416-8478-8620-8840-8984-9038-9128-9236-9248-9344-9594-9650-9714-9928-9938-10178-10368-10414-10502-10732-10876-11008-11158-11410-11722-11836-11964-12054-12096-12126-12136-12202-12246-12298-12616-12774-12782-12790-12802-12976-13216-13246-13502-13766-14454-14974-15004-15124-15252-15294-15356-15530-15610-16316-16936-17024-17122-17214-17310-17528-17682-17742-17870-17878-18010-18410-18524-18788-19204-19254-19518-19596-19786-19874-19904-20390-20752-20936 sum: 1302706 + Column 0: count: 1000 + Column 1: count: 1000 min: -2143595397 max: 2136858458 sum: -22999664100 + Column 2: count: 1000 min: -9212379634781416464 max: 9197412874152820822 + Column 3: count: 1000 min: Darkness,-230-368-488-586-862-930-1686-2044-2636-2652-2872-3108-3162-3192-3404-3442-3508-3542-3550-3712-3980-4146-4204-4336-4390-4418-4424-4490-4512-4650-4768-4924-4950-5210-5524-5630-5678-5710-5758-5952-6238-6252-6300-6366-6668-6712-6926-6942-7100-7194-7802-8030-8452-8608-8640-8862-8868-9134-9234-9412-9602-9608-9642-9678-9740-9780-10426-10510-10514-10706-10814-10870-10942-11028-11244-11326-11462-11496-11656-11830-12022-12178-12418-12832-13304-13448-13590-13618-13908-14188-14246-14340-14364-14394-14762-14850-14964-15048-15494-15674-15726-16006-16056-16180-16304-16332-16452-16598-16730-16810-16994-17210-17268-17786-17962-18214-18444-18446-18724-18912-18952-19164-19348-19400-19546-19776-19896-20084 max: worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660-3114-3676-3788-4068-4150-4706-4744-5350-5420-5582-5696-5726-6006-6020-6024-6098-6184-6568-6636-6802-6994-7004-7318-7498-7758-7780-7798-7920-7952-7960-7988-8232-8256-8390-8416-8478-8620-8840-8984-9038-9128-9236-9248-9344-9594-9650-9714-9928-9938-10178-10368-10414-10502-10732-10876-11008-11158-11410-11722-11836-11964-12054-12096-12126-12136-12202-12246-12298-12616-12774-12782-12790-12802-12976-13216-13246-13502-13766-14454-14974-15004-15124-15252-15294-15356-15530-15610-16316-16936-17024-17122-17214-17310-17528-17682-17742-17870-17878-18010-18410-18524-18788-19204-19254-19518-19596-19786-19874-19904-20390-20752-20936 sum: 670762 File Statistics: Column 0: count: 21000 @@ -38,67 +38,67 @@ File Statistics: Column 3: count: 21000 min: Darkness,-230 max: worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660-3114-3676-3788-4068-4150-4706-4744-5350-5420-5582-5696-5726-6006-6020-6024-6098-6184-6568-6636-6802-6994-7004-7318-7498-7758-7780-7798-7920-7952-7960-7988-8232-8256-8390-8416-8478-8620-8840-8984-9038-9128-9236-9248-9344-9594-9650-9714-9928-9938-10178-10368-10414-10502-10732-10876-11008-11158-11410-11722-11836-11964-12054-12096-12126-12136-12202-12246-12298-12616-12774-12782-12790-12802-12976-13216-13246-13502-13766-14454-14974-15004-15124-15252-15294-15356-15530-15610-16316-16936-17024-17122-17214-17310-17528-17682-17742-17870-17878-18010-18410-18524-18788-19204-19254-19518-19596-19786-19874-19904-20390-20752-20936 sum: 6910238 Stripes: - Stripe: offset: 3 data: 102311 rows: 4000 tail: 68 index: 224 + Stripe: offset: 3 data: 144733 rows: 5000 tail: 68 index: 235 Stream: column 0 section ROW_INDEX start: 3 length 10 Stream: column 1 section ROW_INDEX start: 13 length 36 Stream: column 2 section ROW_INDEX start: 49 length 39 - Stream: column 3 section ROW_INDEX start: 88 length 139 - Stream: column 1 section DATA start: 227 length 16022 - Stream: column 2 section DATA start: 16249 length 32028 - Stream: column 3 section DATA start: 48277 length 50887 - Stream: column 3 section LENGTH start: 99164 length 3374 + Stream: column 3 section ROW_INDEX start: 88 length 150 + Stream: column 1 section DATA start: 238 length 20029 + Stream: column 2 section DATA start: 20267 length 40035 + Stream: column 3 section DATA start: 60302 length 80382 + Stream: column 3 section LENGTH start: 140684 length 4287 Encoding column 0: DIRECT Encoding column 1: DIRECT_V2 Encoding column 2: DIRECT_V2 Encoding column 3: DIRECT_V2 - Stripe: offset: 102606 data: 284999 rows: 5000 tail: 68 index: 356 - Stream: column 0 section ROW_INDEX start: 102606 length 10 - Stream: column 1 section ROW_INDEX start: 102616 length 36 - Stream: column 2 section ROW_INDEX start: 102652 length 39 - Stream: column 3 section ROW_INDEX start: 102691 length 271 - Stream: column 1 section DATA start: 102962 length 20029 - Stream: column 2 section DATA start: 122991 length 40035 - Stream: column 3 section DATA start: 163026 length 219588 - Stream: column 3 section LENGTH start: 382614 length 5347 + Stripe: offset: 145039 data: 321684 rows: 5000 tail: 68 index: 415 + Stream: column 0 section ROW_INDEX start: 145039 length 10 + Stream: column 1 section ROW_INDEX start: 145049 length 35 + Stream: column 2 section ROW_INDEX start: 145084 length 39 + Stream: column 3 section ROW_INDEX start: 145123 length 331 + Stream: column 1 section DATA start: 145454 length 20029 + Stream: column 2 section DATA start: 165483 length 40035 + Stream: column 3 section DATA start: 205518 length 256119 + Stream: column 3 section LENGTH start: 461637 length 5501 Encoding column 0: DIRECT Encoding column 1: DIRECT_V2 Encoding column 2: DIRECT_V2 Encoding column 3: DIRECT_V2 - Stripe: offset: 388029 data: 491655 rows: 5000 tail: 69 index: 544 - Stream: column 0 section ROW_INDEX start: 388029 length 10 - Stream: column 1 section ROW_INDEX start: 388039 length 36 - Stream: column 2 section ROW_INDEX start: 388075 length 39 - Stream: column 3 section ROW_INDEX start: 388114 length 459 - Stream: column 1 section DATA start: 388573 length 20029 - Stream: column 2 section DATA start: 408602 length 40035 - Stream: column 3 section DATA start: 448637 length 425862 - Stream: column 3 section LENGTH start: 874499 length 5729 + Stripe: offset: 467206 data: 531773 rows: 5000 tail: 69 index: 569 + Stream: column 0 section ROW_INDEX start: 467206 length 10 + Stream: column 1 section ROW_INDEX start: 467216 length 36 + Stream: column 2 section ROW_INDEX start: 467252 length 39 + Stream: column 3 section ROW_INDEX start: 467291 length 484 + Stream: column 1 section DATA start: 467775 length 20029 + Stream: column 2 section DATA start: 487804 length 40035 + Stream: column 3 section DATA start: 527839 length 466002 + Stream: column 3 section LENGTH start: 993841 length 5707 Encoding column 0: DIRECT Encoding column 1: DIRECT_V2 Encoding column 2: DIRECT_V2 Encoding column 3: DIRECT_V2 - Stripe: offset: 880297 data: 707368 rows: 5000 tail: 68 index: 691 - Stream: column 0 section ROW_INDEX start: 880297 length 10 - Stream: column 1 section ROW_INDEX start: 880307 length 36 - Stream: column 2 section ROW_INDEX start: 880343 length 39 - Stream: column 3 section ROW_INDEX start: 880382 length 606 - Stream: column 1 section DATA start: 880988 length 20029 - Stream: column 2 section DATA start: 901017 length 40035 - Stream: column 3 section DATA start: 941052 length 641580 - Stream: column 3 section LENGTH start: 1582632 length 5724 + Stripe: offset: 999617 data: 751374 rows: 5000 tail: 69 index: 734 + Stream: column 0 section ROW_INDEX start: 999617 length 10 + Stream: column 1 section ROW_INDEX start: 999627 length 36 + Stream: column 2 section ROW_INDEX start: 999663 length 39 + Stream: column 3 section ROW_INDEX start: 999702 length 649 + Stream: column 1 section DATA start: 1000351 length 20029 + Stream: column 2 section DATA start: 1020380 length 40035 + Stream: column 3 section DATA start: 1060415 length 685567 + Stream: column 3 section LENGTH start: 1745982 length 5743 Encoding column 0: DIRECT Encoding column 1: DIRECT_V2 Encoding column 2: DIRECT_V2 Encoding column 3: DIRECT_V2 - Stripe: offset: 1588424 data: 348697 rows: 2000 tail: 67 index: 797 - Stream: column 0 section ROW_INDEX start: 1588424 length 10 - Stream: column 1 section ROW_INDEX start: 1588434 length 36 - Stream: column 2 section ROW_INDEX start: 1588470 length 39 - Stream: column 3 section ROW_INDEX start: 1588509 length 712 - Stream: column 1 section DATA start: 1589221 length 8011 - Stream: column 2 section DATA start: 1597232 length 16014 - Stream: column 3 section DATA start: 1613246 length 322259 - Stream: column 3 section LENGTH start: 1935505 length 2413 + Stripe: offset: 1800000 data: 177935 rows: 1000 tail: 67 index: 813 + Stream: column 0 section ROW_INDEX start: 1800000 length 10 + Stream: column 1 section ROW_INDEX start: 1800010 length 36 + Stream: column 2 section ROW_INDEX start: 1800046 length 39 + Stream: column 3 section ROW_INDEX start: 1800085 length 728 + Stream: column 1 section DATA start: 1800813 length 4007 + Stream: column 2 section DATA start: 1804820 length 8007 + Stream: column 3 section DATA start: 1812827 length 164661 + Stream: column 3 section LENGTH start: 1977488 length 1260 Encoding column 0: DIRECT Encoding column 1: DIRECT_V2 Encoding column 2: DIRECT_V2 diff --git ql/src/test/results/clientpositive/tez/auto_join0.q.out ql/src/test/results/clientpositive/tez/auto_join0.q.out index 13abf58..31cf7f3 100644 --- ql/src/test/results/clientpositive/tez/auto_join0.q.out +++ ql/src/test/results/clientpositive/tez/auto_join0.q.out @@ -31,6 +31,7 @@ STAGE PLANS: Map 1 <- Map 4 (BROADCAST_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: diff --git ql/src/test/results/clientpositive/tez/auto_join1.q.out ql/src/test/results/clientpositive/tez/auto_join1.q.out index e591d6a..fdb4f4e 100644 --- ql/src/test/results/clientpositive/tez/auto_join1.q.out +++ ql/src/test/results/clientpositive/tez/auto_join1.q.out @@ -1,7 +1,9 @@ PREHOOK: query: CREATE TABLE dest_j1(key INT, value STRING) STORED AS TEXTFILE PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default POSTHOOK: query: CREATE TABLE dest_j1(key INT, value STRING) STORED AS TEXTFILE POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default POSTHOOK: Output: default@dest_j1 PREHOOK: query: explain FROM src src1 JOIN src src2 ON (src1.key = src2.key) @@ -13,20 +15,16 @@ INSERT OVERWRITE TABLE dest_j1 SELECT src1.key, src2.value POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-8 depends on stages: Stage-1 , consists of Stage-5, Stage-4, Stage-6 - Stage-5 - Stage-2 depends on stages: Stage-5, Stage-4, Stage-7 + Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 - Stage-4 - Stage-6 - Stage-7 depends on stages: Stage-6 STAGE PLANS: Stage: Stage-1 Tez Edges: Map 2 <- Map 1 (BROADCAST_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -68,15 +66,6 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 - Stage: Stage-8 - Conditional Operator - - Stage: Stage-5 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - Stage: Stage-2 Dependency Collection @@ -93,40 +82,6 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator - Stage: Stage-4 - Tez - Vertices: - Merge - Map Operator Tree: - TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_j1 - - Stage: Stage-6 - Tez - Vertices: - Merge - Map Operator Tree: - TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_j1 - - Stage: Stage-7 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - PREHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key) INSERT OVERWRITE TABLE dest_j1 SELECT src1.key, src2.value PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/tez/bucket2.q.out ql/src/test/results/clientpositive/tez/bucket2.q.out index 74bad32..d36557d 100644 --- ql/src/test/results/clientpositive/tez/bucket2.q.out +++ ql/src/test/results/clientpositive/tez/bucket2.q.out @@ -1,7 +1,9 @@ PREHOOK: query: CREATE TABLE bucket2_1(key int, value string) CLUSTERED BY (key) INTO 2 BUCKETS PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default POSTHOOK: query: CREATE TABLE bucket2_1(key int, value string) CLUSTERED BY (key) INTO 2 BUCKETS POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default POSTHOOK: Output: default@bucket2_1 PREHOOK: query: explain extended insert overwrite table bucket2_1 @@ -39,6 +41,7 @@ STAGE PLANS: Tez Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -196,6 +199,7 @@ STAGE PLANS: Tez Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: diff --git ql/src/test/results/clientpositive/tez/bucket3.q.out ql/src/test/results/clientpositive/tez/bucket3.q.out index 69d838e..8f0d409 100644 --- ql/src/test/results/clientpositive/tez/bucket3.q.out +++ ql/src/test/results/clientpositive/tez/bucket3.q.out @@ -1,7 +1,9 @@ PREHOOK: query: CREATE TABLE bucket3_1(key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 2 BUCKETS PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default POSTHOOK: query: CREATE TABLE bucket3_1(key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 2 BUCKETS POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default POSTHOOK: Output: default@bucket3_1 PREHOOK: query: explain extended insert overwrite table bucket3_1 partition (ds='1') @@ -43,6 +45,7 @@ STAGE PLANS: Tez Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -221,6 +224,7 @@ STAGE PLANS: Tez Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: diff --git ql/src/test/results/clientpositive/tez/bucket4.q.out ql/src/test/results/clientpositive/tez/bucket4.q.out index 3092e21..8c90dc2 100644 --- ql/src/test/results/clientpositive/tez/bucket4.q.out +++ ql/src/test/results/clientpositive/tez/bucket4.q.out @@ -1,7 +1,9 @@ PREHOOK: query: CREATE TABLE bucket4_1(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default POSTHOOK: query: CREATE TABLE bucket4_1(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default POSTHOOK: Output: default@bucket4_1 PREHOOK: query: explain extended insert overwrite table bucket4_1 @@ -39,6 +41,7 @@ STAGE PLANS: Tez Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -197,6 +200,7 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: diff --git ql/src/test/results/clientpositive/tez/count.q.out ql/src/test/results/clientpositive/tez/count.q.out index 4f21fcd..9d9922d 100644 --- ql/src/test/results/clientpositive/tez/count.q.out +++ ql/src/test/results/clientpositive/tez/count.q.out @@ -1,13 +1,17 @@ PREHOOK: query: create table abcd (a int, b int, c int, d int) PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default POSTHOOK: query: create table abcd (a int, b int, c int, d int) POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default POSTHOOK: Output: default@abcd PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/in4.txt' INTO TABLE abcd PREHOOK: type: LOAD +#### A masked pattern was here #### PREHOOK: Output: default@abcd POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/in4.txt' INTO TABLE abcd POSTHOOK: type: LOAD +#### A masked pattern was here #### POSTHOOK: Output: default@abcd PREHOOK: query: select * from abcd PREHOOK: type: QUERY @@ -37,6 +41,7 @@ STAGE PLANS: Tez Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -108,6 +113,7 @@ STAGE PLANS: Tez Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -174,6 +180,7 @@ STAGE PLANS: Tez Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -239,6 +246,7 @@ STAGE PLANS: Tez Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: diff --git ql/src/test/results/clientpositive/tez/create_merge_compressed.q.out ql/src/test/results/clientpositive/tez/create_merge_compressed.q.out index 4340878..078ad7f 100644 --- ql/src/test/results/clientpositive/tez/create_merge_compressed.q.out +++ ql/src/test/results/clientpositive/tez/create_merge_compressed.q.out @@ -1,18 +1,24 @@ PREHOOK: query: create table src_rc_merge_test(key int, value string) stored as rcfile PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default POSTHOOK: query: create table src_rc_merge_test(key int, value string) stored as rcfile POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default POSTHOOK: Output: default@src_rc_merge_test PREHOOK: query: load data local inpath '../../data/files/smbbucket_1.rc' into table src_rc_merge_test PREHOOK: type: LOAD +#### A masked pattern was here #### PREHOOK: Output: default@src_rc_merge_test POSTHOOK: query: load data local inpath '../../data/files/smbbucket_1.rc' into table src_rc_merge_test POSTHOOK: type: LOAD +#### A masked pattern was here #### POSTHOOK: Output: default@src_rc_merge_test PREHOOK: query: create table tgt_rc_merge_test(key int, value string) stored as rcfile PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default POSTHOOK: query: create table tgt_rc_merge_test(key int, value string) stored as rcfile POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default POSTHOOK: Output: default@tgt_rc_merge_test PREHOOK: query: insert into table tgt_rc_merge_test select * from src_rc_merge_test PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/tez/cross_join.q.out ql/src/test/results/clientpositive/tez/cross_join.q.out index 4f5cea1..16b9671 100644 --- ql/src/test/results/clientpositive/tez/cross_join.q.out +++ ql/src/test/results/clientpositive/tez/cross_join.q.out @@ -13,6 +13,7 @@ STAGE PLANS: Tez Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -72,6 +73,7 @@ STAGE PLANS: Tez Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -131,6 +133,7 @@ STAGE PLANS: Tez Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: diff --git ql/src/test/results/clientpositive/tez/ctas.q.out ql/src/test/results/clientpositive/tez/ctas.q.out index 24c810c..aaac91c 100644 --- ql/src/test/results/clientpositive/tez/ctas.q.out +++ ql/src/test/results/clientpositive/tez/ctas.q.out @@ -2,10 +2,12 @@ PREHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20, 0.20S) create table nzhang_Tmp(a int, b string) PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default POSTHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20, 0.20S) create table nzhang_Tmp(a int, b string) POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default POSTHOOK: Output: default@nzhang_Tmp PREHOOK: query: select * from nzhang_Tmp PREHOOK: type: QUERY @@ -21,15 +23,10 @@ POSTHOOK: query: explain create table nzhang_CTAS1 as select key k, value from s POSTHOOK: type: CREATETABLE_AS_SELECT STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-8 depends on stages: Stage-1 , consists of Stage-5, Stage-4, Stage-6 - Stage-5 - Stage-2 depends on stages: Stage-5, Stage-4, Stage-7 - Stage-9 depends on stages: Stage-2, Stage-0 - Stage-3 depends on stages: Stage-9 - Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 - Stage-4 - Stage-6 - Stage-7 depends on stages: Stage-6 + Stage-2 depends on stages: Stage-1 + Stage-4 depends on stages: Stage-2, Stage-0 + Stage-3 depends on stages: Stage-4 + Stage-0 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -37,6 +34,7 @@ STAGE PLANS: Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -80,19 +78,10 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_CTAS1 - Stage: Stage-8 - Conditional Operator - - Stage: Stage-5 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - Stage: Stage-2 Dependency Collection - Stage: Stage-9 + Stage: Stage-4 Create Table Operator: Create Table columns: k string, value string @@ -109,40 +98,6 @@ STAGE PLANS: hdfs directory: true #### A masked pattern was here #### - Stage: Stage-4 - Tez - Vertices: - Merge - Map Operator Tree: - TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.nzhang_CTAS1 - - Stage: Stage-6 - Tez - Vertices: - Merge - Map Operator Tree: - TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.nzhang_CTAS1 - - Stage: Stage-7 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - PREHOOK: query: create table nzhang_CTAS1 as select key k, value from src sort by k, value limit 10 PREHOOK: type: CREATETABLE_AS_SELECT PREHOOK: Input: default@src @@ -208,15 +163,10 @@ POSTHOOK: query: explain create table nzhang_ctas2 as select * from src sort by POSTHOOK: type: CREATETABLE_AS_SELECT STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-8 depends on stages: Stage-1 , consists of Stage-5, Stage-4, Stage-6 - Stage-5 - Stage-2 depends on stages: Stage-5, Stage-4, Stage-7 - Stage-9 depends on stages: Stage-2, Stage-0 - Stage-3 depends on stages: Stage-9 - Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 - Stage-4 - Stage-6 - Stage-7 depends on stages: Stage-6 + Stage-2 depends on stages: Stage-1 + Stage-4 depends on stages: Stage-2, Stage-0 + Stage-3 depends on stages: Stage-4 + Stage-0 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -224,6 +174,7 @@ STAGE PLANS: Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -267,19 +218,10 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_ctas2 - Stage: Stage-8 - Conditional Operator - - Stage: Stage-5 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - Stage: Stage-2 Dependency Collection - Stage: Stage-9 + Stage: Stage-4 Create Table Operator: Create Table columns: key string, value string @@ -296,40 +238,6 @@ STAGE PLANS: hdfs directory: true #### A masked pattern was here #### - Stage: Stage-4 - Tez - Vertices: - Merge - Map Operator Tree: - TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.nzhang_ctas2 - - Stage: Stage-6 - Tez - Vertices: - Merge - Map Operator Tree: - TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.nzhang_ctas2 - - Stage: Stage-7 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - PREHOOK: query: create table nzhang_ctas2 as select * from src sort by key, value limit 10 PREHOOK: type: CREATETABLE_AS_SELECT PREHOOK: Input: default@src @@ -395,15 +303,10 @@ POSTHOOK: query: explain create table nzhang_ctas3 row format serde "org.apache. POSTHOOK: type: CREATETABLE_AS_SELECT STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-8 depends on stages: Stage-1 , consists of Stage-5, Stage-4, Stage-6 - Stage-5 - Stage-2 depends on stages: Stage-5, Stage-4, Stage-7 - Stage-9 depends on stages: Stage-2, Stage-0 - Stage-3 depends on stages: Stage-9 - Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 - Stage-4 - Stage-6 - Stage-7 depends on stages: Stage-6 + Stage-2 depends on stages: Stage-1 + Stage-4 depends on stages: Stage-2, Stage-0 + Stage-3 depends on stages: Stage-4 + Stage-0 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -411,6 +314,7 @@ STAGE PLANS: Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -454,19 +358,10 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe name: default.nzhang_ctas3 - Stage: Stage-8 - Conditional Operator - - Stage: Stage-5 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - Stage: Stage-2 Dependency Collection - Stage: Stage-9 + Stage: Stage-4 Create Table Operator: Create Table columns: half_key double, conb string @@ -484,18 +379,6 @@ STAGE PLANS: hdfs directory: true #### A masked pattern was here #### - Stage: Stage-4 - Block level merge - - Stage: Stage-6 - Block level merge - - Stage: Stage-7 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - PREHOOK: query: create table nzhang_ctas3 row format serde "org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe" stored as RCFile as select key/2 half_key, concat(value, "_con") conb from src sort by half_key, conb limit 10 PREHOOK: type: CREATETABLE_AS_SELECT PREHOOK: Input: default@src @@ -624,15 +507,10 @@ POSTHOOK: query: explain create table nzhang_ctas4 row format delimited fields t POSTHOOK: type: CREATETABLE_AS_SELECT STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-8 depends on stages: Stage-1 , consists of Stage-5, Stage-4, Stage-6 - Stage-5 - Stage-2 depends on stages: Stage-5, Stage-4, Stage-7 - Stage-9 depends on stages: Stage-2, Stage-0 - Stage-3 depends on stages: Stage-9 - Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 - Stage-4 - Stage-6 - Stage-7 depends on stages: Stage-6 + Stage-2 depends on stages: Stage-1 + Stage-4 depends on stages: Stage-2, Stage-0 + Stage-3 depends on stages: Stage-4 + Stage-0 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -640,6 +518,7 @@ STAGE PLANS: Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -683,19 +562,10 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_ctas4 - Stage: Stage-8 - Conditional Operator - - Stage: Stage-5 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - Stage: Stage-2 Dependency Collection - Stage: Stage-9 + Stage: Stage-4 Create Table Operator: Create Table columns: key string, value string @@ -713,40 +583,6 @@ STAGE PLANS: hdfs directory: true #### A masked pattern was here #### - Stage: Stage-4 - Tez - Vertices: - Merge - Map Operator Tree: - TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.nzhang_ctas4 - - Stage: Stage-6 - Tez - Vertices: - Merge - Map Operator Tree: - TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.nzhang_ctas4 - - Stage: Stage-7 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - PREHOOK: query: create table nzhang_ctas4 row format delimited fields terminated by ',' stored as textfile as select key, value from src sort by key, value limit 10 PREHOOK: type: CREATETABLE_AS_SELECT PREHOOK: Input: default@src @@ -853,15 +689,10 @@ TOK_CREATETABLE STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-8 depends on stages: Stage-1 , consists of Stage-5, Stage-4, Stage-6 - Stage-5 - Stage-2 depends on stages: Stage-5, Stage-4, Stage-7 - Stage-9 depends on stages: Stage-2, Stage-0 - Stage-3 depends on stages: Stage-9 - Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 - Stage-4 - Stage-6 - Stage-7 depends on stages: Stage-6 + Stage-2 depends on stages: Stage-1 + Stage-4 depends on stages: Stage-2, Stage-0 + Stage-3 depends on stages: Stage-4 + Stage-0 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -869,6 +700,7 @@ STAGE PLANS: Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -979,19 +811,10 @@ STAGE PLANS: GatherStats: true MultiFileSpray: false - Stage: Stage-8 - Conditional Operator - - Stage: Stage-5 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - Stage: Stage-2 Dependency Collection - Stage: Stage-9 + Stage: Stage-4 Create Table Operator: Create Table columns: key string, value string @@ -1012,140 +835,6 @@ STAGE PLANS: hdfs directory: true #### A masked pattern was here #### - Stage: Stage-4 - Tez - Vertices: - Merge - Map Operator Tree: - TableScan - GatherStats: false - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1 - columns.types string:string - field.delim , - line.delim - - name default.nzhang_ctas5 - serialization.format , - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.nzhang_ctas5 - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1 - columns.types string:string - field.delim , - line.delim - - name default.nzhang_ctas5 - serialization.format , - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1 - columns.types string:string - field.delim , - line.delim - - name default.nzhang_ctas5 - serialization.format , - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.nzhang_ctas5 - name: default.nzhang_ctas5 - Truncated Path -> Alias: -#### A masked pattern was here #### - - Stage: Stage-6 - Tez - Vertices: - Merge - Map Operator Tree: - TableScan - GatherStats: false - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1 - columns.types string:string - field.delim , - line.delim - - name default.nzhang_ctas5 - serialization.format , - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.nzhang_ctas5 - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1 - columns.types string:string - field.delim , - line.delim - - name default.nzhang_ctas5 - serialization.format , - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1 - columns.types string:string - field.delim , - line.delim - - name default.nzhang_ctas5 - serialization.format , - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.nzhang_ctas5 - name: default.nzhang_ctas5 - Truncated Path -> Alias: -#### A masked pattern was here #### - - Stage: Stage-7 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - PREHOOK: query: create table nzhang_ctas5 row format delimited fields terminated by ',' lines terminated by '\012' stored as textfile as select key, value from src sort by key, value limit 10 PREHOOK: type: CREATETABLE_AS_SELECT PREHOOK: Input: default@src @@ -1155,8 +844,10 @@ POSTHOOK: Input: default@src POSTHOOK: Output: default@nzhang_ctas5 PREHOOK: query: create table nzhang_ctas6 (key string, `to` string) PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default POSTHOOK: query: create table nzhang_ctas6 (key string, `to` string) POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default POSTHOOK: Output: default@nzhang_ctas6 PREHOOK: query: insert overwrite table nzhang_ctas6 select key, value from src tablesample (10 rows) PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/tez/custom_input_output_format.q.out ql/src/test/results/clientpositive/tez/custom_input_output_format.q.out index 8f54c96..9dc6480 100644 --- ql/src/test/results/clientpositive/tez/custom_input_output_format.q.out +++ ql/src/test/results/clientpositive/tez/custom_input_output_format.q.out @@ -2,10 +2,12 @@ PREHOOK: query: CREATE TABLE src1_rot13_iof(key STRING, value STRING) STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.udf.Rot13InputFormat' OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.udf.Rot13OutputFormat' PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default POSTHOOK: query: CREATE TABLE src1_rot13_iof(key STRING, value STRING) STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.udf.Rot13InputFormat' OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.udf.Rot13OutputFormat' POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default POSTHOOK: Output: default@src1_rot13_iof PREHOOK: query: DESCRIBE EXTENDED src1_rot13_iof PREHOOK: type: DESCTABLE diff --git ql/src/test/results/clientpositive/tez/disable_merge_for_bucketing.q.out ql/src/test/results/clientpositive/tez/disable_merge_for_bucketing.q.out index 74bad32..d36557d 100644 --- ql/src/test/results/clientpositive/tez/disable_merge_for_bucketing.q.out +++ ql/src/test/results/clientpositive/tez/disable_merge_for_bucketing.q.out @@ -1,7 +1,9 @@ PREHOOK: query: CREATE TABLE bucket2_1(key int, value string) CLUSTERED BY (key) INTO 2 BUCKETS PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default POSTHOOK: query: CREATE TABLE bucket2_1(key int, value string) CLUSTERED BY (key) INTO 2 BUCKETS POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default POSTHOOK: Output: default@bucket2_1 PREHOOK: query: explain extended insert overwrite table bucket2_1 @@ -39,6 +41,7 @@ STAGE PLANS: Tez Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -196,6 +199,7 @@ STAGE PLANS: Tez Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: diff --git ql/src/test/results/clientpositive/tez/enforce_order.q.out ql/src/test/results/clientpositive/tez/enforce_order.q.out index e870837..c14d3c9 100644 --- ql/src/test/results/clientpositive/tez/enforce_order.q.out +++ ql/src/test/results/clientpositive/tez/enforce_order.q.out @@ -8,13 +8,17 @@ POSTHOOK: query: drop table table_desc POSTHOOK: type: DROPTABLE PREHOOK: query: create table table_asc(key string, value string) clustered by (key) sorted by (key ASC) into 1 BUCKETS PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default POSTHOOK: query: create table table_asc(key string, value string) clustered by (key) sorted by (key ASC) into 1 BUCKETS POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default POSTHOOK: Output: default@table_asc PREHOOK: query: create table table_desc(key string, value string) clustered by (key) sorted by (key DESC) into 1 BUCKETS PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default POSTHOOK: query: create table table_desc(key string, value string) clustered by (key) sorted by (key DESC) into 1 BUCKETS POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default POSTHOOK: Output: default@table_desc PREHOOK: query: insert overwrite table table_asc select key, value from src PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/tez/filter_join_breaktask.q.out ql/src/test/results/clientpositive/tez/filter_join_breaktask.q.out index bf6032d..17139e9 100644 --- ql/src/test/results/clientpositive/tez/filter_join_breaktask.q.out +++ ql/src/test/results/clientpositive/tez/filter_join_breaktask.q.out @@ -1,7 +1,9 @@ PREHOOK: query: CREATE TABLE filter_join_breaktask(key int, value string) partitioned by (ds string) PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default POSTHOOK: query: CREATE TABLE filter_join_breaktask(key int, value string) partitioned by (ds string) POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default POSTHOOK: Output: default@filter_join_breaktask PREHOOK: query: INSERT OVERWRITE TABLE filter_join_breaktask PARTITION(ds='2008-04-08') SELECT key, value from src1 @@ -139,6 +141,7 @@ STAGE PLANS: Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: diff --git ql/src/test/results/clientpositive/tez/filter_join_breaktask2.q.out ql/src/test/results/clientpositive/tez/filter_join_breaktask2.q.out index 1583207..20b4589 100644 --- ql/src/test/results/clientpositive/tez/filter_join_breaktask2.q.out +++ ql/src/test/results/clientpositive/tez/filter_join_breaktask2.q.out @@ -1,24 +1,32 @@ PREHOOK: query: create table T1(c1 string, c2 string, c3 string, c4 string, c5 string, c6 string, c7 string) partitioned by (ds string) PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default POSTHOOK: query: create table T1(c1 string, c2 string, c3 string, c4 string, c5 string, c6 string, c7 string) partitioned by (ds string) POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default POSTHOOK: Output: default@T1 PREHOOK: query: create table T2(c1 string, c2 string, c3 string, c0 string, c4 string, c5 string, c6 string, c7 string, c8 string, c9 string, c10 string, c11 string, c12 string, c13 string, c14 string, c15 string, c16 string, c17 string, c18 string, c19 string, c20 string, c21 string, c22 string, c23 string, c24 string, c25 string) partitioned by (ds string) PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default POSTHOOK: query: create table T2(c1 string, c2 string, c3 string, c0 string, c4 string, c5 string, c6 string, c7 string, c8 string, c9 string, c10 string, c11 string, c12 string, c13 string, c14 string, c15 string, c16 string, c17 string, c18 string, c19 string, c20 string, c21 string, c22 string, c23 string, c24 string, c25 string) partitioned by (ds string) POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default POSTHOOK: Output: default@T2 PREHOOK: query: create table T3 (c0 bigint, c1 bigint, c2 int) partitioned by (ds string) PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default POSTHOOK: query: create table T3 (c0 bigint, c1 bigint, c2 int) partitioned by (ds string) POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default POSTHOOK: Output: default@T3 PREHOOK: query: create table T4 (c0 bigint, c1 string, c2 string, c3 string, c4 string, c5 string, c6 string, c7 string, c8 string, c9 string, c10 string, c11 string, c12 string, c13 string, c14 string, c15 string, c16 string, c17 string, c18 string, c19 string, c20 string, c21 string, c22 string, c23 string, c24 string, c25 string, c26 string, c27 string, c28 string, c29 string, c30 string, c31 string, c32 string, c33 string, c34 string, c35 string, c36 string, c37 string, c38 string, c39 string, c40 string, c41 string, c42 string, c43 string, c44 string, c45 string, c46 string, c47 string, c48 string, c49 string, c50 string, c51 string, c52 string, c53 string, c54 string, c55 string, c56 string, c57 string, c58 string, c59 string, c60 string, c61 string, c62 string, c63 string, c64 string, c65 string, c66 string, c67 bigint, c68 string, c69 string, c70 bigint, c71 bigint, c72 bigint, c73 string, c74 string, c75 string, c76 string, c77 string, c78 string, c79 string, c80 string, c81 bigint, c82 bigint, c83 bigint) partitioned by (ds string) PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default POSTHOOK: query: create table T4 (c0 bigint, c1 string, c2 string, c3 string, c4 string, c5 string, c6 string, c7 string, c8 string, c9 string, c10 string, c11 string, c12 string, c13 string, c14 string, c15 string, c16 string, c17 string, c18 string, c19 string, c20 string, c21 string, c22 string, c23 string, c24 string, c25 string, c26 string, c27 string, c28 string, c29 string, c30 string, c31 string, c32 string, c33 string, c34 string, c35 string, c36 string, c37 string, c38 string, c39 string, c40 string, c41 string, c42 string, c43 string, c44 string, c45 string, c46 string, c47 string, c48 string, c49 string, c50 string, c51 string, c52 string, c53 string, c54 string, c55 string, c56 string, c57 string, c58 string, c59 string, c60 string, c61 string, c62 string, c63 string, c64 string, c65 string, c66 string, c67 bigint, c68 string, c69 string, c70 bigint, c71 bigint, c72 bigint, c73 string, c74 string, c75 string, c76 string, c77 string, c78 string, c79 string, c80 string, c81 bigint, c82 bigint, c83 bigint) partitioned by (ds string) POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default POSTHOOK: Output: default@T4 PREHOOK: query: insert overwrite table T1 partition (ds='2010-04-17') select '5', '1', '1', '1', 0, 0,4 from src tablesample (1 rows) PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/tez/groupby1.q.out ql/src/test/results/clientpositive/tez/groupby1.q.out index 06d46c6..c215835 100644 --- ql/src/test/results/clientpositive/tez/groupby1.q.out +++ ql/src/test/results/clientpositive/tez/groupby1.q.out @@ -1,7 +1,9 @@ PREHOOK: query: CREATE TABLE dest_g1(key INT, value DOUBLE) STORED AS TEXTFILE PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default POSTHOOK: query: CREATE TABLE dest_g1(key INT, value DOUBLE) STORED AS TEXTFILE POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default POSTHOOK: Output: default@dest_g1 PREHOOK: query: EXPLAIN FROM src INSERT OVERWRITE TABLE dest_g1 SELECT src.key, sum(substr(src.value,5)) GROUP BY src.key @@ -11,14 +13,9 @@ FROM src INSERT OVERWRITE TABLE dest_g1 SELECT src.key, sum(substr(src.value,5)) POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-8 depends on stages: Stage-1 , consists of Stage-5, Stage-4, Stage-6 - Stage-5 - Stage-2 depends on stages: Stage-5, Stage-4, Stage-7 + Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 - Stage-4 - Stage-6 - Stage-7 depends on stages: Stage-6 STAGE PLANS: Stage: Stage-1 @@ -26,6 +23,7 @@ STAGE PLANS: Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -77,15 +75,6 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_g1 - Stage: Stage-8 - Conditional Operator - - Stage: Stage-5 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - Stage: Stage-2 Dependency Collection @@ -102,40 +91,6 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator - Stage: Stage-4 - Tez - Vertices: - Merge - Map Operator Tree: - TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_g1 - - Stage: Stage-6 - Tez - Vertices: - Merge - Map Operator Tree: - TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_g1 - - Stage: Stage-7 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest_g1 SELECT src.key, sum(substr(src.value,5)) GROUP BY src.key PREHOOK: type: QUERY PREHOOK: Input: default@src diff --git ql/src/test/results/clientpositive/tez/groupby2.q.out ql/src/test/results/clientpositive/tez/groupby2.q.out index 0836a02..1cf946f 100644 --- ql/src/test/results/clientpositive/tez/groupby2.q.out +++ ql/src/test/results/clientpositive/tez/groupby2.q.out @@ -1,7 +1,9 @@ PREHOOK: query: CREATE TABLE dest_g2(key STRING, c1 INT, c2 STRING) STORED AS TEXTFILE PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default POSTHOOK: query: CREATE TABLE dest_g2(key STRING, c1 INT, c2 STRING) STORED AS TEXTFILE POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default POSTHOOK: Output: default@dest_g2 PREHOOK: query: EXPLAIN FROM src @@ -13,20 +15,16 @@ INSERT OVERWRITE TABLE dest_g2 SELECT substr(src.key,1,1), count(DISTINCT substr POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-8 depends on stages: Stage-1 , consists of Stage-5, Stage-4, Stage-6 - Stage-5 - Stage-2 depends on stages: Stage-5, Stage-4, Stage-7 + Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 - Stage-4 - Stage-6 - Stage-7 depends on stages: Stage-6 STAGE PLANS: Stage: Stage-1 Tez Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -63,15 +61,6 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_g2 - Stage: Stage-8 - Conditional Operator - - Stage: Stage-5 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - Stage: Stage-2 Dependency Collection @@ -88,40 +77,6 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator - Stage: Stage-4 - Tez - Vertices: - Merge - Map Operator Tree: - TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_g2 - - Stage: Stage-6 - Tez - Vertices: - Merge - Map Operator Tree: - TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_g2 - - Stage: Stage-7 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest_g2 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))) GROUP BY substr(src.key,1,1) PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/tez/groupby3.q.out ql/src/test/results/clientpositive/tez/groupby3.q.out index c4bb7bb..cb8f0e7 100644 --- ql/src/test/results/clientpositive/tez/groupby3.q.out +++ ql/src/test/results/clientpositive/tez/groupby3.q.out @@ -1,7 +1,9 @@ PREHOOK: query: CREATE TABLE dest1(c1 DOUBLE, c2 DOUBLE, c3 DOUBLE, c4 DOUBLE, c5 DOUBLE, c6 DOUBLE, c7 DOUBLE, c8 DOUBLE, c9 DOUBLE) STORED AS TEXTFILE PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default POSTHOOK: query: CREATE TABLE dest1(c1 DOUBLE, c2 DOUBLE, c3 DOUBLE, c4 DOUBLE, c5 DOUBLE, c6 DOUBLE, c7 DOUBLE, c8 DOUBLE, c9 DOUBLE) STORED AS TEXTFILE POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default POSTHOOK: Output: default@dest1 PREHOOK: query: EXPLAIN FROM src @@ -31,14 +33,9 @@ INSERT OVERWRITE TABLE dest1 SELECT POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-8 depends on stages: Stage-1 , consists of Stage-5, Stage-4, Stage-6 - Stage-5 - Stage-2 depends on stages: Stage-5, Stage-4, Stage-7 + Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 - Stage-4 - Stage-6 - Stage-7 depends on stages: Stage-6 STAGE PLANS: Stage: Stage-1 @@ -46,6 +43,7 @@ STAGE PLANS: Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -92,15 +90,6 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 - Stage: Stage-8 - Conditional Operator - - Stage: Stage-5 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - Stage: Stage-2 Dependency Collection @@ -117,40 +106,6 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator - Stage: Stage-4 - Tez - Vertices: - Merge - Map Operator Tree: - TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1 - - Stage: Stage-6 - Tez - Vertices: - Merge - Map Operator Tree: - TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1 - - Stage: Stage-7 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT sum(substr(src.value,5)), diff --git ql/src/test/results/clientpositive/tez/having.q.out ql/src/test/results/clientpositive/tez/having.q.out index 99e990d..25d337b 100644 --- ql/src/test/results/clientpositive/tez/having.q.out +++ ql/src/test/results/clientpositive/tez/having.q.out @@ -11,6 +11,7 @@ STAGE PLANS: Tez Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -91,6 +92,7 @@ STAGE PLANS: Tez Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -469,6 +471,7 @@ STAGE PLANS: Tez Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -738,6 +741,7 @@ STAGE PLANS: Tez Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -936,6 +940,7 @@ STAGE PLANS: Tez Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: diff --git ql/src/test/results/clientpositive/tez/insert1.q.out ql/src/test/results/clientpositive/tez/insert1.q.out index ef3e083..7b8819d 100644 --- ql/src/test/results/clientpositive/tez/insert1.q.out +++ ql/src/test/results/clientpositive/tez/insert1.q.out @@ -1,12 +1,16 @@ PREHOOK: query: create table insert1(key int, value string) stored as textfile PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default POSTHOOK: query: create table insert1(key int, value string) stored as textfile POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default POSTHOOK: Output: default@insert1 PREHOOK: query: create table insert2(key int, value string) stored as textfile PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default POSTHOOK: query: create table insert2(key int, value string) stored as textfile POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default POSTHOOK: Output: default@insert2 PREHOOK: query: insert overwrite table insert1 select a.key, a.value from insert2 a WHERE (a.key=-1) PREHOOK: type: QUERY @@ -26,18 +30,14 @@ POSTHOOK: Lineage: insert1.key SIMPLE [(insert2)a.FieldSchema(name:key, type:int POSTHOOK: Lineage: insert1.value SIMPLE [(insert2)a.FieldSchema(name:value, type:string, comment:null), ] STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-8 depends on stages: Stage-1 , consists of Stage-5, Stage-4, Stage-6 - Stage-5 - Stage-2 depends on stages: Stage-5, Stage-4, Stage-7 + Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 - Stage-4 - Stage-6 - Stage-7 depends on stages: Stage-6 STAGE PLANS: Stage: Stage-1 Tez +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -60,15 +60,6 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert1 - Stage: Stage-8 - Conditional Operator - - Stage: Stage-5 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - Stage: Stage-2 Dependency Collection @@ -85,40 +76,6 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator - Stage: Stage-4 - Tez - Vertices: - Merge - Map Operator Tree: - TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.insert1 - - Stage: Stage-6 - Tez - Vertices: - Merge - Map Operator Tree: - TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.insert1 - - Stage: Stage-7 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - PREHOOK: query: explain insert into table INSERT1 select a.key, a.value from insert2 a WHERE (a.key=-1) PREHOOK: type: QUERY POSTHOOK: query: explain insert into table INSERT1 select a.key, a.value from insert2 a WHERE (a.key=-1) @@ -127,18 +84,14 @@ POSTHOOK: Lineage: insert1.key SIMPLE [(insert2)a.FieldSchema(name:key, type:int POSTHOOK: Lineage: insert1.value SIMPLE [(insert2)a.FieldSchema(name:value, type:string, comment:null), ] STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-8 depends on stages: Stage-1 , consists of Stage-5, Stage-4, Stage-6 - Stage-5 - Stage-2 depends on stages: Stage-5, Stage-4, Stage-7 + Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 - Stage-4 - Stage-6 - Stage-7 depends on stages: Stage-6 STAGE PLANS: Stage: Stage-1 Tez +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -161,15 +114,6 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert1 - Stage: Stage-8 - Conditional Operator - - Stage: Stage-5 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - Stage: Stage-2 Dependency Collection @@ -186,40 +130,6 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator - Stage: Stage-4 - Tez - Vertices: - Merge - Map Operator Tree: - TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.insert1 - - Stage: Stage-6 - Tez - Vertices: - Merge - Map Operator Tree: - TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.insert1 - - Stage: Stage-7 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - PREHOOK: query: -- HIVE-3465 create database x PREHOOK: type: CREATEDATABASE @@ -230,8 +140,10 @@ POSTHOOK: Lineage: insert1.key SIMPLE [(insert2)a.FieldSchema(name:key, type:int POSTHOOK: Lineage: insert1.value SIMPLE [(insert2)a.FieldSchema(name:value, type:string, comment:null), ] PREHOOK: query: create table x.insert1(key int, value string) stored as textfile PREHOOK: type: CREATETABLE +PREHOOK: Output: database:x POSTHOOK: query: create table x.insert1(key int, value string) stored as textfile POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:x POSTHOOK: Output: x@insert1 POSTHOOK: Lineage: insert1.key SIMPLE [(insert2)a.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: insert1.value SIMPLE [(insert2)a.FieldSchema(name:value, type:string, comment:null), ] @@ -243,18 +155,14 @@ POSTHOOK: Lineage: insert1.key SIMPLE [(insert2)a.FieldSchema(name:key, type:int POSTHOOK: Lineage: insert1.value SIMPLE [(insert2)a.FieldSchema(name:value, type:string, comment:null), ] STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-8 depends on stages: Stage-1 , consists of Stage-5, Stage-4, Stage-6 - Stage-5 - Stage-2 depends on stages: Stage-5, Stage-4, Stage-7 + Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 - Stage-4 - Stage-6 - Stage-7 depends on stages: Stage-6 STAGE PLANS: Stage: Stage-1 Tez +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -277,15 +185,6 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: x.insert1 - Stage: Stage-8 - Conditional Operator - - Stage: Stage-5 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - Stage: Stage-2 Dependency Collection @@ -302,40 +201,6 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator - Stage: Stage-4 - Tez - Vertices: - Merge - Map Operator Tree: - TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: x.insert1 - - Stage: Stage-6 - Tez - Vertices: - Merge - Map Operator Tree: - TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: x.insert1 - - Stage: Stage-7 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - PREHOOK: query: explain insert into table default.INSERT1 select a.key, a.value from insert2 a WHERE (a.key=-1) PREHOOK: type: QUERY POSTHOOK: query: explain insert into table default.INSERT1 select a.key, a.value from insert2 a WHERE (a.key=-1) @@ -344,18 +209,14 @@ POSTHOOK: Lineage: insert1.key SIMPLE [(insert2)a.FieldSchema(name:key, type:int POSTHOOK: Lineage: insert1.value SIMPLE [(insert2)a.FieldSchema(name:value, type:string, comment:null), ] STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-8 depends on stages: Stage-1 , consists of Stage-5, Stage-4, Stage-6 - Stage-5 - Stage-2 depends on stages: Stage-5, Stage-4, Stage-7 + Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 - Stage-4 - Stage-6 - Stage-7 depends on stages: Stage-6 STAGE PLANS: Stage: Stage-1 Tez +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -378,15 +239,6 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert1 - Stage: Stage-8 - Conditional Operator - - Stage: Stage-5 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - Stage: Stage-2 Dependency Collection @@ -403,40 +255,6 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator - Stage: Stage-4 - Tez - Vertices: - Merge - Map Operator Tree: - TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.insert1 - - Stage: Stage-6 - Tez - Vertices: - Merge - Map Operator Tree: - TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.insert1 - - Stage: Stage-7 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - PREHOOK: query: explain from insert2 insert into table insert1 select * where key < 10 @@ -451,25 +269,16 @@ POSTHOOK: Lineage: insert1.key SIMPLE [(insert2)a.FieldSchema(name:key, type:int POSTHOOK: Lineage: insert1.value SIMPLE [(insert2)a.FieldSchema(name:value, type:string, comment:null), ] STAGE DEPENDENCIES: Stage-2 is a root stage - Stage-9 depends on stages: Stage-2 , consists of Stage-6, Stage-5, Stage-7 - Stage-6 - Stage-3 depends on stages: Stage-6, Stage-5, Stage-8, Stage-12, Stage-11, Stage-14 + Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 Stage-1 depends on stages: Stage-3 - Stage-10 depends on stages: Stage-1 - Stage-5 - Stage-7 - Stage-8 depends on stages: Stage-7 - Stage-15 depends on stages: Stage-2 , consists of Stage-12, Stage-11, Stage-13 - Stage-12 - Stage-11 - Stage-13 - Stage-14 depends on stages: Stage-13 + Stage-5 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-2 Tez +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -507,15 +316,6 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: x.insert1 - Stage: Stage-9 - Conditional Operator - - Stage: Stage-6 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - Stage: Stage-3 Dependency Collection @@ -542,85 +342,8 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: x.insert1 - Stage: Stage-10 - Stats-Aggr Operator - Stage: Stage-5 - Tez - Vertices: - Merge - Map Operator Tree: - TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.insert1 - - Stage: Stage-7 - Tez - Vertices: - Merge - Map Operator Tree: - TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.insert1 - - Stage: Stage-8 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - - Stage: Stage-15 - Conditional Operator - - Stage: Stage-12 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - - Stage: Stage-11 - Tez - Vertices: - Merge - Map Operator Tree: - TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: x.insert1 - - Stage: Stage-13 - Tez - Vertices: - Merge - Map Operator Tree: - TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: x.insert1 - - Stage: Stage-14 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### + Stats-Aggr Operator PREHOOK: query: -- HIVE-3676 CREATE DATABASE db2 @@ -638,8 +361,10 @@ POSTHOOK: Lineage: insert1.key SIMPLE [(insert2)a.FieldSchema(name:key, type:int POSTHOOK: Lineage: insert1.value SIMPLE [(insert2)a.FieldSchema(name:value, type:string, comment:null), ] PREHOOK: query: CREATE TABLE result(col1 STRING) PREHOOK: type: CREATETABLE +PREHOOK: Output: database:db2 POSTHOOK: query: CREATE TABLE result(col1 STRING) POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:db2 POSTHOOK: Output: db2@result POSTHOOK: Lineage: insert1.key SIMPLE [(insert2)a.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: insert1.value SIMPLE [(insert2)a.FieldSchema(name:value, type:string, comment:null), ] @@ -698,8 +423,10 @@ POSTHOOK: Lineage: result.col1 SIMPLE [] POSTHOOK: Lineage: result.col1 SIMPLE [] PREHOOK: query: CREATE TABLE db1.result(col1 STRING) PREHOOK: type: CREATETABLE +PREHOOK: Output: database:db1 POSTHOOK: query: CREATE TABLE db1.result(col1 STRING) POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:db1 POSTHOOK: Output: db1@result POSTHOOK: Lineage: insert1.key SIMPLE [(insert2)a.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: insert1.value SIMPLE [(insert2)a.FieldSchema(name:value, type:string, comment:null), ] diff --git ql/src/test/results/clientpositive/tez/insert_into1.q.out ql/src/test/results/clientpositive/tez/insert_into1.q.out index c5f50c1..1c6e992 100644 --- ql/src/test/results/clientpositive/tez/insert_into1.q.out +++ ql/src/test/results/clientpositive/tez/insert_into1.q.out @@ -4,8 +4,10 @@ POSTHOOK: query: DROP TABLE insert_into1 POSTHOOK: type: DROPTABLE PREHOOK: query: CREATE TABLE insert_into1 (key int, value string) PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default POSTHOOK: query: CREATE TABLE insert_into1 (key int, value string) POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default POSTHOOK: Output: default@insert_into1 PREHOOK: query: EXPLAIN INSERT INTO TABLE insert_into1 SELECT * from src LIMIT 100 PREHOOK: type: QUERY @@ -13,20 +15,16 @@ POSTHOOK: query: EXPLAIN INSERT INTO TABLE insert_into1 SELECT * from src LIMIT POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-8 depends on stages: Stage-1 , consists of Stage-5, Stage-4, Stage-6 - Stage-5 - Stage-2 depends on stages: Stage-5, Stage-4, Stage-7 + Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 - Stage-4 - Stage-6 - Stage-7 depends on stages: Stage-6 STAGE PLANS: Stage: Stage-1 Tez Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -64,15 +62,6 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into1 - Stage: Stage-8 - Conditional Operator - - Stage: Stage-5 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - Stage: Stage-2 Dependency Collection @@ -89,40 +78,6 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator - Stage: Stage-4 - Tez - Vertices: - Merge - Map Operator Tree: - TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.insert_into1 - - Stage: Stage-6 - Tez - Vertices: - Merge - Map Operator Tree: - TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.insert_into1 - - Stage: Stage-7 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - PREHOOK: query: INSERT INTO TABLE insert_into1 SELECT * from src LIMIT 100 PREHOOK: type: QUERY PREHOOK: Input: default@src @@ -156,20 +111,16 @@ POSTHOOK: Lineage: insert_into1.key EXPRESSION [(src)src.FieldSchema(name:key, t POSTHOOK: Lineage: insert_into1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-8 depends on stages: Stage-1 , consists of Stage-5, Stage-4, Stage-6 - Stage-5 - Stage-2 depends on stages: Stage-5, Stage-4, Stage-7 + Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 - Stage-4 - Stage-6 - Stage-7 depends on stages: Stage-6 STAGE PLANS: Stage: Stage-1 Tez Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -207,15 +158,6 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into1 - Stage: Stage-8 - Conditional Operator - - Stage: Stage-5 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - Stage: Stage-2 Dependency Collection @@ -232,40 +174,6 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator - Stage: Stage-4 - Tez - Vertices: - Merge - Map Operator Tree: - TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.insert_into1 - - Stage: Stage-6 - Tez - Vertices: - Merge - Map Operator Tree: - TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.insert_into1 - - Stage: Stage-7 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - PREHOOK: query: INSERT INTO TABLE insert_into1 SELECT * FROM src LIMIT 100 PREHOOK: type: QUERY PREHOOK: Input: default@src @@ -318,20 +226,16 @@ POSTHOOK: Lineage: insert_into1.value SIMPLE [(src)src.FieldSchema(name:value, t POSTHOOK: Lineage: insert_into1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-8 depends on stages: Stage-1 , consists of Stage-5, Stage-4, Stage-6 - Stage-5 - Stage-2 depends on stages: Stage-5, Stage-4, Stage-7 + Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 - Stage-4 - Stage-6 - Stage-7 depends on stages: Stage-6 STAGE PLANS: Stage: Stage-1 Tez Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -369,15 +273,6 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into1 - Stage: Stage-8 - Conditional Operator - - Stage: Stage-5 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - Stage: Stage-2 Dependency Collection @@ -394,40 +289,6 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator - Stage: Stage-4 - Tez - Vertices: - Merge - Map Operator Tree: - TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.insert_into1 - - Stage: Stage-6 - Tez - Vertices: - Merge - Map Operator Tree: - TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.insert_into1 - - Stage: Stage-7 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - PREHOOK: query: INSERT OVERWRITE TABLE insert_into1 SELECT * FROM src LIMIT 10 PREHOOK: type: QUERY PREHOOK: Input: default@src diff --git ql/src/test/results/clientpositive/tez/insert_into2.q.out ql/src/test/results/clientpositive/tez/insert_into2.q.out index 7e94cf3..5869b44 100644 --- ql/src/test/results/clientpositive/tez/insert_into2.q.out +++ ql/src/test/results/clientpositive/tez/insert_into2.q.out @@ -5,9 +5,11 @@ POSTHOOK: type: DROPTABLE PREHOOK: query: CREATE TABLE insert_into2 (key int, value string) PARTITIONED BY (ds string) PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default POSTHOOK: query: CREATE TABLE insert_into2 (key int, value string) PARTITIONED BY (ds string) POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default POSTHOOK: Output: default@insert_into2 PREHOOK: query: EXPLAIN INSERT INTO TABLE insert_into2 PARTITION (ds='1') SELECT * FROM src LIMIT 100 @@ -17,20 +19,16 @@ POSTHOOK: query: EXPLAIN INSERT INTO TABLE insert_into2 PARTITION (ds='1') POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-8 depends on stages: Stage-1 , consists of Stage-5, Stage-4, Stage-6 - Stage-5 - Stage-2 depends on stages: Stage-5, Stage-4, Stage-7 + Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 - Stage-4 - Stage-6 - Stage-7 depends on stages: Stage-6 STAGE PLANS: Stage: Stage-1 Tez Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -68,15 +66,6 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into2 - Stage: Stage-8 - Conditional Operator - - Stage: Stage-5 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - Stage: Stage-2 Dependency Collection @@ -95,40 +84,6 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator - Stage: Stage-4 - Tez - Vertices: - Merge - Map Operator Tree: - TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.insert_into2 - - Stage: Stage-6 - Tez - Vertices: - Merge - Map Operator Tree: - TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.insert_into2 - - Stage: Stage-7 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - PREHOOK: query: INSERT INTO TABLE insert_into2 PARTITION (ds='1') SELECT * FROM src limit 100 PREHOOK: type: QUERY PREHOOK: Input: default@src @@ -197,20 +152,16 @@ POSTHOOK: Lineage: insert_into2 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSc POSTHOOK: Lineage: insert_into2 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-8 depends on stages: Stage-1 , consists of Stage-5, Stage-4, Stage-6 - Stage-5 - Stage-2 depends on stages: Stage-5, Stage-4, Stage-7 + Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 - Stage-4 - Stage-6 - Stage-7 depends on stages: Stage-6 STAGE PLANS: Stage: Stage-1 Tez Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -248,15 +199,6 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into2 - Stage: Stage-8 - Conditional Operator - - Stage: Stage-5 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - Stage: Stage-2 Dependency Collection @@ -275,40 +217,6 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator - Stage: Stage-4 - Tez - Vertices: - Merge - Map Operator Tree: - TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.insert_into2 - - Stage: Stage-6 - Tez - Vertices: - Merge - Map Operator Tree: - TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.insert_into2 - - Stage: Stage-7 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - PREHOOK: query: INSERT OVERWRITE TABLE insert_into2 PARTITION (ds='2') SELECT * FROM src LIMIT 100 PREHOOK: type: QUERY @@ -362,20 +270,16 @@ POSTHOOK: Lineage: insert_into2 PARTITION(ds=2).key EXPRESSION [(src)src.FieldSc POSTHOOK: Lineage: insert_into2 PARTITION(ds=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-8 depends on stages: Stage-1 , consists of Stage-5, Stage-4, Stage-6 - Stage-5 - Stage-2 depends on stages: Stage-5, Stage-4, Stage-7 + Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 - Stage-4 - Stage-6 - Stage-7 depends on stages: Stage-6 STAGE PLANS: Stage: Stage-1 Tez Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -413,15 +317,6 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into2 - Stage: Stage-8 - Conditional Operator - - Stage: Stage-5 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - Stage: Stage-2 Dependency Collection @@ -440,40 +335,6 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator - Stage: Stage-4 - Tez - Vertices: - Merge - Map Operator Tree: - TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.insert_into2 - - Stage: Stage-6 - Tez - Vertices: - Merge - Map Operator Tree: - TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.insert_into2 - - Stage: Stage-7 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - PREHOOK: query: INSERT OVERWRITE TABLE insert_into2 PARTITION (ds='2') SELECT * FROM src LIMIT 50 PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/tez/join0.q.out ql/src/test/results/clientpositive/tez/join0.q.out index 8d0439d..e9fdced 100644 --- ql/src/test/results/clientpositive/tez/join0.q.out +++ ql/src/test/results/clientpositive/tez/join0.q.out @@ -24,6 +24,7 @@ STAGE PLANS: Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -108,7 +109,7 @@ SELECT src1.key as k1, src1.value as v1, (SELECT * FROM src WHERE src.key < 10) src2 SORT BY k1, v1, k2, v2 POSTHOOK: type: QUERY -{"STAGE PLANS":{"Stage-1":{"Tez":{"Vertices:":{"Reducer 2":{"Reduce Operator Tree:":{"Join Operator":{"outputColumnNames:":["_col0","_col1","_col2","_col3"],"children":{"Select Operator":{"expressions:":"_col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string)","outputColumnNames:":["_col0","_col1","_col2","_col3"],"children":{"Reduce Output Operator":{"sort order:":"++++","value expressions:":"_col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string)","Statistics:":"Num rows: 9 Data size: 1983 Basic stats: COMPLETE Column stats: NONE","key expressions:":"_col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string)"}},"Statistics:":"Num rows: 9 Data size: 1983 Basic stats: COMPLETE Column stats: NONE"}},"Statistics:":"Num rows: 9 Data size: 1983 Basic stats: COMPLETE Column stats: NONE","condition map:":[{"":"Inner Join 0 to 1"}],"condition expressions:":{"1":"{VALUE._col0} {VALUE._col1}","0":"{VALUE._col0} {VALUE._col1}"}}}},"Reducer 3":{"Reduce Operator Tree:":{"Extract":{"children":{"File Output Operator":{"Statistics:":"Num rows: 9 Data size: 1983 Basic stats: COMPLETE Column stats: NONE","compressed:":"false","table:":{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"}}},"Statistics:":"Num rows: 9 Data size: 1983 Basic stats: COMPLETE Column stats: NONE"}}},"Map 1":{"Map Operator Tree:":[{"TableScan":{"alias:":"src","children":{"Filter Operator":{"predicate:":"(key < 10) (type: boolean)","children":{"Select Operator":{"expressions:":"key (type: string), value (type: string)","outputColumnNames:":["_col0","_col1"],"children":{"Reduce Output Operator":{"sort order:":"","value expressions:":"_col0 (type: string), _col1 (type: string)","Statistics:":"Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE"}},"Statistics:":"Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE"}},"Statistics:":"Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE"}},"Statistics:":"Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE"}}]},"Map 4":{"Map Operator Tree:":[{"TableScan":{"alias:":"src","children":{"Filter Operator":{"predicate:":"(key < 10) (type: boolean)","children":{"Select Operator":{"expressions:":"key (type: string), value (type: string)","outputColumnNames:":["_col0","_col1"],"children":{"Reduce Output Operator":{"sort order:":"","value expressions:":"_col0 (type: string), _col1 (type: string)","Statistics:":"Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE"}},"Statistics:":"Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE"}},"Statistics:":"Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE"}},"Statistics:":"Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE"}}]}},"Edges:":{"Reducer 2":[{"parent":"Map 1","type":"SIMPLE_EDGE"},{"parent":"Map 4","type":"SIMPLE_EDGE"}],"Reducer 3":{"parent":"Reducer 2","type":"SIMPLE_EDGE"}}}},"Stage-0":{"Fetch Operator":{"limit:":"-1"}}},"STAGE DEPENDENCIES":{"Stage-1":{"ROOT STAGE":"TRUE"},"Stage-0":{"ROOT STAGE":"TRUE"}}} +#### A masked pattern was here #### PREHOOK: query: SELECT src1.key as k1, src1.value as v1, src2.key as k2, src2.value as v2 FROM (SELECT * FROM src WHERE src.key < 10) src1 diff --git ql/src/test/results/clientpositive/tez/join1.q.out ql/src/test/results/clientpositive/tez/join1.q.out index 0a6690e..ca99d72 100644 --- ql/src/test/results/clientpositive/tez/join1.q.out +++ ql/src/test/results/clientpositive/tez/join1.q.out @@ -1,7 +1,9 @@ PREHOOK: query: CREATE TABLE dest_j1(key INT, value STRING) STORED AS TEXTFILE PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default POSTHOOK: query: CREATE TABLE dest_j1(key INT, value STRING) STORED AS TEXTFILE POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default POSTHOOK: Output: default@dest_j1 PREHOOK: query: EXPLAIN FROM src src1 JOIN src src2 ON (src1.key = src2.key) @@ -13,20 +15,16 @@ INSERT OVERWRITE TABLE dest_j1 SELECT src1.key, src2.value POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-8 depends on stages: Stage-1 , consists of Stage-5, Stage-4, Stage-6 - Stage-5 - Stage-2 depends on stages: Stage-5, Stage-4, Stage-7 + Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 - Stage-4 - Stage-6 - Stage-7 depends on stages: Stage-6 STAGE PLANS: Stage: Stage-1 Tez Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -73,15 +71,6 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 - Stage: Stage-8 - Conditional Operator - - Stage: Stage-5 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - Stage: Stage-2 Dependency Collection @@ -98,40 +87,6 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator - Stage: Stage-4 - Tez - Vertices: - Merge - Map Operator Tree: - TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_j1 - - Stage: Stage-6 - Tez - Vertices: - Merge - Map Operator Tree: - TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_j1 - - Stage: Stage-7 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - PREHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key) INSERT OVERWRITE TABLE dest_j1 SELECT src1.key, src2.value PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/tez/leftsemijoin.q.out ql/src/test/results/clientpositive/tez/leftsemijoin.q.out index d8ecfbf..c23c537 100644 --- ql/src/test/results/clientpositive/tez/leftsemijoin.q.out +++ ql/src/test/results/clientpositive/tez/leftsemijoin.q.out @@ -9,35 +9,45 @@ POSTHOOK: type: DROPTABLE PREHOOK: query: CREATE TABLE sales (name STRING, id INT) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default POSTHOOK: query: CREATE TABLE sales (name STRING, id INT) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default POSTHOOK: Output: default@sales PREHOOK: query: CREATE TABLE things (id INT, name STRING) partitioned by (ds string) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default POSTHOOK: query: CREATE TABLE things (id INT, name STRING) partitioned by (ds string) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default POSTHOOK: Output: default@things PREHOOK: query: load data local inpath '../../data/files/sales.txt' INTO TABLE sales PREHOOK: type: LOAD +#### A masked pattern was here #### PREHOOK: Output: default@sales POSTHOOK: query: load data local inpath '../../data/files/sales.txt' INTO TABLE sales POSTHOOK: type: LOAD +#### A masked pattern was here #### POSTHOOK: Output: default@sales PREHOOK: query: load data local inpath '../../data/files/things.txt' INTO TABLE things partition(ds='2011-10-23') PREHOOK: type: LOAD +#### A masked pattern was here #### PREHOOK: Output: default@things POSTHOOK: query: load data local inpath '../../data/files/things.txt' INTO TABLE things partition(ds='2011-10-23') POSTHOOK: type: LOAD +#### A masked pattern was here #### POSTHOOK: Output: default@things POSTHOOK: Output: default@things@ds=2011-10-23 PREHOOK: query: load data local inpath '../../data/files/things2.txt' INTO TABLE things partition(ds='2011-10-24') PREHOOK: type: LOAD +#### A masked pattern was here #### PREHOOK: Output: default@things POSTHOOK: query: load data local inpath '../../data/files/things2.txt' INTO TABLE things partition(ds='2011-10-24') POSTHOOK: type: LOAD +#### A masked pattern was here #### POSTHOOK: Output: default@things POSTHOOK: Output: default@things@ds=2011-10-24 PREHOOK: query: SELECT name,id FROM sales ORDER BY name ASC, id ASC diff --git ql/src/test/results/clientpositive/tez/limit_pushdown.q.out ql/src/test/results/clientpositive/tez/limit_pushdown.q.out index 279b918..7d6b1c7 100644 --- ql/src/test/results/clientpositive/tez/limit_pushdown.q.out +++ ql/src/test/results/clientpositive/tez/limit_pushdown.q.out @@ -17,6 +17,7 @@ STAGE PLANS: Tez Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -95,6 +96,7 @@ STAGE PLANS: Tez Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -173,6 +175,7 @@ STAGE PLANS: Tez Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -268,6 +271,7 @@ STAGE PLANS: Tez Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -363,6 +367,7 @@ STAGE PLANS: Tez Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -453,6 +458,7 @@ STAGE PLANS: Tez Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -548,6 +554,7 @@ STAGE PLANS: Tez Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -643,6 +650,7 @@ STAGE PLANS: Tez Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -703,6 +711,7 @@ STAGE PLANS: Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -818,6 +827,7 @@ STAGE PLANS: Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) Reducer 5 <- Map 4 (SIMPLE_EDGE) Reducer 6 <- Reducer 5 (SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -963,6 +973,7 @@ STAGE PLANS: Tez Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -1052,6 +1063,7 @@ STAGE PLANS: Tez Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -1212,6 +1224,7 @@ STAGE PLANS: Tez Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: diff --git ql/src/test/results/clientpositive/tez/load_dyn_part1.q.out ql/src/test/results/clientpositive/tez/load_dyn_part1.q.out index f68733d..d420caf 100644 --- ql/src/test/results/clientpositive/tez/load_dyn_part1.q.out +++ ql/src/test/results/clientpositive/tez/load_dyn_part1.q.out @@ -8,13 +8,17 @@ ds=2008-04-09/hr=11 ds=2008-04-09/hr=12 PREHOOK: query: create table if not exists nzhang_part1 like srcpart PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default POSTHOOK: query: create table if not exists nzhang_part1 like srcpart POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default POSTHOOK: Output: default@nzhang_part1 PREHOOK: query: create table if not exists nzhang_part2 like srcpart PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default POSTHOOK: query: create table if not exists nzhang_part2 like srcpart POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default POSTHOOK: Output: default@nzhang_part2 PREHOOK: query: describe extended nzhang_part1 PREHOOK: type: DESCTABLE @@ -44,25 +48,16 @@ insert overwrite table nzhang_part2 partition(ds='2008-12-31', hr) select key, v POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage - Stage-9 depends on stages: Stage-2 , consists of Stage-6, Stage-5, Stage-7 - Stage-6 - Stage-3 depends on stages: Stage-6, Stage-5, Stage-8, Stage-12, Stage-11, Stage-14 - Stage-0 depends on stages: Stage-3 - Stage-4 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 Stage-1 depends on stages: Stage-3 - Stage-10 depends on stages: Stage-1 - Stage-5 - Stage-7 - Stage-8 depends on stages: Stage-7 - Stage-15 depends on stages: Stage-2 , consists of Stage-12, Stage-11, Stage-13 - Stage-12 - Stage-11 - Stage-13 - Stage-14 depends on stages: Stage-13 + Stage-4 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-3 + Stage-5 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-2 Tez +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -100,126 +95,40 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part2 - Stage: Stage-9 - Conditional Operator - - Stage: Stage-6 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - Stage: Stage-3 Dependency Collection - Stage: Stage-0 + Stage: Stage-1 Move Operator tables: partition: - ds + ds 2008-12-31 hr replace: true table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.nzhang_part1 + name: default.nzhang_part2 Stage: Stage-4 Stats-Aggr Operator - Stage: Stage-1 + Stage: Stage-0 Move Operator tables: partition: - ds 2008-12-31 + ds hr replace: true table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.nzhang_part2 - - Stage: Stage-10 - Stats-Aggr Operator + name: default.nzhang_part1 Stage: Stage-5 - Tez - Vertices: - Merge - Map Operator Tree: - TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.nzhang_part1 - - Stage: Stage-7 - Tez - Vertices: - Merge - Map Operator Tree: - TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.nzhang_part1 - - Stage: Stage-8 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - - Stage: Stage-15 - Conditional Operator - - Stage: Stage-12 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - - Stage: Stage-11 - Tez - Vertices: - Merge - Map Operator Tree: - TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.nzhang_part2 - - Stage: Stage-13 - Tez - Vertices: - Merge - Map Operator Tree: - TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.nzhang_part2 - - Stage: Stage-14 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### + Stats-Aggr Operator PREHOOK: query: from srcpart insert overwrite table nzhang_part1 partition (ds, hr) select key, value, ds, hr where ds <= '2008-04-08' diff --git ql/src/test/results/clientpositive/tez/load_dyn_part2.q.out ql/src/test/results/clientpositive/tez/load_dyn_part2.q.out index 04445b5..34ca3c3 100644 --- ql/src/test/results/clientpositive/tez/load_dyn_part2.q.out +++ ql/src/test/results/clientpositive/tez/load_dyn_part2.q.out @@ -2,10 +2,12 @@ PREHOOK: query: create table if not exists nzhang_part_bucket (key string, value partitioned by (ds string, hr string) clustered by (key) into 10 buckets PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default POSTHOOK: query: create table if not exists nzhang_part_bucket (key string, value string) partitioned by (ds string, hr string) clustered by (key) into 10 buckets POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default POSTHOOK: Output: default@nzhang_part_bucket PREHOOK: query: describe extended nzhang_part_bucket PREHOOK: type: DESCTABLE @@ -40,6 +42,7 @@ STAGE PLANS: Tez Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: diff --git ql/src/test/results/clientpositive/tez/load_dyn_part3.q.out ql/src/test/results/clientpositive/tez/load_dyn_part3.q.out index 7958f77..f992f55 100644 --- ql/src/test/results/clientpositive/tez/load_dyn_part3.q.out +++ ql/src/test/results/clientpositive/tez/load_dyn_part3.q.out @@ -8,8 +8,10 @@ ds=2008-04-09/hr=11 ds=2008-04-09/hr=12 PREHOOK: query: create table if not exists nzhang_part3 like srcpart PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default POSTHOOK: query: create table if not exists nzhang_part3 like srcpart POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default POSTHOOK: Output: default@nzhang_part3 PREHOOK: query: describe extended nzhang_part3 PREHOOK: type: DESCTABLE @@ -42,6 +44,7 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: diff --git ql/src/test/results/clientpositive/tez/mapjoin_mapjoin.q.out ql/src/test/results/clientpositive/tez/mapjoin_mapjoin.q.out index 6b693f4..bc2c650 100644 --- ql/src/test/results/clientpositive/tez/mapjoin_mapjoin.q.out +++ ql/src/test/results/clientpositive/tez/mapjoin_mapjoin.q.out @@ -15,6 +15,7 @@ STAGE PLANS: Tez Edges: Map 1 <- Map 2 (BROADCAST_EDGE), Map 3 (BROADCAST_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -93,6 +94,7 @@ STAGE PLANS: Edges: Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: diff --git ql/src/test/results/clientpositive/tez/mapreduce1.q.out ql/src/test/results/clientpositive/tez/mapreduce1.q.out index 05154eb..0a6142e 100644 --- ql/src/test/results/clientpositive/tez/mapreduce1.q.out +++ ql/src/test/results/clientpositive/tez/mapreduce1.q.out @@ -1,7 +1,9 @@ PREHOOK: query: CREATE TABLE dest1(key INT, ten INT, one INT, value STRING) STORED AS TEXTFILE PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default POSTHOOK: query: CREATE TABLE dest1(key INT, ten INT, one INT, value STRING) STORED AS TEXTFILE POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default POSTHOOK: Output: default@dest1 PREHOOK: query: EXPLAIN FROM src @@ -21,20 +23,16 @@ SORT BY ten, one POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-8 depends on stages: Stage-1 , consists of Stage-5, Stage-4, Stage-6 - Stage-5 - Stage-2 depends on stages: Stage-5, Stage-4, Stage-7 + Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 - Stage-4 - Stage-6 - Stage-7 depends on stages: Stage-6 STAGE PLANS: Stage: Stage-1 Tez Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -75,15 +73,6 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 - Stage: Stage-8 - Conditional Operator - - Stage: Stage-5 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - Stage: Stage-2 Dependency Collection @@ -100,40 +89,6 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator - Stage: Stage-4 - Tez - Vertices: - Merge - Map Operator Tree: - TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1 - - Stage: Stage-6 - Tez - Vertices: - Merge - Map Operator Tree: - TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1 - - Stage: Stage-7 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 MAP src.key, CAST(src.key / 10 AS INT), CAST(src.key % 10 AS INT), src.value diff --git ql/src/test/results/clientpositive/tez/mapreduce2.q.out ql/src/test/results/clientpositive/tez/mapreduce2.q.out index 8cbf576..284b4dc 100644 --- ql/src/test/results/clientpositive/tez/mapreduce2.q.out +++ ql/src/test/results/clientpositive/tez/mapreduce2.q.out @@ -1,7 +1,9 @@ PREHOOK: query: CREATE TABLE dest1(key INT, ten INT, one INT, value STRING) STORED AS TEXTFILE PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default POSTHOOK: query: CREATE TABLE dest1(key INT, ten INT, one INT, value STRING) STORED AS TEXTFILE POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default POSTHOOK: Output: default@dest1 PREHOOK: query: EXPLAIN FROM src @@ -19,20 +21,16 @@ DISTRIBUTE BY tvalue, tkey POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-8 depends on stages: Stage-1 , consists of Stage-5, Stage-4, Stage-6 - Stage-5 - Stage-2 depends on stages: Stage-5, Stage-4, Stage-7 + Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 - Stage-4 - Stage-6 - Stage-7 depends on stages: Stage-6 STAGE PLANS: Stage: Stage-1 Tez Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -72,15 +70,6 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 - Stage: Stage-8 - Conditional Operator - - Stage: Stage-5 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - Stage: Stage-2 Dependency Collection @@ -97,40 +86,6 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator - Stage: Stage-4 - Tez - Vertices: - Merge - Map Operator Tree: - TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1 - - Stage: Stage-6 - Tez - Vertices: - Merge - Map Operator Tree: - TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1 - - Stage: Stage-7 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 MAP src.key, CAST(src.key / 10 AS INT), CAST(src.key % 10 AS INT), src.value diff --git ql/src/test/results/clientpositive/tez/merge1.q.out ql/src/test/results/clientpositive/tez/merge1.q.out index 9d17aae..ec46417 100644 --- ql/src/test/results/clientpositive/tez/merge1.q.out +++ ql/src/test/results/clientpositive/tez/merge1.q.out @@ -1,7 +1,9 @@ PREHOOK: query: create table dest1(key int, val int) PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default POSTHOOK: query: create table dest1(key int, val int) POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default POSTHOOK: Output: default@dest1 PREHOOK: query: explain insert overwrite table dest1 @@ -13,20 +15,16 @@ select key, count(1) from src group by key POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-8 depends on stages: Stage-1 , consists of Stage-5, Stage-4, Stage-6 - Stage-5 - Stage-2 depends on stages: Stage-5, Stage-4, Stage-7 + Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 - Stage-4 - Stage-6 - Stage-7 depends on stages: Stage-6 STAGE PLANS: Stage: Stage-1 Tez Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -70,15 +68,6 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 - Stage: Stage-8 - Conditional Operator - - Stage: Stage-5 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - Stage: Stage-2 Dependency Collection @@ -95,40 +84,6 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator - Stage: Stage-4 - Tez - Vertices: - Merge - Map Operator Tree: - TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1 - - Stage: Stage-6 - Tez - Vertices: - Merge - Map Operator Tree: - TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1 - - Stage: Stage-7 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - PREHOOK: query: insert overwrite table dest1 select key, count(1) from src group by key PREHOOK: type: QUERY @@ -472,15 +427,19 @@ POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:str POSTHOOK: Lineage: dest1.val EXPRESSION [(src)src.null, ] PREHOOK: query: create table test_src(key string, value string) partitioned by (ds string) PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default POSTHOOK: query: create table test_src(key string, value string) partitioned by (ds string) POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default POSTHOOK: Output: default@test_src POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: dest1.val EXPRESSION [(src)src.null, ] PREHOOK: query: create table dest1(key string) PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default POSTHOOK: query: create table dest1(key string) POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default POSTHOOK: Output: default@dest1 POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: dest1.val EXPRESSION [(src)src.null, ] @@ -524,18 +483,14 @@ POSTHOOK: Lineage: test_src PARTITION(ds=102).key SIMPLE [(src)src.FieldSchema(n POSTHOOK: Lineage: test_src PARTITION(ds=102).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-8 depends on stages: Stage-1 , consists of Stage-5, Stage-4, Stage-6 - Stage-5 - Stage-2 depends on stages: Stage-5, Stage-4, Stage-7 + Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 - Stage-4 - Stage-6 - Stage-7 depends on stages: Stage-6 STAGE PLANS: Stage: Stage-1 Tez +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -555,15 +510,6 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 - Stage: Stage-8 - Conditional Operator - - Stage: Stage-5 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - Stage: Stage-2 Dependency Collection @@ -580,40 +526,6 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator - Stage: Stage-4 - Tez - Vertices: - Merge - Map Operator Tree: - TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1 - - Stage: Stage-6 - Tez - Vertices: - Merge - Map Operator Tree: - TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1 - - Stage: Stage-7 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - PREHOOK: query: insert overwrite table dest1 select key from test_src PREHOOK: type: QUERY PREHOOK: Input: default@test_src @@ -648,18 +560,14 @@ POSTHOOK: Lineage: test_src PARTITION(ds=102).key SIMPLE [(src)src.FieldSchema(n POSTHOOK: Lineage: test_src PARTITION(ds=102).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-8 depends on stages: Stage-1 , consists of Stage-5, Stage-4, Stage-6 - Stage-5 - Stage-2 depends on stages: Stage-5, Stage-4, Stage-7 + Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 - Stage-4 - Stage-6 - Stage-7 depends on stages: Stage-6 STAGE PLANS: Stage: Stage-1 Tez +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -679,15 +587,6 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 - Stage: Stage-8 - Conditional Operator - - Stage: Stage-5 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - Stage: Stage-2 Dependency Collection @@ -704,40 +603,6 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator - Stage: Stage-4 - Tez - Vertices: - Merge - Map Operator Tree: - TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1 - - Stage: Stage-6 - Tez - Vertices: - Merge - Map Operator Tree: - TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1 - - Stage: Stage-7 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - PREHOOK: query: insert overwrite table dest1 select key from test_src PREHOOK: type: QUERY PREHOOK: Input: default@test_src diff --git ql/src/test/results/clientpositive/tez/merge2.q.out ql/src/test/results/clientpositive/tez/merge2.q.out index a4f5c03..3a1f1af 100644 --- ql/src/test/results/clientpositive/tez/merge2.q.out +++ ql/src/test/results/clientpositive/tez/merge2.q.out @@ -1,7 +1,9 @@ PREHOOK: query: create table test1(key int, val int) PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default POSTHOOK: query: create table test1(key int, val int) POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default POSTHOOK: Output: default@test1 PREHOOK: query: explain insert overwrite table test1 @@ -13,20 +15,16 @@ select key, count(1) from src group by key POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-8 depends on stages: Stage-1 , consists of Stage-5, Stage-4, Stage-6 - Stage-5 - Stage-2 depends on stages: Stage-5, Stage-4, Stage-7 + Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 - Stage-4 - Stage-6 - Stage-7 depends on stages: Stage-6 STAGE PLANS: Stage: Stage-1 Tez Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -70,15 +68,6 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test1 - Stage: Stage-8 - Conditional Operator - - Stage: Stage-5 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - Stage: Stage-2 Dependency Collection @@ -95,40 +84,6 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator - Stage: Stage-4 - Tez - Vertices: - Merge - Map Operator Tree: - TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.test1 - - Stage: Stage-6 - Tez - Vertices: - Merge - Map Operator Tree: - TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.test1 - - Stage: Stage-7 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - PREHOOK: query: insert overwrite table test1 select key, count(1) from src group by key PREHOOK: type: QUERY @@ -472,15 +427,19 @@ POSTHOOK: Lineage: test1.key EXPRESSION [(src)src.FieldSchema(name:key, type:str POSTHOOK: Lineage: test1.val EXPRESSION [(src)src.null, ] PREHOOK: query: create table test_src(key string, value string) partitioned by (ds string) PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default POSTHOOK: query: create table test_src(key string, value string) partitioned by (ds string) POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default POSTHOOK: Output: default@test_src POSTHOOK: Lineage: test1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: test1.val EXPRESSION [(src)src.null, ] PREHOOK: query: create table test1(key string) PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default POSTHOOK: query: create table test1(key string) POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default POSTHOOK: Output: default@test1 POSTHOOK: Lineage: test1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: test1.val EXPRESSION [(src)src.null, ] @@ -524,18 +483,14 @@ POSTHOOK: Lineage: test_src PARTITION(ds=102).key SIMPLE [(src)src.FieldSchema(n POSTHOOK: Lineage: test_src PARTITION(ds=102).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-8 depends on stages: Stage-1 , consists of Stage-5, Stage-4, Stage-6 - Stage-5 - Stage-2 depends on stages: Stage-5, Stage-4, Stage-7 + Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 - Stage-4 - Stage-6 - Stage-7 depends on stages: Stage-6 STAGE PLANS: Stage: Stage-1 Tez +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -555,15 +510,6 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test1 - Stage: Stage-8 - Conditional Operator - - Stage: Stage-5 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - Stage: Stage-2 Dependency Collection @@ -580,40 +526,6 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator - Stage: Stage-4 - Tez - Vertices: - Merge - Map Operator Tree: - TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.test1 - - Stage: Stage-6 - Tez - Vertices: - Merge - Map Operator Tree: - TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.test1 - - Stage: Stage-7 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - PREHOOK: query: insert overwrite table test1 select key from test_src PREHOOK: type: QUERY PREHOOK: Input: default@test_src @@ -648,18 +560,14 @@ POSTHOOK: Lineage: test_src PARTITION(ds=102).key SIMPLE [(src)src.FieldSchema(n POSTHOOK: Lineage: test_src PARTITION(ds=102).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-8 depends on stages: Stage-1 , consists of Stage-5, Stage-4, Stage-6 - Stage-5 - Stage-2 depends on stages: Stage-5, Stage-4, Stage-7 + Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 - Stage-4 - Stage-6 - Stage-7 depends on stages: Stage-6 STAGE PLANS: Stage: Stage-1 Tez +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -679,15 +587,6 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test1 - Stage: Stage-8 - Conditional Operator - - Stage: Stage-5 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - Stage: Stage-2 Dependency Collection @@ -704,40 +603,6 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator - Stage: Stage-4 - Tez - Vertices: - Merge - Map Operator Tree: - TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.test1 - - Stage: Stage-6 - Tez - Vertices: - Merge - Map Operator Tree: - TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.test1 - - Stage: Stage-7 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - PREHOOK: query: insert overwrite table test1 select key from test_src PREHOOK: type: QUERY PREHOOK: Input: default@test_src diff --git ql/src/test/results/clientpositive/tez/metadata_only_queries.q.out ql/src/test/results/clientpositive/tez/metadata_only_queries.q.out index 883e71d..4c3e95c 100644 --- ql/src/test/results/clientpositive/tez/metadata_only_queries.q.out +++ ql/src/test/results/clientpositive/tez/metadata_only_queries.q.out @@ -13,6 +13,7 @@ PREHOOK: query: create table over10k( row format delimited fields terminated by '|' PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default POSTHOOK: query: create table over10k( t tinyint, si smallint, @@ -28,12 +29,15 @@ POSTHOOK: query: create table over10k( row format delimited fields terminated by '|' POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default POSTHOOK: Output: default@over10k PREHOOK: query: load data local inpath '../../data/files/over10k' into table over10k PREHOOK: type: LOAD +#### A masked pattern was here #### PREHOOK: Output: default@over10k POSTHOOK: query: load data local inpath '../../data/files/over10k' into table over10k POSTHOOK: type: LOAD +#### A masked pattern was here #### POSTHOOK: Output: default@over10k PREHOOK: query: create table stats_tbl( t tinyint, @@ -48,6 +52,7 @@ PREHOOK: query: create table stats_tbl( dec decimal, bin binary) PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default POSTHOOK: query: create table stats_tbl( t tinyint, si smallint, @@ -61,6 +66,7 @@ POSTHOOK: query: create table stats_tbl( dec decimal, bin binary) POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default POSTHOOK: Output: default@stats_tbl PREHOOK: query: create table stats_tbl_part( t tinyint, @@ -75,6 +81,7 @@ PREHOOK: query: create table stats_tbl_part( dec decimal, bin binary) partitioned by (dt string) PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default POSTHOOK: query: create table stats_tbl_part( t tinyint, si smallint, @@ -88,6 +95,7 @@ POSTHOOK: query: create table stats_tbl_part( dec decimal, bin binary) partitioned by (dt string) POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default POSTHOOK: Output: default@stats_tbl_part PREHOOK: query: insert overwrite table stats_tbl select * from over10k PREHOOK: type: QUERY @@ -290,6 +298,7 @@ STAGE PLANS: Tez Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -391,6 +400,7 @@ STAGE PLANS: Tez Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -922,6 +932,7 @@ STAGE PLANS: Tez Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: diff --git ql/src/test/results/clientpositive/tez/mrr.q.out ql/src/test/results/clientpositive/tez/mrr.q.out index 3956446..8ec77eb 100644 --- ql/src/test/results/clientpositive/tez/mrr.q.out +++ ql/src/test/results/clientpositive/tez/mrr.q.out @@ -14,6 +14,7 @@ STAGE PLANS: Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -403,6 +404,7 @@ STAGE PLANS: Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -818,6 +820,7 @@ STAGE PLANS: Map 2 <- Map 1 (BROADCAST_EDGE) Reducer 3 <- Map 2 (SIMPLE_EDGE) Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -1262,6 +1265,7 @@ STAGE PLANS: Reducer 5 <- Reducer 4 (SIMPLE_EDGE) Reducer 7 <- Map 6 (SIMPLE_EDGE) Reducer 8 <- Reducer 7 (SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -1641,6 +1645,7 @@ STAGE PLANS: Edges: Map 1 <- Reducer 3 (BROADCAST_EDGE) Reducer 3 <- Map 2 (SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: diff --git ql/src/test/results/clientpositive/tez/ptf.q.out ql/src/test/results/clientpositive/tez/ptf.q.out index 1932b93..3bd2db2 100644 --- ql/src/test/results/clientpositive/tez/ptf.q.out +++ ql/src/test/results/clientpositive/tez/ptf.q.out @@ -15,6 +15,7 @@ CREATE TABLE part( p_comment STRING ) PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default POSTHOOK: query: -- data setup CREATE TABLE part( p_partkey INT, @@ -28,12 +29,15 @@ CREATE TABLE part( p_comment STRING ) POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default POSTHOOK: Output: default@part PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/part_tiny.txt' overwrite into table part PREHOOK: type: LOAD +#### A masked pattern was here #### PREHOOK: Output: default@part POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/part_tiny.txt' overwrite into table part POSTHOOK: type: LOAD +#### A masked pattern was here #### POSTHOOK: Output: default@part PREHOOK: query: --1. test1 select p_mfgr, p_name, p_size, @@ -821,6 +825,7 @@ r INT, dr INT, s DOUBLE) PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default POSTHOOK: query: -- 17. testMultipleInserts2SWQsWithPTF CREATE TABLE part_4( p_mfgr STRING, @@ -830,6 +835,7 @@ r INT, dr INT, s DOUBLE) POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default POSTHOOK: Output: default@part_4 PREHOOK: query: CREATE TABLE part_5( p_mfgr STRING, @@ -841,6 +847,7 @@ dr INT, cud DOUBLE, fv1 INT) PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default POSTHOOK: query: CREATE TABLE part_5( p_mfgr STRING, p_name STRING, @@ -851,6 +858,7 @@ dr INT, cud DOUBLE, fv1 INT) POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default POSTHOOK: Output: default@part_5 PREHOOK: query: from noop(on part partition by p_mfgr diff --git ql/src/test/results/clientpositive/tez/sample1.q.out ql/src/test/results/clientpositive/tez/sample1.q.out index 92d031f..1b5cd68 100644 --- ql/src/test/results/clientpositive/tez/sample1.q.out +++ ql/src/test/results/clientpositive/tez/sample1.q.out @@ -1,7 +1,9 @@ PREHOOK: query: CREATE TABLE dest1(key INT, value STRING, dt STRING, hr STRING) STORED AS TEXTFILE PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default POSTHOOK: query: CREATE TABLE dest1(key INT, value STRING, dt STRING, hr STRING) STORED AS TEXTFILE POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default POSTHOOK: Output: default@dest1 PREHOOK: query: -- no input pruning, no sample filter EXPLAIN EXTENDED @@ -56,18 +58,14 @@ TOK_QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-8 depends on stages: Stage-1 , consists of Stage-5, Stage-4, Stage-6 - Stage-5 - Stage-2 depends on stages: Stage-5, Stage-4, Stage-7 + Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 - Stage-4 - Stage-6 - Stage-7 depends on stages: Stage-6 STAGE PLANS: Stage: Stage-1 Tez +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -155,15 +153,6 @@ STAGE PLANS: Truncated Path -> Alias: /srcpart/ds=2008-04-08/hr=11 [s] - Stage: Stage-8 - Conditional Operator - - Stage: Stage-5 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - Stage: Stage-2 Dependency Collection @@ -192,146 +181,6 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### - Stage: Stage-4 - Tez - Vertices: - Merge - Map Operator Tree: - TableScan - GatherStats: false - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value,dt,hr - columns.types int:string:string:string -#### A masked pattern was here #### - name default.dest1 - serialization.ddl struct dest1 { i32 key, string value, string dt, string hr} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1 - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value,dt,hr - columns.types int:string:string:string -#### A masked pattern was here #### - name default.dest1 - serialization.ddl struct dest1 { i32 key, string value, string dt, string hr} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value,dt,hr - columns.types int:string:string:string -#### A masked pattern was here #### - name default.dest1 - serialization.ddl struct dest1 { i32 key, string value, string dt, string hr} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1 - name: default.dest1 - Truncated Path -> Alias: -#### A masked pattern was here #### - - Stage: Stage-6 - Tez - Vertices: - Merge - Map Operator Tree: - TableScan - GatherStats: false - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value,dt,hr - columns.types int:string:string:string -#### A masked pattern was here #### - name default.dest1 - serialization.ddl struct dest1 { i32 key, string value, string dt, string hr} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1 - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value,dt,hr - columns.types int:string:string:string -#### A masked pattern was here #### - name default.dest1 - serialization.ddl struct dest1 { i32 key, string value, string dt, string hr} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value,dt,hr - columns.types int:string:string:string -#### A masked pattern was here #### - name default.dest1 - serialization.ddl struct dest1 { i32 key, string value, string dt, string hr} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1 - name: default.dest1 - Truncated Path -> Alias: -#### A masked pattern was here #### - - Stage: Stage-7 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - PREHOOK: query: INSERT OVERWRITE TABLE dest1 SELECT s.* FROM srcpart TABLESAMPLE (BUCKET 1 OUT OF 1 ON rand()) s WHERE s.ds='2008-04-08' and s.hr='11' diff --git ql/src/test/results/clientpositive/tez/stats_counter_partitioned.q.out ql/src/test/results/clientpositive/tez/stats_counter_partitioned.q.out index 62daf36..b41b357 100644 --- ql/src/test/results/clientpositive/tez/stats_counter_partitioned.q.out +++ ql/src/test/results/clientpositive/tez/stats_counter_partitioned.q.out @@ -2,23 +2,29 @@ PREHOOK: query: -- partitioned table analyze create table dummy (key string, value string) partitioned by (ds string, hr string) PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default POSTHOOK: query: -- partitioned table analyze create table dummy (key string, value string) partitioned by (ds string, hr string) POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default POSTHOOK: Output: default@dummy PREHOOK: query: load data local inpath '../../data/files/kv1.txt' into table dummy partition (ds='2008',hr='12') PREHOOK: type: LOAD +#### A masked pattern was here #### PREHOOK: Output: default@dummy POSTHOOK: query: load data local inpath '../../data/files/kv1.txt' into table dummy partition (ds='2008',hr='12') POSTHOOK: type: LOAD +#### A masked pattern was here #### POSTHOOK: Output: default@dummy POSTHOOK: Output: default@dummy@ds=2008/hr=12 PREHOOK: query: load data local inpath '../../data/files/kv1.txt' into table dummy partition (ds='2008',hr='11') PREHOOK: type: LOAD +#### A masked pattern was here #### PREHOOK: Output: default@dummy POSTHOOK: query: load data local inpath '../../data/files/kv1.txt' into table dummy partition (ds='2008',hr='11') POSTHOOK: type: LOAD +#### A masked pattern was here #### POSTHOOK: Output: default@dummy POSTHOOK: Output: default@dummy@ds=2008/hr=11 PREHOOK: query: analyze table dummy partition (ds,hr) compute statistics @@ -129,10 +135,12 @@ PREHOOK: query: -- static partitioned table on insert create table dummy (key string, value string) partitioned by (ds string, hr string) PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default POSTHOOK: query: -- static partitioned table on insert create table dummy (key string, value string) partitioned by (ds string, hr string) POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default POSTHOOK: Output: default@dummy PREHOOK: query: insert overwrite table dummy partition (ds='10',hr='11') select * from src PREHOOK: type: QUERY @@ -260,10 +268,12 @@ PREHOOK: query: -- dynamic partitioned table on insert create table dummy (key int) partitioned by (hr int) PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default POSTHOOK: query: -- dynamic partitioned table on insert create table dummy (key int) partitioned by (hr int) POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default POSTHOOK: Output: default@dummy POSTHOOK: Lineage: dummy PARTITION(ds=10,hr=11).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: dummy PARTITION(ds=10,hr=11).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] @@ -271,8 +281,10 @@ POSTHOOK: Lineage: dummy PARTITION(ds=10,hr=12).key SIMPLE [(src)src.FieldSchema POSTHOOK: Lineage: dummy PARTITION(ds=10,hr=12).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: CREATE TABLE tbl(key int, value int) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default POSTHOOK: query: CREATE TABLE tbl(key int, value int) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default POSTHOOK: Output: default@tbl POSTHOOK: Lineage: dummy PARTITION(ds=10,hr=11).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: dummy PARTITION(ds=10,hr=11).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] @@ -280,9 +292,11 @@ POSTHOOK: Lineage: dummy PARTITION(ds=10,hr=12).key SIMPLE [(src)src.FieldSchema POSTHOOK: Lineage: dummy PARTITION(ds=10,hr=12).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/tbl.txt' OVERWRITE INTO TABLE tbl PREHOOK: type: LOAD +#### A masked pattern was here #### PREHOOK: Output: default@tbl POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/tbl.txt' OVERWRITE INTO TABLE tbl POSTHOOK: type: LOAD +#### A masked pattern was here #### POSTHOOK: Output: default@tbl POSTHOOK: Lineage: dummy PARTITION(ds=10,hr=11).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: dummy PARTITION(ds=10,hr=11).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] diff --git ql/src/test/results/clientpositive/tez/stats_noscan_1.q.out ql/src/test/results/clientpositive/tez/stats_noscan_1.q.out index 0fb2ec6..9cbd7f7 100644 --- ql/src/test/results/clientpositive/tez/stats_noscan_1.q.out +++ ql/src/test/results/clientpositive/tez/stats_noscan_1.q.out @@ -3,11 +3,13 @@ PREHOOK: query: -- test analyze table ... compute statistics noscan -- 1. test full spec create table analyze_srcpart like srcpart PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default POSTHOOK: query: -- test analyze table ... compute statistics noscan -- 1. test full spec create table analyze_srcpart like srcpart POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default POSTHOOK: Output: default@analyze_srcpart PREHOOK: query: insert overwrite table analyze_srcpart partition (ds, hr) select * from srcpart where ds is not null PREHOOK: type: QUERY @@ -353,9 +355,11 @@ POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-09,hr=12).value SIMPLE [ PREHOOK: query: -- 2. test partial spec create table analyze_srcpart_partial like srcpart PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default POSTHOOK: query: -- 2. test partial spec create table analyze_srcpart_partial like srcpart POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default POSTHOOK: Output: default@analyze_srcpart_partial POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] diff --git ql/src/test/results/clientpositive/tez/subquery_exists.q.out ql/src/test/results/clientpositive/tez/subquery_exists.q.out index e7d6e39..9d7fa28 100644 --- ql/src/test/results/clientpositive/tez/subquery_exists.q.out +++ ql/src/test/results/clientpositive/tez/subquery_exists.q.out @@ -27,6 +27,7 @@ STAGE PLANS: Tez Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: diff --git ql/src/test/results/clientpositive/tez/subquery_in.q.out ql/src/test/results/clientpositive/tez/subquery_in.q.out index 49787b6..c66602b 100644 --- ql/src/test/results/clientpositive/tez/subquery_in.q.out +++ ql/src/test/results/clientpositive/tez/subquery_in.q.out @@ -15,6 +15,7 @@ CREATE TABLE part( p_comment STRING ) PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default POSTHOOK: query: -- data setup CREATE TABLE part( p_partkey INT, @@ -28,12 +29,15 @@ CREATE TABLE part( p_comment STRING ) POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default POSTHOOK: Output: default@part PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/part_tiny.txt' overwrite into table part PREHOOK: type: LOAD +#### A masked pattern was here #### PREHOOK: Output: default@part POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/part_tiny.txt' overwrite into table part POSTHOOK: type: LOAD +#### A masked pattern was here #### POSTHOOK: Output: default@part PREHOOK: query: DROP TABLE lineitem PREHOOK: type: DROPTABLE @@ -58,6 +62,7 @@ PREHOOK: query: CREATE TABLE lineitem (L_ORDERKEY INT, ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default POSTHOOK: query: CREATE TABLE lineitem (L_ORDERKEY INT, L_PARTKEY INT, L_SUPPKEY INT, @@ -77,12 +82,15 @@ POSTHOOK: query: CREATE TABLE lineitem (L_ORDERKEY INT, ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default POSTHOOK: Output: default@lineitem PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/lineitem.txt' OVERWRITE INTO TABLE lineitem PREHOOK: type: LOAD +#### A masked pattern was here #### PREHOOK: Output: default@lineitem POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/lineitem.txt' OVERWRITE INTO TABLE lineitem POSTHOOK: type: LOAD +#### A masked pattern was here #### POSTHOOK: Output: default@lineitem PREHOOK: query: -- non agg, non corr explain @@ -105,6 +113,7 @@ STAGE PLANS: Tez Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -220,6 +229,7 @@ STAGE PLANS: Tez Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -345,6 +355,7 @@ STAGE PLANS: Reducer 2 <- Map 1 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) Reducer 4 <- Reducer 3 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -491,6 +502,7 @@ STAGE PLANS: Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) Reducer 4 <- Map 3 (SIMPLE_EDGE) Reducer 5 <- Reducer 4 (SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -644,6 +656,7 @@ STAGE PLANS: Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) Reducer 4 <- Map 3 (SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -806,6 +819,7 @@ STAGE PLANS: Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) Reducer 5 <- Map 4 (SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: diff --git ql/src/test/results/clientpositive/tez/tez_dml.q.out ql/src/test/results/clientpositive/tez/tez_dml.q.out index 9e9ef33..7c653d8 100644 --- ql/src/test/results/clientpositive/tez/tez_dml.q.out +++ ql/src/test/results/clientpositive/tez/tez_dml.q.out @@ -6,15 +6,10 @@ EXPLAIN CREATE TABLE tmp_src AS SELECT * FROM (SELECT value, count(value) AS cnt POSTHOOK: type: CREATETABLE_AS_SELECT STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-8 depends on stages: Stage-1 , consists of Stage-5, Stage-4, Stage-6 - Stage-5 - Stage-2 depends on stages: Stage-5, Stage-4, Stage-7 - Stage-9 depends on stages: Stage-2, Stage-0 - Stage-3 depends on stages: Stage-9 - Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 - Stage-4 - Stage-6 - Stage-7 depends on stages: Stage-6 + Stage-2 depends on stages: Stage-1 + Stage-4 depends on stages: Stage-2, Stage-0 + Stage-3 depends on stages: Stage-4 + Stage-0 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -22,6 +17,7 @@ STAGE PLANS: Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -74,19 +70,10 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmp_src - Stage: Stage-8 - Conditional Operator - - Stage: Stage-5 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - Stage: Stage-2 Dependency Collection - Stage: Stage-9 + Stage: Stage-4 Create Table Operator: Create Table columns: value string, cnt bigint @@ -103,40 +90,6 @@ STAGE PLANS: hdfs directory: true #### A masked pattern was here #### - Stage: Stage-4 - Tez - Vertices: - Merge - Map Operator Tree: - TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.tmp_src - - Stage: Stage-6 - Tez - Vertices: - Merge - Map Operator Tree: - TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.tmp_src - - Stage: Stage-7 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - PREHOOK: query: CREATE TABLE tmp_src AS SELECT * FROM (SELECT value, count(value) AS cnt FROM src GROUP BY value) f1 ORDER BY cnt PREHOOK: type: CREATETABLE_AS_SELECT PREHOOK: Input: default@src @@ -464,9 +417,11 @@ val_348 5 PREHOOK: query: -- dyn partitions CREATE TABLE tmp_src_part (c string) PARTITIONED BY (d int) PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default POSTHOOK: query: -- dyn partitions CREATE TABLE tmp_src_part (c string) PARTITIONED BY (d int) POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default POSTHOOK: Output: default@tmp_src_part PREHOOK: query: EXPLAIN INSERT INTO TABLE tmp_src_part PARTITION (d) SELECT * FROM tmp_src PREHOOK: type: QUERY @@ -474,18 +429,14 @@ POSTHOOK: query: EXPLAIN INSERT INTO TABLE tmp_src_part PARTITION (d) SELECT * F POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-8 depends on stages: Stage-1 , consists of Stage-5, Stage-4, Stage-6 - Stage-5 - Stage-2 depends on stages: Stage-5, Stage-4, Stage-7 + Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 - Stage-4 - Stage-6 - Stage-7 depends on stages: Stage-6 STAGE PLANS: Stage: Stage-1 Tez +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -505,15 +456,6 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmp_src_part - Stage: Stage-8 - Conditional Operator - - Stage: Stage-5 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - Stage: Stage-2 Dependency Collection @@ -532,40 +474,6 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator - Stage: Stage-4 - Tez - Vertices: - Merge - Map Operator Tree: - TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.tmp_src_part - - Stage: Stage-6 - Tez - Vertices: - Merge - Map Operator Tree: - TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.tmp_src_part - - Stage: Stage-7 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - PREHOOK: query: INSERT INTO TABLE tmp_src_part PARTITION (d) SELECT * FROM tmp_src PREHOOK: type: QUERY PREHOOK: Input: default@tmp_src @@ -918,9 +826,11 @@ val_348 5 PREHOOK: query: -- multi insert CREATE TABLE even (c int, d string) PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default POSTHOOK: query: -- multi insert CREATE TABLE even (c int, d string) POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default POSTHOOK: Output: default@even POSTHOOK: Lineage: tmp_src_part PARTITION(d=1).c SIMPLE [(tmp_src)tmp_src.FieldSchema(name:value, type:string, comment:null), ] POSTHOOK: Lineage: tmp_src_part PARTITION(d=2).c SIMPLE [(tmp_src)tmp_src.FieldSchema(name:value, type:string, comment:null), ] @@ -929,8 +839,10 @@ POSTHOOK: Lineage: tmp_src_part PARTITION(d=4).c SIMPLE [(tmp_src)tmp_src.FieldS POSTHOOK: Lineage: tmp_src_part PARTITION(d=5).c SIMPLE [(tmp_src)tmp_src.FieldSchema(name:value, type:string, comment:null), ] PREHOOK: query: CREATE TABLE odd (c int, d string) PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default POSTHOOK: query: CREATE TABLE odd (c int, d string) POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default POSTHOOK: Output: default@odd POSTHOOK: Lineage: tmp_src_part PARTITION(d=1).c SIMPLE [(tmp_src)tmp_src.FieldSchema(name:value, type:string, comment:null), ] POSTHOOK: Lineage: tmp_src_part PARTITION(d=2).c SIMPLE [(tmp_src)tmp_src.FieldSchema(name:value, type:string, comment:null), ] @@ -954,25 +866,16 @@ POSTHOOK: Lineage: tmp_src_part PARTITION(d=4).c SIMPLE [(tmp_src)tmp_src.FieldS POSTHOOK: Lineage: tmp_src_part PARTITION(d=5).c SIMPLE [(tmp_src)tmp_src.FieldSchema(name:value, type:string, comment:null), ] STAGE DEPENDENCIES: Stage-2 is a root stage - Stage-9 depends on stages: Stage-2 , consists of Stage-6, Stage-5, Stage-7 - Stage-6 - Stage-3 depends on stages: Stage-6, Stage-5, Stage-8, Stage-12, Stage-11, Stage-14 + Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 Stage-1 depends on stages: Stage-3 - Stage-10 depends on stages: Stage-1 - Stage-5 - Stage-7 - Stage-8 depends on stages: Stage-7 - Stage-15 depends on stages: Stage-2 , consists of Stage-12, Stage-11, Stage-13 - Stage-12 - Stage-11 - Stage-13 - Stage-14 depends on stages: Stage-13 + Stage-5 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-2 Tez +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -1010,15 +913,6 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.odd - Stage: Stage-9 - Conditional Operator - - Stage: Stage-6 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - Stage: Stage-3 Dependency Collection @@ -1045,85 +939,8 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.odd - Stage: Stage-10 - Stats-Aggr Operator - Stage: Stage-5 - Tez - Vertices: - Merge - Map Operator Tree: - TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.even - - Stage: Stage-7 - Tez - Vertices: - Merge - Map Operator Tree: - TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.even - - Stage: Stage-8 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - - Stage: Stage-15 - Conditional Operator - - Stage: Stage-12 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - - Stage: Stage-11 - Tez - Vertices: - Merge - Map Operator Tree: - TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.odd - - Stage: Stage-13 - Tez - Vertices: - Merge - Map Operator Tree: - TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.odd - - Stage: Stage-14 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### + Stats-Aggr Operator PREHOOK: query: FROM src INSERT INTO TABLE even SELECT key, value WHERE key % 2 = 0 diff --git ql/src/test/results/clientpositive/tez/tez_join_tests.q.out ql/src/test/results/clientpositive/tez/tez_join_tests.q.out index 0117860..63c8ec0 100644 --- ql/src/test/results/clientpositive/tez/tez_join_tests.q.out +++ ql/src/test/results/clientpositive/tez/tez_join_tests.q.out @@ -16,6 +16,7 @@ STAGE PLANS: Reducer 3 <- Reducer 2 (SIMPLE_EDGE) Reducer 4 <- Reducer 3 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) Reducer 5 <- Reducer 4 (SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: diff --git ql/src/test/results/clientpositive/tez/tez_joins_explain.q.out ql/src/test/results/clientpositive/tez/tez_joins_explain.q.out index 6c0c121..6f00d74 100644 --- ql/src/test/results/clientpositive/tez/tez_joins_explain.q.out +++ ql/src/test/results/clientpositive/tez/tez_joins_explain.q.out @@ -16,6 +16,7 @@ STAGE PLANS: Reducer 3 <- Reducer 2 (SIMPLE_EDGE) Reducer 4 <- Reducer 3 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) Reducer 5 <- Reducer 4 (SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: diff --git ql/src/test/results/clientpositive/tez/union2.q.out ql/src/test/results/clientpositive/tez/union2.q.out new file mode 100644 index 0000000..1b1b7d5 --- /dev/null +++ ql/src/test/results/clientpositive/tez/union2.q.out @@ -0,0 +1,91 @@ +PREHOOK: query: -- union case: both subqueries are map-reduce jobs on same input, followed by reduce sink + +explain + select count(1) FROM (select s1.key as key, s1.value as value from src s1 UNION ALL + select s2.key as key, s2.value as value from src s2) unionsrc +PREHOOK: type: QUERY +POSTHOOK: query: -- union case: both subqueries are map-reduce jobs on same input, followed by reduce sink + +explain + select count(1) FROM (select s1.key as key, s1.value as value from src s1 UNION ALL + select s2.key as key, s2.value as value from src s2) unionsrc +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Map 1 <- Union 2 (CONTAINS) + Map 4 <- Union 2 (CONTAINS) + Reducer 3 <- Union 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: s1 + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Select Operator + Group By Operator + aggregations: count(1) + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) + Map 4 + Map Operator Tree: + TableScan + alias: s2 + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Select Operator + Group By Operator + aggregations: count(1) + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Union 2 + Vertex: Union 2 + + Stage: Stage-0 + Fetch Operator + limit: -1 + +PREHOOK: query: select count(1) FROM (select s1.key as key, s1.value as value from src s1 UNION ALL + select s2.key as key, s2.value as value from src s2) unionsrc +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select count(1) FROM (select s1.key as key, s1.value as value from src s1 UNION ALL + select s2.key as key, s2.value as value from src s2) unionsrc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +1000 diff --git ql/src/test/results/clientpositive/tez/union3.q.out ql/src/test/results/clientpositive/tez/union3.q.out new file mode 100644 index 0000000..c14fbd8 --- /dev/null +++ ql/src/test/results/clientpositive/tez/union3.q.out @@ -0,0 +1,276 @@ +PREHOOK: query: explain +SELECT * +FROM ( + SELECT 1 AS id + FROM (SELECT * FROM src LIMIT 1) s1 + CLUSTER BY id + UNION ALL + SELECT 2 AS id + FROM (SELECT * FROM src LIMIT 1) s1 + CLUSTER BY id + UNION ALL + SELECT 3 AS id + FROM (SELECT * FROM src LIMIT 1) s2 + UNION ALL + SELECT 4 AS id + FROM (SELECT * FROM src LIMIT 1) s2 +) a +PREHOOK: type: QUERY +POSTHOOK: query: explain +SELECT * +FROM ( + SELECT 1 AS id + FROM (SELECT * FROM src LIMIT 1) s1 + CLUSTER BY id + UNION ALL + SELECT 2 AS id + FROM (SELECT * FROM src LIMIT 1) s1 + CLUSTER BY id + UNION ALL + SELECT 3 AS id + FROM (SELECT * FROM src LIMIT 1) s2 + UNION ALL + SELECT 4 AS id + FROM (SELECT * FROM src LIMIT 1) s2 +) a +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 10 <- Map 9 (SIMPLE_EDGE) + Reducer 11 <- Reducer 10 (SIMPLE_EDGE), Union 3 (CONTAINS) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Union 3 (CONTAINS) + Reducer 5 <- Map 4 (SIMPLE_EDGE), Union 3 (CONTAINS) + Reducer 7 <- Map 6 (SIMPLE_EDGE) + Reducer 8 <- Reducer 7 (SIMPLE_EDGE), Union 3 (CONTAINS) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 1 + Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) + Map 4 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 1 + Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) + Map 6 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 1 + Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) + Map 9 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 1 + Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) + Reducer 10 + Reduce Operator Tree: + Extract + Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 1 + Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 2 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + Reducer 11 + Reduce Operator Tree: + Extract + Select Operator + expressions: _col0 (type: int) + outputColumnNames: _col0 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 2 + Reduce Operator Tree: + Extract + Limit + Number of rows: 1 + Select Operator + expressions: 4 (type: int) + outputColumnNames: _col0 + Select Operator + expressions: _col0 (type: int) + outputColumnNames: _col0 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Reduce Operator Tree: + Extract + Limit + Number of rows: 1 + Select Operator + expressions: 3 (type: int) + outputColumnNames: _col0 + Select Operator + expressions: _col0 (type: int) + outputColumnNames: _col0 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 7 + Reduce Operator Tree: + Extract + Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 1 + Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 1 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + Reducer 8 + Reduce Operator Tree: + Extract + Select Operator + expressions: _col0 (type: int) + outputColumnNames: _col0 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Union 3 + Vertex: Union 3 + + Stage: Stage-0 + Fetch Operator + limit: -1 + +PREHOOK: query: CREATE TABLE union_out (id int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +POSTHOOK: query: CREATE TABLE union_out (id int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@union_out +PREHOOK: query: insert overwrite table union_out +SELECT * +FROM ( + SELECT 1 AS id + FROM (SELECT * FROM src LIMIT 1) s1 + CLUSTER BY id + UNION ALL + SELECT 2 AS id + FROM (SELECT * FROM src LIMIT 1) s1 + CLUSTER BY id + UNION ALL + SELECT 3 AS id + FROM (SELECT * FROM src LIMIT 1) s2 + UNION ALL + SELECT 4 AS id + FROM (SELECT * FROM src LIMIT 1) s2 +) a +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@union_out +POSTHOOK: query: insert overwrite table union_out +SELECT * +FROM ( + SELECT 1 AS id + FROM (SELECT * FROM src LIMIT 1) s1 + CLUSTER BY id + UNION ALL + SELECT 2 AS id + FROM (SELECT * FROM src LIMIT 1) s1 + CLUSTER BY id + UNION ALL + SELECT 3 AS id + FROM (SELECT * FROM src LIMIT 1) s2 + UNION ALL + SELECT 4 AS id + FROM (SELECT * FROM src LIMIT 1) s2 +) a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@union_out +POSTHOOK: Lineage: union_out.id EXPRESSION [] +PREHOOK: query: select * from union_out cluster by id +PREHOOK: type: QUERY +PREHOOK: Input: default@union_out +#### A masked pattern was here #### +POSTHOOK: query: select * from union_out cluster by id +POSTHOOK: type: QUERY +POSTHOOK: Input: default@union_out +#### A masked pattern was here #### +POSTHOOK: Lineage: union_out.id EXPRESSION [] +1 +2 +3 +4 diff --git ql/src/test/results/clientpositive/tez/union4.q.out ql/src/test/results/clientpositive/tez/union4.q.out new file mode 100644 index 0000000..9e74952 --- /dev/null +++ ql/src/test/results/clientpositive/tez/union4.q.out @@ -0,0 +1,156 @@ +PREHOOK: query: -- union case: both subqueries are map-reduce jobs on same input, followed by filesink + + +create table tmptable(key string, value int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +POSTHOOK: query: -- union case: both subqueries are map-reduce jobs on same input, followed by filesink + + +create table tmptable(key string, value int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tmptable +PREHOOK: query: explain +insert overwrite table tmptable + select unionsrc.key, unionsrc.value FROM (select 'tst1' as key, count(1) as value from src s1 + UNION ALL + select 'tst2' as key, count(1) as value from src s2) unionsrc +PREHOOK: type: QUERY +POSTHOOK: query: explain +insert overwrite table tmptable + select unionsrc.key, unionsrc.value FROM (select 'tst1' as key, count(1) as value from src s1 + UNION ALL + select 'tst2' as key, count(1) as value from src s2) unionsrc +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Union 3 (CONTAINS) + Reducer 5 <- Map 4 (SIMPLE_EDGE), Union 3 (CONTAINS) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: s1 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE + Select Operator + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE + Group By Operator + aggregations: count(1) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Map 4 + Map Operator Tree: + TableScan + alias: s2 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE + Select Operator + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE + Group By Operator + aggregations: count(1) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Select Operator + expressions: 'tst1' (type: string), _col0 (type: bigint) + outputColumnNames: _col0, _col1 + Select Operator + expressions: _col0 (type: string), UDFToInteger(_col1) (type: int) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.tmptable + Reducer 5 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Select Operator + expressions: 'tst2' (type: string), _col0 (type: bigint) + outputColumnNames: _col0, _col1 + Select Operator + expressions: _col0 (type: string), UDFToInteger(_col1) (type: int) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.tmptable + Union 3 + Vertex: Union 3 + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.tmptable + + Stage: Stage-3 + Stats-Aggr Operator + +PREHOOK: query: insert overwrite table tmptable +select unionsrc.key, unionsrc.value FROM (select 'tst1' as key, count(1) as value from src s1 + UNION ALL + select 'tst2' as key, count(1) as value from src s2) unionsrc +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@tmptable +POSTHOOK: query: insert overwrite table tmptable +select unionsrc.key, unionsrc.value FROM (select 'tst1' as key, count(1) as value from src s1 + UNION ALL + select 'tst2' as key, count(1) as value from src s2) unionsrc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@tmptable +POSTHOOK: Lineage: tmptable.key EXPRESSION [] +POSTHOOK: Lineage: tmptable.value EXPRESSION [(src)s1.null, (src)s2.null, ] +PREHOOK: query: select * from tmptable x sort by x.key +PREHOOK: type: QUERY +PREHOOK: Input: default@tmptable +#### A masked pattern was here #### +POSTHOOK: query: select * from tmptable x sort by x.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tmptable +#### A masked pattern was here #### +POSTHOOK: Lineage: tmptable.key EXPRESSION [] +POSTHOOK: Lineage: tmptable.value EXPRESSION [(src)s1.null, (src)s2.null, ] +tst1 500 +tst2 500 diff --git ql/src/test/results/clientpositive/tez/union5.q.out ql/src/test/results/clientpositive/tez/union5.q.out new file mode 100644 index 0000000..efc4552 --- /dev/null +++ ql/src/test/results/clientpositive/tez/union5.q.out @@ -0,0 +1,143 @@ +PREHOOK: query: -- union case: both subqueries are map-reduce jobs on same input, followed by reduce sink + +explain + select unionsrc.key, count(1) FROM (select 'tst1' as key, count(1) as value from src s1 + UNION ALL + select 'tst2' as key, count(1) as value from src s2) unionsrc group by unionsrc.key +PREHOOK: type: QUERY +POSTHOOK: query: -- union case: both subqueries are map-reduce jobs on same input, followed by reduce sink + +explain + select unionsrc.key, count(1) FROM (select 'tst1' as key, count(1) as value from src s1 + UNION ALL + select 'tst2' as key, count(1) as value from src s2) unionsrc group by unionsrc.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Union 3 (CONTAINS) + Reducer 4 <- Union 3 (SIMPLE_EDGE) + Reducer 6 <- Map 5 (SIMPLE_EDGE), Union 3 (CONTAINS) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: s1 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE + Select Operator + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE + Group By Operator + aggregations: count(1) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Map 5 + Map Operator Tree: + TableScan + alias: s2 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE + Select Operator + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE + Group By Operator + aggregations: count(1) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Select Operator + expressions: 'tst1' (type: string), _col0 (type: bigint) + outputColumnNames: _col0, _col1 + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + value expressions: _col1 (type: bigint) + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Select Operator + expressions: 'tst2' (type: string), _col0 (type: bigint) + outputColumnNames: _col0, _col1 + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + value expressions: _col1 (type: bigint) + Union 3 + Vertex: Union 3 + + Stage: Stage-0 + Fetch Operator + limit: -1 + +PREHOOK: query: select unionsrc.key, count(1) FROM (select 'tst1' as key, count(1) as value from src s1 + UNION ALL + select 'tst2' as key, count(1) as value from src s2) unionsrc group by unionsrc.key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select unionsrc.key, count(1) FROM (select 'tst1' as key, count(1) as value from src s1 + UNION ALL + select 'tst2' as key, count(1) as value from src s2) unionsrc group by unionsrc.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +tst1 1 +tst2 1 diff --git ql/src/test/results/clientpositive/tez/union6.q.out ql/src/test/results/clientpositive/tez/union6.q.out new file mode 100644 index 0000000..c945105 --- /dev/null +++ ql/src/test/results/clientpositive/tez/union6.q.out @@ -0,0 +1,164 @@ +PREHOOK: query: -- union case: 1 subquery is a map-reduce job, different inputs for sub-queries, followed by filesink + + +create table tmptable(key string, value string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +POSTHOOK: query: -- union case: 1 subquery is a map-reduce job, different inputs for sub-queries, followed by filesink + + +create table tmptable(key string, value string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tmptable +PREHOOK: query: explain +insert overwrite table tmptable + select unionsrc.key, unionsrc.value FROM (select 'tst1' as key, cast(count(1) as string) as value from src s1 + UNION ALL + select s2.key as key, s2.value as value from src1 s2) unionsrc +PREHOOK: type: QUERY +POSTHOOK: query: explain +insert overwrite table tmptable + select unionsrc.key, unionsrc.value FROM (select 'tst1' as key, cast(count(1) as string) as value from src s1 + UNION ALL + select s2.key as key, s2.value as value from src1 s2) unionsrc +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Map 4 <- Union 3 (CONTAINS) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Union 3 (CONTAINS) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: s1 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE + Select Operator + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE + Group By Operator + aggregations: count(1) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Map 4 + Map Operator Tree: + TableScan + alias: s2 + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.tmptable + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Select Operator + expressions: 'tst1' (type: string), UDFToString(_col0) (type: string) + outputColumnNames: _col0, _col1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.tmptable + Union 3 + Vertex: Union 3 + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.tmptable + + Stage: Stage-3 + Stats-Aggr Operator + +PREHOOK: query: insert overwrite table tmptable +select unionsrc.key, unionsrc.value FROM (select 'tst1' as key, cast(count(1) as string) as value from src s1 + UNION ALL + select s2.key as key, s2.value as value from src1 s2) unionsrc +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +PREHOOK: Output: default@tmptable +POSTHOOK: query: insert overwrite table tmptable +select unionsrc.key, unionsrc.value FROM (select 'tst1' as key, cast(count(1) as string) as value from src s1 + UNION ALL + select s2.key as key, s2.value as value from src1 s2) unionsrc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +POSTHOOK: Output: default@tmptable +POSTHOOK: Lineage: tmptable.key EXPRESSION [(src1)s2.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tmptable.value EXPRESSION [(src)s1.null, (src1)s2.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: select * from tmptable x sort by x.key, x.value +PREHOOK: type: QUERY +PREHOOK: Input: default@tmptable +#### A masked pattern was here #### +POSTHOOK: query: select * from tmptable x sort by x.key, x.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tmptable +#### A masked pattern was here #### +POSTHOOK: Lineage: tmptable.key EXPRESSION [(src1)s2.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tmptable.value EXPRESSION [(src)s1.null, (src1)s2.FieldSchema(name:value, type:string, comment:default), ] + + + + + val_165 + val_193 + val_265 + val_27 + val_409 + val_484 +128 +146 val_146 +150 val_150 +213 val_213 +224 +238 val_238 +255 val_255 +273 val_273 +278 val_278 +311 val_311 +369 +401 val_401 +406 val_406 +66 val_66 +98 val_98 +tst1 500 diff --git ql/src/test/results/clientpositive/tez/union7.q.out ql/src/test/results/clientpositive/tez/union7.q.out new file mode 100644 index 0000000..58cbcd5 --- /dev/null +++ ql/src/test/results/clientpositive/tez/union7.q.out @@ -0,0 +1,142 @@ +PREHOOK: query: -- union case: 1 subquery is a map-reduce job, different inputs for sub-queries, followed by reducesink + +explain + select unionsrc.key, count(1) FROM (select 'tst1' as key, cast(count(1) as string) as value from src s1 + UNION ALL + select s2.key as key, s2.value as value from src1 s2) unionsrc group by unionsrc.key +PREHOOK: type: QUERY +POSTHOOK: query: -- union case: 1 subquery is a map-reduce job, different inputs for sub-queries, followed by reducesink + +explain + select unionsrc.key, count(1) FROM (select 'tst1' as key, cast(count(1) as string) as value from src s1 + UNION ALL + select s2.key as key, s2.value as value from src1 s2) unionsrc group by unionsrc.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Map 5 <- Union 3 (CONTAINS) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Union 3 (CONTAINS) + Reducer 4 <- Union 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: s1 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE + Select Operator + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE + Group By Operator + aggregations: count(1) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Map 5 + Map Operator Tree: + TableScan + alias: s2 + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + value expressions: _col1 (type: bigint) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Select Operator + expressions: 'tst1' (type: string), UDFToString(_col0) (type: string) + outputColumnNames: _col0, _col1 + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + value expressions: _col1 (type: bigint) + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: PARTIAL + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Union 3 + Vertex: Union 3 + + Stage: Stage-0 + Fetch Operator + limit: -1 + +PREHOOK: query: select unionsrc.key, count(1) FROM (select 'tst1' as key, cast(count(1) as string) as value from src s1 + UNION ALL + select s2.key as key, s2.value as value from src1 s2) unionsrc group by unionsrc.key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: select unionsrc.key, count(1) FROM (select 'tst1' as key, cast(count(1) as string) as value from src s1 + UNION ALL + select s2.key as key, s2.value as value from src1 s2) unionsrc group by unionsrc.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### + 10 +128 1 +146 1 +150 1 +213 1 +224 1 +238 1 +255 1 +273 1 +278 1 +311 1 +369 1 +401 1 +406 1 +66 1 +98 1 +tst1 1 diff --git ql/src/test/results/clientpositive/tez/union8.q.out ql/src/test/results/clientpositive/tez/union8.q.out new file mode 100644 index 0000000..cf48953 --- /dev/null +++ ql/src/test/results/clientpositive/tez/union8.q.out @@ -0,0 +1,1594 @@ +PREHOOK: query: -- union case: all subqueries are a map-only jobs, 3 way union, same input for all sub-queries, followed by filesink + +explain + select unionsrc.key, unionsrc.value FROM (select s1.key as key, s1.value as value from src s1 UNION ALL + select s2.key as key, s2.value as value from src s2 UNION ALL + select s3.key as key, s3.value as value from src s3) unionsrc +PREHOOK: type: QUERY +POSTHOOK: query: -- union case: all subqueries are a map-only jobs, 3 way union, same input for all sub-queries, followed by filesink + +explain + select unionsrc.key, unionsrc.value FROM (select s1.key as key, s1.value as value from src s1 UNION ALL + select s2.key as key, s2.value as value from src s2 UNION ALL + select s3.key as key, s3.value as value from src s3) unionsrc +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Map 1 <- Union 2 (CONTAINS) + Map 3 <- Union 2 (CONTAINS) + Map 4 <- Union 2 (CONTAINS) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: s1 + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map 3 + Map Operator Tree: + TableScan + alias: s2 + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map 4 + Map Operator Tree: + TableScan + alias: s3 + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Union 2 + Vertex: Union 2 + + Stage: Stage-0 + Fetch Operator + limit: -1 + +PREHOOK: query: select unionsrc.key, unionsrc.value FROM (select s1.key as key, s1.value as value from src s1 UNION ALL + select s2.key as key, s2.value as value from src s2 UNION ALL + select s3.key as key, s3.value as value from src s3) unionsrc +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select unionsrc.key, unionsrc.value FROM (select s1.key as key, s1.value as value from src s1 UNION ALL + select s2.key as key, s2.value as value from src s2 UNION ALL + select s3.key as key, s3.value as value from src s3) unionsrc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +238 val_238 +86 val_86 +311 val_311 +27 val_27 +165 val_165 +409 val_409 +255 val_255 +278 val_278 +98 val_98 +484 val_484 +265 val_265 +193 val_193 +401 val_401 +150 val_150 +273 val_273 +224 val_224 +369 val_369 +66 val_66 +128 val_128 +213 val_213 +146 val_146 +406 val_406 +429 val_429 +374 val_374 +152 val_152 +469 val_469 +145 val_145 +495 val_495 +37 val_37 +327 val_327 +281 val_281 +277 val_277 +209 val_209 +15 val_15 +82 val_82 +403 val_403 +166 val_166 +417 val_417 +430 val_430 +252 val_252 +292 val_292 +219 val_219 +287 val_287 +153 val_153 +193 val_193 +338 val_338 +446 val_446 +459 val_459 +394 val_394 +237 val_237 +482 val_482 +174 val_174 +413 val_413 +494 val_494 +207 val_207 +199 val_199 +466 val_466 +208 val_208 +174 val_174 +399 val_399 +396 val_396 +247 val_247 +417 val_417 +489 val_489 +162 val_162 +377 val_377 +397 val_397 +309 val_309 +365 val_365 +266 val_266 +439 val_439 +342 val_342 +367 val_367 +325 val_325 +167 val_167 +195 val_195 +475 val_475 +17 val_17 +113 val_113 +155 val_155 +203 val_203 +339 val_339 +0 val_0 +455 val_455 +128 val_128 +311 val_311 +316 val_316 +57 val_57 +302 val_302 +205 val_205 +149 val_149 +438 val_438 +345 val_345 +129 val_129 +170 val_170 +20 val_20 +489 val_489 +157 val_157 +378 val_378 +221 val_221 +92 val_92 +111 val_111 +47 val_47 +72 val_72 +4 val_4 +280 val_280 +35 val_35 +427 val_427 +277 val_277 +208 val_208 +356 val_356 +399 val_399 +169 val_169 +382 val_382 +498 val_498 +125 val_125 +386 val_386 +437 val_437 +469 val_469 +192 val_192 +286 val_286 +187 val_187 +176 val_176 +54 val_54 +459 val_459 +51 val_51 +138 val_138 +103 val_103 +239 val_239 +213 val_213 +216 val_216 +430 val_430 +278 val_278 +176 val_176 +289 val_289 +221 val_221 +65 val_65 +318 val_318 +332 val_332 +311 val_311 +275 val_275 +137 val_137 +241 val_241 +83 val_83 +333 val_333 +180 val_180 +284 val_284 +12 val_12 +230 val_230 +181 val_181 +67 val_67 +260 val_260 +404 val_404 +384 val_384 +489 val_489 +353 val_353 +373 val_373 +272 val_272 +138 val_138 +217 val_217 +84 val_84 +348 val_348 +466 val_466 +58 val_58 +8 val_8 +411 val_411 +230 val_230 +208 val_208 +348 val_348 +24 val_24 +463 val_463 +431 val_431 +179 val_179 +172 val_172 +42 val_42 +129 val_129 +158 val_158 +119 val_119 +496 val_496 +0 val_0 +322 val_322 +197 val_197 +468 val_468 +393 val_393 +454 val_454 +100 val_100 +298 val_298 +199 val_199 +191 val_191 +418 val_418 +96 val_96 +26 val_26 +165 val_165 +327 val_327 +230 val_230 +205 val_205 +120 val_120 +131 val_131 +51 val_51 +404 val_404 +43 val_43 +436 val_436 +156 val_156 +469 val_469 +468 val_468 +308 val_308 +95 val_95 +196 val_196 +288 val_288 +481 val_481 +457 val_457 +98 val_98 +282 val_282 +197 val_197 +187 val_187 +318 val_318 +318 val_318 +409 val_409 +470 val_470 +137 val_137 +369 val_369 +316 val_316 +169 val_169 +413 val_413 +85 val_85 +77 val_77 +0 val_0 +490 val_490 +87 val_87 +364 val_364 +179 val_179 +118 val_118 +134 val_134 +395 val_395 +282 val_282 +138 val_138 +238 val_238 +419 val_419 +15 val_15 +118 val_118 +72 val_72 +90 val_90 +307 val_307 +19 val_19 +435 val_435 +10 val_10 +277 val_277 +273 val_273 +306 val_306 +224 val_224 +309 val_309 +389 val_389 +327 val_327 +242 val_242 +369 val_369 +392 val_392 +272 val_272 +331 val_331 +401 val_401 +242 val_242 +452 val_452 +177 val_177 +226 val_226 +5 val_5 +497 val_497 +402 val_402 +396 val_396 +317 val_317 +395 val_395 +58 val_58 +35 val_35 +336 val_336 +95 val_95 +11 val_11 +168 val_168 +34 val_34 +229 val_229 +233 val_233 +143 val_143 +472 val_472 +322 val_322 +498 val_498 +160 val_160 +195 val_195 +42 val_42 +321 val_321 +430 val_430 +119 val_119 +489 val_489 +458 val_458 +78 val_78 +76 val_76 +41 val_41 +223 val_223 +492 val_492 +149 val_149 +449 val_449 +218 val_218 +228 val_228 +138 val_138 +453 val_453 +30 val_30 +209 val_209 +64 val_64 +468 val_468 +76 val_76 +74 val_74 +342 val_342 +69 val_69 +230 val_230 +33 val_33 +368 val_368 +103 val_103 +296 val_296 +113 val_113 +216 val_216 +367 val_367 +344 val_344 +167 val_167 +274 val_274 +219 val_219 +239 val_239 +485 val_485 +116 val_116 +223 val_223 +256 val_256 +263 val_263 +70 val_70 +487 val_487 +480 val_480 +401 val_401 +288 val_288 +191 val_191 +5 val_5 +244 val_244 +438 val_438 +128 val_128 +467 val_467 +432 val_432 +202 val_202 +316 val_316 +229 val_229 +469 val_469 +463 val_463 +280 val_280 +2 val_2 +35 val_35 +283 val_283 +331 val_331 +235 val_235 +80 val_80 +44 val_44 +193 val_193 +321 val_321 +335 val_335 +104 val_104 +466 val_466 +366 val_366 +175 val_175 +403 val_403 +483 val_483 +53 val_53 +105 val_105 +257 val_257 +406 val_406 +409 val_409 +190 val_190 +406 val_406 +401 val_401 +114 val_114 +258 val_258 +90 val_90 +203 val_203 +262 val_262 +348 val_348 +424 val_424 +12 val_12 +396 val_396 +201 val_201 +217 val_217 +164 val_164 +431 val_431 +454 val_454 +478 val_478 +298 val_298 +125 val_125 +431 val_431 +164 val_164 +424 val_424 +187 val_187 +382 val_382 +5 val_5 +70 val_70 +397 val_397 +480 val_480 +291 val_291 +24 val_24 +351 val_351 +255 val_255 +104 val_104 +70 val_70 +163 val_163 +438 val_438 +119 val_119 +414 val_414 +200 val_200 +491 val_491 +237 val_237 +439 val_439 +360 val_360 +248 val_248 +479 val_479 +305 val_305 +417 val_417 +199 val_199 +444 val_444 +120 val_120 +429 val_429 +169 val_169 +443 val_443 +323 val_323 +325 val_325 +277 val_277 +230 val_230 +478 val_478 +178 val_178 +468 val_468 +310 val_310 +317 val_317 +333 val_333 +493 val_493 +460 val_460 +207 val_207 +249 val_249 +265 val_265 +480 val_480 +83 val_83 +136 val_136 +353 val_353 +172 val_172 +214 val_214 +462 val_462 +233 val_233 +406 val_406 +133 val_133 +175 val_175 +189 val_189 +454 val_454 +375 val_375 +401 val_401 +421 val_421 +407 val_407 +384 val_384 +256 val_256 +26 val_26 +134 val_134 +67 val_67 +384 val_384 +379 val_379 +18 val_18 +462 val_462 +492 val_492 +100 val_100 +298 val_298 +9 val_9 +341 val_341 +498 val_498 +146 val_146 +458 val_458 +362 val_362 +186 val_186 +285 val_285 +348 val_348 +167 val_167 +18 val_18 +273 val_273 +183 val_183 +281 val_281 +344 val_344 +97 val_97 +469 val_469 +315 val_315 +84 val_84 +28 val_28 +37 val_37 +448 val_448 +152 val_152 +348 val_348 +307 val_307 +194 val_194 +414 val_414 +477 val_477 +222 val_222 +126 val_126 +90 val_90 +169 val_169 +403 val_403 +400 val_400 +200 val_200 +97 val_97 +238 val_238 +86 val_86 +311 val_311 +27 val_27 +165 val_165 +409 val_409 +255 val_255 +278 val_278 +98 val_98 +484 val_484 +265 val_265 +193 val_193 +401 val_401 +150 val_150 +273 val_273 +224 val_224 +369 val_369 +66 val_66 +128 val_128 +213 val_213 +146 val_146 +406 val_406 +429 val_429 +374 val_374 +152 val_152 +469 val_469 +145 val_145 +495 val_495 +37 val_37 +327 val_327 +281 val_281 +277 val_277 +209 val_209 +15 val_15 +82 val_82 +403 val_403 +166 val_166 +417 val_417 +430 val_430 +252 val_252 +292 val_292 +219 val_219 +287 val_287 +153 val_153 +193 val_193 +338 val_338 +446 val_446 +459 val_459 +394 val_394 +237 val_237 +482 val_482 +174 val_174 +413 val_413 +494 val_494 +207 val_207 +199 val_199 +466 val_466 +208 val_208 +174 val_174 +399 val_399 +396 val_396 +247 val_247 +417 val_417 +489 val_489 +162 val_162 +377 val_377 +397 val_397 +309 val_309 +365 val_365 +266 val_266 +439 val_439 +342 val_342 +367 val_367 +325 val_325 +167 val_167 +195 val_195 +475 val_475 +17 val_17 +113 val_113 +155 val_155 +203 val_203 +339 val_339 +0 val_0 +455 val_455 +128 val_128 +311 val_311 +316 val_316 +57 val_57 +302 val_302 +205 val_205 +149 val_149 +438 val_438 +345 val_345 +129 val_129 +170 val_170 +20 val_20 +489 val_489 +157 val_157 +378 val_378 +221 val_221 +92 val_92 +111 val_111 +47 val_47 +72 val_72 +4 val_4 +280 val_280 +35 val_35 +427 val_427 +277 val_277 +208 val_208 +356 val_356 +399 val_399 +169 val_169 +382 val_382 +498 val_498 +125 val_125 +386 val_386 +437 val_437 +469 val_469 +192 val_192 +286 val_286 +187 val_187 +176 val_176 +54 val_54 +459 val_459 +51 val_51 +138 val_138 +103 val_103 +239 val_239 +213 val_213 +216 val_216 +430 val_430 +278 val_278 +176 val_176 +289 val_289 +221 val_221 +65 val_65 +318 val_318 +332 val_332 +311 val_311 +275 val_275 +137 val_137 +241 val_241 +83 val_83 +333 val_333 +180 val_180 +284 val_284 +12 val_12 +230 val_230 +181 val_181 +67 val_67 +260 val_260 +404 val_404 +384 val_384 +489 val_489 +353 val_353 +373 val_373 +272 val_272 +138 val_138 +217 val_217 +84 val_84 +348 val_348 +466 val_466 +58 val_58 +8 val_8 +411 val_411 +230 val_230 +208 val_208 +348 val_348 +24 val_24 +463 val_463 +431 val_431 +179 val_179 +172 val_172 +42 val_42 +129 val_129 +158 val_158 +119 val_119 +496 val_496 +0 val_0 +322 val_322 +197 val_197 +468 val_468 +393 val_393 +454 val_454 +100 val_100 +298 val_298 +199 val_199 +191 val_191 +418 val_418 +96 val_96 +26 val_26 +165 val_165 +327 val_327 +230 val_230 +205 val_205 +120 val_120 +131 val_131 +51 val_51 +404 val_404 +43 val_43 +436 val_436 +156 val_156 +469 val_469 +468 val_468 +308 val_308 +95 val_95 +196 val_196 +288 val_288 +481 val_481 +457 val_457 +98 val_98 +282 val_282 +197 val_197 +187 val_187 +318 val_318 +318 val_318 +409 val_409 +470 val_470 +137 val_137 +369 val_369 +316 val_316 +169 val_169 +413 val_413 +85 val_85 +77 val_77 +0 val_0 +490 val_490 +87 val_87 +364 val_364 +179 val_179 +118 val_118 +134 val_134 +395 val_395 +282 val_282 +138 val_138 +238 val_238 +419 val_419 +15 val_15 +118 val_118 +72 val_72 +90 val_90 +307 val_307 +19 val_19 +435 val_435 +10 val_10 +277 val_277 +273 val_273 +306 val_306 +224 val_224 +309 val_309 +389 val_389 +327 val_327 +242 val_242 +369 val_369 +392 val_392 +272 val_272 +331 val_331 +401 val_401 +242 val_242 +452 val_452 +177 val_177 +226 val_226 +5 val_5 +497 val_497 +402 val_402 +396 val_396 +317 val_317 +395 val_395 +58 val_58 +35 val_35 +336 val_336 +95 val_95 +11 val_11 +168 val_168 +34 val_34 +229 val_229 +233 val_233 +143 val_143 +472 val_472 +322 val_322 +498 val_498 +160 val_160 +195 val_195 +42 val_42 +321 val_321 +430 val_430 +119 val_119 +489 val_489 +458 val_458 +78 val_78 +76 val_76 +41 val_41 +223 val_223 +492 val_492 +149 val_149 +449 val_449 +218 val_218 +228 val_228 +138 val_138 +453 val_453 +30 val_30 +209 val_209 +64 val_64 +468 val_468 +76 val_76 +74 val_74 +342 val_342 +69 val_69 +230 val_230 +33 val_33 +368 val_368 +103 val_103 +296 val_296 +113 val_113 +216 val_216 +367 val_367 +344 val_344 +167 val_167 +274 val_274 +219 val_219 +239 val_239 +485 val_485 +116 val_116 +223 val_223 +256 val_256 +263 val_263 +70 val_70 +487 val_487 +480 val_480 +401 val_401 +288 val_288 +191 val_191 +5 val_5 +244 val_244 +438 val_438 +128 val_128 +467 val_467 +432 val_432 +202 val_202 +316 val_316 +229 val_229 +469 val_469 +463 val_463 +280 val_280 +2 val_2 +35 val_35 +283 val_283 +331 val_331 +235 val_235 +80 val_80 +44 val_44 +193 val_193 +321 val_321 +335 val_335 +104 val_104 +466 val_466 +366 val_366 +175 val_175 +403 val_403 +483 val_483 +53 val_53 +105 val_105 +257 val_257 +406 val_406 +409 val_409 +190 val_190 +406 val_406 +401 val_401 +114 val_114 +258 val_258 +90 val_90 +203 val_203 +262 val_262 +348 val_348 +424 val_424 +12 val_12 +396 val_396 +201 val_201 +217 val_217 +164 val_164 +431 val_431 +454 val_454 +478 val_478 +298 val_298 +125 val_125 +431 val_431 +164 val_164 +424 val_424 +187 val_187 +382 val_382 +5 val_5 +70 val_70 +397 val_397 +480 val_480 +291 val_291 +24 val_24 +351 val_351 +255 val_255 +104 val_104 +70 val_70 +163 val_163 +438 val_438 +119 val_119 +414 val_414 +200 val_200 +491 val_491 +237 val_237 +439 val_439 +360 val_360 +248 val_248 +479 val_479 +305 val_305 +417 val_417 +199 val_199 +444 val_444 +120 val_120 +429 val_429 +169 val_169 +443 val_443 +323 val_323 +325 val_325 +277 val_277 +230 val_230 +478 val_478 +178 val_178 +468 val_468 +310 val_310 +317 val_317 +333 val_333 +493 val_493 +460 val_460 +207 val_207 +249 val_249 +265 val_265 +480 val_480 +83 val_83 +136 val_136 +353 val_353 +172 val_172 +214 val_214 +462 val_462 +233 val_233 +406 val_406 +133 val_133 +175 val_175 +189 val_189 +454 val_454 +375 val_375 +401 val_401 +421 val_421 +407 val_407 +384 val_384 +256 val_256 +26 val_26 +134 val_134 +67 val_67 +384 val_384 +379 val_379 +18 val_18 +462 val_462 +492 val_492 +100 val_100 +298 val_298 +9 val_9 +341 val_341 +498 val_498 +146 val_146 +458 val_458 +362 val_362 +186 val_186 +285 val_285 +348 val_348 +167 val_167 +18 val_18 +273 val_273 +183 val_183 +281 val_281 +344 val_344 +97 val_97 +469 val_469 +315 val_315 +84 val_84 +28 val_28 +37 val_37 +448 val_448 +152 val_152 +348 val_348 +307 val_307 +194 val_194 +414 val_414 +477 val_477 +222 val_222 +126 val_126 +90 val_90 +169 val_169 +403 val_403 +400 val_400 +200 val_200 +97 val_97 +238 val_238 +86 val_86 +311 val_311 +27 val_27 +165 val_165 +409 val_409 +255 val_255 +278 val_278 +98 val_98 +484 val_484 +265 val_265 +193 val_193 +401 val_401 +150 val_150 +273 val_273 +224 val_224 +369 val_369 +66 val_66 +128 val_128 +213 val_213 +146 val_146 +406 val_406 +429 val_429 +374 val_374 +152 val_152 +469 val_469 +145 val_145 +495 val_495 +37 val_37 +327 val_327 +281 val_281 +277 val_277 +209 val_209 +15 val_15 +82 val_82 +403 val_403 +166 val_166 +417 val_417 +430 val_430 +252 val_252 +292 val_292 +219 val_219 +287 val_287 +153 val_153 +193 val_193 +338 val_338 +446 val_446 +459 val_459 +394 val_394 +237 val_237 +482 val_482 +174 val_174 +413 val_413 +494 val_494 +207 val_207 +199 val_199 +466 val_466 +208 val_208 +174 val_174 +399 val_399 +396 val_396 +247 val_247 +417 val_417 +489 val_489 +162 val_162 +377 val_377 +397 val_397 +309 val_309 +365 val_365 +266 val_266 +439 val_439 +342 val_342 +367 val_367 +325 val_325 +167 val_167 +195 val_195 +475 val_475 +17 val_17 +113 val_113 +155 val_155 +203 val_203 +339 val_339 +0 val_0 +455 val_455 +128 val_128 +311 val_311 +316 val_316 +57 val_57 +302 val_302 +205 val_205 +149 val_149 +438 val_438 +345 val_345 +129 val_129 +170 val_170 +20 val_20 +489 val_489 +157 val_157 +378 val_378 +221 val_221 +92 val_92 +111 val_111 +47 val_47 +72 val_72 +4 val_4 +280 val_280 +35 val_35 +427 val_427 +277 val_277 +208 val_208 +356 val_356 +399 val_399 +169 val_169 +382 val_382 +498 val_498 +125 val_125 +386 val_386 +437 val_437 +469 val_469 +192 val_192 +286 val_286 +187 val_187 +176 val_176 +54 val_54 +459 val_459 +51 val_51 +138 val_138 +103 val_103 +239 val_239 +213 val_213 +216 val_216 +430 val_430 +278 val_278 +176 val_176 +289 val_289 +221 val_221 +65 val_65 +318 val_318 +332 val_332 +311 val_311 +275 val_275 +137 val_137 +241 val_241 +83 val_83 +333 val_333 +180 val_180 +284 val_284 +12 val_12 +230 val_230 +181 val_181 +67 val_67 +260 val_260 +404 val_404 +384 val_384 +489 val_489 +353 val_353 +373 val_373 +272 val_272 +138 val_138 +217 val_217 +84 val_84 +348 val_348 +466 val_466 +58 val_58 +8 val_8 +411 val_411 +230 val_230 +208 val_208 +348 val_348 +24 val_24 +463 val_463 +431 val_431 +179 val_179 +172 val_172 +42 val_42 +129 val_129 +158 val_158 +119 val_119 +496 val_496 +0 val_0 +322 val_322 +197 val_197 +468 val_468 +393 val_393 +454 val_454 +100 val_100 +298 val_298 +199 val_199 +191 val_191 +418 val_418 +96 val_96 +26 val_26 +165 val_165 +327 val_327 +230 val_230 +205 val_205 +120 val_120 +131 val_131 +51 val_51 +404 val_404 +43 val_43 +436 val_436 +156 val_156 +469 val_469 +468 val_468 +308 val_308 +95 val_95 +196 val_196 +288 val_288 +481 val_481 +457 val_457 +98 val_98 +282 val_282 +197 val_197 +187 val_187 +318 val_318 +318 val_318 +409 val_409 +470 val_470 +137 val_137 +369 val_369 +316 val_316 +169 val_169 +413 val_413 +85 val_85 +77 val_77 +0 val_0 +490 val_490 +87 val_87 +364 val_364 +179 val_179 +118 val_118 +134 val_134 +395 val_395 +282 val_282 +138 val_138 +238 val_238 +419 val_419 +15 val_15 +118 val_118 +72 val_72 +90 val_90 +307 val_307 +19 val_19 +435 val_435 +10 val_10 +277 val_277 +273 val_273 +306 val_306 +224 val_224 +309 val_309 +389 val_389 +327 val_327 +242 val_242 +369 val_369 +392 val_392 +272 val_272 +331 val_331 +401 val_401 +242 val_242 +452 val_452 +177 val_177 +226 val_226 +5 val_5 +497 val_497 +402 val_402 +396 val_396 +317 val_317 +395 val_395 +58 val_58 +35 val_35 +336 val_336 +95 val_95 +11 val_11 +168 val_168 +34 val_34 +229 val_229 +233 val_233 +143 val_143 +472 val_472 +322 val_322 +498 val_498 +160 val_160 +195 val_195 +42 val_42 +321 val_321 +430 val_430 +119 val_119 +489 val_489 +458 val_458 +78 val_78 +76 val_76 +41 val_41 +223 val_223 +492 val_492 +149 val_149 +449 val_449 +218 val_218 +228 val_228 +138 val_138 +453 val_453 +30 val_30 +209 val_209 +64 val_64 +468 val_468 +76 val_76 +74 val_74 +342 val_342 +69 val_69 +230 val_230 +33 val_33 +368 val_368 +103 val_103 +296 val_296 +113 val_113 +216 val_216 +367 val_367 +344 val_344 +167 val_167 +274 val_274 +219 val_219 +239 val_239 +485 val_485 +116 val_116 +223 val_223 +256 val_256 +263 val_263 +70 val_70 +487 val_487 +480 val_480 +401 val_401 +288 val_288 +191 val_191 +5 val_5 +244 val_244 +438 val_438 +128 val_128 +467 val_467 +432 val_432 +202 val_202 +316 val_316 +229 val_229 +469 val_469 +463 val_463 +280 val_280 +2 val_2 +35 val_35 +283 val_283 +331 val_331 +235 val_235 +80 val_80 +44 val_44 +193 val_193 +321 val_321 +335 val_335 +104 val_104 +466 val_466 +366 val_366 +175 val_175 +403 val_403 +483 val_483 +53 val_53 +105 val_105 +257 val_257 +406 val_406 +409 val_409 +190 val_190 +406 val_406 +401 val_401 +114 val_114 +258 val_258 +90 val_90 +203 val_203 +262 val_262 +348 val_348 +424 val_424 +12 val_12 +396 val_396 +201 val_201 +217 val_217 +164 val_164 +431 val_431 +454 val_454 +478 val_478 +298 val_298 +125 val_125 +431 val_431 +164 val_164 +424 val_424 +187 val_187 +382 val_382 +5 val_5 +70 val_70 +397 val_397 +480 val_480 +291 val_291 +24 val_24 +351 val_351 +255 val_255 +104 val_104 +70 val_70 +163 val_163 +438 val_438 +119 val_119 +414 val_414 +200 val_200 +491 val_491 +237 val_237 +439 val_439 +360 val_360 +248 val_248 +479 val_479 +305 val_305 +417 val_417 +199 val_199 +444 val_444 +120 val_120 +429 val_429 +169 val_169 +443 val_443 +323 val_323 +325 val_325 +277 val_277 +230 val_230 +478 val_478 +178 val_178 +468 val_468 +310 val_310 +317 val_317 +333 val_333 +493 val_493 +460 val_460 +207 val_207 +249 val_249 +265 val_265 +480 val_480 +83 val_83 +136 val_136 +353 val_353 +172 val_172 +214 val_214 +462 val_462 +233 val_233 +406 val_406 +133 val_133 +175 val_175 +189 val_189 +454 val_454 +375 val_375 +401 val_401 +421 val_421 +407 val_407 +384 val_384 +256 val_256 +26 val_26 +134 val_134 +67 val_67 +384 val_384 +379 val_379 +18 val_18 +462 val_462 +492 val_492 +100 val_100 +298 val_298 +9 val_9 +341 val_341 +498 val_498 +146 val_146 +458 val_458 +362 val_362 +186 val_186 +285 val_285 +348 val_348 +167 val_167 +18 val_18 +273 val_273 +183 val_183 +281 val_281 +344 val_344 +97 val_97 +469 val_469 +315 val_315 +84 val_84 +28 val_28 +37 val_37 +448 val_448 +152 val_152 +348 val_348 +307 val_307 +194 val_194 +414 val_414 +477 val_477 +222 val_222 +126 val_126 +90 val_90 +169 val_169 +403 val_403 +400 val_400 +200 val_200 +97 val_97 diff --git ql/src/test/results/clientpositive/tez/union9.q.out ql/src/test/results/clientpositive/tez/union9.q.out new file mode 100644 index 0000000..ad23db3 --- /dev/null +++ ql/src/test/results/clientpositive/tez/union9.q.out @@ -0,0 +1,111 @@ +PREHOOK: query: -- union case: all subqueries are a map-only jobs, 3 way union, same input for all sub-queries, followed by reducesink + +explain + select count(1) FROM (select s1.key as key, s1.value as value from src s1 UNION ALL + select s2.key as key, s2.value as value from src s2 UNION ALL + select s3.key as key, s3.value as value from src s3) unionsrc +PREHOOK: type: QUERY +POSTHOOK: query: -- union case: all subqueries are a map-only jobs, 3 way union, same input for all sub-queries, followed by reducesink + +explain + select count(1) FROM (select s1.key as key, s1.value as value from src s1 UNION ALL + select s2.key as key, s2.value as value from src s2 UNION ALL + select s3.key as key, s3.value as value from src s3) unionsrc +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Map 1 <- Union 2 (CONTAINS) + Map 4 <- Union 2 (CONTAINS) + Map 5 <- Union 2 (CONTAINS) + Reducer 3 <- Union 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: s1 + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Select Operator + Group By Operator + aggregations: count(1) + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) + Map 4 + Map Operator Tree: + TableScan + alias: s2 + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Select Operator + Group By Operator + aggregations: count(1) + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) + Map 5 + Map Operator Tree: + TableScan + alias: s3 + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Select Operator + Group By Operator + aggregations: count(1) + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Union 2 + Vertex: Union 2 + + Stage: Stage-0 + Fetch Operator + limit: -1 + +PREHOOK: query: select count(1) FROM (select s1.key as key, s1.value as value from src s1 UNION ALL + select s2.key as key, s2.value as value from src s2 UNION ALL + select s3.key as key, s3.value as value from src s3) unionsrc +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select count(1) FROM (select s1.key as key, s1.value as value from src s1 UNION ALL + select s2.key as key, s2.value as value from src s2 UNION ALL + select s3.key as key, s3.value as value from src s3) unionsrc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +1500 diff --git shims/0.20/src/main/java/org/apache/hadoop/hive/shims/Hadoop20Shims.java shims/0.20/src/main/java/org/apache/hadoop/hive/shims/Hadoop20Shims.java index 4f5d4fa..51c8051 100644 --- shims/0.20/src/main/java/org/apache/hadoop/hive/shims/Hadoop20Shims.java +++ shims/0.20/src/main/java/org/apache/hadoop/hive/shims/Hadoop20Shims.java @@ -42,6 +42,7 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.BlockLocation; +import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; @@ -50,8 +51,10 @@ import org.apache.hadoop.fs.Trash; import org.apache.hadoop.hdfs.MiniDFSCluster; import org.apache.hadoop.hive.io.HiveIOExceptionHandlerUtil; +import org.apache.hadoop.hive.shims.HadoopShims.DirectDecompressorShim; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; +import org.apache.hadoop.io.compress.CompressionCodec; import org.apache.hadoop.mapred.ClusterStatus; import org.apache.hadoop.mapred.FileInputFormat; import org.apache.hadoop.mapred.InputFormat; @@ -773,6 +776,19 @@ public FileSystem createProxyFileSystem(FileSystem fs, URI uri) { ret.put("MAPREDTASKCLEANUPNEEDED", "mapreduce.job.committer.task.cleanup.needed"); return ret; } + + @Override + public ZeroCopyReaderShim getZeroCopyReader(FSDataInputStream in, ByteBufferPoolShim pool) throws IOException { + /* not supported */ + return null; + } + + @Override + public DirectDecompressorShim getDirectDecompressor(DirectCompressionType codec) { + /* not supported */ + return null; + } + @Override public Configuration getConfiguration(org.apache.hadoop.mapreduce.JobContext context) { return context.getConfiguration(); diff --git shims/0.20S/src/main/java/org/apache/hadoop/hive/shims/Hadoop20SShims.java shims/0.20S/src/main/java/org/apache/hadoop/hive/shims/Hadoop20SShims.java index be57716..e4e56b7 100644 --- shims/0.20S/src/main/java/org/apache/hadoop/hive/shims/Hadoop20SShims.java +++ shims/0.20S/src/main/java/org/apache/hadoop/hive/shims/Hadoop20SShims.java @@ -30,6 +30,7 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.filecache.DistributedCache; import org.apache.hadoop.fs.BlockLocation; +import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; @@ -37,7 +38,9 @@ import org.apache.hadoop.fs.ProxyFileSystem; import org.apache.hadoop.fs.Trash; import org.apache.hadoop.hdfs.MiniDFSCluster; +import org.apache.hadoop.hive.shims.HadoopShims.DirectDecompressorShim; import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.io.compress.CompressionCodec; import org.apache.hadoop.mapred.JobTracker; import org.apache.hadoop.mapred.MiniMRCluster; import org.apache.hadoop.mapred.ClusterStatus; @@ -410,6 +413,18 @@ public FileSystem createProxyFileSystem(FileSystem fs, URI uri) { ret.put("MAPREDTASKCLEANUPNEEDED", "mapreduce.job.committer.task.cleanup.needed"); return ret; } + + @Override + public ZeroCopyReaderShim getZeroCopyReader(FSDataInputStream in, ByteBufferPoolShim pool) throws IOException { + /* not supported */ + return null; + } + + @Override + public DirectDecompressorShim getDirectDecompressor(DirectCompressionType codec) { + /* not supported */ + return null; + } @Override public Configuration getConfiguration(org.apache.hadoop.mapreduce.JobContext context) { diff --git shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java index 3d778df..5df5ed5 100644 --- shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java +++ shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java @@ -27,11 +27,13 @@ import java.util.Map; import java.util.HashMap; import java.net.URI; +import java.nio.ByteBuffer; import java.io.FileNotFoundException; import org.apache.commons.lang.StringUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.BlockLocation; +import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.LocatedFileStatus; @@ -41,7 +43,12 @@ import org.apache.hadoop.fs.RemoteIterator; import org.apache.hadoop.fs.Trash; import org.apache.hadoop.hdfs.MiniDFSCluster; +import org.apache.hadoop.hive.shims.HadoopShims.ByteBufferPoolShim; +import org.apache.hadoop.hive.shims.HadoopShims.DirectCompressionType; +import org.apache.hadoop.hive.shims.HadoopShims.DirectDecompressorShim; +import org.apache.hadoop.hive.shims.HadoopShims.ZeroCopyReaderShim; import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.io.compress.CompressionCodec; import org.apache.hadoop.mapred.ClusterStatus; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.MiniMRCluster; @@ -71,6 +78,19 @@ HadoopShims.MiniDFSShim cluster = null; + final boolean zeroCopy; + + public Hadoop23Shims() { + boolean zcr = false; + try { + Class.forName("org.apache.hadoop.fs.CacheFlag", false, + ShimLoader.class.getClassLoader()); + zcr = true; + } catch (ClassNotFoundException ce) { + } + this.zeroCopy = zcr; + } + @Override public String getTaskAttemptLogUrl(JobConf conf, String taskTrackerHttpAddress, String taskAttemptId) @@ -556,6 +576,24 @@ public FileSystem createProxyFileSystem(FileSystem fs, URI uri) { ret.put("MAPREDSETUPCLEANUPNEEDED", "mapreduce.job.committer.setup.cleanup.needed"); ret.put("MAPREDTASKCLEANUPNEEDED", "mapreduce.job.committer.task.cleanup.needed"); return ret; + } + + @Override + public ZeroCopyReaderShim getZeroCopyReader(FSDataInputStream in, ByteBufferPoolShim pool) throws IOException { + if(zeroCopy) { + return ZeroCopyShims.getZeroCopyReader(in, pool); + } + /* not supported */ + return null; + } + + @Override + public DirectDecompressorShim getDirectDecompressor(DirectCompressionType codec) { + if(zeroCopy) { + return ZeroCopyShims.getDirectDecompressor(codec); + } + /* not supported */ + return null; } @Override diff --git shims/0.23/src/main/java/org/apache/hadoop/hive/shims/ZeroCopyShims.java shims/0.23/src/main/java/org/apache/hadoop/hive/shims/ZeroCopyShims.java new file mode 100644 index 0000000..8de08ad --- /dev/null +++ shims/0.23/src/main/java/org/apache/hadoop/hive/shims/ZeroCopyShims.java @@ -0,0 +1,128 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.shims; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.EnumSet; + +import org.apache.hadoop.fs.FSDataInputStream; +import org.apache.hadoop.fs.ReadOption; +import org.apache.hadoop.hive.shims.HadoopShims.DirectCompressionType; +import org.apache.hadoop.hive.shims.HadoopShims.DirectDecompressorShim; +import org.apache.hadoop.io.ByteBufferPool; +import org.apache.hadoop.io.compress.DirectDecompressor; +import org.apache.hadoop.io.compress.snappy.SnappyDecompressor.SnappyDirectDecompressor; +import org.apache.hadoop.io.compress.zlib.ZlibDecompressor.CompressionHeader; +import org.apache.hadoop.io.compress.zlib.ZlibDecompressor.ZlibDirectDecompressor; + +import org.apache.hadoop.hive.shims.HadoopShims.ByteBufferPoolShim; +import org.apache.hadoop.hive.shims.HadoopShims.ZeroCopyReaderShim; + +class ZeroCopyShims { + private static final class ByteBufferPoolAdapter implements ByteBufferPool { + private ByteBufferPoolShim pool; + + public ByteBufferPoolAdapter(ByteBufferPoolShim pool) { + this.pool = pool; + } + + @Override + public final ByteBuffer getBuffer(boolean direct, int length) { + return this.pool.getBuffer(direct, length); + } + + @Override + public final void putBuffer(ByteBuffer buffer) { + this.pool.putBuffer(buffer); + } + } + + private static final class ZeroCopyAdapter implements ZeroCopyReaderShim { + private final FSDataInputStream in; + private final ByteBufferPoolAdapter pool; + private final static EnumSet CHECK_SUM = EnumSet + .noneOf(ReadOption.class); + private final static EnumSet NO_CHECK_SUM = EnumSet + .of(ReadOption.SKIP_CHECKSUMS); + + public ZeroCopyAdapter(FSDataInputStream in, ByteBufferPoolShim poolshim) { + this.in = in; + if (poolshim != null) { + pool = new ByteBufferPoolAdapter(poolshim); + } else { + pool = null; + } + } + + public final ByteBuffer readBuffer(int maxLength, boolean verifyChecksums) + throws IOException { + EnumSet options = NO_CHECK_SUM; + if (verifyChecksums) { + options = CHECK_SUM; + } + return this.in.read(this.pool, maxLength, options); + } + + public final void releaseBuffer(ByteBuffer buffer) { + this.in.releaseBuffer(buffer); + } + } + + public static ZeroCopyReaderShim getZeroCopyReader(FSDataInputStream in, + ByteBufferPoolShim pool) throws IOException { + return new ZeroCopyAdapter(in, pool); + } + + private static final class DirectDecompressorAdapter implements + DirectDecompressorShim { + private final DirectDecompressor decompressor; + + public DirectDecompressorAdapter(DirectDecompressor decompressor) { + this.decompressor = decompressor; + } + + public void decompress(ByteBuffer src, ByteBuffer dst) throws IOException { + this.decompressor.decompress(src, dst); + } + } + + public static DirectDecompressorShim getDirectDecompressor( + DirectCompressionType codec) { + DirectDecompressor decompressor = null; + switch (codec) { + case ZLIB: { + decompressor = new ZlibDirectDecompressor(); + } + break; + case ZLIB_NOHEADER: { + decompressor = new ZlibDirectDecompressor(CompressionHeader.NO_HEADER, 0); + } + break; + case SNAPPY: { + decompressor = new SnappyDirectDecompressor(); + } + break; + } + if (decompressor != null) { + return new DirectDecompressorAdapter(decompressor); + } + /* not supported */ + return null; + } +} diff --git shims/common/src/main/java/org/apache/hadoop/hive/shims/HadoopShims.java shims/common/src/main/java/org/apache/hadoop/hive/shims/HadoopShims.java index 9e9a60d..e15ab4e 100644 --- shims/common/src/main/java/org/apache/hadoop/hive/shims/HadoopShims.java +++ shims/common/src/main/java/org/apache/hadoop/hive/shims/HadoopShims.java @@ -24,6 +24,7 @@ import java.net.MalformedURLException; import java.net.URI; import java.net.URISyntaxException; +import java.nio.ByteBuffer; import java.security.PrivilegedExceptionAction; import java.util.Comparator; import java.util.Iterator; @@ -36,11 +37,13 @@ import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.BlockLocation; +import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.PathFilter; import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.io.compress.CompressionCodec; import org.apache.hadoop.mapred.ClusterStatus; import org.apache.hadoop.mapred.InputSplit; import org.apache.hadoop.mapred.JobConf; @@ -521,6 +524,69 @@ RecordReader getRecordReader(JobConf job, InputSplitShim split, Reporter reporte public Map getHadoopConfNames(); + /** + * a hadoop.io ByteBufferPool shim. + */ + public interface ByteBufferPoolShim { + /** + * Get a new ByteBuffer from the pool. The pool can provide this from + * removing a buffer from its internal cache, or by allocating a + * new buffer. + * + * @param direct Whether the buffer should be direct. + * @param length The minimum length the buffer will have. + * @return A new ByteBuffer. Its capacity can be less + * than what was requested, but must be at + * least 1 byte. + */ + ByteBuffer getBuffer(boolean direct, int length); + + /** + * Release a buffer back to the pool. + * The pool may choose to put this buffer into its cache/free it. + * + * @param buffer a direct bytebuffer + */ + void putBuffer(ByteBuffer buffer); + } + + /** + * Provides an HDFS ZeroCopyReader shim. + * @param in FSDataInputStream to read from (where the cached/mmap buffers are tied to) + * @param in ByteBufferPoolShim to allocate fallback buffers with + * + * @return returns null if not supported + */ + public ZeroCopyReaderShim getZeroCopyReader(FSDataInputStream in, ByteBufferPoolShim pool) throws IOException; + + public interface ZeroCopyReaderShim { + /** + * Get a ByteBuffer from the FSDataInputStream - this can be either a HeapByteBuffer or an MappedByteBuffer. + * Also move the in stream by that amount. The data read can be small than maxLength. + * + * @return ByteBuffer read from the stream, + */ + public ByteBuffer readBuffer(int maxLength, boolean verifyChecksums) throws IOException; + /** + * Release a ByteBuffer obtained from a read on the + * Also move the in stream by that amount. The data read can be small than maxLength. + * + */ + public void releaseBuffer(ByteBuffer buffer); + } + + public enum DirectCompressionType { + NONE, + ZLIB_NOHEADER, + ZLIB, + SNAPPY, + }; + + public interface DirectDecompressorShim { + public void decompress(ByteBuffer src, ByteBuffer dst) throws IOException; + } + + public DirectDecompressorShim getDirectDecompressor(DirectCompressionType codec); /** * Get configuration from JobContext