diff --git common/src/java/org/apache/hadoop/hive/conf/HiveConf.java common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index 6802b4d..4cccb1a 100644
--- common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -500,6 +500,7 @@
HIVEMERGEMAPFILES("hive.merge.mapfiles", true),
HIVEMERGEMAPREDFILES("hive.merge.mapredfiles", false),
+ HIVEMERGETEZFILES("hive.merge.tezfiles", false),
HIVEMERGEMAPFILESSIZE("hive.merge.size.per.task", (long) (256 * 1000 * 1000)),
HIVEMERGEMAPFILESAVGSIZE("hive.merge.smallfiles.avgsize", (long) (16 * 1000 * 1000)),
HIVEMERGERCFILEBLOCKLEVEL("hive.merge.rcfile.block.level", true),
@@ -536,6 +537,8 @@
HIVE_ORC_COMPUTE_SPLITS_NUM_THREADS("hive.orc.compute.splits.num.threads", 10),
HIVE_ORC_SKIP_CORRUPT_DATA("hive.exec.orc.skip.corrupt.data", false),
+ HIVE_ORC_ZEROCOPY("hive.exec.orc.zerocopy", false),
+
HIVESKEWJOIN("hive.optimize.skewjoin", false),
HIVECONVERTJOIN("hive.auto.convert.join", true),
HIVECONVERTJOINNOCONDITIONALTASK("hive.auto.convert.join.noconditionaltask", true),
@@ -562,6 +565,10 @@
HIVEDEBUGLOCALTASK("hive.debug.localtask",false),
HIVEINPUTFORMAT("hive.input.format", "org.apache.hadoop.hive.ql.io.CombineHiveInputFormat"),
+ HIVETEZINPUTFORMAT("hive.tez.input.format", "org.apache.hadoop.hive.ql.io.HiveInputFormat"),
+
+ HIVETEZCONTAINERSIZE("hive.tez.container.size", -1),
+ HIVETEZJAVAOPTS("hive.tez.java.opts", null),
HIVEENFORCEBUCKETING("hive.enforce.bucketing", false),
HIVEENFORCESORTING("hive.enforce.sorting", false),
@@ -903,6 +910,9 @@
// Whether to generate the splits locally or in the AM (tez only)
HIVE_AM_SPLIT_GENERATION("hive.compute.splits.in.am", true),
+ HIVE_PREWARM_ENABLED("hive.prewarm.enabled", false),
+ HIVE_PREWARM_NUM_CONTAINERS("hive.prewarm.numcontainers", 10),
+
// none, idonly, traverse, execution
HIVESTAGEIDREARRANGE("hive.stageid.rearrange", "none"),
HIVEEXPLAINDEPENDENCYAPPENDTASKTYPES("hive.explain.dependency.append.tasktype", false),
diff --git conf/hive-default.xml.template conf/hive-default.xml.template
index 0b86b9c..3f01e0b 100644
--- conf/hive-default.xml.template
+++ conf/hive-default.xml.template
@@ -794,6 +794,12 @@
+ hive.merge.tezfiles
+ false
+ Merge small files at the end of a Tez DAG
+
+
+
hive.heartbeat.interval
1000
Send a heartbeat after this interval - used by mapjoin and filter operators
@@ -960,6 +966,12 @@
+ hive.tez.input.format
+ org.apache.hadoop.hive.ql.io.HiveInputFormat
+ The default input format for tez. Tez groups splits in the AM.
+
+
+
hive.udtf.auto.progress
false
Whether Hive should automatically send progress information to TaskTracker when using UDTF's to prevent the task getting killed because of inactivity. Users should be cautious because this may prevent TaskTracker from killing tasks with infinite loops.
@@ -2222,6 +2234,22 @@
+ hive.prewarm.enabled
+ false
+
+ Enables container prewarm for tez (hadoop 2 only)
+
+
+
+
+ hive.prewarm.numcontainers
+ 10
+
+ Controls the number of containers to prewarm for tez (hadoop 2 only)
+
+
+
+
hive.server2.table.type.mapping
CLASSIC
@@ -2340,6 +2368,14 @@
+ hive.exec.orc.zerocopy.
+ false
+
+ Use zerocopy reads with ORC.
+
+
+
+
hive.jar.directory
hdfs:///user/hive/
@@ -2358,4 +2394,16 @@
+
+ hive.tez.container.size
+ -1
+ By default tez will spawn containers of the size of a mapper. This can be used to overwrite.
+
+
+
+ hive.tez.java.opts
+
+ By default tez will use the java opts from map tasks. This can be used to overwrite.
+
+
diff --git data/conf/tez/hive-site.xml data/conf/tez/hive-site.xml
index 1af4495..5ff5b4c 100644
--- data/conf/tez/hive-site.xml
+++ data/conf/tez/hive-site.xml
@@ -33,8 +33,26 @@
- mapred.child.java.opts
- -Xmx200m
+ mapred.tez.java.opts
+ -Xmx128m
+
+
+
+ hive.tez.container.size
+ 128
+
+
+
+
+ hive.merge.tezfiles
+ false
+ Merge small files at the end of a Tez DAG
+
+
+
+ hive.tez.input.format
+ org.apache.hadoop.hive.ql.io.HiveInputFormat
+ The default input format for tez. Tez groups splits in the AM.
@@ -172,7 +190,7 @@
hive.input.format
- org.apache.hadoop.hive.ql.io.HiveInputFormat
+ org.apache.hadoop.hive.ql.io.CombineHiveInputFormat
The default input format, if it is not specified, the system assigns it. It is set to HiveInputFormat for hadoop versions 17, 18 and 19, whereas it is set to CombineHiveInputFormat for hadoop 20. The user can always overwrite it - if there is a bug in CombineHiveInputFormat, it can always be manually set to HiveInputFormat.
@@ -194,4 +212,20 @@
Whether to use MR or Tez
+
+ hive.prewarm.enabled
+ true
+
+ Enables container prewarm for tez (hadoop 2 only)
+
+
+
+
+ hive.prewarm.numcontainers
+ 3
+
+ Controls the number of containers to prewarm for tez (hadoop 2 only)
+
+
+
diff --git itests/qtest/pom.xml itests/qtest/pom.xml
index e70da0f..cd871ff 100644
--- itests/qtest/pom.xml
+++ itests/qtest/pom.xml
@@ -39,7 +39,7 @@
stats_counter_partitioned.q,list_bucket_dml_10.q,input16_cc.q,scriptfile1.q,scriptfile1_win.q,bucket4.q,bucketmapjoin6.q,disable_merge_for_bucketing.q,reduce_deduplicate.q,smb_mapjoin_8.q,join1.q,groupby2.q,bucketizedhiveinputformat.q,bucketmapjoin7.q,optrstat_groupby.q,bucket_num_reducers.q,bucket5.q,load_fs2.q,bucket_num_reducers2.q,infer_bucket_sort_merge.q,infer_bucket_sort_reducers_power_two.q,infer_bucket_sort_dyn_part.q,infer_bucket_sort_bucketed_table.q,infer_bucket_sort_map_operators.q,infer_bucket_sort_num_buckets.q,leftsemijoin_mr.q,schemeAuthority.q,schemeAuthority2.q,truncate_column_buckets.q,remote_script.q,,load_hdfs_file_with_space_in_the_name.q,parallel_orderby.q,import_exported_table.q,stats_counter.q,auto_sortmerge_join_16.q,quotedid_smb.q,file_with_header_footer.q,external_table_with_space_in_location_path.q,root_dir_external_table.q,index_bitmap3.q,ql_rewrite_gbtoidx.q,index_bitmap_auto.q,udf_using.q
cluster_tasklog_retrieval.q,minimr_broken_pipe.q,mapreduce_stack_trace.q,mapreduce_stack_trace_turnoff.q,mapreduce_stack_trace_hadoop20.q,mapreduce_stack_trace_turnoff_hadoop20.q,file_with_header_footer_negative.q,udf_local_resource.q
tez_join_tests.q,tez_joins_explain.q,mrr.q,tez_dml.q,tez_insert_overwrite_local_directory_1.q
- join0.q,join1.q,auto_join0.q,auto_join1.q,bucket2.q,bucket3.q,bucket4.q,count.q,create_merge_compressed.q,cross_join.q,ctas.q,custom_input_output_format.q,disable_merge_for_bucketing.q,enforce_order.q,filter_join_breaktask.q,filter_join_breaktask2.q,groupby1.q,groupby2.q,groupby3.q,having.q,insert1.q,insert_into1.q,insert_into2.q,leftsemijoin.q,limit_pushdown.q,load_dyn_part1.q,load_dyn_part2.q,load_dyn_part3.q,mapjoin_mapjoin.q,mapreduce1.q,mapreduce2.q,merge1.q,merge2.q,metadata_only_queries.q,sample1.q,subquery_in.q,subquery_exists.q,vectorization_15.q,ptf.q,stats_counter.q,stats_noscan_1.q,stats_counter_partitioned.q
+ join0.q,join1.q,auto_join0.q,auto_join1.q,bucket2.q,bucket3.q,bucket4.q,count.q,create_merge_compressed.q,cross_join.q,ctas.q,custom_input_output_format.q,disable_merge_for_bucketing.q,enforce_order.q,filter_join_breaktask.q,filter_join_breaktask2.q,groupby1.q,groupby2.q,groupby3.q,having.q,insert1.q,insert_into1.q,insert_into2.q,leftsemijoin.q,limit_pushdown.q,load_dyn_part1.q,load_dyn_part2.q,load_dyn_part3.q,mapjoin_mapjoin.q,mapreduce1.q,mapreduce2.q,merge1.q,merge2.q,metadata_only_queries.q,sample1.q,subquery_in.q,subquery_exists.q,vectorization_15.q,ptf.q,stats_counter.q,stats_noscan_1.q,stats_counter_partitioned.q,union2.q,union3.q,union4.q,union5.q,union6.q,union7.q,union8.q,union9.q
add_part_exist.q,alter1.q,alter2.q,alter4.q,alter5.q,alter_rename_partition.q,alter_rename_partition_authorization.q,archive.q,archive_corrupt.q,archive_multi.q,archive_mr_1806.q,archive_multi_mr_1806.q,authorization_1.q,authorization_2.q,authorization_4.q,authorization_5.q,authorization_6.q,authorization_7.q,ba_table1.q,ba_table2.q,ba_table3.q,ba_table_udfs.q,binary_table_bincolserde.q,binary_table_colserde.q,cluster.q,columnarserde_create_shortcut.q,combine2.q,constant_prop.q,create_nested_type.q,create_or_replace_view.q,create_struct_table.q,create_union_table.q,database.q,database_location.q,database_properties.q,ddltime.q,describe_database_json.q,drop_database_removes_partition_dirs.q,escape1.q,escape2.q,exim_00_nonpart_empty.q,exim_01_nonpart.q,exim_02_00_part_empty.q,exim_02_part.q,exim_03_nonpart_over_compat.q,exim_04_all_part.q,exim_04_evolved_parts.q,exim_05_some_part.q,exim_06_one_part.q,exim_07_all_part_over_nonoverlap.q,exim_08_nonpart_rename.q,exim_09_part_spec_nonoverlap.q,exim_10_external_managed.q,exim_11_managed_external.q,exim_12_external_location.q,exim_13_managed_location.q,exim_14_managed_location_over_existing.q,exim_15_external_part.q,exim_16_part_external.q,exim_17_part_managed.q,exim_18_part_external.q,exim_19_00_part_external_location.q,exim_19_part_external_location.q,exim_20_part_managed_location.q,exim_21_export_authsuccess.q,exim_22_import_exist_authsuccess.q,exim_23_import_part_authsuccess.q,exim_24_import_nonexist_authsuccess.q,global_limit.q,groupby_complex_types.q,groupby_complex_types_multi_single_reducer.q,index_auth.q,index_auto.q,index_auto_empty.q,index_bitmap.q,index_bitmap1.q,index_bitmap2.q,index_bitmap3.q,index_bitmap_auto.q,index_bitmap_rc.q,index_compact.q,index_compact_1.q,index_compact_2.q,index_compact_3.q,index_stale_partitioned.q,init_file.q,input16.q,input16_cc.q,input46.q,input_columnarserde.q,input_dynamicserde.q,input_lazyserde.q,input_testxpath3.q,input_testxpath4.q,insert2_overwrite_partitions.q,insertexternal1.q,join_thrift.q,lateral_view.q,load_binary_data.q,load_exist_part_authsuccess.q,load_nonpart_authsuccess.q,load_part_authsuccess.q,loadpart_err.q,lock1.q,lock2.q,lock3.q,lock4.q,merge_dynamic_partition.q,multi_insert.q,multi_insert_move_tasks_share_dependencies.q,null_column.q,ppd_clusterby.q,query_with_semi.q,rename_column.q,sample6.q,sample_islocalmode_hook.q,set_processor_namespaces.q,show_tables.q,source.q,split_sample.q,str_to_map.q,transform1.q,udaf_collect_set.q,udaf_context_ngrams.q,udaf_histogram_numeric.q,udaf_ngrams.q,udaf_percentile_approx.q,udf_array.q,udf_bitmap_and.q,udf_bitmap_or.q,udf_explode.q,udf_format_number.q,udf_map.q,udf_map_keys.q,udf_map_values.q,udf_max.q,udf_min.q,udf_named_struct.q,udf_percentile.q,udf_printf.q,udf_sentences.q,udf_sort_array.q,udf_split.q,udf_struct.q,udf_substr.q,udf_translate.q,udf_union.q,udf_xpath.q,udtf_stack.q,view.q,virtual_column.q
@@ -273,6 +273,12 @@
tests
+ org.apache.hadoop
+ hadoop-yarn-client
+ ${hadoop-23.version}
+ test
+
+
org.apache.hbase
hbase-common
${hbase.hadoop2.version}
diff --git itests/util/src/main/java/org/apache/hadoop/hive/ql/QTestUtil.java itests/util/src/main/java/org/apache/hadoop/hive/ql/QTestUtil.java
index b192aaf..96868eb 100644
--- itests/util/src/main/java/org/apache/hadoop/hive/ql/QTestUtil.java
+++ itests/util/src/main/java/org/apache/hadoop/hive/ql/QTestUtil.java
@@ -1264,6 +1264,7 @@ private void maskPatterns(Pattern[] patterns, String fname) throws Exception {
".*job_local[0-9_]*.*",
".*USING 'java -cp.*",
"^Deleted.*",
+ ".*DagName:.*",
".*Input:.*/data/files/.*",
".*Output:.*/data/files/.*"
});
diff --git pom.xml pom.xml
index a09b01d..46b10a0 100644
--- pom.xml
+++ pom.xml
@@ -94,11 +94,11 @@
3.1
1.1.3
10.10.1.1
- 11.0.2
+ 15.0
2.1.6
0.20.2
1.2.1
- 2.2.0
+ 2.3.0
0.96.0-hadoop1
0.96.0-hadoop2
@@ -134,7 +134,7 @@
1.0.1
1.7.5
4.0.4
- 0.2.0
+ 0.3.0-incubating-SNAPSHOT
1.1
0.2
1.4
@@ -188,6 +188,17 @@
false
+
+ apache.snapshots
+ Apache Snapshot Repository
+ http://repository.apache.org/snapshots
+
+ false
+
+
+ true
+
+
diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java
index fe929fc..35f4fa9 100644
--- ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java
+++ ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java
@@ -154,17 +154,24 @@ public JSONObject getJSONLogicalPlan(PrintStream out, ExplainWork work) throws E
public JSONObject getJSONPlan(PrintStream out, ExplainWork work)
throws Exception {
+ return getJSONPlan(out, work.getAstStringTree(), work.getRootTasks(), work.getFetchTask(),
+ work.isFormatted(), work.getExtended(), work.isAppendTaskType());
+ }
+
+ public JSONObject getJSONPlan(PrintStream out, String ast, List> tasks, Task> fetchTask,
+ boolean jsonOutput, boolean isExtended, boolean appendTaskType) throws Exception {
+
// If the user asked for a formatted output, dump the json output
// in the output stream
JSONObject outJSONObject = new JSONObject();
- boolean jsonOutput = work.isFormatted();
+
if (jsonOutput) {
out = null;
}
// Print out the parse AST
- if (work.getAstStringTree() != null && work.getExtended()) {
- String jsonAST = outputAST(work.getAstStringTree(), out, jsonOutput, 0);
+ if (ast != null && isExtended) {
+ String jsonAST = outputAST(ast, out, jsonOutput, 0);
if (out != null) {
out.println();
}
@@ -173,16 +180,15 @@ public JSONObject getJSONPlan(PrintStream out, ExplainWork work)
outJSONObject.put("ABSTRACT SYNTAX TREE", jsonAST);
}
}
- List> tasks = work.getRootTasks();
List ordered = StageIDsRearranger.getExplainOrder(conf, tasks);
- Task extends Serializable> fetchTask = work.getFetchTask();
+
if (fetchTask != null) {
fetchTask.setRootTask(true); // todo HIVE-3925
ordered.add(fetchTask);
}
- JSONObject jsonDependencies = outputDependencies(out, work, ordered);
+ JSONObject jsonDependencies = outputDependencies(out, jsonOutput, appendTaskType, ordered);
if (out != null) {
out.println();
@@ -193,7 +199,8 @@ public JSONObject getJSONPlan(PrintStream out, ExplainWork work)
}
// Go over all the tasks and dump out the plans
- JSONObject jsonPlan = outputStagePlans(out, work, ordered);
+ JSONObject jsonPlan = outputStagePlans(out, ordered,
+ jsonOutput, isExtended);
if (jsonOutput) {
outJSONObject.put("STAGE PLANS", jsonPlan);
@@ -748,10 +755,10 @@ public String outputAST(String treeString, PrintStream out,
return jsonOutput ? treeString : null;
}
- public JSONObject outputDependencies(PrintStream out, ExplainWork work, List tasks)
+ public JSONObject outputDependencies(PrintStream out, boolean jsonOutput,
+ boolean appendTaskType, List tasks)
throws Exception {
- boolean jsonOutput = work.isFormatted();
- boolean appendTaskType = work.isAppendTaskType();
+
if (out != null) {
out.println("STAGE DEPENDENCIES:");
}
@@ -767,16 +774,17 @@ public JSONObject outputDependencies(PrintStream out, ExplainWork work, List tasks)
+ public JSONObject outputStagePlans(PrintStream out, List tasks,
+ boolean jsonOutput, boolean isExtended)
throws Exception {
- boolean jsonOutput = work.isFormatted();
+
if (out != null) {
out.println("STAGE PLANS:");
}
JSONObject json = jsonOutput ? new JSONObject() : null;
for (Task task : tasks) {
- outputPlan(task, out, json, work.getExtended(), jsonOutput, 2);
+ outputPlan(task, out, json, isExtended, jsonOutput, 2);
}
return jsonOutput ? json : null;
}
diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
index cc840be..23ef69b 100644
--- ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
+++ ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
@@ -777,6 +777,14 @@ public Path read(Kryo kryo, Input input, Class type) {
}
}
+ public static Set> cloneOperatorTree(Configuration conf, Set> roots) {
+ ByteArrayOutputStream baos = new ByteArrayOutputStream(4096);
+ serializePlan(roots, baos, conf, true);
+ Set> result = deserializePlan(new ByteArrayInputStream(baos.toByteArray()),
+ roots.getClass(), conf, true);
+ return result;
+ }
+
private static void serializePlan(Object plan, OutputStream out, Configuration conf, boolean cloningPlan) {
PerfLogger perfLogger = PerfLogger.getPerfLogger();
perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.SERIALIZE_PLAN);
@@ -3136,8 +3144,10 @@ public static void setInputPaths(JobConf job, List pathsToAdd) {
* Set hive input format, and input format file if necessary.
*/
public static void setInputAttributes(Configuration conf, MapWork mWork) {
+ HiveConf.ConfVars var = HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("tez") ?
+ HiveConf.ConfVars.HIVETEZINPUTFORMAT : HiveConf.ConfVars.HIVEINPUTFORMAT;
if (mWork.getInputformat() != null) {
- HiveConf.setVar(conf, HiveConf.ConfVars.HIVEINPUTFORMAT, mWork.getInputformat());
+ HiveConf.setVar(conf, var, mWork.getInputformat());
}
if (mWork.getIndexIntermediateFile() != null) {
conf.set("hive.index.compact.file", mWork.getIndexIntermediateFile());
diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java
index 642841f..1fbc57d 100644
--- ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java
+++ ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java
@@ -17,18 +17,27 @@
*/
package org.apache.hadoop.hive.ql.exec.tez;
+import com.google.common.base.Function;
+import com.google.common.collect.Iterators;
+
import java.io.FileNotFoundException;
import java.io.IOException;
+import java.net.URI;
import java.net.URISyntaxException;
import java.util.ArrayList;
import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Iterator;
import java.util.List;
import java.util.Map;
+import java.util.Set;
import javax.security.auth.login.LoginException;
import org.apache.commons.io.FilenameUtils;
import org.apache.commons.lang.StringUtils;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
@@ -41,6 +50,7 @@
import org.apache.hadoop.hive.ql.exec.Utilities;
import org.apache.hadoop.hive.ql.exec.mr.ExecMapper;
import org.apache.hadoop.hive.ql.exec.mr.ExecReducer;
+import org.apache.hadoop.hive.ql.exec.tez.tools.TezMergedLogicalInput;
import org.apache.hadoop.hive.ql.io.BucketizedHiveInputFormat;
import org.apache.hadoop.hive.ql.io.HiveInputFormat;
import org.apache.hadoop.hive.ql.io.HiveKey;
@@ -64,18 +74,26 @@
import org.apache.hadoop.yarn.api.records.LocalResource;
import org.apache.hadoop.yarn.api.records.LocalResourceType;
import org.apache.hadoop.yarn.api.records.LocalResourceVisibility;
+import org.apache.hadoop.yarn.api.records.Resource;
import org.apache.hadoop.yarn.api.records.URL;
import org.apache.hadoop.yarn.util.ConverterUtils;
import org.apache.hadoop.yarn.util.Records;
+import org.apache.tez.dag.api.DAG;
import org.apache.tez.dag.api.Edge;
import org.apache.tez.dag.api.EdgeProperty;
import org.apache.tez.dag.api.EdgeProperty.DataMovementType;
import org.apache.tez.dag.api.EdgeProperty.DataSourceType;
import org.apache.tez.dag.api.EdgeProperty.SchedulingType;
+import org.apache.tez.dag.api.GroupInputEdge;
import org.apache.tez.dag.api.InputDescriptor;
import org.apache.tez.dag.api.OutputDescriptor;
import org.apache.tez.dag.api.ProcessorDescriptor;
import org.apache.tez.dag.api.Vertex;
+import org.apache.tez.dag.api.VertexLocationHint;
+import org.apache.tez.dag.api.TezException;
+import org.apache.tez.client.PreWarmContext;
+import org.apache.tez.client.TezSessionConfiguration;
+import org.apache.tez.dag.api.VertexGroup;
import org.apache.tez.mapreduce.common.MRInputAMSplitGenerator;
import org.apache.tez.mapreduce.hadoop.InputSplitInfo;
import org.apache.tez.mapreduce.hadoop.MRHelpers;
@@ -84,6 +102,8 @@
import org.apache.tez.mapreduce.input.MRInputLegacy;
import org.apache.tez.mapreduce.output.MROutput;
import org.apache.tez.mapreduce.partition.MRPartitioner;
+import org.apache.tez.runtime.library.input.ConcatenatedMergedKeyValueInput;
+import org.apache.tez.runtime.library.input.ConcatenatedMergedKeyValuesInput;
import org.apache.tez.runtime.library.input.ShuffledMergedInputLegacy;
import org.apache.tez.runtime.library.input.ShuffledUnorderedKVInput;
import org.apache.tez.runtime.library.output.OnFileSortedOutput;
@@ -96,9 +116,36 @@
*/
public class DagUtils {
+ private static final Log LOG = LogFactory.getLog(DagUtils.class.getName());
private static final String TEZ_DIR = "_tez_scratch_dir";
private static DagUtils instance;
+ private void addCredentials(MapWork mapWork, DAG dag) {
+ Set paths = mapWork.getPathToAliases().keySet();
+ if (paths != null && !paths.isEmpty()) {
+ Iterator pathIterator = Iterators.transform(paths.iterator(), new Function() {
+ @Override
+ public URI apply(String input) {
+ return new Path(input).toUri();
+ }
+ });
+
+ Set uris = new HashSet();
+ Iterators.addAll(uris, pathIterator);
+
+ if (LOG.isDebugEnabled()) {
+ for (URI uri: uris) {
+ LOG.debug("Marking URI as needing credentials: "+uri);
+ }
+ }
+ dag.addURIsForCredentials(uris);
+ }
+ }
+
+ private void addCredentials(ReduceWork reduceWork, DAG dag) {
+ // nothing at the moment
+ }
+
/*
* Creates the configuration object necessary to run a specific vertex from
* map work. This includes input formats, input processor, etc.
@@ -132,7 +179,7 @@ private JobConf initializeVertexConf(JobConf baseConf, MapWork mapWork) {
Utilities.setInputAttributes(conf, mapWork);
- String inpFormat = HiveConf.getVar(conf, HiveConf.ConfVars.HIVEINPUTFORMAT);
+ String inpFormat = HiveConf.getVar(conf, HiveConf.ConfVars.HIVETEZINPUTFORMAT);
if ((inpFormat == null) || (!StringUtils.isNotBlank(inpFormat))) {
inpFormat = ShimLoader.getHadoopShims().getInputFormatClassName();
}
@@ -148,9 +195,56 @@ private JobConf initializeVertexConf(JobConf baseConf, MapWork mapWork) {
}
/**
+ * Given a Vertex group and a vertex createEdge will create an
+ * Edge between them.
+ *
+ * @param group The parent VertexGroup
+ * @param wConf The job conf of the child vertex
+ * @param w The child vertex
+ * @param edgeType the type of connection between the two
+ * endpoints.
+ */
+ public GroupInputEdge createEdge(VertexGroup group, JobConf wConf,
+ Vertex w, EdgeType edgeType)
+ throws IOException {
+
+ Class mergeInputClass;
+
+ LOG.info("Creating Edge between " + group.getGroupName() + " and " + w.getVertexName());
+ w.getProcessorDescriptor().setUserPayload(MRHelpers.createUserPayloadFromConf(wConf));
+
+ switch (edgeType) {
+ case BROADCAST_EDGE:
+ mergeInputClass = ConcatenatedMergedKeyValueInput.class;
+ break;
+
+ case SIMPLE_EDGE:
+ default:
+ mergeInputClass = TezMergedLogicalInput.class;
+ break;
+ }
+
+ return new GroupInputEdge(group, w, createEdgeProperty(edgeType),
+ new InputDescriptor(mergeInputClass.getName()));
+ }
+
+ /**
+ * Given two vertices a, b update their configurations to be used in an Edge a-b
+ */
+ public void updateConfigurationForEdge(JobConf vConf, Vertex v, JobConf wConf, Vertex w)
+ throws IOException {
+
+ // Tez needs to setup output subsequent input pairs correctly
+ MultiStageMRConfToTezTranslator.translateVertexConfToTez(wConf, vConf);
+
+ // update payloads (configuration for the vertices might have changed)
+ v.getProcessorDescriptor().setUserPayload(MRHelpers.createUserPayloadFromConf(vConf));
+ w.getProcessorDescriptor().setUserPayload(MRHelpers.createUserPayloadFromConf(wConf));
+ }
+
+ /**
* Given two vertices and their respective configuration objects createEdge
- * will create an Edge object that connects the two. Currently the edge will
- * always be a stable bi-partite edge.
+ * will create an Edge object that connects the two.
*
* @param vConf JobConf of the first vertex
* @param v The first vertex (source)
@@ -162,13 +256,15 @@ public Edge createEdge(JobConf vConf, Vertex v, JobConf wConf, Vertex w,
EdgeType edgeType)
throws IOException {
- // Tez needs to setup output subsequent input pairs correctly
- MultiStageMRConfToTezTranslator.translateVertexConfToTez(wConf, vConf);
+ updateConfigurationForEdge(vConf, v, wConf, w);
- // update payloads (configuration for the vertices might have changed)
- v.getProcessorDescriptor().setUserPayload(MRHelpers.createUserPayloadFromConf(vConf));
- w.getProcessorDescriptor().setUserPayload(MRHelpers.createUserPayloadFromConf(wConf));
+ return new Edge(v, w, createEdgeProperty(edgeType));
+ }
+ /*
+ * Helper function to create an edge property from an edge type.
+ */
+ private EdgeProperty createEdgeProperty(EdgeType edgeType) {
DataMovementType dataMovementType;
Class logicalInputClass;
Class logicalOutputClass;
@@ -194,10 +290,40 @@ public Edge createEdge(JobConf vConf, Vertex v, JobConf wConf, Vertex w,
SchedulingType.SEQUENTIAL,
new OutputDescriptor(logicalOutputClass.getName()),
new InputDescriptor(logicalInputClass.getName()));
- return new Edge(v, w, edgeProperty);
+
+ return edgeProperty;
}
/*
+ * Helper to determine the size of the container requested
+ * from yarn. Falls back to Map-reduce's map size if tez
+ * container size isn't set.
+ */
+ private Resource getContainerResource(Configuration conf) {
+ Resource containerResource;
+ int memory = HiveConf.getIntVar(conf, HiveConf.ConfVars.HIVETEZCONTAINERSIZE) > 0 ?
+ HiveConf.getIntVar(conf, HiveConf.ConfVars.HIVETEZCONTAINERSIZE) :
+ conf.getInt(MRJobConfig.MAP_MEMORY_MB, MRJobConfig.DEFAULT_MAP_MEMORY_MB);
+ int cpus = conf.getInt(MRJobConfig.MAP_CPU_VCORES,
+ MRJobConfig.DEFAULT_MAP_CPU_VCORES);
+ return Resource.newInstance(memory, cpus);
+ }
+
+ /*
+ * Helper to determine what java options to use for the containers
+ * Falls back to Map-reduces map java opts if no tez specific options
+ * are set
+ */
+ private String getContainerJavaOpts(Configuration conf) {
+ String javaOpts = HiveConf.getVar(conf, HiveConf.ConfVars.HIVETEZJAVAOPTS);
+ if (javaOpts != null && !javaOpts.isEmpty()) {
+ return javaOpts;
+ }
+ return MRHelpers.getMapJavaOpts(conf);
+ }
+
+
+ /*
* Helper function to create Vertex from MapWork.
*/
private Vertex createVertex(JobConf conf, MapWork mapWork,
@@ -248,12 +374,11 @@ private Vertex createVertex(JobConf conf, MapWork mapWork,
byte[] serializedConf = MRHelpers.createUserPayloadFromConf(conf);
map = new Vertex(mapWork.getName(),
new ProcessorDescriptor(MapTezProcessor.class.getName()).
- setUserPayload(serializedConf), numTasks,
- MRHelpers.getMapResource(conf));
+ setUserPayload(serializedConf), numTasks, getContainerResource(conf));
Map environment = new HashMap();
MRHelpers.updateEnvironmentForMRTasks(conf, environment, true);
map.setTaskEnvironment(environment);
- map.setJavaOpts(MRHelpers.getMapJavaOpts(conf));
+ map.setJavaOpts(getContainerJavaOpts(conf));
assert mapWork.getAliasToWork().keySet().size() == 1;
@@ -262,7 +387,7 @@ private Vertex createVertex(JobConf conf, MapWork mapWork,
byte[] mrInput = null;
if (useTezGroupedSplits) {
mrInput = MRHelpers.createMRInputPayloadWithGrouping(serializedConf,
- null, HiveInputFormat.class.getName());
+ HiveInputFormat.class.getName());
} else {
mrInput = MRHelpers.createMRInputPayload(serializedConf, null);
}
@@ -323,14 +448,14 @@ private Vertex createVertex(JobConf conf, ReduceWork reduceWork,
Vertex reducer = new Vertex(reduceWork.getName(),
new ProcessorDescriptor(ReduceTezProcessor.class.getName()).
setUserPayload(MRHelpers.createUserPayloadFromConf(conf)),
- reduceWork.getNumReduceTasks(), MRHelpers.getReduceResource(conf));
+ reduceWork.getNumReduceTasks(), getContainerResource(conf));
Map environment = new HashMap();
MRHelpers.updateEnvironmentForMRTasks(conf, environment, false);
reducer.setTaskEnvironment(environment);
- reducer.setJavaOpts(MRHelpers.getReduceJavaOpts(conf));
+ reducer.setJavaOpts(getContainerJavaOpts(conf));
Map localResources = new HashMap();
localResources.put(getBaseName(appJarLr), appJarLr);
@@ -370,6 +495,49 @@ private LocalResource createLocalResource(FileSystem remoteFs, Path file,
}
/**
+ * @param sessionConfig session configuration
+ * @param numContainers number of containers to pre-warm
+ * @param localResources additional resources to pre-warm with
+ * @return prewarm context object
+ */
+ public PreWarmContext createPreWarmContext(TezSessionConfiguration sessionConfig, int numContainers,
+ Map localResources) throws IOException, TezException {
+
+ Configuration conf = sessionConfig.getTezConfiguration();
+
+ ProcessorDescriptor prewarmProcDescriptor = new ProcessorDescriptor(HivePreWarmProcessor.class.getName());
+ prewarmProcDescriptor.setUserPayload(MRHelpers.createUserPayloadFromConf(conf));
+
+ PreWarmContext context = new PreWarmContext(prewarmProcDescriptor, getContainerResource(conf),
+ numContainers, new VertexLocationHint(null));
+
+ Map combinedResources = new HashMap();
+
+ combinedResources.putAll(sessionConfig.getSessionResources());
+
+ try {
+ for(LocalResource lr : localizeTempFiles(conf)) {
+ combinedResources.put(getBaseName(lr), lr);
+ }
+ } catch(LoginException le) {
+ throw new IOException(le);
+ }
+
+ if(localResources != null) {
+ combinedResources.putAll(localResources);
+ }
+
+ context.setLocalResources(combinedResources);
+
+ /* boiler plate task env */
+ Map environment = new HashMap();
+ MRHelpers.updateEnvironmentForMRTasks(conf, environment, true);
+ context.setEnvironment(environment);
+ context.setJavaOpts(getContainerJavaOpts(conf));
+ return context;
+ }
+
+ /**
* @param conf
* @return path to destination directory on hdfs
* @throws LoginException if we are unable to figure user information
@@ -651,6 +819,17 @@ public Vertex createVertex(JobConf conf, BaseWork work,
}
/**
+ * Set up credentials for the base work on secure clusters
+ */
+ public void addCredentials(BaseWork work, DAG dag) {
+ if (work instanceof MapWork) {
+ addCredentials((MapWork) work, dag);
+ } else if (work instanceof ReduceWork) {
+ addCredentials((ReduceWork) work, dag);
+ }
+ }
+
+ /**
* createTezDir creates a temporary directory in the scratchDir folder to
* be used with Tez. Assumes scratchDir exists.
*/
diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/tez/HivePreWarmProcessor.java ql/src/java/org/apache/hadoop/hive/ql/exec/tez/HivePreWarmProcessor.java
new file mode 100644
index 0000000..c756e72
--- /dev/null
+++ ql/src/java/org/apache/hadoop/hive/ql/exec/tez/HivePreWarmProcessor.java
@@ -0,0 +1,119 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.tez;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.io.ReadaheadPool;
+import org.apache.hadoop.hive.shims.ShimLoader;
+import org.apache.tez.common.TezUtils;
+import org.apache.tez.runtime.api.Event;
+import org.apache.tez.runtime.api.LogicalIOProcessor;
+import org.apache.tez.runtime.api.LogicalInput;
+import org.apache.tez.runtime.api.LogicalOutput;
+import org.apache.tez.runtime.api.TezProcessorContext;
+
+import java.net.URL;
+import java.net.JarURLConnection;
+import java.util.ArrayList;
+import java.util.Enumeration;
+import java.util.List;
+import java.util.Map;
+import java.util.jar.JarFile;
+import java.util.jar.JarEntry;
+
+import javax.crypto.Mac;
+
+/**
+ * A simple sleep processor implementation that sleeps for the configured
+ * time in milliseconds.
+ *
+ * @see Config for configuring the HivePreWarmProcessor
+ */
+public class HivePreWarmProcessor implements LogicalIOProcessor {
+
+ private static boolean prewarmed = false;
+
+ private static final Log LOG = LogFactory.getLog(HivePreWarmProcessor.class);
+
+ private Configuration conf;
+
+ @Override
+ public void initialize(TezProcessorContext processorContext)
+ throws Exception {
+ byte[] userPayload = processorContext.getUserPayload();
+ this.conf = TezUtils.createConfFromUserPayload(userPayload);
+ }
+
+ @Override
+ public void run(Map inputs,
+ Map outputs) throws Exception {
+ if(prewarmed) {
+ /* container reuse */
+ return;
+ }
+ for (LogicalInput input : inputs.values()) {
+ input.start();
+ }
+ for (LogicalOutput output : outputs.values()) {
+ output.start();
+ }
+ /* these are things that goes through singleton initialization on most queries */
+ FileSystem fs = FileSystem.get(conf);
+ Mac mac = Mac.getInstance("HmacSHA1");
+ ReadaheadPool rpool = ReadaheadPool.getInstance();
+ ShimLoader.getHadoopShims();
+
+ URL hiveurl = new URL("jar:"+DagUtils.getInstance().getExecJarPathLocal()+"!/");
+ JarURLConnection hiveconn = (JarURLConnection)hiveurl.openConnection();
+ JarFile hivejar = hiveconn.getJarFile();
+ try {
+ Enumeration classes = hivejar.entries();
+ while(classes.hasMoreElements()) {
+ JarEntry je = classes.nextElement();
+ if (je.getName().endsWith(".class")) {
+ String klass = je.getName().replace(".class","").replaceAll("/","\\.");
+ if(klass.indexOf("ql.exec") != -1 || klass.indexOf("ql.io") != -1) {
+ /* several hive classes depend on the metastore APIs, which is not included
+ * in hive-exec.jar. These are the relatively safe ones - operators & io classes.
+ */
+ if(klass.indexOf("vector") != -1 || klass.indexOf("Operator") != -1) {
+ Class.forName(klass);
+ }
+ }
+ }
+ }
+ } finally {
+ hivejar.close();
+ }
+ prewarmed = true;
+ }
+
+ @Override
+ public void handleEvents(List processorEvents) {
+ // Nothing to do
+ }
+
+ @Override
+ public void close() throws Exception {
+ // Nothing to cleanup
+ }
+}
diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordProcessor.java ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordProcessor.java
index 7c2c2a6..d89f2c7 100644
--- ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordProcessor.java
+++ ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordProcessor.java
@@ -55,7 +55,6 @@
import org.apache.tez.mapreduce.processor.MRTaskReporter;
import org.apache.tez.runtime.api.LogicalInput;
import org.apache.tez.runtime.library.api.KeyValuesReader;
-import org.apache.tez.runtime.library.input.ShuffledMergedInput;
/**
* Process input from tez LogicalInput and write output - for a map plan
@@ -184,15 +183,19 @@ void init(JobConf jconf, MRTaskReporter mrReporter, Map in
@Override
void run() throws IOException{
- List shuffleInputs = getShuffleInputs(inputs);
+ List shuffleInputs = getShuffleInputs(inputs);
KeyValuesReader kvsReader;
- if(shuffleInputs.size() == 1){
- //no merging of inputs required
- kvsReader = shuffleInputs.get(0).getReader();
- }else {
- //get a sort merged input
- kvsReader = new InputMerger(shuffleInputs);
+ try {
+ if(shuffleInputs.size() == 1){
+ //no merging of inputs required
+ kvsReader = (KeyValuesReader) shuffleInputs.get(0).getReader();
+ }else {
+ //get a sort merged input
+ kvsReader = new InputMerger(shuffleInputs);
+ }
+ } catch (Exception e) {
+ throw new IOException(e);
}
while(kvsReader.next()){
@@ -211,12 +214,12 @@ void run() throws IOException{
* @param inputs
* @return
*/
- private List getShuffleInputs(Map inputs) {
+ private List getShuffleInputs(Map inputs) {
//the reduce plan inputs have tags, add all inputs that have tags
Map tag2input = redWork.getTagToInput();
- ArrayList shuffleInputs = new ArrayList();
+ ArrayList shuffleInputs = new ArrayList();
for(String inpStr : tag2input.values()){
- shuffleInputs.add((ShuffledMergedInput)inputs.get(inpStr));
+ shuffleInputs.add((LogicalInput)inputs.get(inpStr));
}
return shuffleInputs;
}
diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezProcessor.java ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezProcessor.java
index 9c3284b..9be2aa2 100644
--- ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezProcessor.java
+++ ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezProcessor.java
@@ -124,6 +124,12 @@ public void run(Map inputs, Map out
// (possibly asynchronously)
LOG.info("Running map: " + processorContext.getUniqueIdentifier());
+ for (LogicalInput input : inputs.values()) {
+ input.start();
+ }
+ for (LogicalOutput output : outputs.values()) {
+ output.start();
+ }
Map outMap = new HashMap();
diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezSessionState.java ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezSessionState.java
index b8552a3..aef6e68 100644
--- ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezSessionState.java
+++ ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezSessionState.java
@@ -35,6 +35,7 @@
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.DistributedFileSystem;
import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
import org.apache.hadoop.hive.ql.ErrorMsg;
import org.apache.hadoop.yarn.api.records.LocalResource;
import org.apache.tez.client.AMConfiguration;
@@ -43,7 +44,7 @@
import org.apache.tez.dag.api.SessionNotRunning;
import org.apache.tez.dag.api.TezConfiguration;
import org.apache.tez.dag.api.TezException;
-import org.apache.tez.mapreduce.hadoop.MRHelpers;
+import org.apache.tez.client.PreWarmContext;
/**
* Holds session state related to Tez
@@ -134,8 +135,24 @@ public void open(String sessionId, HiveConf conf)
session = new TezSession("HIVE-"+sessionId, sessionConfig);
LOG.info("Opening new Tez Session (id: "+sessionId+", scratch dir: "+tezScratchDir+")");
+
session.start();
+ if (HiveConf.getBoolVar(conf, ConfVars.HIVE_PREWARM_ENABLED)) {
+ int n = HiveConf.getIntVar(conf, ConfVars.HIVE_PREWARM_NUM_CONTAINERS);
+ LOG.info("Prewarming " + n + " containers (id: " + sessionId
+ + ", scratch dir: " + tezScratchDir + ")");
+ PreWarmContext context = utils.createPreWarmContext(sessionConfig, n,
+ commonLocalResources);
+ try {
+ session.preWarm(context);
+ } catch (InterruptedException ie) {
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Hive Prewarm threw an exception ", ie);
+ }
+ }
+ }
+
// In case we need to run some MR jobs, we'll run them under tez MR emulation. The session
// id is used for tez to reuse the current session rather than start a new one.
conf.set("mapreduce.framework.name", "yarn-tez");
diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezTask.java ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezTask.java
index bef5ba3..d30ec8e 100644
--- ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezTask.java
+++ ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezTask.java
@@ -23,6 +23,7 @@
import java.util.Collections;
import java.util.EnumSet;
import java.util.HashMap;
+import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Set;
@@ -42,6 +43,7 @@
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.plan.BaseWork;
import org.apache.hadoop.hive.ql.plan.TezWork;
+import org.apache.hadoop.hive.ql.plan.UnionWork;
import org.apache.hadoop.hive.ql.plan.TezWork.EdgeType;
import org.apache.hadoop.hive.ql.plan.api.StageType;
import org.apache.hadoop.hive.ql.session.SessionState;
@@ -54,9 +56,11 @@
import org.apache.tez.common.counters.TezCounters;
import org.apache.tez.dag.api.DAG;
import org.apache.tez.dag.api.Edge;
+import org.apache.tez.dag.api.GroupInputEdge;
import org.apache.tez.dag.api.SessionNotRunning;
import org.apache.tez.dag.api.TezException;
import org.apache.tez.dag.api.Vertex;
+import org.apache.tez.dag.api.VertexGroup;
import org.apache.tez.dag.api.client.DAGClient;
import org.apache.tez.dag.api.client.StatusGetOpts;
@@ -97,9 +101,6 @@ public int execute(DriverContext driverContext) {
DAGClient client = null;
TezSessionState session = null;
- // Tez requires us to use RPC for the query plan
- HiveConf.setBoolVar(conf, ConfVars.HIVE_RPC_QUERY_PLAN, true);
-
try {
// Get or create Context object. If we create it we have to clean
// it later as well.
@@ -206,9 +207,7 @@ DAG build(JobConf conf, TezWork work, Path scratchDir,
FileSystem fs = tezDir.getFileSystem(conf);
// the name of the dag is what is displayed in the AM/Job UI
- DAG dag = new DAG(
- Utilities.abbreviate(HiveConf.getVar(conf, HiveConf.ConfVars.HIVEQUERYSTRING),
- HiveConf.getIntVar(conf, HiveConf.ConfVars.HIVEJOBNAMELENGTH)));
+ DAG dag = new DAG(work.getName());
for (BaseWork w: ws) {
@@ -216,23 +215,68 @@ DAG build(JobConf conf, TezWork work, Path scratchDir,
// translate work to vertex
perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.TEZ_CREATE_VERTEX + w.getName());
- JobConf wxConf = utils.initializeVertexConf(conf, w);
- Vertex wx = utils.createVertex(wxConf, w, tezDir,
- appJarLr, additionalLr, fs, ctx, !isFinal);
- dag.addVertex(wx);
- perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.TEZ_CREATE_VERTEX + w.getName());
- workToVertex.put(w, wx);
- workToConf.put(w, wxConf);
-
- // add all dependencies (i.e.: edges) to the graph
- for (BaseWork v: work.getChildren(w)) {
- assert workToVertex.containsKey(v);
- Edge e = null;
-
- EdgeType edgeType = work.getEdgeProperty(w, v);
-
- e = utils.createEdge(wxConf, wx, workToConf.get(v), workToVertex.get(v), edgeType);
- dag.addEdge(e);
+
+ if (w instanceof UnionWork) {
+ // Special case for unions. These items translate to VertexGroups
+
+ List unionWorkItems = new LinkedList();
+ List children = new LinkedList();
+
+ // split the children into vertices that make up the union and vertices that are
+ // proper children of the union
+ for (BaseWork v: work.getChildren(w)) {
+ EdgeType type = work.getEdgeProperty(w, v);
+ if (type == EdgeType.CONTAINS) {
+ unionWorkItems.add(v);
+ } else {
+ children.add(v);
+ }
+ }
+
+ // create VertexGroup
+ Vertex[] vertexArray = new Vertex[unionWorkItems.size()];
+
+ int i = 0;
+ for (BaseWork v: unionWorkItems) {
+ vertexArray[i++] = workToVertex.get(v);
+ }
+ VertexGroup group = dag.createVertexGroup(w.getName(), vertexArray);
+
+ // now hook up the children
+ for (BaseWork v: children) {
+ // need to pairwise patch up the configuration of the vertices
+ for (BaseWork part: unionWorkItems) {
+ utils.updateConfigurationForEdge(workToConf.get(part), workToVertex.get(part),
+ workToConf.get(v), workToVertex.get(v));
+ }
+
+ // finally we can create the grouped edge
+ GroupInputEdge e = utils.createEdge(group, workToConf.get(v),
+ workToVertex.get(v), work.getEdgeProperty(w, v));
+
+ dag.addEdge(e);
+ }
+ } else {
+ // Regular vertices
+ JobConf wxConf = utils.initializeVertexConf(conf, w);
+ Vertex wx = utils.createVertex(wxConf, w, tezDir, appJarLr,
+ additionalLr, fs, ctx, !isFinal);
+ dag.addVertex(wx);
+ utils.addCredentials(w, dag);
+ perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.TEZ_CREATE_VERTEX + w.getName());
+ workToVertex.put(w, wx);
+ workToConf.put(w, wxConf);
+
+ // add all dependencies (i.e.: edges) to the graph
+ for (BaseWork v: work.getChildren(w)) {
+ assert workToVertex.containsKey(v);
+ Edge e = null;
+
+ EdgeType edgeType = work.getEdgeProperty(w, v);
+
+ e = utils.createEdge(wxConf, wx, workToConf.get(v), workToVertex.get(v), edgeType);
+ dag.addEdge(e);
+ }
}
}
perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.TEZ_BUILD_DAG);
diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/tez/tools/InputMerger.java ql/src/java/org/apache/hadoop/hive/ql/exec/tez/tools/InputMerger.java
index e5746c4..726e122 100644
--- ql/src/java/org/apache/hadoop/hive/ql/exec/tez/tools/InputMerger.java
+++ ql/src/java/org/apache/hadoop/hive/ql/exec/tez/tools/InputMerger.java
@@ -26,12 +26,13 @@
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hive.ql.exec.tez.ReduceRecordProcessor;
import org.apache.hadoop.io.BinaryComparable;
+import org.apache.tez.runtime.api.Input;
+import org.apache.tez.runtime.api.LogicalInput;
import org.apache.tez.runtime.library.api.KeyValuesReader;
-import org.apache.tez.runtime.library.input.ShuffledMergedInput;
/**
* A KeyValuesReader implementation that returns a sorted stream of key-values
- * by doing a sorted merge of the key-value in ShuffledMergedInputs.
+ * by doing a sorted merge of the key-value in LogicalInputs.
* Tags are in the last byte of the key, so no special handling for tags is required.
* Uses a priority queue to pick the KeyValuesReader of the input that is next in
* sort order.
@@ -42,12 +43,12 @@
private PriorityQueue pQueue = null;
private KeyValuesReader nextKVReader = null;
- public InputMerger(List shuffleInputs) throws IOException {
- //get KeyValuesReaders from the ShuffledMergedInput and add them to priority queue
+ public InputMerger(List extends Input> shuffleInputs) throws Exception {
+ //get KeyValuesReaders from the LogicalInput and add them to priority queue
int initialCapacity = shuffleInputs.size();
pQueue = new PriorityQueue(initialCapacity, new KVReaderComparator());
- for(ShuffledMergedInput input : shuffleInputs){
- addToQueue(input.getReader());
+ for(Input input : shuffleInputs){
+ addToQueue((KeyValuesReader)input.getReader());
}
}
diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/tez/tools/TezMergedLogicalInput.java ql/src/java/org/apache/hadoop/hive/ql/exec/tez/tools/TezMergedLogicalInput.java
new file mode 100644
index 0000000..c4b99e5
--- /dev/null
+++ ql/src/java/org/apache/hadoop/hive/ql/exec/tez/tools/TezMergedLogicalInput.java
@@ -0,0 +1,44 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.exec.tez.tools;
+
+import java.util.List;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.tez.runtime.api.Input;
+import org.apache.tez.runtime.api.LogicalInput;
+import org.apache.tez.runtime.api.MergedLogicalInput;
+import org.apache.tez.runtime.api.Reader;
+
+/**
+ * TezMergedLogicalInput is an adapter to make union input look like
+ * a single input in tez.
+ */
+public class TezMergedLogicalInput extends MergedLogicalInput {
+
+ @Override
+ public Reader getReader() throws Exception {
+ return new InputMerger(getInputs());
+ }
+
+ @Override
+ public void setConstituentInputIsReady(Input input) {
+ // ignore notification
+ }
+}
diff --git ql/src/java/org/apache/hadoop/hive/ql/index/AggregateIndexHandler.java ql/src/java/org/apache/hadoop/hive/ql/index/AggregateIndexHandler.java
index 427ea12..c39e823 100644
--- ql/src/java/org/apache/hadoop/hive/ql/index/AggregateIndexHandler.java
+++ ql/src/java/org/apache/hadoop/hive/ql/index/AggregateIndexHandler.java
@@ -152,6 +152,7 @@ private void createAggregationFunction(List indexTblCols, String pr
HiveConf builderConf = new HiveConf(getConf(), AggregateIndexHandler.class);
builderConf.setBoolVar(HiveConf.ConfVars.HIVEMERGEMAPFILES, false);
builderConf.setBoolVar(HiveConf.ConfVars.HIVEMERGEMAPREDFILES, false);
+ builderConf.setBoolVar(HiveConf.ConfVars.HIVEMERGETEZFILES, false);
Task> rootTask = IndexUtils.createRootTask(builderConf, inputs, outputs,
command, (LinkedHashMap) partSpec, indexTableName, dbName);
diff --git ql/src/java/org/apache/hadoop/hive/ql/index/compact/CompactIndexHandler.java ql/src/java/org/apache/hadoop/hive/ql/index/compact/CompactIndexHandler.java
index 11ddcae..0135a71 100644
--- ql/src/java/org/apache/hadoop/hive/ql/index/compact/CompactIndexHandler.java
+++ ql/src/java/org/apache/hadoop/hive/ql/index/compact/CompactIndexHandler.java
@@ -144,6 +144,7 @@ public void analyzeIndexDefinition(Table baseTable, Index index,
HiveConf builderConf = new HiveConf(getConf(), CompactIndexHandler.class);
builderConf.setBoolVar(HiveConf.ConfVars.HIVEMERGEMAPFILES, false);
builderConf.setBoolVar(HiveConf.ConfVars.HIVEMERGEMAPREDFILES, false);
+ builderConf.setBoolVar(HiveConf.ConfVars.HIVEMERGETEZFILES, false);
Task> rootTask = IndexUtils.createRootTask(builderConf, inputs, outputs,
command, partSpec, indexTableName, dbName);
return rootTask;
diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/DirectDecompressionCodec.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/DirectDecompressionCodec.java
new file mode 100644
index 0000000..41a77b0
--- /dev/null
+++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/DirectDecompressionCodec.java
@@ -0,0 +1,26 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.io.orc;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+
+public interface DirectDecompressionCodec extends CompressionCodec {
+ public boolean isAvailable();
+ public void directDecompress(ByteBuffer in, ByteBuffer out) throws IOException;
+}
diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/InStream.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/InStream.java
index 6da3d03..74ba971 100644
--- ql/src/java/org/apache/hadoop/hive/ql/io/orc/InStream.java
+++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/InStream.java
@@ -21,8 +21,13 @@
import java.io.InputStream;
import java.nio.ByteBuffer;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+
abstract class InStream extends InputStream {
+ private static final Log LOG = LogFactory.getLog(InStream.class);
+
private static class UncompressedStream extends InStream {
private final String name;
private final ByteBuffer[] bytes;
@@ -172,7 +177,7 @@ private void readHeader() throws IOException {
bufferSize + " needed = " + chunkLength);
}
// read 3 bytes, which should be equal to OutStream.HEADER_SIZE always
- assert OutStream.HEADER_SIZE == 3 : "The Orc HEADER_SIZE must be the same in OutStream and InStream";
+ assert OutStream.HEADER_SIZE == 3 : "The Orc HEADER_SIZE must be the same in OutStream and InStream";
currentOffset += OutStream.HEADER_SIZE;
ByteBuffer slice = this.slice(chunkLength);
@@ -274,14 +279,23 @@ private ByteBuffer slice(int chunkLength) throws IOException {
chunkLength + " bytes");
}
+ if (LOG.isDebugEnabled()) {
+ LOG.debug(String.format(
+ "Crossing into next BufferChunk because compressed only has %d bytes (needs %d)",
+ compressed.remaining(), len));
+ }
+
// we need to consolidate 2 or more buffers into 1
- // first clear out compressed buffers
+ // first copy out compressed buffers
ByteBuffer copy = allocateBuffer(chunkLength);
currentOffset += compressed.remaining();
len -= compressed.remaining();
copy.put(compressed);
while (len > 0 && (++currentRange) < bytes.length) {
+ if (LOG.isDebugEnabled()) {
+ LOG.debug(String.format("Read slow-path, >1 cross block reads with %s", this.toString()));
+ }
compressed = bytes[currentRange].duplicate();
if (compressed.remaining() >= len) {
slice = compressed.slice();
diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/MemoryManager.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/MemoryManager.java
index 9af12de..ac56702 100644
--- ql/src/java/org/apache/hadoop/hive/ql/io/orc/MemoryManager.java
+++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/MemoryManager.java
@@ -122,6 +122,9 @@ synchronized void removeWriter(Path path) throws IOException {
totalAllocation -= val.allocation;
updateScale(false);
}
+ if(writerList.isEmpty()) {
+ rowsAddedSinceCheck = 0;
+ }
}
/**
diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
index 7798a7c..ef68d1a 100644
--- ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
+++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
@@ -28,7 +28,9 @@
import java.util.HashMap;
import java.util.List;
import java.util.Map;
+import java.util.TreeMap;
+import org.apache.commons.lang.builder.HashCodeBuilder;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
@@ -36,6 +38,8 @@
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.common.type.HiveDecimal;
+import org.apache.hadoop.hive.conf.HiveConf;
+import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVE_ORC_ZEROCOPY;
import org.apache.hadoop.hive.ql.exec.vector.*;
import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf;
import org.apache.hadoop.hive.ql.io.sarg.SearchArgument;
@@ -53,6 +57,10 @@
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
+import org.apache.hadoop.hive.shims.ShimLoader;
+import org.apache.hadoop.hive.shims.HadoopShims.*;
+
+import com.google.common.collect.ComparisonChain;
class RecordReaderImpl implements RecordReader {
@@ -87,6 +95,89 @@
private boolean[] includedRowGroups = null;
private final Configuration conf;
+ private final ByteBufferAllocatorPool pool = new ByteBufferAllocatorPool();
+ private final ZeroCopyReaderShim zcr;
+
+ // this is an implementation copied from ElasticByteBufferPool in hadoop-2,
+ // which lacks a clear()/clean() operation
+ public final static class ByteBufferAllocatorPool implements ByteBufferPoolShim {
+ private static final class Key implements Comparable {
+ private final int capacity;
+ private final long insertionGeneration;
+
+ Key(int capacity, long insertionGeneration) {
+ this.capacity = capacity;
+ this.insertionGeneration = insertionGeneration;
+ }
+
+ @Override
+ public int compareTo(Key other) {
+ return ComparisonChain.start().compare(capacity, other.capacity)
+ .compare(insertionGeneration, other.insertionGeneration).result();
+ }
+
+ @Override
+ public boolean equals(Object rhs) {
+ if (rhs == null) {
+ return false;
+ }
+ try {
+ Key o = (Key) rhs;
+ return (compareTo(o) == 0);
+ } catch (ClassCastException e) {
+ return false;
+ }
+ }
+
+ @Override
+ public int hashCode() {
+ return new HashCodeBuilder().append(capacity).append(insertionGeneration)
+ .toHashCode();
+ }
+ }
+
+ private final TreeMap buffers = new TreeMap();
+
+ private final TreeMap directBuffers = new TreeMap();
+
+ private long currentGeneration = 0;
+
+ private final TreeMap getBufferTree(boolean direct) {
+ return direct ? directBuffers : buffers;
+ }
+
+ public void clear() {
+ buffers.clear();
+ directBuffers.clear();
+ }
+
+ @Override
+ public ByteBuffer getBuffer(boolean direct, int length) {
+ TreeMap tree = getBufferTree(direct);
+ Map.Entry entry = tree.ceilingEntry(new Key(length, 0));
+ if (entry == null) {
+ return direct ? ByteBuffer.allocateDirect(length) : ByteBuffer
+ .allocate(length);
+ }
+ tree.remove(entry.getKey());
+ return entry.getValue();
+ }
+
+ @Override
+ public void putBuffer(ByteBuffer buffer) {
+ TreeMap tree = getBufferTree(buffer.isDirect());
+ while (true) {
+ Key key = new Key(buffer.capacity(), currentGeneration++);
+ if (!tree.containsKey(key)) {
+ tree.put(key, buffer);
+ return;
+ }
+ // Buffers are indexed by (capacity, generation).
+ // If our key is not unique on the first try, we try again
+ }
+ }
+ }
+
RecordReaderImpl(Iterable stripes,
FileSystem fileSystem,
Path path,
@@ -130,6 +221,18 @@
}
}
+ final boolean zeroCopy = (conf != null)
+ && (HiveConf.getBoolVar(conf, HIVE_ORC_ZEROCOPY));
+
+ if (zeroCopy
+ && (codec == null || ((codec instanceof DirectDecompressionCodec)
+ && ((DirectDecompressionCodec) codec).isAvailable()))) {
+ /* codec is null or is available */
+ this.zcr = ShimLoader.getHadoopShims().getZeroCopyReader(file, pool);
+ } else {
+ this.zcr = null;
+ }
+
firstRow = skippedRows;
totalRowCount = rows;
reader = createTreeReader(path, 0, types, included, conf);
@@ -2283,6 +2386,11 @@ private void clearStreams() throws IOException {
is.close();
}
if(bufferChunks != null) {
+ if(zcr != null) {
+ for (BufferChunk bufChunk : bufferChunks) {
+ zcr.releaseBuffer(bufChunk.chunk);
+ }
+ }
bufferChunks.clear();
}
streams.clear();
@@ -2599,10 +2707,20 @@ static void mergeDiskRanges(List ranges) {
for(DiskRange range: ranges) {
int len = (int) (range.end - range.offset);
long off = range.offset;
- file.seek(base + off);
- byte[] buffer = new byte[len];
- file.readFully(buffer, 0, buffer.length);
- result.add(new BufferChunk(ByteBuffer.wrap(buffer), range.offset));
+ file.seek(base + off);
+ if(zcr != null) {
+ while(len > 0) {
+ ByteBuffer partial = zcr.readBuffer(len, false);
+ result.add(new BufferChunk(partial, off));
+ int read = partial.remaining();
+ len -= read;
+ off += read;
+ }
+ } else {
+ byte[] buffer = new byte[len];
+ file.readFully(buffer, 0, buffer.length);
+ result.add(new BufferChunk(ByteBuffer.wrap(buffer), range.offset));
+ }
}
return result;
}
@@ -2840,6 +2958,7 @@ public VectorizedRowBatch nextBatch(VectorizedRowBatch previous) throws IOExcept
@Override
public void close() throws IOException {
clearStreams();
+ pool.clear();
file.close();
}
diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/SnappyCodec.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/SnappyCodec.java
index e3131a3..4613015 100644
--- ql/src/java/org/apache/hadoop/hive/ql/io/orc/SnappyCodec.java
+++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/SnappyCodec.java
@@ -18,12 +18,17 @@
package org.apache.hadoop.hive.ql.io.orc;
+import org.apache.hadoop.hive.shims.HadoopShims.DirectDecompressorShim;
+import org.apache.hadoop.hive.shims.ShimLoader;
+import org.apache.hadoop.hive.shims.HadoopShims.DirectCompressionType;
import org.iq80.snappy.Snappy;
import java.io.IOException;
import java.nio.ByteBuffer;
-class SnappyCodec implements CompressionCodec {
+class SnappyCodec implements CompressionCodec, DirectDecompressionCodec {
+
+ Boolean direct = null;
@Override
public boolean compress(ByteBuffer in, ByteBuffer out,
@@ -57,6 +62,10 @@ public boolean compress(ByteBuffer in, ByteBuffer out,
@Override
public void decompress(ByteBuffer in, ByteBuffer out) throws IOException {
+ if(in.isDirect() && out.isDirect()) {
+ directDecompress(in, out);
+ return;
+ }
int inOffset = in.position();
int uncompressLen =
Snappy.uncompress(in.array(), in.arrayOffset() + inOffset,
@@ -64,4 +73,30 @@ public void decompress(ByteBuffer in, ByteBuffer out) throws IOException {
out.position(uncompressLen + out.position());
out.flip();
}
+
+ @Override
+ public boolean isAvailable() {
+ if (direct == null) {
+ try {
+ if (ShimLoader.getHadoopShims().getDirectDecompressor(
+ DirectCompressionType.SNAPPY) != null) {
+ direct = Boolean.valueOf(true);
+ } else {
+ direct = Boolean.valueOf(false);
+ }
+ } catch (UnsatisfiedLinkError ule) {
+ direct = Boolean.valueOf(false);
+ }
+ }
+ return direct.booleanValue();
+ }
+
+ @Override
+ public void directDecompress(ByteBuffer in, ByteBuffer out)
+ throws IOException {
+ DirectDecompressorShim decompressShim = ShimLoader.getHadoopShims()
+ .getDirectDecompressor(DirectCompressionType.SNAPPY);
+ decompressShim.decompress(in, out);
+ out.flip(); // flip for read
+ }
}
diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcInputFormat.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcInputFormat.java
index 27a9338..660d8e3 100644
--- ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcInputFormat.java
+++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcInputFormat.java
@@ -67,6 +67,7 @@
this.offset = fileSplit.getStart();
this.length = fileSplit.getLength();
this.reader = file.rows(offset, length, includedColumns, sarg, columnNames);
+
try {
rbCtx = new VectorizedRowBatchCtx();
rbCtx.init(conf, fileSplit);
diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/ZlibCodec.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/ZlibCodec.java
index a75fdea..27fbb42 100644
--- ql/src/java/org/apache/hadoop/hive/ql/io/orc/ZlibCodec.java
+++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/ZlibCodec.java
@@ -23,7 +23,14 @@
import java.util.zip.Deflater;
import java.util.zip.Inflater;
-class ZlibCodec implements CompressionCodec {
+import org.apache.hadoop.hive.shims.HadoopShims;
+import org.apache.hadoop.hive.shims.HadoopShims.DirectCompressionType;
+import org.apache.hadoop.hive.shims.HadoopShims.DirectDecompressorShim;
+import org.apache.hadoop.hive.shims.ShimLoader;
+
+class ZlibCodec implements CompressionCodec, DirectDecompressionCodec {
+
+ private Boolean direct = null;
@Override
public boolean compress(ByteBuffer in, ByteBuffer out,
@@ -55,6 +62,12 @@ public boolean compress(ByteBuffer in, ByteBuffer out,
@Override
public void decompress(ByteBuffer in, ByteBuffer out) throws IOException {
+
+ if(in.isDirect() && out.isDirect()) {
+ directDecompress(in, out);
+ return;
+ }
+
Inflater inflater = new Inflater(true);
inflater.setInput(in.array(), in.arrayOffset() + in.position(),
in.remaining());
@@ -74,4 +87,30 @@ public void decompress(ByteBuffer in, ByteBuffer out) throws IOException {
in.position(in.limit());
}
+ @Override
+ public boolean isAvailable() {
+ if (direct == null) {
+ // see nowrap option in new Inflater(boolean) which disables zlib headers
+ try {
+ if (ShimLoader.getHadoopShims().getDirectDecompressor(
+ DirectCompressionType.ZLIB_NOHEADER) != null) {
+ direct = Boolean.valueOf(true);
+ } else {
+ direct = Boolean.valueOf(false);
+ }
+ } catch (UnsatisfiedLinkError ule) {
+ direct = Boolean.valueOf(false);
+ }
+ }
+ return direct.booleanValue();
+ }
+
+ @Override
+ public void directDecompress(ByteBuffer in, ByteBuffer out)
+ throws IOException {
+ DirectDecompressorShim decompressShim = ShimLoader.getHadoopShims()
+ .getDirectDecompressor(DirectCompressionType.ZLIB_NOHEADER);
+ decompressShim.decompress(in, out);
+ out.flip(); // flip for read
+ }
}
diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java
index d2aa220..a6b0a56 100644
--- ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java
+++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java
@@ -1253,7 +1253,7 @@ public static void createMRWorkForMergingFiles (FileSinkOperator fsInput,
} else {
cplan = createMRWorkForMergingFiles(conf, tsMerge, fsInputDesc);
if (conf.getVar(ConfVars.HIVE_EXECUTION_ENGINE).equals("tez")) {
- work = new TezWork();
+ work = new TezWork(conf.getVar(HiveConf.ConfVars.HIVEQUERYID));
cplan.setName("Merge");
((TezWork)work).add(cplan);
} else {
@@ -1622,6 +1622,13 @@ public static boolean isMergeRequired(List> mvTasks, HiveConf hco
}
if ((mvTask != null) && !mvTask.isLocal() && fsOp.getConf().canBeMerged()) {
+
+ if (currTask.getWork() instanceof TezWork) {
+ // tez blurs the boundary between map and reduce, thus it has it's own
+ // config
+ return hconf.getBoolVar(ConfVars.HIVEMERGETEZFILES);
+ }
+
if (fsOp.getConf().isLinkedFileSink()) {
// If the user has HIVEMERGEMAPREDFILES set to false, the idea was the
// number of reducers are few, so the number of files anyway are small.
@@ -1635,16 +1642,13 @@ public static boolean isMergeRequired(List> mvTasks, HiveConf hco
// There are separate configuration parameters to control whether to
// merge for a map-only job
// or for a map-reduce job
- if (currTask.getWork() instanceof TezWork) {
- return hconf.getBoolVar(ConfVars.HIVEMERGEMAPFILES) ||
- hconf.getBoolVar(ConfVars.HIVEMERGEMAPREDFILES);
- } else if (currTask.getWork() instanceof MapredWork) {
+ if (currTask.getWork() instanceof MapredWork) {
ReduceWork reduceWork = ((MapredWork) currTask.getWork()).getReduceWork();
boolean mergeMapOnly =
- hconf.getBoolVar(ConfVars.HIVEMERGEMAPFILES) && reduceWork == null;
+ hconf.getBoolVar(ConfVars.HIVEMERGEMAPFILES) && reduceWork == null;
boolean mergeMapRed =
- hconf.getBoolVar(ConfVars.HIVEMERGEMAPREDFILES) &&
- reduceWork != null;
+ hconf.getBoolVar(ConfVars.HIVEMERGEMAPREDFILES) &&
+ reduceWork != null;
if (mergeMapOnly || mergeMapRed) {
return true;
}
diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/FileSinkProcessor.java ql/src/java/org/apache/hadoop/hive/ql/parse/FileSinkProcessor.java
index 9592992..30c39db 100644
--- ql/src/java/org/apache/hadoop/hive/ql/parse/FileSinkProcessor.java
+++ ql/src/java/org/apache/hadoop/hive/ql/parse/FileSinkProcessor.java
@@ -51,28 +51,9 @@ public Object process(Node nd, Stack stack,
GenTezProcContext context = (GenTezProcContext) procCtx;
FileSinkOperator fileSink = (FileSinkOperator) nd;
- ParseContext parseContext = context.parseContext;
-
-
- boolean isInsertTable = // is INSERT OVERWRITE TABLE
- GenMapRedUtils.isInsertInto(parseContext, fileSink);
- HiveConf hconf = parseContext.getConf();
-
- boolean chDir = GenMapRedUtils.isMergeRequired(context.moveTask,
- hconf, fileSink, context.currentTask, isInsertTable);
-
- Path finalName = GenMapRedUtils.createMoveTask(context.currentTask,
- chDir, fileSink, parseContext, context.moveTask, hconf, context.dependencyTask);
-
- if (chDir) {
- // Merge the files in the destination table/partitions by creating Map-only merge job
- // If underlying data is RCFile a RCFileBlockMerge task would be created.
- LOG.info("using CombineHiveInputformat for the merge job");
- GenMapRedUtils.createMRWorkForMergingFiles(fileSink, finalName,
- context.dependencyTask, context.moveTask,
- hconf, context.currentTask);
- }
-
+
+ // just remember it for later processing
+ context.fileSinkSet.add(fileSink);
return true;
}
}
\ No newline at end of file
diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezProcContext.java ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezProcContext.java
index f4b6016..7581e16 100644
--- ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezProcContext.java
+++ ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezProcContext.java
@@ -20,14 +20,19 @@
import java.io.Serializable;
import java.util.HashMap;
+import java.util.HashSet;
+import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Set;
+import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.exec.DependencyCollectionTask;
+import org.apache.hadoop.hive.ql.exec.FileSinkOperator;
import org.apache.hadoop.hive.ql.exec.MapJoinOperator;
import org.apache.hadoop.hive.ql.exec.Operator;
+import org.apache.hadoop.hive.ql.exec.UnionOperator;
import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator;
import org.apache.hadoop.hive.ql.exec.Task;
import org.apache.hadoop.hive.ql.exec.TaskFactory;
@@ -39,6 +44,7 @@
import org.apache.hadoop.hive.ql.plan.DependencyCollectionWork;
import org.apache.hadoop.hive.ql.plan.MoveWork;
import org.apache.hadoop.hive.ql.plan.OperatorDesc;
+import org.apache.hadoop.hive.ql.plan.FileSinkDesc;
import org.apache.hadoop.hive.ql.plan.TezWork;
/**
@@ -105,6 +111,15 @@
// used to group dependent tasks for multi table inserts
public final DependencyCollectionTask dependencyTask;
+ // used to hook up unions
+ public final Map, BaseWork> unionWorkMap;
+ public final List currentUnionOperators;
+ public final Set workWithUnionOperators;
+
+ // we link filesink that will write to the same final location
+ public final Map> linkedFileSinks;
+ public final Set fileSinkSet;
+
@SuppressWarnings("unchecked")
public GenTezProcContext(HiveConf conf, ParseContext parseContext,
List> moveTask, List> rootTasks,
@@ -116,7 +131,8 @@ public GenTezProcContext(HiveConf conf, ParseContext parseContext,
this.rootTasks = rootTasks;
this.inputs = inputs;
this.outputs = outputs;
- this.currentTask = (TezTask) TaskFactory.get(new TezWork(), conf);
+ this.currentTask = (TezTask) TaskFactory.get(
+ new TezWork(conf.getVar(HiveConf.ConfVars.HIVEQUERYID)), conf);
this.leafOperatorToFollowingWork = new HashMap, BaseWork>();
this.linkOpWithWorkMap = new HashMap, List>();
this.linkWorkWithReduceSinkMap = new HashMap>();
@@ -126,6 +142,11 @@ public GenTezProcContext(HiveConf conf, ParseContext parseContext,
this.linkChildOpWithDummyOp = new HashMap, List>>();
this.dependencyTask = (DependencyCollectionTask)
TaskFactory.get(new DependencyCollectionWork(), conf);
+ this.unionWorkMap = new HashMap, BaseWork>();
+ this.currentUnionOperators = new LinkedList();
+ this.workWithUnionOperators = new HashSet();
+ this.linkedFileSinks = new HashMap>();
+ this.fileSinkSet = new HashSet();
rootTasks.add(currentTask);
}
diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java
index 042cb39..d9b5977 100644
--- ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java
+++ ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java
@@ -18,17 +18,35 @@
package org.apache.hadoop.hive.ql.parse;
+import java.util.ArrayList;
+import java.util.Deque;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.List;
+import java.util.LinkedList;
+import java.util.Map;
+import java.util.Set;
+
+import org.apache.hadoop.fs.Path;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.exec.Operator;
+import org.apache.hadoop.hive.ql.exec.FileSinkOperator;
import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator;
import org.apache.hadoop.hive.ql.exec.TableScanOperator;
+import org.apache.hadoop.hive.ql.exec.UnionOperator;
+import org.apache.hadoop.hive.ql.exec.Utilities;
import org.apache.hadoop.hive.ql.optimizer.GenMapRedUtils;
import org.apache.hadoop.hive.ql.plan.BaseWork;
import org.apache.hadoop.hive.ql.plan.MapWork;
import org.apache.hadoop.hive.ql.plan.OperatorDesc;
+import org.apache.hadoop.hive.ql.plan.FileSinkDesc;
import org.apache.hadoop.hive.ql.plan.ReduceWork;
import org.apache.hadoop.hive.ql.plan.TezWork;
+import org.apache.hadoop.hive.ql.plan.UnionWork;
import org.apache.hadoop.hive.ql.plan.TezWork.EdgeType;
/**
@@ -59,6 +77,13 @@ public void resetSequenceNumber() {
sequenceNumber = 0;
}
+ public UnionWork createUnionWork(GenTezProcContext context, Operator> operator, TezWork tezWork) {
+ UnionWork unionWork = new UnionWork("Union "+ (++sequenceNumber));
+ context.unionWorkMap.put(operator, unionWork);
+ tezWork.add(unionWork);
+ return unionWork;
+ }
+
public ReduceWork createReduceWork(GenTezProcContext context, Operator> root, TezWork tezWork) {
assert !root.getParentOperators().isEmpty();
ReduceWork reduceWork = new ReduceWork("Reducer "+ (++sequenceNumber));
@@ -128,4 +153,111 @@ protected void setupMapWork(MapWork mapWork, GenTezProcContext context,
GenMapRedUtils.setMapWork(mapWork, context.parseContext,
context.inputs, partitions, root, alias, context.conf, false);
}
+
+ // removes any union operator and clones the plan
+ public void removeUnionOperators(Configuration conf, GenTezProcContext context,
+ BaseWork work)
+ throws SemanticException {
+
+ Set> roots = work.getAllRootOperators();
+
+ // need to clone the plan.
+ Set> newRoots = Utilities.cloneOperatorTree(conf, roots);
+
+ Map, Operator>> replacementMap = new HashMap, Operator>>();
+
+ Iterator> it = newRoots.iterator();
+ for (Operator> orig: roots) {
+ replacementMap.put(orig,it.next());
+ }
+
+ // now we remove all the unions. we throw away any branch that's not reachable from
+ // the current set of roots. The reason is that those branches will be handled in
+ // different tasks.
+ Deque> operators = new LinkedList>();
+ operators.addAll(newRoots);
+
+ Set> seen = new HashSet>();
+
+ while(!operators.isEmpty()) {
+ Operator> current = operators.pop();
+ seen.add(current);
+
+ if (current instanceof FileSinkOperator) {
+ FileSinkOperator fileSink = (FileSinkOperator)current;
+
+ // remember it for additional processing later
+ context.fileSinkSet.add(fileSink);
+
+ FileSinkDesc desc = fileSink.getConf();
+ Path path = desc.getDirName();
+ List linked;
+
+ if (!context.linkedFileSinks.containsKey(path)) {
+ linked = new ArrayList();
+ context.linkedFileSinks.put(path, linked);
+ }
+ linked = context.linkedFileSinks.get(path);
+ linked.add(desc);
+
+ desc.setDirName(new Path(path, ""+linked.size()));
+ desc.setLinkedFileSinkDesc(linked);
+ }
+
+ if (current instanceof UnionOperator) {
+ Operator> parent = null;
+ int count = 0;
+
+ for (Operator> op: current.getParentOperators()) {
+ if (seen.contains(op)) {
+ ++count;
+ parent = op;
+ }
+ }
+
+ // we should have been able to reach the union from only one side.
+ assert count <= 1;
+
+ if (parent == null) {
+ // root operator is union (can happen in reducers)
+ replacementMap.put(current, current.getChildOperators().get(0));
+ } else {
+ parent.removeChildAndAdoptItsChildren(current);
+ }
+ }
+
+ if (current instanceof FileSinkOperator
+ || current instanceof ReduceSinkOperator) {
+ current.setChildOperators(null);
+ } else {
+ operators.addAll(current.getChildOperators());
+ }
+ }
+ work.replaceRoots(replacementMap);
+ }
+
+ public void processFileSink(GenTezProcContext context, FileSinkOperator fileSink)
+ throws SemanticException {
+
+ ParseContext parseContext = context.parseContext;
+
+ boolean isInsertTable = // is INSERT OVERWRITE TABLE
+ GenMapRedUtils.isInsertInto(parseContext, fileSink);
+ HiveConf hconf = parseContext.getConf();
+
+ boolean chDir = GenMapRedUtils.isMergeRequired(context.moveTask,
+ hconf, fileSink, context.currentTask, isInsertTable);
+
+ Path finalName = GenMapRedUtils.createMoveTask(context.currentTask,
+ chDir, fileSink, parseContext, context.moveTask, hconf, context.dependencyTask);
+
+ if (chDir) {
+ // Merge the files in the destination table/partitions by creating Map-only merge job
+ // If underlying data is RCFile a RCFileBlockMerge task would be created.
+ LOG.info("using CombineHiveInputformat for the merge job");
+ GenMapRedUtils.createMRWorkForMergingFiles(fileSink, finalName,
+ context.dependencyTask, context.moveTask,
+ hconf, context.currentTask);
+ }
+ }
}
diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezWork.java ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezWork.java
index 475c940..a6c30a3 100644
--- ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezWork.java
+++ ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezWork.java
@@ -19,15 +19,21 @@
package org.apache.hadoop.hive.ql.parse;
import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.LinkedList;
import java.util.List;
+import java.util.Map;
import java.util.Stack;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hive.ql.exec.FileSinkOperator;
import org.apache.hadoop.hive.ql.exec.HashTableDummyOperator;
import org.apache.hadoop.hive.ql.exec.Operator;
import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator;
import org.apache.hadoop.hive.ql.exec.TableScanOperator;
+import org.apache.hadoop.hive.ql.exec.UnionOperator;
+import org.apache.hadoop.hive.ql.exec.Utilities;
import org.apache.hadoop.hive.ql.lib.Node;
import org.apache.hadoop.hive.ql.lib.NodeProcessor;
import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx;
@@ -37,6 +43,7 @@
import org.apache.hadoop.hive.ql.plan.OperatorDesc;
import org.apache.hadoop.hive.ql.plan.ReduceWork;
import org.apache.hadoop.hive.ql.plan.TezWork;
+import org.apache.hadoop.hive.ql.plan.UnionWork;
import org.apache.hadoop.hive.ql.plan.TezWork.EdgeType;
/**
@@ -106,6 +113,41 @@ public Object process(Node nd, Stack stack,
context.rootToWorkMap.put(root, work);
}
+ // This is where we cut the tree as described above. We also remember that
+ // we might have to connect parent work with this work later.
+ for (Operator> parent: new ArrayList>(root.getParentOperators())) {
+ context.leafOperatorToFollowingWork.put(parent, work);
+ LOG.debug("Removing " + parent + " as parent from " + root);
+ root.removeParent(parent);
+ }
+
+ if (!context.currentUnionOperators.isEmpty()) {
+ // if there are union all operators we need to add the work to the set
+ // of union operators.
+
+ UnionWork unionWork;
+ if (context.unionWorkMap.containsKey(operator)) {
+ // we've seen this terminal before and have created a union work object.
+ // just need to add this work to it. There will be no children of this one
+ // since we've passed this operator before.
+ assert operator.getChildOperators().isEmpty();
+ unionWork = (UnionWork) context.unionWorkMap.get(operator);
+
+ } else {
+ // first time through. we need to create a union work object and add this
+ // work to it. Subsequent work should reference the union and not the actual
+ // work.
+ unionWork = utils.createUnionWork(context, operator, tezWork);
+ }
+
+ // finally hook everything up
+ tezWork.connect(unionWork, work, EdgeType.CONTAINS);
+ unionWork.addUnionOperators(context.currentUnionOperators);
+ context.currentUnionOperators.clear();
+ context.workWithUnionOperators.add(work);
+ work = unionWork;
+ }
+
// We're scanning a tree from roots to leaf (this is not technically
// correct, demux and mux operators might form a diamond shape, but
// we will only scan one path and ignore the others, because the
@@ -134,16 +176,10 @@ public Object process(Node nd, Stack stack,
// remember the output name of the reduce sink
rs.getConf().setOutputName(rWork.getName());
- // add dependency between the two work items
- tezWork.connect(work, rWork, EdgeType.SIMPLE_EDGE);
- }
-
- // This is where we cut the tree as described above. We also remember that
- // we might have to connect parent work with this work later.
- for (Operator> parent: new ArrayList>(root.getParentOperators())) {
- context.leafOperatorToFollowingWork.put(parent, work);
- LOG.debug("Removing " + parent + " as parent from " + root);
- root.removeParent(parent);
+ if (!context.unionWorkMap.containsKey(operator)) {
+ // add dependency between the two work items
+ tezWork.connect(work, rWork, EdgeType.SIMPLE_EDGE);
+ }
}
// No children means we're at the bottom. If there are more operators to scan
@@ -182,7 +218,7 @@ public Object process(Node nd, Stack stack,
for (BaseWork parentWork : linkWorkList) {
tezWork.connect(parentWork, work, EdgeType.BROADCAST_EDGE);
- // need to set up output name for reduce sink not that we know the name
+ // need to set up output name for reduce sink now that we know the name
// of the downstream work
for (ReduceSinkOperator r:
context.linkWorkWithReduceSinkMap.get(parentWork)) {
diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
index a01aa0e..c704b77 100644
--- ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
+++ ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
@@ -644,7 +644,9 @@ private String processTable(QB qb, ASTNode tabref) throws SemanticException {
}
private void assertCombineInputFormat(Tree numerator, String message) throws SemanticException {
- String inputFormat = HiveConf.getVar(conf, HiveConf.ConfVars.HIVEINPUTFORMAT);
+ String inputFormat = conf.getVar(HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("tez") ?
+ HiveConf.getVar(conf, HiveConf.ConfVars.HIVETEZINPUTFORMAT):
+ HiveConf.getVar(conf, HiveConf.ConfVars.HIVEINPUTFORMAT);
if (!inputFormat.equals(CombineHiveInputFormat.class.getName())) {
throw new SemanticException(generateErrorMessage((ASTNode) numerator,
message + " sampling is not supported in " + inputFormat));
diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java
index b7738c5..a5e6cbf 100644
--- ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java
+++ ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java
@@ -31,6 +31,7 @@
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
import org.apache.hadoop.hive.ql.Context;
import org.apache.hadoop.hive.ql.exec.ConditionalTask;
import org.apache.hadoop.hive.ql.exec.FileSinkOperator;
@@ -53,16 +54,19 @@
import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx;
import org.apache.hadoop.hive.ql.lib.Rule;
import org.apache.hadoop.hive.ql.lib.RuleRegExp;
+import org.apache.hadoop.hive.ql.metadata.Hive;
import org.apache.hadoop.hive.ql.optimizer.ConvertJoinMapJoin;
import org.apache.hadoop.hive.ql.optimizer.ReduceSinkMapJoinProc;
import org.apache.hadoop.hive.ql.optimizer.SetReducerParallelism;
import org.apache.hadoop.hive.ql.optimizer.physical.PhysicalContext;
import org.apache.hadoop.hive.ql.optimizer.physical.Vectorizer;
+import org.apache.hadoop.hive.ql.optimizer.physical.StageIDsRearranger;
import org.apache.hadoop.hive.ql.plan.BaseWork;
import org.apache.hadoop.hive.ql.plan.MapWork;
import org.apache.hadoop.hive.ql.plan.MoveWork;
import org.apache.hadoop.hive.ql.plan.OperatorDesc;
import org.apache.hadoop.hive.ql.plan.TezWork;
+import org.apache.hadoop.hive.ql.session.SessionState.LogHelper;
/**
* TezCompiler translates the operator plan into TezTasks.
@@ -75,6 +79,18 @@ public TezCompiler() {
}
@Override
+ public void init(HiveConf conf, LogHelper console, Hive db) {
+ super.init(conf, console, db);
+
+ // Tez requires us to use RPC for the query plan
+ HiveConf.setBoolVar(conf, ConfVars.HIVE_RPC_QUERY_PLAN, true);
+
+ // We require the use of recursive input dirs for union processing
+ conf.setBoolean("mapred.input.dir.recursive", true);
+ HiveConf.setBoolVar(conf, ConfVars.HIVE_HADOOP_SUPPORTS_SUBDIRECTORIES, true);
+ }
+
+ @Override
protected void optimizeOperatorPlan(ParseContext pCtx, Set inputs,
Set outputs) throws SemanticException {
@@ -138,14 +154,18 @@ protected void generateTaskTree(List> rootTasks, Pa
TableScanOperator.getOperatorName() + "%"),
new ProcessAnalyzeTable(GenTezUtils.getUtils()));
- opRules.put(new RuleRegExp("Bail on Union",
+ opRules.put(new RuleRegExp("Handle union",
UnionOperator.getOperatorName() + "%"), new NodeProcessor()
{
@Override
public Object process(Node n, Stack s,
NodeProcessorCtx procCtx, Object... os) throws SemanticException {
- throw new SemanticException("Unions not yet supported on Tez."
- +" Please use MR for this query");
+ GenTezProcContext context = (GenTezProcContext) procCtx;
+ UnionOperator union = (UnionOperator) n;
+
+ // simply need to remember that we've seen a union.
+ context.currentUnionOperators.add(union);
+ return null;
}
});
@@ -156,20 +176,31 @@ public Object process(Node n, Stack s,
topNodes.addAll(pCtx.getTopOps().values());
GraphWalker ogw = new GenTezWorkWalker(disp, procCtx);
ogw.startWalking(topNodes, null);
+
+ // we need to clone some operator plans and remove union operators still
+ for (BaseWork w: procCtx.workWithUnionOperators) {
+ GenTezUtils.getUtils().removeUnionOperators(conf, procCtx, w);
+ }
+
+ // finally make sure the file sink operators are set up right
+ for (FileSinkOperator fileSink: procCtx.fileSinkSet) {
+ GenTezUtils.getUtils().processFileSink(procCtx, fileSink);
+ }
}
@Override
protected void setInputFormat(Task extends Serializable> task) {
if (task instanceof TezTask) {
TezWork work = ((TezTask)task).getWork();
- Set roots = work.getRoots();
- for (BaseWork w: roots) {
- assert w instanceof MapWork;
- MapWork mapWork = (MapWork)w;
- HashMap> opMap = mapWork.getAliasToWork();
- if (!opMap.isEmpty()) {
- for (Operator extends OperatorDesc> op : opMap.values()) {
- setInputFormat(mapWork, op);
+ List all = work.getAllWork();
+ for (BaseWork w: all) {
+ if (w instanceof MapWork) {
+ MapWork mapWork = (MapWork) w;
+ HashMap> opMap = mapWork.getAliasToWork();
+ if (!opMap.isEmpty()) {
+ for (Operator extends OperatorDesc> op : opMap.values()) {
+ setInputFormat(mapWork, op);
+ }
}
}
}
@@ -217,6 +248,9 @@ protected void optimizeTaskPlan(List> rootTasks, Pa
if (conf.getBoolVar(HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED)) {
(new Vectorizer()).resolve(physicalCtx);
}
+ if (!"none".equalsIgnoreCase(conf.getVar(HiveConf.ConfVars.HIVESTAGEIDREARRANGE))) {
+ (new StageIDsRearranger()).resolve(physicalCtx);
+ }
return;
}
}
diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/BaseWork.java ql/src/java/org/apache/hadoop/hive/ql/plan/BaseWork.java
index eb85446..38c4c11 100644
--- ql/src/java/org/apache/hadoop/hive/ql/plan/BaseWork.java
+++ ql/src/java/org/apache/hadoop/hive/ql/plan/BaseWork.java
@@ -22,6 +22,7 @@
import java.util.LinkedList;
import java.util.LinkedHashSet;
import java.util.List;
+import java.util.Map;
import java.util.Set;
import java.util.Stack;
@@ -82,7 +83,9 @@ public void addDummyOp(HashTableDummyOperator dummyOp) {
dummyOps.add(dummyOp);
}
- protected abstract Set> getAllRootOperators();
+ public abstract void replaceRoots(Map, Operator>> replacementMap);
+
+ public abstract Set> getAllRootOperators();
public Set> getAllOperators() {
diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/MapWork.java ql/src/java/org/apache/hadoop/hive/ql/plan/MapWork.java
index 19b553f..e1cc3f4 100644
--- ql/src/java/org/apache/hadoop/hive/ql/plan/MapWork.java
+++ ql/src/java/org/apache/hadoop/hive/ql/plan/MapWork.java
@@ -305,6 +305,17 @@ public String getVectorModeOn() {
}
@Override
+ public void replaceRoots(Map, Operator>> replacementMap) {
+ LinkedHashMap> newAliasToWork = new LinkedHashMap>();
+
+ for (Map.Entry> entry: aliasToWork.entrySet()) {
+ newAliasToWork.put(entry.getKey(), replacementMap.get(entry.getValue()));
+ }
+
+ setAliasToWork(newAliasToWork);
+ }
+
+ @Override
@Explain(displayName = "Map Operator Tree")
public Set> getAllRootOperators() {
Set> opSet = new LinkedHashSet>();
diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceWork.java ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceWork.java
index afb3648..a68374e 100644
--- ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceWork.java
+++ ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceWork.java
@@ -129,7 +129,13 @@ public void setTagToInput(final Map tagToInput) {
}
@Override
- protected Set> getAllRootOperators() {
+ public void replaceRoots(Map, Operator>> replacementMap) {
+ assert replacementMap.size() == 1;
+ setReducer(replacementMap.get(getReducer()));
+ }
+
+ @Override
+ public Set> getAllRootOperators() {
Set> opSet = new LinkedHashSet>();
opSet.add(getReducer());
return opSet;
diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/TezWork.java ql/src/java/org/apache/hadoop/hive/ql/plan/TezWork.java
index 9112a77..f974c57 100644
--- ql/src/java/org/apache/hadoop/hive/ql/plan/TezWork.java
+++ ql/src/java/org/apache/hadoop/hive/ql/plan/TezWork.java
@@ -45,11 +45,14 @@
public enum EdgeType {
SIMPLE_EDGE,
- BROADCAST_EDGE
+ BROADCAST_EDGE,
+ CONTAINS
}
private static transient final Log LOG = LogFactory.getLog(TezWork.class);
+ private static int counter;
+ private final String name;
private final Set roots = new HashSet();
private final Set leaves = new HashSet();
private final Map> workGraph = new HashMap>();
@@ -57,6 +60,15 @@
private final Map, EdgeType> edgeProperties =
new HashMap, EdgeType>();
+ public TezWork(String name) {
+ this.name = name + ":" + (++counter);
+ }
+
+ @Explain(displayName = "DagName")
+ public String getName() {
+ return name;
+ }
+
/**
* getWorkMap returns a map of "vertex name" to BaseWork
*/
diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/UnionWork.java ql/src/java/org/apache/hadoop/hive/ql/plan/UnionWork.java
new file mode 100644
index 0000000..60781e6
--- /dev/null
+++ ql/src/java/org/apache/hadoop/hive/ql/plan/UnionWork.java
@@ -0,0 +1,71 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.plan;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.HashSet;
+import org.apache.hadoop.hive.ql.plan.BaseWork;
+import org.apache.hadoop.hive.ql.exec.Operator;
+import org.apache.hadoop.hive.ql.exec.UnionOperator;
+
+/**
+ * Simple wrapper for union all cases. All contributing work for a union all
+ * is collected here. Downstream work will connect to the union not the individual
+ * work.
+ */
+public class UnionWork extends BaseWork {
+
+ private final Set unionOperators = new HashSet();
+
+ public UnionWork() {
+ super();
+ }
+
+ public UnionWork(String name) {
+ super(name);
+ }
+
+ @Explain(displayName = "Vertex")
+ @Override
+ public String getName() {
+ return super.getName();
+ }
+
+ @Override
+ public void replaceRoots(Map, Operator>> replacementMap) {
+ }
+
+ @Override
+ public Set> getAllRootOperators() {
+ return new HashSet>();
+ }
+
+ public void addUnionOperators(Collection unions) {
+ unionOperators.addAll(unions);
+ }
+
+ public Set getUnionOperators() {
+ return unionOperators;
+ }
+}
diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/tez/TestTezTask.java ql/src/test/org/apache/hadoop/hive/ql/exec/tez/TestTezTask.java
index 544fd82..859b5ad 100644
--- ql/src/test/org/apache/hadoop/hive/ql/exec/tez/TestTezTask.java
+++ ql/src/test/org/apache/hadoop/hive/ql/exec/tez/TestTezTask.java
@@ -112,7 +112,7 @@ public Edge answer(InvocationOnMock invocation) throws Throwable {
}
});
- work = new TezWork();
+ work = new TezWork("");
mws = new MapWork[] { new MapWork(), new MapWork()};
rws = new ReduceWork[] { new ReduceWork(), new ReduceWork() };
@@ -194,7 +194,7 @@ public void testBuildDag() throws IllegalArgumentException, IOException, Excepti
@Test
public void testEmptyWork() throws IllegalArgumentException, IOException, Exception {
- DAG dag = task.build(conf, new TezWork(), path, appLr, new Context(conf));
+ DAG dag = task.build(conf, new TezWork(""), path, appLr, new Context(conf));
assertEquals(dag.getVertices().size(), 0);
}
diff --git ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java
index 4d3013d..cc8e73a 100644
--- ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java
+++ ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java
@@ -29,6 +29,8 @@
import java.nio.ByteBuffer;
import java.sql.Timestamp;
import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
@@ -39,6 +41,9 @@
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.common.type.HiveDecimal;
+
+import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVE_ORC_ZEROCOPY;
+
import org.apache.hadoop.hive.ql.io.sarg.SearchArgument;
import org.apache.hadoop.hive.serde2.io.ByteWritable;
import org.apache.hadoop.hive.serde2.io.DoubleWritable;
@@ -67,14 +72,19 @@
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hive.common.util.HiveTestUtils;
+import org.junit.After;
import org.junit.Before;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.TestName;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+import org.junit.runners.Parameterized.Parameters;
/**
* Tests for the top level reader/streamFactory of ORC files.
*/
+@RunWith(value = Parameterized.class)
public class TestOrcFile {
public static class SimpleStruct {
@@ -191,6 +201,16 @@ private static ByteBuffer byteBuf(int... items) {
Configuration conf;
FileSystem fs;
Path testFilePath;
+ private final boolean zeroCopy;
+
+ @Parameters
+ public static Collection data() {
+ return Arrays.asList(new Boolean[][] { {false}, {true}});
+ }
+
+ public TestOrcFile(Boolean zcr) {
+ zeroCopy = zcr.booleanValue();
+ }
@Rule
public TestName testCaseName = new TestName();
@@ -198,6 +218,9 @@ private static ByteBuffer byteBuf(int... items) {
@Before
public void openFileSystem () throws Exception {
conf = new Configuration();
+ if(zeroCopy) {
+ conf.setBoolean(HIVE_ORC_ZEROCOPY.varname, zeroCopy);
+ }
fs = FileSystem.getLocal(conf);
testFilePath = new Path(workDir, "TestOrcFile." +
testCaseName.getMethodName() + ".orc");
@@ -547,6 +570,7 @@ public void testStripeLevelStats() throws Exception {
inspector = ObjectInspectorFactory.getReflectionObjectInspector
(InnerStruct.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
}
+
Writer writer = OrcFile.createWriter(testFilePath,
OrcFile.writerOptions(conf)
.inspector(inspector)
@@ -572,35 +596,36 @@ public void testStripeLevelStats() throws Exception {
StripeStatistics ss1 = metadata.getStripeStatistics().get(0);
StripeStatistics ss2 = metadata.getStripeStatistics().get(1);
StripeStatistics ss3 = metadata.getStripeStatistics().get(2);
- assertEquals(4996, ss1.getColumnStatistics()[0].getNumberOfValues());
+
+ assertEquals(5000, ss1.getColumnStatistics()[0].getNumberOfValues());
assertEquals(5000, ss2.getColumnStatistics()[0].getNumberOfValues());
- assertEquals(1004, ss3.getColumnStatistics()[0].getNumberOfValues());
+ assertEquals(1000, ss3.getColumnStatistics()[0].getNumberOfValues());
- assertEquals(4996, ((IntegerColumnStatistics)ss1.getColumnStatistics()[1]).getNumberOfValues());
+ assertEquals(5000, ((IntegerColumnStatistics)ss1.getColumnStatistics()[1]).getNumberOfValues());
assertEquals(5000, ((IntegerColumnStatistics)ss2.getColumnStatistics()[1]).getNumberOfValues());
- assertEquals(1004, ((IntegerColumnStatistics)ss3.getColumnStatistics()[1]).getNumberOfValues());
+ assertEquals(1000, ((IntegerColumnStatistics)ss3.getColumnStatistics()[1]).getNumberOfValues());
assertEquals(1, ((IntegerColumnStatistics)ss1.getColumnStatistics()[1]).getMinimum());
- assertEquals(1, ((IntegerColumnStatistics)ss2.getColumnStatistics()[1]).getMinimum());
- assertEquals(2, ((IntegerColumnStatistics)ss3.getColumnStatistics()[1]).getMinimum());
+ assertEquals(2, ((IntegerColumnStatistics)ss2.getColumnStatistics()[1]).getMinimum());
+ assertEquals(3, ((IntegerColumnStatistics)ss3.getColumnStatistics()[1]).getMinimum());
assertEquals(1, ((IntegerColumnStatistics)ss1.getColumnStatistics()[1]).getMaximum());
assertEquals(2, ((IntegerColumnStatistics)ss2.getColumnStatistics()[1]).getMaximum());
assertEquals(3, ((IntegerColumnStatistics)ss3.getColumnStatistics()[1]).getMaximum());
- assertEquals(4996, ((IntegerColumnStatistics)ss1.getColumnStatistics()[1]).getSum());
- assertEquals(9996, ((IntegerColumnStatistics)ss2.getColumnStatistics()[1]).getSum());
- assertEquals(3008, ((IntegerColumnStatistics)ss3.getColumnStatistics()[1]).getSum());
+ assertEquals(5000, ((IntegerColumnStatistics)ss1.getColumnStatistics()[1]).getSum());
+ assertEquals(10000, ((IntegerColumnStatistics)ss2.getColumnStatistics()[1]).getSum());
+ assertEquals(3000, ((IntegerColumnStatistics)ss3.getColumnStatistics()[1]).getSum());
- assertEquals(4996, ((StringColumnStatistics)ss1.getColumnStatistics()[2]).getNumberOfValues());
+ assertEquals(5000, ((StringColumnStatistics)ss1.getColumnStatistics()[2]).getNumberOfValues());
assertEquals(5000, ((StringColumnStatistics)ss2.getColumnStatistics()[2]).getNumberOfValues());
- assertEquals(1004, ((StringColumnStatistics)ss3.getColumnStatistics()[2]).getNumberOfValues());
+ assertEquals(1000, ((StringColumnStatistics)ss3.getColumnStatistics()[2]).getNumberOfValues());
assertEquals("one", ((StringColumnStatistics)ss1.getColumnStatistics()[2]).getMinimum());
- assertEquals("one", ((StringColumnStatistics)ss2.getColumnStatistics()[2]).getMinimum());
+ assertEquals("two", ((StringColumnStatistics)ss2.getColumnStatistics()[2]).getMinimum());
assertEquals("three", ((StringColumnStatistics)ss3.getColumnStatistics()[2]).getMinimum());
assertEquals("one", ((StringColumnStatistics)ss1.getColumnStatistics()[2]).getMaximum());
assertEquals("two", ((StringColumnStatistics)ss2.getColumnStatistics()[2]).getMaximum());
- assertEquals("two", ((StringColumnStatistics)ss3.getColumnStatistics()[2]).getMaximum());
- assertEquals(14988, ((StringColumnStatistics)ss1.getColumnStatistics()[2]).getSum());
+ assertEquals("three", ((StringColumnStatistics)ss3.getColumnStatistics()[2]).getMaximum());
+ assertEquals(15000, ((StringColumnStatistics)ss1.getColumnStatistics()[2]).getSum());
assertEquals(15000, ((StringColumnStatistics)ss2.getColumnStatistics()[2]).getSum());
- assertEquals(5012, ((StringColumnStatistics)ss3.getColumnStatistics()[2]).getSum());
+ assertEquals(5000, ((StringColumnStatistics)ss3.getColumnStatistics()[2]).getSum());
RecordReaderImpl recordReader = (RecordReaderImpl) reader.rows(null);
OrcProto.RowIndex[] index = recordReader.readRowIndex(0);
diff --git ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestStringRedBlackTree.java ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestStringRedBlackTree.java
index 251cd83..5581ad3 100644
--- ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestStringRedBlackTree.java
+++ ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestStringRedBlackTree.java
@@ -262,7 +262,7 @@ public static void main(String[] args) throws Exception {
bit.testCompressedSeek();
bit.testBiggerItems();
bit.testSkips();
- TestOrcFile test1 = new TestOrcFile();
+ TestOrcFile test1 = new TestOrcFile(false);
test1.test1();
test1.emptyFile();
test1.metaData();
diff --git ql/src/test/org/apache/hadoop/hive/ql/plan/TestTezWork.java ql/src/test/org/apache/hadoop/hive/ql/plan/TestTezWork.java
index a11d418..d57a64c 100644
--- ql/src/test/org/apache/hadoop/hive/ql/plan/TestTezWork.java
+++ ql/src/test/org/apache/hadoop/hive/ql/plan/TestTezWork.java
@@ -34,7 +34,7 @@
@Before
public void setup() throws Exception {
nodes = new LinkedList();
- work = new TezWork();
+ work = new TezWork("");
addWork(5);
}
diff --git ql/src/test/org/apache/hadoop/hive/ql/session/TestSessionState.java ql/src/test/org/apache/hadoop/hive/ql/session/TestSessionState.java
index d4e737f..c51ff09 100644
--- ql/src/test/org/apache/hadoop/hive/ql/session/TestSessionState.java
+++ ql/src/test/org/apache/hadoop/hive/ql/session/TestSessionState.java
@@ -20,20 +20,43 @@
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNull;
+import java.util.Arrays;
+import java.util.Collection;
+
import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
import org.apache.hadoop.hive.metastore.MetaStoreUtils;
import org.junit.Before;
import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+import org.junit.runners.Parameterized.Parameters;
/**
* Test SessionState
*/
+@RunWith(value = Parameterized.class)
public class TestSessionState {
+ private final boolean prewarm;
+
+ public TestSessionState(Boolean mode) {
+ this.prewarm = mode.booleanValue();
+ }
+
+ @Parameters
+ public static Collection data() {
+ return Arrays.asList(new Boolean[][] { {false}, {true}});
+ }
@Before
- public void setup(){
- SessionState.start(new HiveConf());
+ public void setup() {
+ HiveConf conf = new HiveConf();
+ if (prewarm) {
+ HiveConf.setBoolVar(conf, ConfVars.HIVE_PREWARM_ENABLED, true);
+ HiveConf.setIntVar(conf, ConfVars.HIVE_PREWARM_NUM_CONTAINERS, 1);
+ }
+ SessionState.start(conf);
}
/**
diff --git ql/src/test/results/clientpositive/tez/auto_join0.q.out ql/src/test/results/clientpositive/tez/auto_join0.q.out
index 13abf58..31cf7f3 100644
--- ql/src/test/results/clientpositive/tez/auto_join0.q.out
+++ ql/src/test/results/clientpositive/tez/auto_join0.q.out
@@ -31,6 +31,7 @@ STAGE PLANS:
Map 1 <- Map 4 (BROADCAST_EDGE)
Reducer 2 <- Map 1 (SIMPLE_EDGE)
Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
diff --git ql/src/test/results/clientpositive/tez/auto_join1.q.out ql/src/test/results/clientpositive/tez/auto_join1.q.out
index e591d6a..fdb4f4e 100644
--- ql/src/test/results/clientpositive/tez/auto_join1.q.out
+++ ql/src/test/results/clientpositive/tez/auto_join1.q.out
@@ -1,7 +1,9 @@
PREHOOK: query: CREATE TABLE dest_j1(key INT, value STRING) STORED AS TEXTFILE
PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
POSTHOOK: query: CREATE TABLE dest_j1(key INT, value STRING) STORED AS TEXTFILE
POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
POSTHOOK: Output: default@dest_j1
PREHOOK: query: explain
FROM src src1 JOIN src src2 ON (src1.key = src2.key)
@@ -13,20 +15,16 @@ INSERT OVERWRITE TABLE dest_j1 SELECT src1.key, src2.value
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
Stage-1 is a root stage
- Stage-8 depends on stages: Stage-1 , consists of Stage-5, Stage-4, Stage-6
- Stage-5
- Stage-2 depends on stages: Stage-5, Stage-4, Stage-7
+ Stage-2 depends on stages: Stage-1
Stage-0 depends on stages: Stage-2
Stage-3 depends on stages: Stage-0
- Stage-4
- Stage-6
- Stage-7 depends on stages: Stage-6
STAGE PLANS:
Stage: Stage-1
Tez
Edges:
Map 2 <- Map 1 (BROADCAST_EDGE)
+#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
@@ -68,15 +66,6 @@ STAGE PLANS:
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.dest_j1
- Stage: Stage-8
- Conditional Operator
-
- Stage: Stage-5
- Move Operator
- files:
- hdfs directory: true
-#### A masked pattern was here ####
-
Stage: Stage-2
Dependency Collection
@@ -93,40 +82,6 @@ STAGE PLANS:
Stage: Stage-3
Stats-Aggr Operator
- Stage: Stage-4
- Tez
- Vertices:
- Merge
- Map Operator Tree:
- TableScan
- File Output Operator
- compressed: false
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.dest_j1
-
- Stage: Stage-6
- Tez
- Vertices:
- Merge
- Map Operator Tree:
- TableScan
- File Output Operator
- compressed: false
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.dest_j1
-
- Stage: Stage-7
- Move Operator
- files:
- hdfs directory: true
-#### A masked pattern was here ####
-
PREHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key)
INSERT OVERWRITE TABLE dest_j1 SELECT src1.key, src2.value
PREHOOK: type: QUERY
diff --git ql/src/test/results/clientpositive/tez/bucket2.q.out ql/src/test/results/clientpositive/tez/bucket2.q.out
index 74bad32..d36557d 100644
--- ql/src/test/results/clientpositive/tez/bucket2.q.out
+++ ql/src/test/results/clientpositive/tez/bucket2.q.out
@@ -1,7 +1,9 @@
PREHOOK: query: CREATE TABLE bucket2_1(key int, value string) CLUSTERED BY (key) INTO 2 BUCKETS
PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
POSTHOOK: query: CREATE TABLE bucket2_1(key int, value string) CLUSTERED BY (key) INTO 2 BUCKETS
POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
POSTHOOK: Output: default@bucket2_1
PREHOOK: query: explain extended
insert overwrite table bucket2_1
@@ -39,6 +41,7 @@ STAGE PLANS:
Tez
Edges:
Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
@@ -196,6 +199,7 @@ STAGE PLANS:
Tez
Edges:
Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
diff --git ql/src/test/results/clientpositive/tez/bucket3.q.out ql/src/test/results/clientpositive/tez/bucket3.q.out
index 69d838e..8f0d409 100644
--- ql/src/test/results/clientpositive/tez/bucket3.q.out
+++ ql/src/test/results/clientpositive/tez/bucket3.q.out
@@ -1,7 +1,9 @@
PREHOOK: query: CREATE TABLE bucket3_1(key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 2 BUCKETS
PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
POSTHOOK: query: CREATE TABLE bucket3_1(key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 2 BUCKETS
POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
POSTHOOK: Output: default@bucket3_1
PREHOOK: query: explain extended
insert overwrite table bucket3_1 partition (ds='1')
@@ -43,6 +45,7 @@ STAGE PLANS:
Tez
Edges:
Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
@@ -221,6 +224,7 @@ STAGE PLANS:
Tez
Edges:
Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
diff --git ql/src/test/results/clientpositive/tez/bucket4.q.out ql/src/test/results/clientpositive/tez/bucket4.q.out
index 3092e21..8c90dc2 100644
--- ql/src/test/results/clientpositive/tez/bucket4.q.out
+++ ql/src/test/results/clientpositive/tez/bucket4.q.out
@@ -1,7 +1,9 @@
PREHOOK: query: CREATE TABLE bucket4_1(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS
PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
POSTHOOK: query: CREATE TABLE bucket4_1(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS
POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
POSTHOOK: Output: default@bucket4_1
PREHOOK: query: explain extended
insert overwrite table bucket4_1
@@ -39,6 +41,7 @@ STAGE PLANS:
Tez
Edges:
Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
@@ -197,6 +200,7 @@ STAGE DEPENDENCIES:
STAGE PLANS:
Stage: Stage-1
Tez
+#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
diff --git ql/src/test/results/clientpositive/tez/count.q.out ql/src/test/results/clientpositive/tez/count.q.out
index 4f21fcd..9d9922d 100644
--- ql/src/test/results/clientpositive/tez/count.q.out
+++ ql/src/test/results/clientpositive/tez/count.q.out
@@ -1,13 +1,17 @@
PREHOOK: query: create table abcd (a int, b int, c int, d int)
PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
POSTHOOK: query: create table abcd (a int, b int, c int, d int)
POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
POSTHOOK: Output: default@abcd
PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/in4.txt' INTO TABLE abcd
PREHOOK: type: LOAD
+#### A masked pattern was here ####
PREHOOK: Output: default@abcd
POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/in4.txt' INTO TABLE abcd
POSTHOOK: type: LOAD
+#### A masked pattern was here ####
POSTHOOK: Output: default@abcd
PREHOOK: query: select * from abcd
PREHOOK: type: QUERY
@@ -37,6 +41,7 @@ STAGE PLANS:
Tez
Edges:
Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
@@ -108,6 +113,7 @@ STAGE PLANS:
Tez
Edges:
Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
@@ -174,6 +180,7 @@ STAGE PLANS:
Tez
Edges:
Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
@@ -239,6 +246,7 @@ STAGE PLANS:
Tez
Edges:
Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
diff --git ql/src/test/results/clientpositive/tez/create_merge_compressed.q.out ql/src/test/results/clientpositive/tez/create_merge_compressed.q.out
index 4340878..078ad7f 100644
--- ql/src/test/results/clientpositive/tez/create_merge_compressed.q.out
+++ ql/src/test/results/clientpositive/tez/create_merge_compressed.q.out
@@ -1,18 +1,24 @@
PREHOOK: query: create table src_rc_merge_test(key int, value string) stored as rcfile
PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
POSTHOOK: query: create table src_rc_merge_test(key int, value string) stored as rcfile
POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
POSTHOOK: Output: default@src_rc_merge_test
PREHOOK: query: load data local inpath '../../data/files/smbbucket_1.rc' into table src_rc_merge_test
PREHOOK: type: LOAD
+#### A masked pattern was here ####
PREHOOK: Output: default@src_rc_merge_test
POSTHOOK: query: load data local inpath '../../data/files/smbbucket_1.rc' into table src_rc_merge_test
POSTHOOK: type: LOAD
+#### A masked pattern was here ####
POSTHOOK: Output: default@src_rc_merge_test
PREHOOK: query: create table tgt_rc_merge_test(key int, value string) stored as rcfile
PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
POSTHOOK: query: create table tgt_rc_merge_test(key int, value string) stored as rcfile
POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
POSTHOOK: Output: default@tgt_rc_merge_test
PREHOOK: query: insert into table tgt_rc_merge_test select * from src_rc_merge_test
PREHOOK: type: QUERY
diff --git ql/src/test/results/clientpositive/tez/cross_join.q.out ql/src/test/results/clientpositive/tez/cross_join.q.out
index 4f5cea1..16b9671 100644
--- ql/src/test/results/clientpositive/tez/cross_join.q.out
+++ ql/src/test/results/clientpositive/tez/cross_join.q.out
@@ -13,6 +13,7 @@ STAGE PLANS:
Tez
Edges:
Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE)
+#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
@@ -72,6 +73,7 @@ STAGE PLANS:
Tez
Edges:
Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE)
+#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
@@ -131,6 +133,7 @@ STAGE PLANS:
Tez
Edges:
Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE)
+#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
diff --git ql/src/test/results/clientpositive/tez/ctas.q.out ql/src/test/results/clientpositive/tez/ctas.q.out
index 24c810c..aaac91c 100644
--- ql/src/test/results/clientpositive/tez/ctas.q.out
+++ ql/src/test/results/clientpositive/tez/ctas.q.out
@@ -2,10 +2,12 @@ PREHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20, 0.20S)
create table nzhang_Tmp(a int, b string)
PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
POSTHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20, 0.20S)
create table nzhang_Tmp(a int, b string)
POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
POSTHOOK: Output: default@nzhang_Tmp
PREHOOK: query: select * from nzhang_Tmp
PREHOOK: type: QUERY
@@ -21,15 +23,10 @@ POSTHOOK: query: explain create table nzhang_CTAS1 as select key k, value from s
POSTHOOK: type: CREATETABLE_AS_SELECT
STAGE DEPENDENCIES:
Stage-1 is a root stage
- Stage-8 depends on stages: Stage-1 , consists of Stage-5, Stage-4, Stage-6
- Stage-5
- Stage-2 depends on stages: Stage-5, Stage-4, Stage-7
- Stage-9 depends on stages: Stage-2, Stage-0
- Stage-3 depends on stages: Stage-9
- Stage-0 depends on stages: Stage-5, Stage-4, Stage-7
- Stage-4
- Stage-6
- Stage-7 depends on stages: Stage-6
+ Stage-2 depends on stages: Stage-1
+ Stage-4 depends on stages: Stage-2, Stage-0
+ Stage-3 depends on stages: Stage-4
+ Stage-0 depends on stages: Stage-1
STAGE PLANS:
Stage: Stage-1
@@ -37,6 +34,7 @@ STAGE PLANS:
Edges:
Reducer 2 <- Map 1 (SIMPLE_EDGE)
Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
@@ -80,19 +78,10 @@ STAGE PLANS:
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.nzhang_CTAS1
- Stage: Stage-8
- Conditional Operator
-
- Stage: Stage-5
- Move Operator
- files:
- hdfs directory: true
-#### A masked pattern was here ####
-
Stage: Stage-2
Dependency Collection
- Stage: Stage-9
+ Stage: Stage-4
Create Table Operator:
Create Table
columns: k string, value string
@@ -109,40 +98,6 @@ STAGE PLANS:
hdfs directory: true
#### A masked pattern was here ####
- Stage: Stage-4
- Tez
- Vertices:
- Merge
- Map Operator Tree:
- TableScan
- File Output Operator
- compressed: false
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.nzhang_CTAS1
-
- Stage: Stage-6
- Tez
- Vertices:
- Merge
- Map Operator Tree:
- TableScan
- File Output Operator
- compressed: false
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.nzhang_CTAS1
-
- Stage: Stage-7
- Move Operator
- files:
- hdfs directory: true
-#### A masked pattern was here ####
-
PREHOOK: query: create table nzhang_CTAS1 as select key k, value from src sort by k, value limit 10
PREHOOK: type: CREATETABLE_AS_SELECT
PREHOOK: Input: default@src
@@ -208,15 +163,10 @@ POSTHOOK: query: explain create table nzhang_ctas2 as select * from src sort by
POSTHOOK: type: CREATETABLE_AS_SELECT
STAGE DEPENDENCIES:
Stage-1 is a root stage
- Stage-8 depends on stages: Stage-1 , consists of Stage-5, Stage-4, Stage-6
- Stage-5
- Stage-2 depends on stages: Stage-5, Stage-4, Stage-7
- Stage-9 depends on stages: Stage-2, Stage-0
- Stage-3 depends on stages: Stage-9
- Stage-0 depends on stages: Stage-5, Stage-4, Stage-7
- Stage-4
- Stage-6
- Stage-7 depends on stages: Stage-6
+ Stage-2 depends on stages: Stage-1
+ Stage-4 depends on stages: Stage-2, Stage-0
+ Stage-3 depends on stages: Stage-4
+ Stage-0 depends on stages: Stage-1
STAGE PLANS:
Stage: Stage-1
@@ -224,6 +174,7 @@ STAGE PLANS:
Edges:
Reducer 2 <- Map 1 (SIMPLE_EDGE)
Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
@@ -267,19 +218,10 @@ STAGE PLANS:
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.nzhang_ctas2
- Stage: Stage-8
- Conditional Operator
-
- Stage: Stage-5
- Move Operator
- files:
- hdfs directory: true
-#### A masked pattern was here ####
-
Stage: Stage-2
Dependency Collection
- Stage: Stage-9
+ Stage: Stage-4
Create Table Operator:
Create Table
columns: key string, value string
@@ -296,40 +238,6 @@ STAGE PLANS:
hdfs directory: true
#### A masked pattern was here ####
- Stage: Stage-4
- Tez
- Vertices:
- Merge
- Map Operator Tree:
- TableScan
- File Output Operator
- compressed: false
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.nzhang_ctas2
-
- Stage: Stage-6
- Tez
- Vertices:
- Merge
- Map Operator Tree:
- TableScan
- File Output Operator
- compressed: false
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.nzhang_ctas2
-
- Stage: Stage-7
- Move Operator
- files:
- hdfs directory: true
-#### A masked pattern was here ####
-
PREHOOK: query: create table nzhang_ctas2 as select * from src sort by key, value limit 10
PREHOOK: type: CREATETABLE_AS_SELECT
PREHOOK: Input: default@src
@@ -395,15 +303,10 @@ POSTHOOK: query: explain create table nzhang_ctas3 row format serde "org.apache.
POSTHOOK: type: CREATETABLE_AS_SELECT
STAGE DEPENDENCIES:
Stage-1 is a root stage
- Stage-8 depends on stages: Stage-1 , consists of Stage-5, Stage-4, Stage-6
- Stage-5
- Stage-2 depends on stages: Stage-5, Stage-4, Stage-7
- Stage-9 depends on stages: Stage-2, Stage-0
- Stage-3 depends on stages: Stage-9
- Stage-0 depends on stages: Stage-5, Stage-4, Stage-7
- Stage-4
- Stage-6
- Stage-7 depends on stages: Stage-6
+ Stage-2 depends on stages: Stage-1
+ Stage-4 depends on stages: Stage-2, Stage-0
+ Stage-3 depends on stages: Stage-4
+ Stage-0 depends on stages: Stage-1
STAGE PLANS:
Stage: Stage-1
@@ -411,6 +314,7 @@ STAGE PLANS:
Edges:
Reducer 2 <- Map 1 (SIMPLE_EDGE)
Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
@@ -454,19 +358,10 @@ STAGE PLANS:
serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
name: default.nzhang_ctas3
- Stage: Stage-8
- Conditional Operator
-
- Stage: Stage-5
- Move Operator
- files:
- hdfs directory: true
-#### A masked pattern was here ####
-
Stage: Stage-2
Dependency Collection
- Stage: Stage-9
+ Stage: Stage-4
Create Table Operator:
Create Table
columns: half_key double, conb string
@@ -484,18 +379,6 @@ STAGE PLANS:
hdfs directory: true
#### A masked pattern was here ####
- Stage: Stage-4
- Block level merge
-
- Stage: Stage-6
- Block level merge
-
- Stage: Stage-7
- Move Operator
- files:
- hdfs directory: true
-#### A masked pattern was here ####
-
PREHOOK: query: create table nzhang_ctas3 row format serde "org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe" stored as RCFile as select key/2 half_key, concat(value, "_con") conb from src sort by half_key, conb limit 10
PREHOOK: type: CREATETABLE_AS_SELECT
PREHOOK: Input: default@src
@@ -624,15 +507,10 @@ POSTHOOK: query: explain create table nzhang_ctas4 row format delimited fields t
POSTHOOK: type: CREATETABLE_AS_SELECT
STAGE DEPENDENCIES:
Stage-1 is a root stage
- Stage-8 depends on stages: Stage-1 , consists of Stage-5, Stage-4, Stage-6
- Stage-5
- Stage-2 depends on stages: Stage-5, Stage-4, Stage-7
- Stage-9 depends on stages: Stage-2, Stage-0
- Stage-3 depends on stages: Stage-9
- Stage-0 depends on stages: Stage-5, Stage-4, Stage-7
- Stage-4
- Stage-6
- Stage-7 depends on stages: Stage-6
+ Stage-2 depends on stages: Stage-1
+ Stage-4 depends on stages: Stage-2, Stage-0
+ Stage-3 depends on stages: Stage-4
+ Stage-0 depends on stages: Stage-1
STAGE PLANS:
Stage: Stage-1
@@ -640,6 +518,7 @@ STAGE PLANS:
Edges:
Reducer 2 <- Map 1 (SIMPLE_EDGE)
Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
@@ -683,19 +562,10 @@ STAGE PLANS:
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.nzhang_ctas4
- Stage: Stage-8
- Conditional Operator
-
- Stage: Stage-5
- Move Operator
- files:
- hdfs directory: true
-#### A masked pattern was here ####
-
Stage: Stage-2
Dependency Collection
- Stage: Stage-9
+ Stage: Stage-4
Create Table Operator:
Create Table
columns: key string, value string
@@ -713,40 +583,6 @@ STAGE PLANS:
hdfs directory: true
#### A masked pattern was here ####
- Stage: Stage-4
- Tez
- Vertices:
- Merge
- Map Operator Tree:
- TableScan
- File Output Operator
- compressed: false
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.nzhang_ctas4
-
- Stage: Stage-6
- Tez
- Vertices:
- Merge
- Map Operator Tree:
- TableScan
- File Output Operator
- compressed: false
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.nzhang_ctas4
-
- Stage: Stage-7
- Move Operator
- files:
- hdfs directory: true
-#### A masked pattern was here ####
-
PREHOOK: query: create table nzhang_ctas4 row format delimited fields terminated by ',' stored as textfile as select key, value from src sort by key, value limit 10
PREHOOK: type: CREATETABLE_AS_SELECT
PREHOOK: Input: default@src
@@ -853,15 +689,10 @@ TOK_CREATETABLE
STAGE DEPENDENCIES:
Stage-1 is a root stage
- Stage-8 depends on stages: Stage-1 , consists of Stage-5, Stage-4, Stage-6
- Stage-5
- Stage-2 depends on stages: Stage-5, Stage-4, Stage-7
- Stage-9 depends on stages: Stage-2, Stage-0
- Stage-3 depends on stages: Stage-9
- Stage-0 depends on stages: Stage-5, Stage-4, Stage-7
- Stage-4
- Stage-6
- Stage-7 depends on stages: Stage-6
+ Stage-2 depends on stages: Stage-1
+ Stage-4 depends on stages: Stage-2, Stage-0
+ Stage-3 depends on stages: Stage-4
+ Stage-0 depends on stages: Stage-1
STAGE PLANS:
Stage: Stage-1
@@ -869,6 +700,7 @@ STAGE PLANS:
Edges:
Reducer 2 <- Map 1 (SIMPLE_EDGE)
Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
@@ -979,19 +811,10 @@ STAGE PLANS:
GatherStats: true
MultiFileSpray: false
- Stage: Stage-8
- Conditional Operator
-
- Stage: Stage-5
- Move Operator
- files:
- hdfs directory: true
-#### A masked pattern was here ####
-
Stage: Stage-2
Dependency Collection
- Stage: Stage-9
+ Stage: Stage-4
Create Table Operator:
Create Table
columns: key string, value string
@@ -1012,140 +835,6 @@ STAGE PLANS:
hdfs directory: true
#### A masked pattern was here ####
- Stage: Stage-4
- Tez
- Vertices:
- Merge
- Map Operator Tree:
- TableScan
- GatherStats: false
- File Output Operator
- compressed: false
- GlobalTableId: 0
-#### A masked pattern was here ####
- NumFilesPerFileSink: 1
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- properties:
- columns _col0,_col1
- columns.types string:string
- field.delim ,
- line.delim
-
- name default.nzhang_ctas5
- serialization.format ,
- serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.nzhang_ctas5
- TotalFiles: 1
- GatherStats: false
- MultiFileSpray: false
- Path -> Alias:
-#### A masked pattern was here ####
- Path -> Partition:
-#### A masked pattern was here ####
- Partition
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- properties:
- columns _col0,_col1
- columns.types string:string
- field.delim ,
- line.delim
-
- name default.nzhang_ctas5
- serialization.format ,
- serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- properties:
- columns _col0,_col1
- columns.types string:string
- field.delim ,
- line.delim
-
- name default.nzhang_ctas5
- serialization.format ,
- serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.nzhang_ctas5
- name: default.nzhang_ctas5
- Truncated Path -> Alias:
-#### A masked pattern was here ####
-
- Stage: Stage-6
- Tez
- Vertices:
- Merge
- Map Operator Tree:
- TableScan
- GatherStats: false
- File Output Operator
- compressed: false
- GlobalTableId: 0
-#### A masked pattern was here ####
- NumFilesPerFileSink: 1
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- properties:
- columns _col0,_col1
- columns.types string:string
- field.delim ,
- line.delim
-
- name default.nzhang_ctas5
- serialization.format ,
- serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.nzhang_ctas5
- TotalFiles: 1
- GatherStats: false
- MultiFileSpray: false
- Path -> Alias:
-#### A masked pattern was here ####
- Path -> Partition:
-#### A masked pattern was here ####
- Partition
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- properties:
- columns _col0,_col1
- columns.types string:string
- field.delim ,
- line.delim
-
- name default.nzhang_ctas5
- serialization.format ,
- serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- properties:
- columns _col0,_col1
- columns.types string:string
- field.delim ,
- line.delim
-
- name default.nzhang_ctas5
- serialization.format ,
- serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.nzhang_ctas5
- name: default.nzhang_ctas5
- Truncated Path -> Alias:
-#### A masked pattern was here ####
-
- Stage: Stage-7
- Move Operator
- files:
- hdfs directory: true
-#### A masked pattern was here ####
-
PREHOOK: query: create table nzhang_ctas5 row format delimited fields terminated by ',' lines terminated by '\012' stored as textfile as select key, value from src sort by key, value limit 10
PREHOOK: type: CREATETABLE_AS_SELECT
PREHOOK: Input: default@src
@@ -1155,8 +844,10 @@ POSTHOOK: Input: default@src
POSTHOOK: Output: default@nzhang_ctas5
PREHOOK: query: create table nzhang_ctas6 (key string, `to` string)
PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
POSTHOOK: query: create table nzhang_ctas6 (key string, `to` string)
POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
POSTHOOK: Output: default@nzhang_ctas6
PREHOOK: query: insert overwrite table nzhang_ctas6 select key, value from src tablesample (10 rows)
PREHOOK: type: QUERY
diff --git ql/src/test/results/clientpositive/tez/custom_input_output_format.q.out ql/src/test/results/clientpositive/tez/custom_input_output_format.q.out
index 8f54c96..9dc6480 100644
--- ql/src/test/results/clientpositive/tez/custom_input_output_format.q.out
+++ ql/src/test/results/clientpositive/tez/custom_input_output_format.q.out
@@ -2,10 +2,12 @@ PREHOOK: query: CREATE TABLE src1_rot13_iof(key STRING, value STRING)
STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.udf.Rot13InputFormat'
OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.udf.Rot13OutputFormat'
PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
POSTHOOK: query: CREATE TABLE src1_rot13_iof(key STRING, value STRING)
STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.udf.Rot13InputFormat'
OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.udf.Rot13OutputFormat'
POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
POSTHOOK: Output: default@src1_rot13_iof
PREHOOK: query: DESCRIBE EXTENDED src1_rot13_iof
PREHOOK: type: DESCTABLE
diff --git ql/src/test/results/clientpositive/tez/disable_merge_for_bucketing.q.out ql/src/test/results/clientpositive/tez/disable_merge_for_bucketing.q.out
index 74bad32..d36557d 100644
--- ql/src/test/results/clientpositive/tez/disable_merge_for_bucketing.q.out
+++ ql/src/test/results/clientpositive/tez/disable_merge_for_bucketing.q.out
@@ -1,7 +1,9 @@
PREHOOK: query: CREATE TABLE bucket2_1(key int, value string) CLUSTERED BY (key) INTO 2 BUCKETS
PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
POSTHOOK: query: CREATE TABLE bucket2_1(key int, value string) CLUSTERED BY (key) INTO 2 BUCKETS
POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
POSTHOOK: Output: default@bucket2_1
PREHOOK: query: explain extended
insert overwrite table bucket2_1
@@ -39,6 +41,7 @@ STAGE PLANS:
Tez
Edges:
Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
@@ -196,6 +199,7 @@ STAGE PLANS:
Tez
Edges:
Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
diff --git ql/src/test/results/clientpositive/tez/enforce_order.q.out ql/src/test/results/clientpositive/tez/enforce_order.q.out
index e870837..c14d3c9 100644
--- ql/src/test/results/clientpositive/tez/enforce_order.q.out
+++ ql/src/test/results/clientpositive/tez/enforce_order.q.out
@@ -8,13 +8,17 @@ POSTHOOK: query: drop table table_desc
POSTHOOK: type: DROPTABLE
PREHOOK: query: create table table_asc(key string, value string) clustered by (key) sorted by (key ASC) into 1 BUCKETS
PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
POSTHOOK: query: create table table_asc(key string, value string) clustered by (key) sorted by (key ASC) into 1 BUCKETS
POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
POSTHOOK: Output: default@table_asc
PREHOOK: query: create table table_desc(key string, value string) clustered by (key) sorted by (key DESC) into 1 BUCKETS
PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
POSTHOOK: query: create table table_desc(key string, value string) clustered by (key) sorted by (key DESC) into 1 BUCKETS
POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
POSTHOOK: Output: default@table_desc
PREHOOK: query: insert overwrite table table_asc select key, value from src
PREHOOK: type: QUERY
diff --git ql/src/test/results/clientpositive/tez/filter_join_breaktask.q.out ql/src/test/results/clientpositive/tez/filter_join_breaktask.q.out
index bf6032d..17139e9 100644
--- ql/src/test/results/clientpositive/tez/filter_join_breaktask.q.out
+++ ql/src/test/results/clientpositive/tez/filter_join_breaktask.q.out
@@ -1,7 +1,9 @@
PREHOOK: query: CREATE TABLE filter_join_breaktask(key int, value string) partitioned by (ds string)
PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
POSTHOOK: query: CREATE TABLE filter_join_breaktask(key int, value string) partitioned by (ds string)
POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
POSTHOOK: Output: default@filter_join_breaktask
PREHOOK: query: INSERT OVERWRITE TABLE filter_join_breaktask PARTITION(ds='2008-04-08')
SELECT key, value from src1
@@ -139,6 +141,7 @@ STAGE PLANS:
Edges:
Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE)
Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE)
+#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
diff --git ql/src/test/results/clientpositive/tez/filter_join_breaktask2.q.out ql/src/test/results/clientpositive/tez/filter_join_breaktask2.q.out
index 1583207..20b4589 100644
--- ql/src/test/results/clientpositive/tez/filter_join_breaktask2.q.out
+++ ql/src/test/results/clientpositive/tez/filter_join_breaktask2.q.out
@@ -1,24 +1,32 @@
PREHOOK: query: create table T1(c1 string, c2 string, c3 string, c4 string, c5 string, c6 string, c7 string)
partitioned by (ds string)
PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
POSTHOOK: query: create table T1(c1 string, c2 string, c3 string, c4 string, c5 string, c6 string, c7 string)
partitioned by (ds string)
POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
POSTHOOK: Output: default@T1
PREHOOK: query: create table T2(c1 string, c2 string, c3 string, c0 string, c4 string, c5 string, c6 string, c7 string, c8 string, c9 string, c10 string, c11 string, c12 string, c13 string, c14 string, c15 string, c16 string, c17 string, c18 string, c19 string, c20 string, c21 string, c22 string, c23 string, c24 string, c25 string) partitioned by (ds string)
PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
POSTHOOK: query: create table T2(c1 string, c2 string, c3 string, c0 string, c4 string, c5 string, c6 string, c7 string, c8 string, c9 string, c10 string, c11 string, c12 string, c13 string, c14 string, c15 string, c16 string, c17 string, c18 string, c19 string, c20 string, c21 string, c22 string, c23 string, c24 string, c25 string) partitioned by (ds string)
POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
POSTHOOK: Output: default@T2
PREHOOK: query: create table T3 (c0 bigint, c1 bigint, c2 int) partitioned by (ds string)
PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
POSTHOOK: query: create table T3 (c0 bigint, c1 bigint, c2 int) partitioned by (ds string)
POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
POSTHOOK: Output: default@T3
PREHOOK: query: create table T4 (c0 bigint, c1 string, c2 string, c3 string, c4 string, c5 string, c6 string, c7 string, c8 string, c9 string, c10 string, c11 string, c12 string, c13 string, c14 string, c15 string, c16 string, c17 string, c18 string, c19 string, c20 string, c21 string, c22 string, c23 string, c24 string, c25 string, c26 string, c27 string, c28 string, c29 string, c30 string, c31 string, c32 string, c33 string, c34 string, c35 string, c36 string, c37 string, c38 string, c39 string, c40 string, c41 string, c42 string, c43 string, c44 string, c45 string, c46 string, c47 string, c48 string, c49 string, c50 string, c51 string, c52 string, c53 string, c54 string, c55 string, c56 string, c57 string, c58 string, c59 string, c60 string, c61 string, c62 string, c63 string, c64 string, c65 string, c66 string, c67 bigint, c68 string, c69 string, c70 bigint, c71 bigint, c72 bigint, c73 string, c74 string, c75 string, c76 string, c77 string, c78 string, c79 string, c80 string, c81 bigint, c82 bigint, c83 bigint) partitioned by (ds string)
PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
POSTHOOK: query: create table T4 (c0 bigint, c1 string, c2 string, c3 string, c4 string, c5 string, c6 string, c7 string, c8 string, c9 string, c10 string, c11 string, c12 string, c13 string, c14 string, c15 string, c16 string, c17 string, c18 string, c19 string, c20 string, c21 string, c22 string, c23 string, c24 string, c25 string, c26 string, c27 string, c28 string, c29 string, c30 string, c31 string, c32 string, c33 string, c34 string, c35 string, c36 string, c37 string, c38 string, c39 string, c40 string, c41 string, c42 string, c43 string, c44 string, c45 string, c46 string, c47 string, c48 string, c49 string, c50 string, c51 string, c52 string, c53 string, c54 string, c55 string, c56 string, c57 string, c58 string, c59 string, c60 string, c61 string, c62 string, c63 string, c64 string, c65 string, c66 string, c67 bigint, c68 string, c69 string, c70 bigint, c71 bigint, c72 bigint, c73 string, c74 string, c75 string, c76 string, c77 string, c78 string, c79 string, c80 string, c81 bigint, c82 bigint, c83 bigint) partitioned by (ds string)
POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
POSTHOOK: Output: default@T4
PREHOOK: query: insert overwrite table T1 partition (ds='2010-04-17') select '5', '1', '1', '1', 0, 0,4 from src tablesample (1 rows)
PREHOOK: type: QUERY
diff --git ql/src/test/results/clientpositive/tez/groupby1.q.out ql/src/test/results/clientpositive/tez/groupby1.q.out
index 06d46c6..c215835 100644
--- ql/src/test/results/clientpositive/tez/groupby1.q.out
+++ ql/src/test/results/clientpositive/tez/groupby1.q.out
@@ -1,7 +1,9 @@
PREHOOK: query: CREATE TABLE dest_g1(key INT, value DOUBLE) STORED AS TEXTFILE
PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
POSTHOOK: query: CREATE TABLE dest_g1(key INT, value DOUBLE) STORED AS TEXTFILE
POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
POSTHOOK: Output: default@dest_g1
PREHOOK: query: EXPLAIN
FROM src INSERT OVERWRITE TABLE dest_g1 SELECT src.key, sum(substr(src.value,5)) GROUP BY src.key
@@ -11,14 +13,9 @@ FROM src INSERT OVERWRITE TABLE dest_g1 SELECT src.key, sum(substr(src.value,5))
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
Stage-1 is a root stage
- Stage-8 depends on stages: Stage-1 , consists of Stage-5, Stage-4, Stage-6
- Stage-5
- Stage-2 depends on stages: Stage-5, Stage-4, Stage-7
+ Stage-2 depends on stages: Stage-1
Stage-0 depends on stages: Stage-2
Stage-3 depends on stages: Stage-0
- Stage-4
- Stage-6
- Stage-7 depends on stages: Stage-6
STAGE PLANS:
Stage: Stage-1
@@ -26,6 +23,7 @@ STAGE PLANS:
Edges:
Reducer 2 <- Map 1 (SIMPLE_EDGE)
Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
@@ -77,15 +75,6 @@ STAGE PLANS:
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.dest_g1
- Stage: Stage-8
- Conditional Operator
-
- Stage: Stage-5
- Move Operator
- files:
- hdfs directory: true
-#### A masked pattern was here ####
-
Stage: Stage-2
Dependency Collection
@@ -102,40 +91,6 @@ STAGE PLANS:
Stage: Stage-3
Stats-Aggr Operator
- Stage: Stage-4
- Tez
- Vertices:
- Merge
- Map Operator Tree:
- TableScan
- File Output Operator
- compressed: false
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.dest_g1
-
- Stage: Stage-6
- Tez
- Vertices:
- Merge
- Map Operator Tree:
- TableScan
- File Output Operator
- compressed: false
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.dest_g1
-
- Stage: Stage-7
- Move Operator
- files:
- hdfs directory: true
-#### A masked pattern was here ####
-
PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest_g1 SELECT src.key, sum(substr(src.value,5)) GROUP BY src.key
PREHOOK: type: QUERY
PREHOOK: Input: default@src
diff --git ql/src/test/results/clientpositive/tez/groupby2.q.out ql/src/test/results/clientpositive/tez/groupby2.q.out
index 0836a02..1cf946f 100644
--- ql/src/test/results/clientpositive/tez/groupby2.q.out
+++ ql/src/test/results/clientpositive/tez/groupby2.q.out
@@ -1,7 +1,9 @@
PREHOOK: query: CREATE TABLE dest_g2(key STRING, c1 INT, c2 STRING) STORED AS TEXTFILE
PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
POSTHOOK: query: CREATE TABLE dest_g2(key STRING, c1 INT, c2 STRING) STORED AS TEXTFILE
POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
POSTHOOK: Output: default@dest_g2
PREHOOK: query: EXPLAIN
FROM src
@@ -13,20 +15,16 @@ INSERT OVERWRITE TABLE dest_g2 SELECT substr(src.key,1,1), count(DISTINCT substr
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
Stage-1 is a root stage
- Stage-8 depends on stages: Stage-1 , consists of Stage-5, Stage-4, Stage-6
- Stage-5
- Stage-2 depends on stages: Stage-5, Stage-4, Stage-7
+ Stage-2 depends on stages: Stage-1
Stage-0 depends on stages: Stage-2
Stage-3 depends on stages: Stage-0
- Stage-4
- Stage-6
- Stage-7 depends on stages: Stage-6
STAGE PLANS:
Stage: Stage-1
Tez
Edges:
Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
@@ -63,15 +61,6 @@ STAGE PLANS:
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.dest_g2
- Stage: Stage-8
- Conditional Operator
-
- Stage: Stage-5
- Move Operator
- files:
- hdfs directory: true
-#### A masked pattern was here ####
-
Stage: Stage-2
Dependency Collection
@@ -88,40 +77,6 @@ STAGE PLANS:
Stage: Stage-3
Stats-Aggr Operator
- Stage: Stage-4
- Tez
- Vertices:
- Merge
- Map Operator Tree:
- TableScan
- File Output Operator
- compressed: false
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.dest_g2
-
- Stage: Stage-6
- Tez
- Vertices:
- Merge
- Map Operator Tree:
- TableScan
- File Output Operator
- compressed: false
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.dest_g2
-
- Stage: Stage-7
- Move Operator
- files:
- hdfs directory: true
-#### A masked pattern was here ####
-
PREHOOK: query: FROM src
INSERT OVERWRITE TABLE dest_g2 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))) GROUP BY substr(src.key,1,1)
PREHOOK: type: QUERY
diff --git ql/src/test/results/clientpositive/tez/groupby3.q.out ql/src/test/results/clientpositive/tez/groupby3.q.out
index c4bb7bb..cb8f0e7 100644
--- ql/src/test/results/clientpositive/tez/groupby3.q.out
+++ ql/src/test/results/clientpositive/tez/groupby3.q.out
@@ -1,7 +1,9 @@
PREHOOK: query: CREATE TABLE dest1(c1 DOUBLE, c2 DOUBLE, c3 DOUBLE, c4 DOUBLE, c5 DOUBLE, c6 DOUBLE, c7 DOUBLE, c8 DOUBLE, c9 DOUBLE) STORED AS TEXTFILE
PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
POSTHOOK: query: CREATE TABLE dest1(c1 DOUBLE, c2 DOUBLE, c3 DOUBLE, c4 DOUBLE, c5 DOUBLE, c6 DOUBLE, c7 DOUBLE, c8 DOUBLE, c9 DOUBLE) STORED AS TEXTFILE
POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
POSTHOOK: Output: default@dest1
PREHOOK: query: EXPLAIN
FROM src
@@ -31,14 +33,9 @@ INSERT OVERWRITE TABLE dest1 SELECT
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
Stage-1 is a root stage
- Stage-8 depends on stages: Stage-1 , consists of Stage-5, Stage-4, Stage-6
- Stage-5
- Stage-2 depends on stages: Stage-5, Stage-4, Stage-7
+ Stage-2 depends on stages: Stage-1
Stage-0 depends on stages: Stage-2
Stage-3 depends on stages: Stage-0
- Stage-4
- Stage-6
- Stage-7 depends on stages: Stage-6
STAGE PLANS:
Stage: Stage-1
@@ -46,6 +43,7 @@ STAGE PLANS:
Edges:
Reducer 2 <- Map 1 (SIMPLE_EDGE)
Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
@@ -92,15 +90,6 @@ STAGE PLANS:
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.dest1
- Stage: Stage-8
- Conditional Operator
-
- Stage: Stage-5
- Move Operator
- files:
- hdfs directory: true
-#### A masked pattern was here ####
-
Stage: Stage-2
Dependency Collection
@@ -117,40 +106,6 @@ STAGE PLANS:
Stage: Stage-3
Stats-Aggr Operator
- Stage: Stage-4
- Tez
- Vertices:
- Merge
- Map Operator Tree:
- TableScan
- File Output Operator
- compressed: false
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.dest1
-
- Stage: Stage-6
- Tez
- Vertices:
- Merge
- Map Operator Tree:
- TableScan
- File Output Operator
- compressed: false
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.dest1
-
- Stage: Stage-7
- Move Operator
- files:
- hdfs directory: true
-#### A masked pattern was here ####
-
PREHOOK: query: FROM src
INSERT OVERWRITE TABLE dest1 SELECT
sum(substr(src.value,5)),
diff --git ql/src/test/results/clientpositive/tez/having.q.out ql/src/test/results/clientpositive/tez/having.q.out
index 99e990d..25d337b 100644
--- ql/src/test/results/clientpositive/tez/having.q.out
+++ ql/src/test/results/clientpositive/tez/having.q.out
@@ -11,6 +11,7 @@ STAGE PLANS:
Tez
Edges:
Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
@@ -91,6 +92,7 @@ STAGE PLANS:
Tez
Edges:
Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
@@ -469,6 +471,7 @@ STAGE PLANS:
Tez
Edges:
Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
@@ -738,6 +741,7 @@ STAGE PLANS:
Tez
Edges:
Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
@@ -936,6 +940,7 @@ STAGE PLANS:
Tez
Edges:
Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
diff --git ql/src/test/results/clientpositive/tez/insert1.q.out ql/src/test/results/clientpositive/tez/insert1.q.out
index ef3e083..7b8819d 100644
--- ql/src/test/results/clientpositive/tez/insert1.q.out
+++ ql/src/test/results/clientpositive/tez/insert1.q.out
@@ -1,12 +1,16 @@
PREHOOK: query: create table insert1(key int, value string) stored as textfile
PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
POSTHOOK: query: create table insert1(key int, value string) stored as textfile
POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
POSTHOOK: Output: default@insert1
PREHOOK: query: create table insert2(key int, value string) stored as textfile
PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
POSTHOOK: query: create table insert2(key int, value string) stored as textfile
POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
POSTHOOK: Output: default@insert2
PREHOOK: query: insert overwrite table insert1 select a.key, a.value from insert2 a WHERE (a.key=-1)
PREHOOK: type: QUERY
@@ -26,18 +30,14 @@ POSTHOOK: Lineage: insert1.key SIMPLE [(insert2)a.FieldSchema(name:key, type:int
POSTHOOK: Lineage: insert1.value SIMPLE [(insert2)a.FieldSchema(name:value, type:string, comment:null), ]
STAGE DEPENDENCIES:
Stage-1 is a root stage
- Stage-8 depends on stages: Stage-1 , consists of Stage-5, Stage-4, Stage-6
- Stage-5
- Stage-2 depends on stages: Stage-5, Stage-4, Stage-7
+ Stage-2 depends on stages: Stage-1
Stage-0 depends on stages: Stage-2
Stage-3 depends on stages: Stage-0
- Stage-4
- Stage-6
- Stage-7 depends on stages: Stage-6
STAGE PLANS:
Stage: Stage-1
Tez
+#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
@@ -60,15 +60,6 @@ STAGE PLANS:
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.insert1
- Stage: Stage-8
- Conditional Operator
-
- Stage: Stage-5
- Move Operator
- files:
- hdfs directory: true
-#### A masked pattern was here ####
-
Stage: Stage-2
Dependency Collection
@@ -85,40 +76,6 @@ STAGE PLANS:
Stage: Stage-3
Stats-Aggr Operator
- Stage: Stage-4
- Tez
- Vertices:
- Merge
- Map Operator Tree:
- TableScan
- File Output Operator
- compressed: false
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.insert1
-
- Stage: Stage-6
- Tez
- Vertices:
- Merge
- Map Operator Tree:
- TableScan
- File Output Operator
- compressed: false
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.insert1
-
- Stage: Stage-7
- Move Operator
- files:
- hdfs directory: true
-#### A masked pattern was here ####
-
PREHOOK: query: explain insert into table INSERT1 select a.key, a.value from insert2 a WHERE (a.key=-1)
PREHOOK: type: QUERY
POSTHOOK: query: explain insert into table INSERT1 select a.key, a.value from insert2 a WHERE (a.key=-1)
@@ -127,18 +84,14 @@ POSTHOOK: Lineage: insert1.key SIMPLE [(insert2)a.FieldSchema(name:key, type:int
POSTHOOK: Lineage: insert1.value SIMPLE [(insert2)a.FieldSchema(name:value, type:string, comment:null), ]
STAGE DEPENDENCIES:
Stage-1 is a root stage
- Stage-8 depends on stages: Stage-1 , consists of Stage-5, Stage-4, Stage-6
- Stage-5
- Stage-2 depends on stages: Stage-5, Stage-4, Stage-7
+ Stage-2 depends on stages: Stage-1
Stage-0 depends on stages: Stage-2
Stage-3 depends on stages: Stage-0
- Stage-4
- Stage-6
- Stage-7 depends on stages: Stage-6
STAGE PLANS:
Stage: Stage-1
Tez
+#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
@@ -161,15 +114,6 @@ STAGE PLANS:
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.insert1
- Stage: Stage-8
- Conditional Operator
-
- Stage: Stage-5
- Move Operator
- files:
- hdfs directory: true
-#### A masked pattern was here ####
-
Stage: Stage-2
Dependency Collection
@@ -186,40 +130,6 @@ STAGE PLANS:
Stage: Stage-3
Stats-Aggr Operator
- Stage: Stage-4
- Tez
- Vertices:
- Merge
- Map Operator Tree:
- TableScan
- File Output Operator
- compressed: false
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.insert1
-
- Stage: Stage-6
- Tez
- Vertices:
- Merge
- Map Operator Tree:
- TableScan
- File Output Operator
- compressed: false
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.insert1
-
- Stage: Stage-7
- Move Operator
- files:
- hdfs directory: true
-#### A masked pattern was here ####
-
PREHOOK: query: -- HIVE-3465
create database x
PREHOOK: type: CREATEDATABASE
@@ -230,8 +140,10 @@ POSTHOOK: Lineage: insert1.key SIMPLE [(insert2)a.FieldSchema(name:key, type:int
POSTHOOK: Lineage: insert1.value SIMPLE [(insert2)a.FieldSchema(name:value, type:string, comment:null), ]
PREHOOK: query: create table x.insert1(key int, value string) stored as textfile
PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:x
POSTHOOK: query: create table x.insert1(key int, value string) stored as textfile
POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:x
POSTHOOK: Output: x@insert1
POSTHOOK: Lineage: insert1.key SIMPLE [(insert2)a.FieldSchema(name:key, type:int, comment:null), ]
POSTHOOK: Lineage: insert1.value SIMPLE [(insert2)a.FieldSchema(name:value, type:string, comment:null), ]
@@ -243,18 +155,14 @@ POSTHOOK: Lineage: insert1.key SIMPLE [(insert2)a.FieldSchema(name:key, type:int
POSTHOOK: Lineage: insert1.value SIMPLE [(insert2)a.FieldSchema(name:value, type:string, comment:null), ]
STAGE DEPENDENCIES:
Stage-1 is a root stage
- Stage-8 depends on stages: Stage-1 , consists of Stage-5, Stage-4, Stage-6
- Stage-5
- Stage-2 depends on stages: Stage-5, Stage-4, Stage-7
+ Stage-2 depends on stages: Stage-1
Stage-0 depends on stages: Stage-2
Stage-3 depends on stages: Stage-0
- Stage-4
- Stage-6
- Stage-7 depends on stages: Stage-6
STAGE PLANS:
Stage: Stage-1
Tez
+#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
@@ -277,15 +185,6 @@ STAGE PLANS:
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: x.insert1
- Stage: Stage-8
- Conditional Operator
-
- Stage: Stage-5
- Move Operator
- files:
- hdfs directory: true
-#### A masked pattern was here ####
-
Stage: Stage-2
Dependency Collection
@@ -302,40 +201,6 @@ STAGE PLANS:
Stage: Stage-3
Stats-Aggr Operator
- Stage: Stage-4
- Tez
- Vertices:
- Merge
- Map Operator Tree:
- TableScan
- File Output Operator
- compressed: false
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: x.insert1
-
- Stage: Stage-6
- Tez
- Vertices:
- Merge
- Map Operator Tree:
- TableScan
- File Output Operator
- compressed: false
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: x.insert1
-
- Stage: Stage-7
- Move Operator
- files:
- hdfs directory: true
-#### A masked pattern was here ####
-
PREHOOK: query: explain insert into table default.INSERT1 select a.key, a.value from insert2 a WHERE (a.key=-1)
PREHOOK: type: QUERY
POSTHOOK: query: explain insert into table default.INSERT1 select a.key, a.value from insert2 a WHERE (a.key=-1)
@@ -344,18 +209,14 @@ POSTHOOK: Lineage: insert1.key SIMPLE [(insert2)a.FieldSchema(name:key, type:int
POSTHOOK: Lineage: insert1.value SIMPLE [(insert2)a.FieldSchema(name:value, type:string, comment:null), ]
STAGE DEPENDENCIES:
Stage-1 is a root stage
- Stage-8 depends on stages: Stage-1 , consists of Stage-5, Stage-4, Stage-6
- Stage-5
- Stage-2 depends on stages: Stage-5, Stage-4, Stage-7
+ Stage-2 depends on stages: Stage-1
Stage-0 depends on stages: Stage-2
Stage-3 depends on stages: Stage-0
- Stage-4
- Stage-6
- Stage-7 depends on stages: Stage-6
STAGE PLANS:
Stage: Stage-1
Tez
+#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
@@ -378,15 +239,6 @@ STAGE PLANS:
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.insert1
- Stage: Stage-8
- Conditional Operator
-
- Stage: Stage-5
- Move Operator
- files:
- hdfs directory: true
-#### A masked pattern was here ####
-
Stage: Stage-2
Dependency Collection
@@ -403,40 +255,6 @@ STAGE PLANS:
Stage: Stage-3
Stats-Aggr Operator
- Stage: Stage-4
- Tez
- Vertices:
- Merge
- Map Operator Tree:
- TableScan
- File Output Operator
- compressed: false
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.insert1
-
- Stage: Stage-6
- Tez
- Vertices:
- Merge
- Map Operator Tree:
- TableScan
- File Output Operator
- compressed: false
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.insert1
-
- Stage: Stage-7
- Move Operator
- files:
- hdfs directory: true
-#### A masked pattern was here ####
-
PREHOOK: query: explain
from insert2
insert into table insert1 select * where key < 10
@@ -451,25 +269,16 @@ POSTHOOK: Lineage: insert1.key SIMPLE [(insert2)a.FieldSchema(name:key, type:int
POSTHOOK: Lineage: insert1.value SIMPLE [(insert2)a.FieldSchema(name:value, type:string, comment:null), ]
STAGE DEPENDENCIES:
Stage-2 is a root stage
- Stage-9 depends on stages: Stage-2 , consists of Stage-6, Stage-5, Stage-7
- Stage-6
- Stage-3 depends on stages: Stage-6, Stage-5, Stage-8, Stage-12, Stage-11, Stage-14
+ Stage-3 depends on stages: Stage-2
Stage-0 depends on stages: Stage-3
Stage-4 depends on stages: Stage-0
Stage-1 depends on stages: Stage-3
- Stage-10 depends on stages: Stage-1
- Stage-5
- Stage-7
- Stage-8 depends on stages: Stage-7
- Stage-15 depends on stages: Stage-2 , consists of Stage-12, Stage-11, Stage-13
- Stage-12
- Stage-11
- Stage-13
- Stage-14 depends on stages: Stage-13
+ Stage-5 depends on stages: Stage-1
STAGE PLANS:
Stage: Stage-2
Tez
+#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
@@ -507,15 +316,6 @@ STAGE PLANS:
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: x.insert1
- Stage: Stage-9
- Conditional Operator
-
- Stage: Stage-6
- Move Operator
- files:
- hdfs directory: true
-#### A masked pattern was here ####
-
Stage: Stage-3
Dependency Collection
@@ -542,85 +342,8 @@ STAGE PLANS:
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: x.insert1
- Stage: Stage-10
- Stats-Aggr Operator
-
Stage: Stage-5
- Tez
- Vertices:
- Merge
- Map Operator Tree:
- TableScan
- File Output Operator
- compressed: false
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.insert1
-
- Stage: Stage-7
- Tez
- Vertices:
- Merge
- Map Operator Tree:
- TableScan
- File Output Operator
- compressed: false
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.insert1
-
- Stage: Stage-8
- Move Operator
- files:
- hdfs directory: true
-#### A masked pattern was here ####
-
- Stage: Stage-15
- Conditional Operator
-
- Stage: Stage-12
- Move Operator
- files:
- hdfs directory: true
-#### A masked pattern was here ####
-
- Stage: Stage-11
- Tez
- Vertices:
- Merge
- Map Operator Tree:
- TableScan
- File Output Operator
- compressed: false
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: x.insert1
-
- Stage: Stage-13
- Tez
- Vertices:
- Merge
- Map Operator Tree:
- TableScan
- File Output Operator
- compressed: false
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: x.insert1
-
- Stage: Stage-14
- Move Operator
- files:
- hdfs directory: true
-#### A masked pattern was here ####
+ Stats-Aggr Operator
PREHOOK: query: -- HIVE-3676
CREATE DATABASE db2
@@ -638,8 +361,10 @@ POSTHOOK: Lineage: insert1.key SIMPLE [(insert2)a.FieldSchema(name:key, type:int
POSTHOOK: Lineage: insert1.value SIMPLE [(insert2)a.FieldSchema(name:value, type:string, comment:null), ]
PREHOOK: query: CREATE TABLE result(col1 STRING)
PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:db2
POSTHOOK: query: CREATE TABLE result(col1 STRING)
POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:db2
POSTHOOK: Output: db2@result
POSTHOOK: Lineage: insert1.key SIMPLE [(insert2)a.FieldSchema(name:key, type:int, comment:null), ]
POSTHOOK: Lineage: insert1.value SIMPLE [(insert2)a.FieldSchema(name:value, type:string, comment:null), ]
@@ -698,8 +423,10 @@ POSTHOOK: Lineage: result.col1 SIMPLE []
POSTHOOK: Lineage: result.col1 SIMPLE []
PREHOOK: query: CREATE TABLE db1.result(col1 STRING)
PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:db1
POSTHOOK: query: CREATE TABLE db1.result(col1 STRING)
POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:db1
POSTHOOK: Output: db1@result
POSTHOOK: Lineage: insert1.key SIMPLE [(insert2)a.FieldSchema(name:key, type:int, comment:null), ]
POSTHOOK: Lineage: insert1.value SIMPLE [(insert2)a.FieldSchema(name:value, type:string, comment:null), ]
diff --git ql/src/test/results/clientpositive/tez/insert_into1.q.out ql/src/test/results/clientpositive/tez/insert_into1.q.out
index c5f50c1..1c6e992 100644
--- ql/src/test/results/clientpositive/tez/insert_into1.q.out
+++ ql/src/test/results/clientpositive/tez/insert_into1.q.out
@@ -4,8 +4,10 @@ POSTHOOK: query: DROP TABLE insert_into1
POSTHOOK: type: DROPTABLE
PREHOOK: query: CREATE TABLE insert_into1 (key int, value string)
PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
POSTHOOK: query: CREATE TABLE insert_into1 (key int, value string)
POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
POSTHOOK: Output: default@insert_into1
PREHOOK: query: EXPLAIN INSERT INTO TABLE insert_into1 SELECT * from src LIMIT 100
PREHOOK: type: QUERY
@@ -13,20 +15,16 @@ POSTHOOK: query: EXPLAIN INSERT INTO TABLE insert_into1 SELECT * from src LIMIT
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
Stage-1 is a root stage
- Stage-8 depends on stages: Stage-1 , consists of Stage-5, Stage-4, Stage-6
- Stage-5
- Stage-2 depends on stages: Stage-5, Stage-4, Stage-7
+ Stage-2 depends on stages: Stage-1
Stage-0 depends on stages: Stage-2
Stage-3 depends on stages: Stage-0
- Stage-4
- Stage-6
- Stage-7 depends on stages: Stage-6
STAGE PLANS:
Stage: Stage-1
Tez
Edges:
Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
@@ -64,15 +62,6 @@ STAGE PLANS:
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.insert_into1
- Stage: Stage-8
- Conditional Operator
-
- Stage: Stage-5
- Move Operator
- files:
- hdfs directory: true
-#### A masked pattern was here ####
-
Stage: Stage-2
Dependency Collection
@@ -89,40 +78,6 @@ STAGE PLANS:
Stage: Stage-3
Stats-Aggr Operator
- Stage: Stage-4
- Tez
- Vertices:
- Merge
- Map Operator Tree:
- TableScan
- File Output Operator
- compressed: false
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.insert_into1
-
- Stage: Stage-6
- Tez
- Vertices:
- Merge
- Map Operator Tree:
- TableScan
- File Output Operator
- compressed: false
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.insert_into1
-
- Stage: Stage-7
- Move Operator
- files:
- hdfs directory: true
-#### A masked pattern was here ####
-
PREHOOK: query: INSERT INTO TABLE insert_into1 SELECT * from src LIMIT 100
PREHOOK: type: QUERY
PREHOOK: Input: default@src
@@ -156,20 +111,16 @@ POSTHOOK: Lineage: insert_into1.key EXPRESSION [(src)src.FieldSchema(name:key, t
POSTHOOK: Lineage: insert_into1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
STAGE DEPENDENCIES:
Stage-1 is a root stage
- Stage-8 depends on stages: Stage-1 , consists of Stage-5, Stage-4, Stage-6
- Stage-5
- Stage-2 depends on stages: Stage-5, Stage-4, Stage-7
+ Stage-2 depends on stages: Stage-1
Stage-0 depends on stages: Stage-2
Stage-3 depends on stages: Stage-0
- Stage-4
- Stage-6
- Stage-7 depends on stages: Stage-6
STAGE PLANS:
Stage: Stage-1
Tez
Edges:
Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
@@ -207,15 +158,6 @@ STAGE PLANS:
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.insert_into1
- Stage: Stage-8
- Conditional Operator
-
- Stage: Stage-5
- Move Operator
- files:
- hdfs directory: true
-#### A masked pattern was here ####
-
Stage: Stage-2
Dependency Collection
@@ -232,40 +174,6 @@ STAGE PLANS:
Stage: Stage-3
Stats-Aggr Operator
- Stage: Stage-4
- Tez
- Vertices:
- Merge
- Map Operator Tree:
- TableScan
- File Output Operator
- compressed: false
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.insert_into1
-
- Stage: Stage-6
- Tez
- Vertices:
- Merge
- Map Operator Tree:
- TableScan
- File Output Operator
- compressed: false
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.insert_into1
-
- Stage: Stage-7
- Move Operator
- files:
- hdfs directory: true
-#### A masked pattern was here ####
-
PREHOOK: query: INSERT INTO TABLE insert_into1 SELECT * FROM src LIMIT 100
PREHOOK: type: QUERY
PREHOOK: Input: default@src
@@ -318,20 +226,16 @@ POSTHOOK: Lineage: insert_into1.value SIMPLE [(src)src.FieldSchema(name:value, t
POSTHOOK: Lineage: insert_into1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
STAGE DEPENDENCIES:
Stage-1 is a root stage
- Stage-8 depends on stages: Stage-1 , consists of Stage-5, Stage-4, Stage-6
- Stage-5
- Stage-2 depends on stages: Stage-5, Stage-4, Stage-7
+ Stage-2 depends on stages: Stage-1
Stage-0 depends on stages: Stage-2
Stage-3 depends on stages: Stage-0
- Stage-4
- Stage-6
- Stage-7 depends on stages: Stage-6
STAGE PLANS:
Stage: Stage-1
Tez
Edges:
Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
@@ -369,15 +273,6 @@ STAGE PLANS:
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.insert_into1
- Stage: Stage-8
- Conditional Operator
-
- Stage: Stage-5
- Move Operator
- files:
- hdfs directory: true
-#### A masked pattern was here ####
-
Stage: Stage-2
Dependency Collection
@@ -394,40 +289,6 @@ STAGE PLANS:
Stage: Stage-3
Stats-Aggr Operator
- Stage: Stage-4
- Tez
- Vertices:
- Merge
- Map Operator Tree:
- TableScan
- File Output Operator
- compressed: false
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.insert_into1
-
- Stage: Stage-6
- Tez
- Vertices:
- Merge
- Map Operator Tree:
- TableScan
- File Output Operator
- compressed: false
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.insert_into1
-
- Stage: Stage-7
- Move Operator
- files:
- hdfs directory: true
-#### A masked pattern was here ####
-
PREHOOK: query: INSERT OVERWRITE TABLE insert_into1 SELECT * FROM src LIMIT 10
PREHOOK: type: QUERY
PREHOOK: Input: default@src
diff --git ql/src/test/results/clientpositive/tez/insert_into2.q.out ql/src/test/results/clientpositive/tez/insert_into2.q.out
index 7e94cf3..5869b44 100644
--- ql/src/test/results/clientpositive/tez/insert_into2.q.out
+++ ql/src/test/results/clientpositive/tez/insert_into2.q.out
@@ -5,9 +5,11 @@ POSTHOOK: type: DROPTABLE
PREHOOK: query: CREATE TABLE insert_into2 (key int, value string)
PARTITIONED BY (ds string)
PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
POSTHOOK: query: CREATE TABLE insert_into2 (key int, value string)
PARTITIONED BY (ds string)
POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
POSTHOOK: Output: default@insert_into2
PREHOOK: query: EXPLAIN INSERT INTO TABLE insert_into2 PARTITION (ds='1')
SELECT * FROM src LIMIT 100
@@ -17,20 +19,16 @@ POSTHOOK: query: EXPLAIN INSERT INTO TABLE insert_into2 PARTITION (ds='1')
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
Stage-1 is a root stage
- Stage-8 depends on stages: Stage-1 , consists of Stage-5, Stage-4, Stage-6
- Stage-5
- Stage-2 depends on stages: Stage-5, Stage-4, Stage-7
+ Stage-2 depends on stages: Stage-1
Stage-0 depends on stages: Stage-2
Stage-3 depends on stages: Stage-0
- Stage-4
- Stage-6
- Stage-7 depends on stages: Stage-6
STAGE PLANS:
Stage: Stage-1
Tez
Edges:
Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
@@ -68,15 +66,6 @@ STAGE PLANS:
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.insert_into2
- Stage: Stage-8
- Conditional Operator
-
- Stage: Stage-5
- Move Operator
- files:
- hdfs directory: true
-#### A masked pattern was here ####
-
Stage: Stage-2
Dependency Collection
@@ -95,40 +84,6 @@ STAGE PLANS:
Stage: Stage-3
Stats-Aggr Operator
- Stage: Stage-4
- Tez
- Vertices:
- Merge
- Map Operator Tree:
- TableScan
- File Output Operator
- compressed: false
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.insert_into2
-
- Stage: Stage-6
- Tez
- Vertices:
- Merge
- Map Operator Tree:
- TableScan
- File Output Operator
- compressed: false
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.insert_into2
-
- Stage: Stage-7
- Move Operator
- files:
- hdfs directory: true
-#### A masked pattern was here ####
-
PREHOOK: query: INSERT INTO TABLE insert_into2 PARTITION (ds='1') SELECT * FROM src limit 100
PREHOOK: type: QUERY
PREHOOK: Input: default@src
@@ -197,20 +152,16 @@ POSTHOOK: Lineage: insert_into2 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSc
POSTHOOK: Lineage: insert_into2 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
STAGE DEPENDENCIES:
Stage-1 is a root stage
- Stage-8 depends on stages: Stage-1 , consists of Stage-5, Stage-4, Stage-6
- Stage-5
- Stage-2 depends on stages: Stage-5, Stage-4, Stage-7
+ Stage-2 depends on stages: Stage-1
Stage-0 depends on stages: Stage-2
Stage-3 depends on stages: Stage-0
- Stage-4
- Stage-6
- Stage-7 depends on stages: Stage-6
STAGE PLANS:
Stage: Stage-1
Tez
Edges:
Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
@@ -248,15 +199,6 @@ STAGE PLANS:
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.insert_into2
- Stage: Stage-8
- Conditional Operator
-
- Stage: Stage-5
- Move Operator
- files:
- hdfs directory: true
-#### A masked pattern was here ####
-
Stage: Stage-2
Dependency Collection
@@ -275,40 +217,6 @@ STAGE PLANS:
Stage: Stage-3
Stats-Aggr Operator
- Stage: Stage-4
- Tez
- Vertices:
- Merge
- Map Operator Tree:
- TableScan
- File Output Operator
- compressed: false
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.insert_into2
-
- Stage: Stage-6
- Tez
- Vertices:
- Merge
- Map Operator Tree:
- TableScan
- File Output Operator
- compressed: false
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.insert_into2
-
- Stage: Stage-7
- Move Operator
- files:
- hdfs directory: true
-#### A masked pattern was here ####
-
PREHOOK: query: INSERT OVERWRITE TABLE insert_into2 PARTITION (ds='2')
SELECT * FROM src LIMIT 100
PREHOOK: type: QUERY
@@ -362,20 +270,16 @@ POSTHOOK: Lineage: insert_into2 PARTITION(ds=2).key EXPRESSION [(src)src.FieldSc
POSTHOOK: Lineage: insert_into2 PARTITION(ds=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
STAGE DEPENDENCIES:
Stage-1 is a root stage
- Stage-8 depends on stages: Stage-1 , consists of Stage-5, Stage-4, Stage-6
- Stage-5
- Stage-2 depends on stages: Stage-5, Stage-4, Stage-7
+ Stage-2 depends on stages: Stage-1
Stage-0 depends on stages: Stage-2
Stage-3 depends on stages: Stage-0
- Stage-4
- Stage-6
- Stage-7 depends on stages: Stage-6
STAGE PLANS:
Stage: Stage-1
Tez
Edges:
Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
@@ -413,15 +317,6 @@ STAGE PLANS:
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.insert_into2
- Stage: Stage-8
- Conditional Operator
-
- Stage: Stage-5
- Move Operator
- files:
- hdfs directory: true
-#### A masked pattern was here ####
-
Stage: Stage-2
Dependency Collection
@@ -440,40 +335,6 @@ STAGE PLANS:
Stage: Stage-3
Stats-Aggr Operator
- Stage: Stage-4
- Tez
- Vertices:
- Merge
- Map Operator Tree:
- TableScan
- File Output Operator
- compressed: false
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.insert_into2
-
- Stage: Stage-6
- Tez
- Vertices:
- Merge
- Map Operator Tree:
- TableScan
- File Output Operator
- compressed: false
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.insert_into2
-
- Stage: Stage-7
- Move Operator
- files:
- hdfs directory: true
-#### A masked pattern was here ####
-
PREHOOK: query: INSERT OVERWRITE TABLE insert_into2 PARTITION (ds='2')
SELECT * FROM src LIMIT 50
PREHOOK: type: QUERY
diff --git ql/src/test/results/clientpositive/tez/join0.q.out ql/src/test/results/clientpositive/tez/join0.q.out
index 8d0439d..e9fdced 100644
--- ql/src/test/results/clientpositive/tez/join0.q.out
+++ ql/src/test/results/clientpositive/tez/join0.q.out
@@ -24,6 +24,7 @@ STAGE PLANS:
Edges:
Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE)
Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
@@ -108,7 +109,7 @@ SELECT src1.key as k1, src1.value as v1,
(SELECT * FROM src WHERE src.key < 10) src2
SORT BY k1, v1, k2, v2
POSTHOOK: type: QUERY
-{"STAGE PLANS":{"Stage-1":{"Tez":{"Vertices:":{"Reducer 2":{"Reduce Operator Tree:":{"Join Operator":{"outputColumnNames:":["_col0","_col1","_col2","_col3"],"children":{"Select Operator":{"expressions:":"_col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string)","outputColumnNames:":["_col0","_col1","_col2","_col3"],"children":{"Reduce Output Operator":{"sort order:":"++++","value expressions:":"_col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string)","Statistics:":"Num rows: 9 Data size: 1983 Basic stats: COMPLETE Column stats: NONE","key expressions:":"_col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string)"}},"Statistics:":"Num rows: 9 Data size: 1983 Basic stats: COMPLETE Column stats: NONE"}},"Statistics:":"Num rows: 9 Data size: 1983 Basic stats: COMPLETE Column stats: NONE","condition map:":[{"":"Inner Join 0 to 1"}],"condition expressions:":{"1":"{VALUE._col0} {VALUE._col1}","0":"{VALUE._col0} {VALUE._col1}"}}}},"Reducer 3":{"Reduce Operator Tree:":{"Extract":{"children":{"File Output Operator":{"Statistics:":"Num rows: 9 Data size: 1983 Basic stats: COMPLETE Column stats: NONE","compressed:":"false","table:":{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"}}},"Statistics:":"Num rows: 9 Data size: 1983 Basic stats: COMPLETE Column stats: NONE"}}},"Map 1":{"Map Operator Tree:":[{"TableScan":{"alias:":"src","children":{"Filter Operator":{"predicate:":"(key < 10) (type: boolean)","children":{"Select Operator":{"expressions:":"key (type: string), value (type: string)","outputColumnNames:":["_col0","_col1"],"children":{"Reduce Output Operator":{"sort order:":"","value expressions:":"_col0 (type: string), _col1 (type: string)","Statistics:":"Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE"}},"Statistics:":"Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE"}},"Statistics:":"Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE"}},"Statistics:":"Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE"}}]},"Map 4":{"Map Operator Tree:":[{"TableScan":{"alias:":"src","children":{"Filter Operator":{"predicate:":"(key < 10) (type: boolean)","children":{"Select Operator":{"expressions:":"key (type: string), value (type: string)","outputColumnNames:":["_col0","_col1"],"children":{"Reduce Output Operator":{"sort order:":"","value expressions:":"_col0 (type: string), _col1 (type: string)","Statistics:":"Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE"}},"Statistics:":"Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE"}},"Statistics:":"Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE"}},"Statistics:":"Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE"}}]}},"Edges:":{"Reducer 2":[{"parent":"Map 1","type":"SIMPLE_EDGE"},{"parent":"Map 4","type":"SIMPLE_EDGE"}],"Reducer 3":{"parent":"Reducer 2","type":"SIMPLE_EDGE"}}}},"Stage-0":{"Fetch Operator":{"limit:":"-1"}}},"STAGE DEPENDENCIES":{"Stage-1":{"ROOT STAGE":"TRUE"},"Stage-0":{"ROOT STAGE":"TRUE"}}}
+#### A masked pattern was here ####
PREHOOK: query: SELECT src1.key as k1, src1.value as v1,
src2.key as k2, src2.value as v2 FROM
(SELECT * FROM src WHERE src.key < 10) src1
diff --git ql/src/test/results/clientpositive/tez/join1.q.out ql/src/test/results/clientpositive/tez/join1.q.out
index 0a6690e..ca99d72 100644
--- ql/src/test/results/clientpositive/tez/join1.q.out
+++ ql/src/test/results/clientpositive/tez/join1.q.out
@@ -1,7 +1,9 @@
PREHOOK: query: CREATE TABLE dest_j1(key INT, value STRING) STORED AS TEXTFILE
PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
POSTHOOK: query: CREATE TABLE dest_j1(key INT, value STRING) STORED AS TEXTFILE
POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
POSTHOOK: Output: default@dest_j1
PREHOOK: query: EXPLAIN
FROM src src1 JOIN src src2 ON (src1.key = src2.key)
@@ -13,20 +15,16 @@ INSERT OVERWRITE TABLE dest_j1 SELECT src1.key, src2.value
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
Stage-1 is a root stage
- Stage-8 depends on stages: Stage-1 , consists of Stage-5, Stage-4, Stage-6
- Stage-5
- Stage-2 depends on stages: Stage-5, Stage-4, Stage-7
+ Stage-2 depends on stages: Stage-1
Stage-0 depends on stages: Stage-2
Stage-3 depends on stages: Stage-0
- Stage-4
- Stage-6
- Stage-7 depends on stages: Stage-6
STAGE PLANS:
Stage: Stage-1
Tez
Edges:
Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE)
+#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
@@ -73,15 +71,6 @@ STAGE PLANS:
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.dest_j1
- Stage: Stage-8
- Conditional Operator
-
- Stage: Stage-5
- Move Operator
- files:
- hdfs directory: true
-#### A masked pattern was here ####
-
Stage: Stage-2
Dependency Collection
@@ -98,40 +87,6 @@ STAGE PLANS:
Stage: Stage-3
Stats-Aggr Operator
- Stage: Stage-4
- Tez
- Vertices:
- Merge
- Map Operator Tree:
- TableScan
- File Output Operator
- compressed: false
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.dest_j1
-
- Stage: Stage-6
- Tez
- Vertices:
- Merge
- Map Operator Tree:
- TableScan
- File Output Operator
- compressed: false
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.dest_j1
-
- Stage: Stage-7
- Move Operator
- files:
- hdfs directory: true
-#### A masked pattern was here ####
-
PREHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key)
INSERT OVERWRITE TABLE dest_j1 SELECT src1.key, src2.value
PREHOOK: type: QUERY
diff --git ql/src/test/results/clientpositive/tez/leftsemijoin.q.out ql/src/test/results/clientpositive/tez/leftsemijoin.q.out
index d8ecfbf..c23c537 100644
--- ql/src/test/results/clientpositive/tez/leftsemijoin.q.out
+++ ql/src/test/results/clientpositive/tez/leftsemijoin.q.out
@@ -9,35 +9,45 @@ POSTHOOK: type: DROPTABLE
PREHOOK: query: CREATE TABLE sales (name STRING, id INT)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
POSTHOOK: query: CREATE TABLE sales (name STRING, id INT)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
POSTHOOK: Output: default@sales
PREHOOK: query: CREATE TABLE things (id INT, name STRING) partitioned by (ds string)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
POSTHOOK: query: CREATE TABLE things (id INT, name STRING) partitioned by (ds string)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
POSTHOOK: Output: default@things
PREHOOK: query: load data local inpath '../../data/files/sales.txt' INTO TABLE sales
PREHOOK: type: LOAD
+#### A masked pattern was here ####
PREHOOK: Output: default@sales
POSTHOOK: query: load data local inpath '../../data/files/sales.txt' INTO TABLE sales
POSTHOOK: type: LOAD
+#### A masked pattern was here ####
POSTHOOK: Output: default@sales
PREHOOK: query: load data local inpath '../../data/files/things.txt' INTO TABLE things partition(ds='2011-10-23')
PREHOOK: type: LOAD
+#### A masked pattern was here ####
PREHOOK: Output: default@things
POSTHOOK: query: load data local inpath '../../data/files/things.txt' INTO TABLE things partition(ds='2011-10-23')
POSTHOOK: type: LOAD
+#### A masked pattern was here ####
POSTHOOK: Output: default@things
POSTHOOK: Output: default@things@ds=2011-10-23
PREHOOK: query: load data local inpath '../../data/files/things2.txt' INTO TABLE things partition(ds='2011-10-24')
PREHOOK: type: LOAD
+#### A masked pattern was here ####
PREHOOK: Output: default@things
POSTHOOK: query: load data local inpath '../../data/files/things2.txt' INTO TABLE things partition(ds='2011-10-24')
POSTHOOK: type: LOAD
+#### A masked pattern was here ####
POSTHOOK: Output: default@things
POSTHOOK: Output: default@things@ds=2011-10-24
PREHOOK: query: SELECT name,id FROM sales ORDER BY name ASC, id ASC
diff --git ql/src/test/results/clientpositive/tez/limit_pushdown.q.out ql/src/test/results/clientpositive/tez/limit_pushdown.q.out
index 279b918..7d6b1c7 100644
--- ql/src/test/results/clientpositive/tez/limit_pushdown.q.out
+++ ql/src/test/results/clientpositive/tez/limit_pushdown.q.out
@@ -17,6 +17,7 @@ STAGE PLANS:
Tez
Edges:
Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
@@ -95,6 +96,7 @@ STAGE PLANS:
Tez
Edges:
Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
@@ -173,6 +175,7 @@ STAGE PLANS:
Tez
Edges:
Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
@@ -268,6 +271,7 @@ STAGE PLANS:
Tez
Edges:
Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
@@ -363,6 +367,7 @@ STAGE PLANS:
Tez
Edges:
Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
@@ -453,6 +458,7 @@ STAGE PLANS:
Tez
Edges:
Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
@@ -548,6 +554,7 @@ STAGE PLANS:
Tez
Edges:
Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
@@ -643,6 +650,7 @@ STAGE PLANS:
Tez
Edges:
Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
@@ -703,6 +711,7 @@ STAGE PLANS:
Edges:
Reducer 2 <- Map 1 (SIMPLE_EDGE)
Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
@@ -818,6 +827,7 @@ STAGE PLANS:
Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE)
Reducer 5 <- Map 4 (SIMPLE_EDGE)
Reducer 6 <- Reducer 5 (SIMPLE_EDGE)
+#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
@@ -963,6 +973,7 @@ STAGE PLANS:
Tez
Edges:
Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
@@ -1052,6 +1063,7 @@ STAGE PLANS:
Tez
Edges:
Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
@@ -1212,6 +1224,7 @@ STAGE PLANS:
Tez
Edges:
Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
diff --git ql/src/test/results/clientpositive/tez/load_dyn_part1.q.out ql/src/test/results/clientpositive/tez/load_dyn_part1.q.out
index f68733d..d420caf 100644
--- ql/src/test/results/clientpositive/tez/load_dyn_part1.q.out
+++ ql/src/test/results/clientpositive/tez/load_dyn_part1.q.out
@@ -8,13 +8,17 @@ ds=2008-04-09/hr=11
ds=2008-04-09/hr=12
PREHOOK: query: create table if not exists nzhang_part1 like srcpart
PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
POSTHOOK: query: create table if not exists nzhang_part1 like srcpart
POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
POSTHOOK: Output: default@nzhang_part1
PREHOOK: query: create table if not exists nzhang_part2 like srcpart
PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
POSTHOOK: query: create table if not exists nzhang_part2 like srcpart
POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
POSTHOOK: Output: default@nzhang_part2
PREHOOK: query: describe extended nzhang_part1
PREHOOK: type: DESCTABLE
@@ -44,25 +48,16 @@ insert overwrite table nzhang_part2 partition(ds='2008-12-31', hr) select key, v
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
Stage-2 is a root stage
- Stage-9 depends on stages: Stage-2 , consists of Stage-6, Stage-5, Stage-7
- Stage-6
- Stage-3 depends on stages: Stage-6, Stage-5, Stage-8, Stage-12, Stage-11, Stage-14
- Stage-0 depends on stages: Stage-3
- Stage-4 depends on stages: Stage-0
+ Stage-3 depends on stages: Stage-2
Stage-1 depends on stages: Stage-3
- Stage-10 depends on stages: Stage-1
- Stage-5
- Stage-7
- Stage-8 depends on stages: Stage-7
- Stage-15 depends on stages: Stage-2 , consists of Stage-12, Stage-11, Stage-13
- Stage-12
- Stage-11
- Stage-13
- Stage-14 depends on stages: Stage-13
+ Stage-4 depends on stages: Stage-1
+ Stage-0 depends on stages: Stage-3
+ Stage-5 depends on stages: Stage-0
STAGE PLANS:
Stage: Stage-2
Tez
+#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
@@ -100,126 +95,40 @@ STAGE PLANS:
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.nzhang_part2
- Stage: Stage-9
- Conditional Operator
-
- Stage: Stage-6
- Move Operator
- files:
- hdfs directory: true
-#### A masked pattern was here ####
-
Stage: Stage-3
Dependency Collection
- Stage: Stage-0
+ Stage: Stage-1
Move Operator
tables:
partition:
- ds
+ ds 2008-12-31
hr
replace: true
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.nzhang_part1
+ name: default.nzhang_part2
Stage: Stage-4
Stats-Aggr Operator
- Stage: Stage-1
+ Stage: Stage-0
Move Operator
tables:
partition:
- ds 2008-12-31
+ ds
hr
replace: true
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.nzhang_part2
-
- Stage: Stage-10
- Stats-Aggr Operator
+ name: default.nzhang_part1
Stage: Stage-5
- Tez
- Vertices:
- Merge
- Map Operator Tree:
- TableScan
- File Output Operator
- compressed: false
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.nzhang_part1
-
- Stage: Stage-7
- Tez
- Vertices:
- Merge
- Map Operator Tree:
- TableScan
- File Output Operator
- compressed: false
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.nzhang_part1
-
- Stage: Stage-8
- Move Operator
- files:
- hdfs directory: true
-#### A masked pattern was here ####
-
- Stage: Stage-15
- Conditional Operator
-
- Stage: Stage-12
- Move Operator
- files:
- hdfs directory: true
-#### A masked pattern was here ####
-
- Stage: Stage-11
- Tez
- Vertices:
- Merge
- Map Operator Tree:
- TableScan
- File Output Operator
- compressed: false
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.nzhang_part2
-
- Stage: Stage-13
- Tez
- Vertices:
- Merge
- Map Operator Tree:
- TableScan
- File Output Operator
- compressed: false
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.nzhang_part2
-
- Stage: Stage-14
- Move Operator
- files:
- hdfs directory: true
-#### A masked pattern was here ####
+ Stats-Aggr Operator
PREHOOK: query: from srcpart
insert overwrite table nzhang_part1 partition (ds, hr) select key, value, ds, hr where ds <= '2008-04-08'
diff --git ql/src/test/results/clientpositive/tez/load_dyn_part2.q.out ql/src/test/results/clientpositive/tez/load_dyn_part2.q.out
index 04445b5..34ca3c3 100644
--- ql/src/test/results/clientpositive/tez/load_dyn_part2.q.out
+++ ql/src/test/results/clientpositive/tez/load_dyn_part2.q.out
@@ -2,10 +2,12 @@ PREHOOK: query: create table if not exists nzhang_part_bucket (key string, value
partitioned by (ds string, hr string)
clustered by (key) into 10 buckets
PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
POSTHOOK: query: create table if not exists nzhang_part_bucket (key string, value string)
partitioned by (ds string, hr string)
clustered by (key) into 10 buckets
POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
POSTHOOK: Output: default@nzhang_part_bucket
PREHOOK: query: describe extended nzhang_part_bucket
PREHOOK: type: DESCTABLE
@@ -40,6 +42,7 @@ STAGE PLANS:
Tez
Edges:
Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
diff --git ql/src/test/results/clientpositive/tez/load_dyn_part3.q.out ql/src/test/results/clientpositive/tez/load_dyn_part3.q.out
index 7958f77..f992f55 100644
--- ql/src/test/results/clientpositive/tez/load_dyn_part3.q.out
+++ ql/src/test/results/clientpositive/tez/load_dyn_part3.q.out
@@ -8,8 +8,10 @@ ds=2008-04-09/hr=11
ds=2008-04-09/hr=12
PREHOOK: query: create table if not exists nzhang_part3 like srcpart
PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
POSTHOOK: query: create table if not exists nzhang_part3 like srcpart
POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
POSTHOOK: Output: default@nzhang_part3
PREHOOK: query: describe extended nzhang_part3
PREHOOK: type: DESCTABLE
@@ -42,6 +44,7 @@ STAGE DEPENDENCIES:
STAGE PLANS:
Stage: Stage-1
Tez
+#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
diff --git ql/src/test/results/clientpositive/tez/mapjoin_mapjoin.q.out ql/src/test/results/clientpositive/tez/mapjoin_mapjoin.q.out
index 6b693f4..bc2c650 100644
--- ql/src/test/results/clientpositive/tez/mapjoin_mapjoin.q.out
+++ ql/src/test/results/clientpositive/tez/mapjoin_mapjoin.q.out
@@ -15,6 +15,7 @@ STAGE PLANS:
Tez
Edges:
Map 1 <- Map 2 (BROADCAST_EDGE), Map 3 (BROADCAST_EDGE)
+#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
@@ -93,6 +94,7 @@ STAGE PLANS:
Edges:
Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE)
Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
diff --git ql/src/test/results/clientpositive/tez/mapreduce1.q.out ql/src/test/results/clientpositive/tez/mapreduce1.q.out
index 05154eb..0a6142e 100644
--- ql/src/test/results/clientpositive/tez/mapreduce1.q.out
+++ ql/src/test/results/clientpositive/tez/mapreduce1.q.out
@@ -1,7 +1,9 @@
PREHOOK: query: CREATE TABLE dest1(key INT, ten INT, one INT, value STRING) STORED AS TEXTFILE
PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
POSTHOOK: query: CREATE TABLE dest1(key INT, ten INT, one INT, value STRING) STORED AS TEXTFILE
POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
POSTHOOK: Output: default@dest1
PREHOOK: query: EXPLAIN
FROM src
@@ -21,20 +23,16 @@ SORT BY ten, one
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
Stage-1 is a root stage
- Stage-8 depends on stages: Stage-1 , consists of Stage-5, Stage-4, Stage-6
- Stage-5
- Stage-2 depends on stages: Stage-5, Stage-4, Stage-7
+ Stage-2 depends on stages: Stage-1
Stage-0 depends on stages: Stage-2
Stage-3 depends on stages: Stage-0
- Stage-4
- Stage-6
- Stage-7 depends on stages: Stage-6
STAGE PLANS:
Stage: Stage-1
Tez
Edges:
Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
@@ -75,15 +73,6 @@ STAGE PLANS:
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.dest1
- Stage: Stage-8
- Conditional Operator
-
- Stage: Stage-5
- Move Operator
- files:
- hdfs directory: true
-#### A masked pattern was here ####
-
Stage: Stage-2
Dependency Collection
@@ -100,40 +89,6 @@ STAGE PLANS:
Stage: Stage-3
Stats-Aggr Operator
- Stage: Stage-4
- Tez
- Vertices:
- Merge
- Map Operator Tree:
- TableScan
- File Output Operator
- compressed: false
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.dest1
-
- Stage: Stage-6
- Tez
- Vertices:
- Merge
- Map Operator Tree:
- TableScan
- File Output Operator
- compressed: false
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.dest1
-
- Stage: Stage-7
- Move Operator
- files:
- hdfs directory: true
-#### A masked pattern was here ####
-
PREHOOK: query: FROM src
INSERT OVERWRITE TABLE dest1
MAP src.key, CAST(src.key / 10 AS INT), CAST(src.key % 10 AS INT), src.value
diff --git ql/src/test/results/clientpositive/tez/mapreduce2.q.out ql/src/test/results/clientpositive/tez/mapreduce2.q.out
index 8cbf576..284b4dc 100644
--- ql/src/test/results/clientpositive/tez/mapreduce2.q.out
+++ ql/src/test/results/clientpositive/tez/mapreduce2.q.out
@@ -1,7 +1,9 @@
PREHOOK: query: CREATE TABLE dest1(key INT, ten INT, one INT, value STRING) STORED AS TEXTFILE
PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
POSTHOOK: query: CREATE TABLE dest1(key INT, ten INT, one INT, value STRING) STORED AS TEXTFILE
POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
POSTHOOK: Output: default@dest1
PREHOOK: query: EXPLAIN
FROM src
@@ -19,20 +21,16 @@ DISTRIBUTE BY tvalue, tkey
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
Stage-1 is a root stage
- Stage-8 depends on stages: Stage-1 , consists of Stage-5, Stage-4, Stage-6
- Stage-5
- Stage-2 depends on stages: Stage-5, Stage-4, Stage-7
+ Stage-2 depends on stages: Stage-1
Stage-0 depends on stages: Stage-2
Stage-3 depends on stages: Stage-0
- Stage-4
- Stage-6
- Stage-7 depends on stages: Stage-6
STAGE PLANS:
Stage: Stage-1
Tez
Edges:
Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
@@ -72,15 +70,6 @@ STAGE PLANS:
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.dest1
- Stage: Stage-8
- Conditional Operator
-
- Stage: Stage-5
- Move Operator
- files:
- hdfs directory: true
-#### A masked pattern was here ####
-
Stage: Stage-2
Dependency Collection
@@ -97,40 +86,6 @@ STAGE PLANS:
Stage: Stage-3
Stats-Aggr Operator
- Stage: Stage-4
- Tez
- Vertices:
- Merge
- Map Operator Tree:
- TableScan
- File Output Operator
- compressed: false
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.dest1
-
- Stage: Stage-6
- Tez
- Vertices:
- Merge
- Map Operator Tree:
- TableScan
- File Output Operator
- compressed: false
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.dest1
-
- Stage: Stage-7
- Move Operator
- files:
- hdfs directory: true
-#### A masked pattern was here ####
-
PREHOOK: query: FROM src
INSERT OVERWRITE TABLE dest1
MAP src.key, CAST(src.key / 10 AS INT), CAST(src.key % 10 AS INT), src.value
diff --git ql/src/test/results/clientpositive/tez/merge1.q.out ql/src/test/results/clientpositive/tez/merge1.q.out
index 9d17aae..ec46417 100644
--- ql/src/test/results/clientpositive/tez/merge1.q.out
+++ ql/src/test/results/clientpositive/tez/merge1.q.out
@@ -1,7 +1,9 @@
PREHOOK: query: create table dest1(key int, val int)
PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
POSTHOOK: query: create table dest1(key int, val int)
POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
POSTHOOK: Output: default@dest1
PREHOOK: query: explain
insert overwrite table dest1
@@ -13,20 +15,16 @@ select key, count(1) from src group by key
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
Stage-1 is a root stage
- Stage-8 depends on stages: Stage-1 , consists of Stage-5, Stage-4, Stage-6
- Stage-5
- Stage-2 depends on stages: Stage-5, Stage-4, Stage-7
+ Stage-2 depends on stages: Stage-1
Stage-0 depends on stages: Stage-2
Stage-3 depends on stages: Stage-0
- Stage-4
- Stage-6
- Stage-7 depends on stages: Stage-6
STAGE PLANS:
Stage: Stage-1
Tez
Edges:
Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
@@ -70,15 +68,6 @@ STAGE PLANS:
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.dest1
- Stage: Stage-8
- Conditional Operator
-
- Stage: Stage-5
- Move Operator
- files:
- hdfs directory: true
-#### A masked pattern was here ####
-
Stage: Stage-2
Dependency Collection
@@ -95,40 +84,6 @@ STAGE PLANS:
Stage: Stage-3
Stats-Aggr Operator
- Stage: Stage-4
- Tez
- Vertices:
- Merge
- Map Operator Tree:
- TableScan
- File Output Operator
- compressed: false
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.dest1
-
- Stage: Stage-6
- Tez
- Vertices:
- Merge
- Map Operator Tree:
- TableScan
- File Output Operator
- compressed: false
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.dest1
-
- Stage: Stage-7
- Move Operator
- files:
- hdfs directory: true
-#### A masked pattern was here ####
-
PREHOOK: query: insert overwrite table dest1
select key, count(1) from src group by key
PREHOOK: type: QUERY
@@ -472,15 +427,19 @@ POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:str
POSTHOOK: Lineage: dest1.val EXPRESSION [(src)src.null, ]
PREHOOK: query: create table test_src(key string, value string) partitioned by (ds string)
PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
POSTHOOK: query: create table test_src(key string, value string) partitioned by (ds string)
POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
POSTHOOK: Output: default@test_src
POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
POSTHOOK: Lineage: dest1.val EXPRESSION [(src)src.null, ]
PREHOOK: query: create table dest1(key string)
PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
POSTHOOK: query: create table dest1(key string)
POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
POSTHOOK: Output: default@dest1
POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
POSTHOOK: Lineage: dest1.val EXPRESSION [(src)src.null, ]
@@ -524,18 +483,14 @@ POSTHOOK: Lineage: test_src PARTITION(ds=102).key SIMPLE [(src)src.FieldSchema(n
POSTHOOK: Lineage: test_src PARTITION(ds=102).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
STAGE DEPENDENCIES:
Stage-1 is a root stage
- Stage-8 depends on stages: Stage-1 , consists of Stage-5, Stage-4, Stage-6
- Stage-5
- Stage-2 depends on stages: Stage-5, Stage-4, Stage-7
+ Stage-2 depends on stages: Stage-1
Stage-0 depends on stages: Stage-2
Stage-3 depends on stages: Stage-0
- Stage-4
- Stage-6
- Stage-7 depends on stages: Stage-6
STAGE PLANS:
Stage: Stage-1
Tez
+#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
@@ -555,15 +510,6 @@ STAGE PLANS:
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.dest1
- Stage: Stage-8
- Conditional Operator
-
- Stage: Stage-5
- Move Operator
- files:
- hdfs directory: true
-#### A masked pattern was here ####
-
Stage: Stage-2
Dependency Collection
@@ -580,40 +526,6 @@ STAGE PLANS:
Stage: Stage-3
Stats-Aggr Operator
- Stage: Stage-4
- Tez
- Vertices:
- Merge
- Map Operator Tree:
- TableScan
- File Output Operator
- compressed: false
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.dest1
-
- Stage: Stage-6
- Tez
- Vertices:
- Merge
- Map Operator Tree:
- TableScan
- File Output Operator
- compressed: false
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.dest1
-
- Stage: Stage-7
- Move Operator
- files:
- hdfs directory: true
-#### A masked pattern was here ####
-
PREHOOK: query: insert overwrite table dest1 select key from test_src
PREHOOK: type: QUERY
PREHOOK: Input: default@test_src
@@ -648,18 +560,14 @@ POSTHOOK: Lineage: test_src PARTITION(ds=102).key SIMPLE [(src)src.FieldSchema(n
POSTHOOK: Lineage: test_src PARTITION(ds=102).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
STAGE DEPENDENCIES:
Stage-1 is a root stage
- Stage-8 depends on stages: Stage-1 , consists of Stage-5, Stage-4, Stage-6
- Stage-5
- Stage-2 depends on stages: Stage-5, Stage-4, Stage-7
+ Stage-2 depends on stages: Stage-1
Stage-0 depends on stages: Stage-2
Stage-3 depends on stages: Stage-0
- Stage-4
- Stage-6
- Stage-7 depends on stages: Stage-6
STAGE PLANS:
Stage: Stage-1
Tez
+#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
@@ -679,15 +587,6 @@ STAGE PLANS:
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.dest1
- Stage: Stage-8
- Conditional Operator
-
- Stage: Stage-5
- Move Operator
- files:
- hdfs directory: true
-#### A masked pattern was here ####
-
Stage: Stage-2
Dependency Collection
@@ -704,40 +603,6 @@ STAGE PLANS:
Stage: Stage-3
Stats-Aggr Operator
- Stage: Stage-4
- Tez
- Vertices:
- Merge
- Map Operator Tree:
- TableScan
- File Output Operator
- compressed: false
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.dest1
-
- Stage: Stage-6
- Tez
- Vertices:
- Merge
- Map Operator Tree:
- TableScan
- File Output Operator
- compressed: false
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.dest1
-
- Stage: Stage-7
- Move Operator
- files:
- hdfs directory: true
-#### A masked pattern was here ####
-
PREHOOK: query: insert overwrite table dest1 select key from test_src
PREHOOK: type: QUERY
PREHOOK: Input: default@test_src
diff --git ql/src/test/results/clientpositive/tez/merge2.q.out ql/src/test/results/clientpositive/tez/merge2.q.out
index a4f5c03..3a1f1af 100644
--- ql/src/test/results/clientpositive/tez/merge2.q.out
+++ ql/src/test/results/clientpositive/tez/merge2.q.out
@@ -1,7 +1,9 @@
PREHOOK: query: create table test1(key int, val int)
PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
POSTHOOK: query: create table test1(key int, val int)
POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
POSTHOOK: Output: default@test1
PREHOOK: query: explain
insert overwrite table test1
@@ -13,20 +15,16 @@ select key, count(1) from src group by key
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
Stage-1 is a root stage
- Stage-8 depends on stages: Stage-1 , consists of Stage-5, Stage-4, Stage-6
- Stage-5
- Stage-2 depends on stages: Stage-5, Stage-4, Stage-7
+ Stage-2 depends on stages: Stage-1
Stage-0 depends on stages: Stage-2
Stage-3 depends on stages: Stage-0
- Stage-4
- Stage-6
- Stage-7 depends on stages: Stage-6
STAGE PLANS:
Stage: Stage-1
Tez
Edges:
Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
@@ -70,15 +68,6 @@ STAGE PLANS:
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.test1
- Stage: Stage-8
- Conditional Operator
-
- Stage: Stage-5
- Move Operator
- files:
- hdfs directory: true
-#### A masked pattern was here ####
-
Stage: Stage-2
Dependency Collection
@@ -95,40 +84,6 @@ STAGE PLANS:
Stage: Stage-3
Stats-Aggr Operator
- Stage: Stage-4
- Tez
- Vertices:
- Merge
- Map Operator Tree:
- TableScan
- File Output Operator
- compressed: false
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.test1
-
- Stage: Stage-6
- Tez
- Vertices:
- Merge
- Map Operator Tree:
- TableScan
- File Output Operator
- compressed: false
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.test1
-
- Stage: Stage-7
- Move Operator
- files:
- hdfs directory: true
-#### A masked pattern was here ####
-
PREHOOK: query: insert overwrite table test1
select key, count(1) from src group by key
PREHOOK: type: QUERY
@@ -472,15 +427,19 @@ POSTHOOK: Lineage: test1.key EXPRESSION [(src)src.FieldSchema(name:key, type:str
POSTHOOK: Lineage: test1.val EXPRESSION [(src)src.null, ]
PREHOOK: query: create table test_src(key string, value string) partitioned by (ds string)
PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
POSTHOOK: query: create table test_src(key string, value string) partitioned by (ds string)
POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
POSTHOOK: Output: default@test_src
POSTHOOK: Lineage: test1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
POSTHOOK: Lineage: test1.val EXPRESSION [(src)src.null, ]
PREHOOK: query: create table test1(key string)
PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
POSTHOOK: query: create table test1(key string)
POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
POSTHOOK: Output: default@test1
POSTHOOK: Lineage: test1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
POSTHOOK: Lineage: test1.val EXPRESSION [(src)src.null, ]
@@ -524,18 +483,14 @@ POSTHOOK: Lineage: test_src PARTITION(ds=102).key SIMPLE [(src)src.FieldSchema(n
POSTHOOK: Lineage: test_src PARTITION(ds=102).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
STAGE DEPENDENCIES:
Stage-1 is a root stage
- Stage-8 depends on stages: Stage-1 , consists of Stage-5, Stage-4, Stage-6
- Stage-5
- Stage-2 depends on stages: Stage-5, Stage-4, Stage-7
+ Stage-2 depends on stages: Stage-1
Stage-0 depends on stages: Stage-2
Stage-3 depends on stages: Stage-0
- Stage-4
- Stage-6
- Stage-7 depends on stages: Stage-6
STAGE PLANS:
Stage: Stage-1
Tez
+#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
@@ -555,15 +510,6 @@ STAGE PLANS:
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.test1
- Stage: Stage-8
- Conditional Operator
-
- Stage: Stage-5
- Move Operator
- files:
- hdfs directory: true
-#### A masked pattern was here ####
-
Stage: Stage-2
Dependency Collection
@@ -580,40 +526,6 @@ STAGE PLANS:
Stage: Stage-3
Stats-Aggr Operator
- Stage: Stage-4
- Tez
- Vertices:
- Merge
- Map Operator Tree:
- TableScan
- File Output Operator
- compressed: false
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.test1
-
- Stage: Stage-6
- Tez
- Vertices:
- Merge
- Map Operator Tree:
- TableScan
- File Output Operator
- compressed: false
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.test1
-
- Stage: Stage-7
- Move Operator
- files:
- hdfs directory: true
-#### A masked pattern was here ####
-
PREHOOK: query: insert overwrite table test1 select key from test_src
PREHOOK: type: QUERY
PREHOOK: Input: default@test_src
@@ -648,18 +560,14 @@ POSTHOOK: Lineage: test_src PARTITION(ds=102).key SIMPLE [(src)src.FieldSchema(n
POSTHOOK: Lineage: test_src PARTITION(ds=102).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
STAGE DEPENDENCIES:
Stage-1 is a root stage
- Stage-8 depends on stages: Stage-1 , consists of Stage-5, Stage-4, Stage-6
- Stage-5
- Stage-2 depends on stages: Stage-5, Stage-4, Stage-7
+ Stage-2 depends on stages: Stage-1
Stage-0 depends on stages: Stage-2
Stage-3 depends on stages: Stage-0
- Stage-4
- Stage-6
- Stage-7 depends on stages: Stage-6
STAGE PLANS:
Stage: Stage-1
Tez
+#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
@@ -679,15 +587,6 @@ STAGE PLANS:
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.test1
- Stage: Stage-8
- Conditional Operator
-
- Stage: Stage-5
- Move Operator
- files:
- hdfs directory: true
-#### A masked pattern was here ####
-
Stage: Stage-2
Dependency Collection
@@ -704,40 +603,6 @@ STAGE PLANS:
Stage: Stage-3
Stats-Aggr Operator
- Stage: Stage-4
- Tez
- Vertices:
- Merge
- Map Operator Tree:
- TableScan
- File Output Operator
- compressed: false
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.test1
-
- Stage: Stage-6
- Tez
- Vertices:
- Merge
- Map Operator Tree:
- TableScan
- File Output Operator
- compressed: false
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.test1
-
- Stage: Stage-7
- Move Operator
- files:
- hdfs directory: true
-#### A masked pattern was here ####
-
PREHOOK: query: insert overwrite table test1 select key from test_src
PREHOOK: type: QUERY
PREHOOK: Input: default@test_src
diff --git ql/src/test/results/clientpositive/tez/metadata_only_queries.q.out ql/src/test/results/clientpositive/tez/metadata_only_queries.q.out
index 883e71d..4c3e95c 100644
--- ql/src/test/results/clientpositive/tez/metadata_only_queries.q.out
+++ ql/src/test/results/clientpositive/tez/metadata_only_queries.q.out
@@ -13,6 +13,7 @@ PREHOOK: query: create table over10k(
row format delimited
fields terminated by '|'
PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
POSTHOOK: query: create table over10k(
t tinyint,
si smallint,
@@ -28,12 +29,15 @@ POSTHOOK: query: create table over10k(
row format delimited
fields terminated by '|'
POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
POSTHOOK: Output: default@over10k
PREHOOK: query: load data local inpath '../../data/files/over10k' into table over10k
PREHOOK: type: LOAD
+#### A masked pattern was here ####
PREHOOK: Output: default@over10k
POSTHOOK: query: load data local inpath '../../data/files/over10k' into table over10k
POSTHOOK: type: LOAD
+#### A masked pattern was here ####
POSTHOOK: Output: default@over10k
PREHOOK: query: create table stats_tbl(
t tinyint,
@@ -48,6 +52,7 @@ PREHOOK: query: create table stats_tbl(
dec decimal,
bin binary)
PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
POSTHOOK: query: create table stats_tbl(
t tinyint,
si smallint,
@@ -61,6 +66,7 @@ POSTHOOK: query: create table stats_tbl(
dec decimal,
bin binary)
POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
POSTHOOK: Output: default@stats_tbl
PREHOOK: query: create table stats_tbl_part(
t tinyint,
@@ -75,6 +81,7 @@ PREHOOK: query: create table stats_tbl_part(
dec decimal,
bin binary) partitioned by (dt string)
PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
POSTHOOK: query: create table stats_tbl_part(
t tinyint,
si smallint,
@@ -88,6 +95,7 @@ POSTHOOK: query: create table stats_tbl_part(
dec decimal,
bin binary) partitioned by (dt string)
POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
POSTHOOK: Output: default@stats_tbl_part
PREHOOK: query: insert overwrite table stats_tbl select * from over10k
PREHOOK: type: QUERY
@@ -290,6 +298,7 @@ STAGE PLANS:
Tez
Edges:
Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
@@ -391,6 +400,7 @@ STAGE PLANS:
Tez
Edges:
Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
@@ -922,6 +932,7 @@ STAGE PLANS:
Tez
Edges:
Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
diff --git ql/src/test/results/clientpositive/tez/mrr.q.out ql/src/test/results/clientpositive/tez/mrr.q.out
index 3956446..8ec77eb 100644
--- ql/src/test/results/clientpositive/tez/mrr.q.out
+++ ql/src/test/results/clientpositive/tez/mrr.q.out
@@ -14,6 +14,7 @@ STAGE PLANS:
Edges:
Reducer 2 <- Map 1 (SIMPLE_EDGE)
Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
@@ -403,6 +404,7 @@ STAGE PLANS:
Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE)
Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
Reducer 4 <- Reducer 3 (SIMPLE_EDGE)
+#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
@@ -818,6 +820,7 @@ STAGE PLANS:
Map 2 <- Map 1 (BROADCAST_EDGE)
Reducer 3 <- Map 2 (SIMPLE_EDGE)
Reducer 4 <- Reducer 3 (SIMPLE_EDGE)
+#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
@@ -1262,6 +1265,7 @@ STAGE PLANS:
Reducer 5 <- Reducer 4 (SIMPLE_EDGE)
Reducer 7 <- Map 6 (SIMPLE_EDGE)
Reducer 8 <- Reducer 7 (SIMPLE_EDGE)
+#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
@@ -1641,6 +1645,7 @@ STAGE PLANS:
Edges:
Map 1 <- Reducer 3 (BROADCAST_EDGE)
Reducer 3 <- Map 2 (SIMPLE_EDGE)
+#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
diff --git ql/src/test/results/clientpositive/tez/ptf.q.out ql/src/test/results/clientpositive/tez/ptf.q.out
index 1932b93..3bd2db2 100644
--- ql/src/test/results/clientpositive/tez/ptf.q.out
+++ ql/src/test/results/clientpositive/tez/ptf.q.out
@@ -15,6 +15,7 @@ CREATE TABLE part(
p_comment STRING
)
PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
POSTHOOK: query: -- data setup
CREATE TABLE part(
p_partkey INT,
@@ -28,12 +29,15 @@ CREATE TABLE part(
p_comment STRING
)
POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
POSTHOOK: Output: default@part
PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/part_tiny.txt' overwrite into table part
PREHOOK: type: LOAD
+#### A masked pattern was here ####
PREHOOK: Output: default@part
POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/part_tiny.txt' overwrite into table part
POSTHOOK: type: LOAD
+#### A masked pattern was here ####
POSTHOOK: Output: default@part
PREHOOK: query: --1. test1
select p_mfgr, p_name, p_size,
@@ -821,6 +825,7 @@ r INT,
dr INT,
s DOUBLE)
PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
POSTHOOK: query: -- 17. testMultipleInserts2SWQsWithPTF
CREATE TABLE part_4(
p_mfgr STRING,
@@ -830,6 +835,7 @@ r INT,
dr INT,
s DOUBLE)
POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
POSTHOOK: Output: default@part_4
PREHOOK: query: CREATE TABLE part_5(
p_mfgr STRING,
@@ -841,6 +847,7 @@ dr INT,
cud DOUBLE,
fv1 INT)
PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
POSTHOOK: query: CREATE TABLE part_5(
p_mfgr STRING,
p_name STRING,
@@ -851,6 +858,7 @@ dr INT,
cud DOUBLE,
fv1 INT)
POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
POSTHOOK: Output: default@part_5
PREHOOK: query: from noop(on part
partition by p_mfgr
diff --git ql/src/test/results/clientpositive/tez/sample1.q.out ql/src/test/results/clientpositive/tez/sample1.q.out
index 92d031f..1b5cd68 100644
--- ql/src/test/results/clientpositive/tez/sample1.q.out
+++ ql/src/test/results/clientpositive/tez/sample1.q.out
@@ -1,7 +1,9 @@
PREHOOK: query: CREATE TABLE dest1(key INT, value STRING, dt STRING, hr STRING) STORED AS TEXTFILE
PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
POSTHOOK: query: CREATE TABLE dest1(key INT, value STRING, dt STRING, hr STRING) STORED AS TEXTFILE
POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
POSTHOOK: Output: default@dest1
PREHOOK: query: -- no input pruning, no sample filter
EXPLAIN EXTENDED
@@ -56,18 +58,14 @@ TOK_QUERY
STAGE DEPENDENCIES:
Stage-1 is a root stage
- Stage-8 depends on stages: Stage-1 , consists of Stage-5, Stage-4, Stage-6
- Stage-5
- Stage-2 depends on stages: Stage-5, Stage-4, Stage-7
+ Stage-2 depends on stages: Stage-1
Stage-0 depends on stages: Stage-2
Stage-3 depends on stages: Stage-0
- Stage-4
- Stage-6
- Stage-7 depends on stages: Stage-6
STAGE PLANS:
Stage: Stage-1
Tez
+#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
@@ -155,15 +153,6 @@ STAGE PLANS:
Truncated Path -> Alias:
/srcpart/ds=2008-04-08/hr=11 [s]
- Stage: Stage-8
- Conditional Operator
-
- Stage: Stage-5
- Move Operator
- files:
- hdfs directory: true
-#### A masked pattern was here ####
-
Stage: Stage-2
Dependency Collection
@@ -192,146 +181,6 @@ STAGE PLANS:
Stats-Aggr Operator
#### A masked pattern was here ####
- Stage: Stage-4
- Tez
- Vertices:
- Merge
- Map Operator Tree:
- TableScan
- GatherStats: false
- File Output Operator
- compressed: false
- GlobalTableId: 0
-#### A masked pattern was here ####
- NumFilesPerFileSink: 1
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- properties:
- bucket_count -1
- columns key,value,dt,hr
- columns.types int:string:string:string
-#### A masked pattern was here ####
- name default.dest1
- serialization.ddl struct dest1 { i32 key, string value, string dt, string hr}
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.dest1
- TotalFiles: 1
- GatherStats: false
- MultiFileSpray: false
- Path -> Alias:
-#### A masked pattern was here ####
- Path -> Partition:
-#### A masked pattern was here ####
- Partition
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- properties:
- bucket_count -1
- columns key,value,dt,hr
- columns.types int:string:string:string
-#### A masked pattern was here ####
- name default.dest1
- serialization.ddl struct dest1 { i32 key, string value, string dt, string hr}
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- properties:
- bucket_count -1
- columns key,value,dt,hr
- columns.types int:string:string:string
-#### A masked pattern was here ####
- name default.dest1
- serialization.ddl struct dest1 { i32 key, string value, string dt, string hr}
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.dest1
- name: default.dest1
- Truncated Path -> Alias:
-#### A masked pattern was here ####
-
- Stage: Stage-6
- Tez
- Vertices:
- Merge
- Map Operator Tree:
- TableScan
- GatherStats: false
- File Output Operator
- compressed: false
- GlobalTableId: 0
-#### A masked pattern was here ####
- NumFilesPerFileSink: 1
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- properties:
- bucket_count -1
- columns key,value,dt,hr
- columns.types int:string:string:string
-#### A masked pattern was here ####
- name default.dest1
- serialization.ddl struct dest1 { i32 key, string value, string dt, string hr}
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.dest1
- TotalFiles: 1
- GatherStats: false
- MultiFileSpray: false
- Path -> Alias:
-#### A masked pattern was here ####
- Path -> Partition:
-#### A masked pattern was here ####
- Partition
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- properties:
- bucket_count -1
- columns key,value,dt,hr
- columns.types int:string:string:string
-#### A masked pattern was here ####
- name default.dest1
- serialization.ddl struct dest1 { i32 key, string value, string dt, string hr}
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- properties:
- bucket_count -1
- columns key,value,dt,hr
- columns.types int:string:string:string
-#### A masked pattern was here ####
- name default.dest1
- serialization.ddl struct dest1 { i32 key, string value, string dt, string hr}
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.dest1
- name: default.dest1
- Truncated Path -> Alias:
-#### A masked pattern was here ####
-
- Stage: Stage-7
- Move Operator
- files:
- hdfs directory: true
-#### A masked pattern was here ####
-
PREHOOK: query: INSERT OVERWRITE TABLE dest1 SELECT s.*
FROM srcpart TABLESAMPLE (BUCKET 1 OUT OF 1 ON rand()) s
WHERE s.ds='2008-04-08' and s.hr='11'
diff --git ql/src/test/results/clientpositive/tez/stats_counter_partitioned.q.out ql/src/test/results/clientpositive/tez/stats_counter_partitioned.q.out
index 62daf36..b41b357 100644
--- ql/src/test/results/clientpositive/tez/stats_counter_partitioned.q.out
+++ ql/src/test/results/clientpositive/tez/stats_counter_partitioned.q.out
@@ -2,23 +2,29 @@ PREHOOK: query: -- partitioned table analyze
create table dummy (key string, value string) partitioned by (ds string, hr string)
PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
POSTHOOK: query: -- partitioned table analyze
create table dummy (key string, value string) partitioned by (ds string, hr string)
POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
POSTHOOK: Output: default@dummy
PREHOOK: query: load data local inpath '../../data/files/kv1.txt' into table dummy partition (ds='2008',hr='12')
PREHOOK: type: LOAD
+#### A masked pattern was here ####
PREHOOK: Output: default@dummy
POSTHOOK: query: load data local inpath '../../data/files/kv1.txt' into table dummy partition (ds='2008',hr='12')
POSTHOOK: type: LOAD
+#### A masked pattern was here ####
POSTHOOK: Output: default@dummy
POSTHOOK: Output: default@dummy@ds=2008/hr=12
PREHOOK: query: load data local inpath '../../data/files/kv1.txt' into table dummy partition (ds='2008',hr='11')
PREHOOK: type: LOAD
+#### A masked pattern was here ####
PREHOOK: Output: default@dummy
POSTHOOK: query: load data local inpath '../../data/files/kv1.txt' into table dummy partition (ds='2008',hr='11')
POSTHOOK: type: LOAD
+#### A masked pattern was here ####
POSTHOOK: Output: default@dummy
POSTHOOK: Output: default@dummy@ds=2008/hr=11
PREHOOK: query: analyze table dummy partition (ds,hr) compute statistics
@@ -129,10 +135,12 @@ PREHOOK: query: -- static partitioned table on insert
create table dummy (key string, value string) partitioned by (ds string, hr string)
PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
POSTHOOK: query: -- static partitioned table on insert
create table dummy (key string, value string) partitioned by (ds string, hr string)
POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
POSTHOOK: Output: default@dummy
PREHOOK: query: insert overwrite table dummy partition (ds='10',hr='11') select * from src
PREHOOK: type: QUERY
@@ -260,10 +268,12 @@ PREHOOK: query: -- dynamic partitioned table on insert
create table dummy (key int) partitioned by (hr int)
PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
POSTHOOK: query: -- dynamic partitioned table on insert
create table dummy (key int) partitioned by (hr int)
POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
POSTHOOK: Output: default@dummy
POSTHOOK: Lineage: dummy PARTITION(ds=10,hr=11).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
POSTHOOK: Lineage: dummy PARTITION(ds=10,hr=11).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
@@ -271,8 +281,10 @@ POSTHOOK: Lineage: dummy PARTITION(ds=10,hr=12).key SIMPLE [(src)src.FieldSchema
POSTHOOK: Lineage: dummy PARTITION(ds=10,hr=12).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
PREHOOK: query: CREATE TABLE tbl(key int, value int) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|'
PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
POSTHOOK: query: CREATE TABLE tbl(key int, value int) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|'
POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
POSTHOOK: Output: default@tbl
POSTHOOK: Lineage: dummy PARTITION(ds=10,hr=11).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
POSTHOOK: Lineage: dummy PARTITION(ds=10,hr=11).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
@@ -280,9 +292,11 @@ POSTHOOK: Lineage: dummy PARTITION(ds=10,hr=12).key SIMPLE [(src)src.FieldSchema
POSTHOOK: Lineage: dummy PARTITION(ds=10,hr=12).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/tbl.txt' OVERWRITE INTO TABLE tbl
PREHOOK: type: LOAD
+#### A masked pattern was here ####
PREHOOK: Output: default@tbl
POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/tbl.txt' OVERWRITE INTO TABLE tbl
POSTHOOK: type: LOAD
+#### A masked pattern was here ####
POSTHOOK: Output: default@tbl
POSTHOOK: Lineage: dummy PARTITION(ds=10,hr=11).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
POSTHOOK: Lineage: dummy PARTITION(ds=10,hr=11).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
diff --git ql/src/test/results/clientpositive/tez/stats_noscan_1.q.out ql/src/test/results/clientpositive/tez/stats_noscan_1.q.out
index 0fb2ec6..9cbd7f7 100644
--- ql/src/test/results/clientpositive/tez/stats_noscan_1.q.out
+++ ql/src/test/results/clientpositive/tez/stats_noscan_1.q.out
@@ -3,11 +3,13 @@ PREHOOK: query: -- test analyze table ... compute statistics noscan
-- 1. test full spec
create table analyze_srcpart like srcpart
PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
POSTHOOK: query: -- test analyze table ... compute statistics noscan
-- 1. test full spec
create table analyze_srcpart like srcpart
POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
POSTHOOK: Output: default@analyze_srcpart
PREHOOK: query: insert overwrite table analyze_srcpart partition (ds, hr) select * from srcpart where ds is not null
PREHOOK: type: QUERY
@@ -353,9 +355,11 @@ POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-09,hr=12).value SIMPLE [
PREHOOK: query: -- 2. test partial spec
create table analyze_srcpart_partial like srcpart
PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
POSTHOOK: query: -- 2. test partial spec
create table analyze_srcpart_partial like srcpart
POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
POSTHOOK: Output: default@analyze_srcpart_partial
POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ]
POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ]
diff --git ql/src/test/results/clientpositive/tez/subquery_exists.q.out ql/src/test/results/clientpositive/tez/subquery_exists.q.out
index e7d6e39..9d7fa28 100644
--- ql/src/test/results/clientpositive/tez/subquery_exists.q.out
+++ ql/src/test/results/clientpositive/tez/subquery_exists.q.out
@@ -27,6 +27,7 @@ STAGE PLANS:
Tez
Edges:
Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE)
+#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
diff --git ql/src/test/results/clientpositive/tez/subquery_in.q.out ql/src/test/results/clientpositive/tez/subquery_in.q.out
index 49787b6..c66602b 100644
--- ql/src/test/results/clientpositive/tez/subquery_in.q.out
+++ ql/src/test/results/clientpositive/tez/subquery_in.q.out
@@ -15,6 +15,7 @@ CREATE TABLE part(
p_comment STRING
)
PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
POSTHOOK: query: -- data setup
CREATE TABLE part(
p_partkey INT,
@@ -28,12 +29,15 @@ CREATE TABLE part(
p_comment STRING
)
POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
POSTHOOK: Output: default@part
PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/part_tiny.txt' overwrite into table part
PREHOOK: type: LOAD
+#### A masked pattern was here ####
PREHOOK: Output: default@part
POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/part_tiny.txt' overwrite into table part
POSTHOOK: type: LOAD
+#### A masked pattern was here ####
POSTHOOK: Output: default@part
PREHOOK: query: DROP TABLE lineitem
PREHOOK: type: DROPTABLE
@@ -58,6 +62,7 @@ PREHOOK: query: CREATE TABLE lineitem (L_ORDERKEY INT,
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '|'
PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
POSTHOOK: query: CREATE TABLE lineitem (L_ORDERKEY INT,
L_PARTKEY INT,
L_SUPPKEY INT,
@@ -77,12 +82,15 @@ POSTHOOK: query: CREATE TABLE lineitem (L_ORDERKEY INT,
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '|'
POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
POSTHOOK: Output: default@lineitem
PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/lineitem.txt' OVERWRITE INTO TABLE lineitem
PREHOOK: type: LOAD
+#### A masked pattern was here ####
PREHOOK: Output: default@lineitem
POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/lineitem.txt' OVERWRITE INTO TABLE lineitem
POSTHOOK: type: LOAD
+#### A masked pattern was here ####
POSTHOOK: Output: default@lineitem
PREHOOK: query: -- non agg, non corr
explain
@@ -105,6 +113,7 @@ STAGE PLANS:
Tez
Edges:
Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE)
+#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
@@ -220,6 +229,7 @@ STAGE PLANS:
Tez
Edges:
Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE)
+#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
@@ -345,6 +355,7 @@ STAGE PLANS:
Reducer 2 <- Map 1 (SIMPLE_EDGE)
Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
Reducer 4 <- Reducer 3 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE)
+#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
@@ -491,6 +502,7 @@ STAGE PLANS:
Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE)
Reducer 4 <- Map 3 (SIMPLE_EDGE)
Reducer 5 <- Reducer 4 (SIMPLE_EDGE)
+#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
@@ -644,6 +656,7 @@ STAGE PLANS:
Edges:
Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE)
Reducer 4 <- Map 3 (SIMPLE_EDGE)
+#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
@@ -806,6 +819,7 @@ STAGE PLANS:
Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE)
Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE)
Reducer 5 <- Map 4 (SIMPLE_EDGE)
+#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
diff --git ql/src/test/results/clientpositive/tez/tez_dml.q.out ql/src/test/results/clientpositive/tez/tez_dml.q.out
index 9e9ef33..7c653d8 100644
--- ql/src/test/results/clientpositive/tez/tez_dml.q.out
+++ ql/src/test/results/clientpositive/tez/tez_dml.q.out
@@ -6,15 +6,10 @@ EXPLAIN CREATE TABLE tmp_src AS SELECT * FROM (SELECT value, count(value) AS cnt
POSTHOOK: type: CREATETABLE_AS_SELECT
STAGE DEPENDENCIES:
Stage-1 is a root stage
- Stage-8 depends on stages: Stage-1 , consists of Stage-5, Stage-4, Stage-6
- Stage-5
- Stage-2 depends on stages: Stage-5, Stage-4, Stage-7
- Stage-9 depends on stages: Stage-2, Stage-0
- Stage-3 depends on stages: Stage-9
- Stage-0 depends on stages: Stage-5, Stage-4, Stage-7
- Stage-4
- Stage-6
- Stage-7 depends on stages: Stage-6
+ Stage-2 depends on stages: Stage-1
+ Stage-4 depends on stages: Stage-2, Stage-0
+ Stage-3 depends on stages: Stage-4
+ Stage-0 depends on stages: Stage-1
STAGE PLANS:
Stage: Stage-1
@@ -22,6 +17,7 @@ STAGE PLANS:
Edges:
Reducer 2 <- Map 1 (SIMPLE_EDGE)
Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
@@ -74,19 +70,10 @@ STAGE PLANS:
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.tmp_src
- Stage: Stage-8
- Conditional Operator
-
- Stage: Stage-5
- Move Operator
- files:
- hdfs directory: true
-#### A masked pattern was here ####
-
Stage: Stage-2
Dependency Collection
- Stage: Stage-9
+ Stage: Stage-4
Create Table Operator:
Create Table
columns: value string, cnt bigint
@@ -103,40 +90,6 @@ STAGE PLANS:
hdfs directory: true
#### A masked pattern was here ####
- Stage: Stage-4
- Tez
- Vertices:
- Merge
- Map Operator Tree:
- TableScan
- File Output Operator
- compressed: false
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.tmp_src
-
- Stage: Stage-6
- Tez
- Vertices:
- Merge
- Map Operator Tree:
- TableScan
- File Output Operator
- compressed: false
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.tmp_src
-
- Stage: Stage-7
- Move Operator
- files:
- hdfs directory: true
-#### A masked pattern was here ####
-
PREHOOK: query: CREATE TABLE tmp_src AS SELECT * FROM (SELECT value, count(value) AS cnt FROM src GROUP BY value) f1 ORDER BY cnt
PREHOOK: type: CREATETABLE_AS_SELECT
PREHOOK: Input: default@src
@@ -464,9 +417,11 @@ val_348 5
PREHOOK: query: -- dyn partitions
CREATE TABLE tmp_src_part (c string) PARTITIONED BY (d int)
PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
POSTHOOK: query: -- dyn partitions
CREATE TABLE tmp_src_part (c string) PARTITIONED BY (d int)
POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
POSTHOOK: Output: default@tmp_src_part
PREHOOK: query: EXPLAIN INSERT INTO TABLE tmp_src_part PARTITION (d) SELECT * FROM tmp_src
PREHOOK: type: QUERY
@@ -474,18 +429,14 @@ POSTHOOK: query: EXPLAIN INSERT INTO TABLE tmp_src_part PARTITION (d) SELECT * F
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
Stage-1 is a root stage
- Stage-8 depends on stages: Stage-1 , consists of Stage-5, Stage-4, Stage-6
- Stage-5
- Stage-2 depends on stages: Stage-5, Stage-4, Stage-7
+ Stage-2 depends on stages: Stage-1
Stage-0 depends on stages: Stage-2
Stage-3 depends on stages: Stage-0
- Stage-4
- Stage-6
- Stage-7 depends on stages: Stage-6
STAGE PLANS:
Stage: Stage-1
Tez
+#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
@@ -505,15 +456,6 @@ STAGE PLANS:
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.tmp_src_part
- Stage: Stage-8
- Conditional Operator
-
- Stage: Stage-5
- Move Operator
- files:
- hdfs directory: true
-#### A masked pattern was here ####
-
Stage: Stage-2
Dependency Collection
@@ -532,40 +474,6 @@ STAGE PLANS:
Stage: Stage-3
Stats-Aggr Operator
- Stage: Stage-4
- Tez
- Vertices:
- Merge
- Map Operator Tree:
- TableScan
- File Output Operator
- compressed: false
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.tmp_src_part
-
- Stage: Stage-6
- Tez
- Vertices:
- Merge
- Map Operator Tree:
- TableScan
- File Output Operator
- compressed: false
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.tmp_src_part
-
- Stage: Stage-7
- Move Operator
- files:
- hdfs directory: true
-#### A masked pattern was here ####
-
PREHOOK: query: INSERT INTO TABLE tmp_src_part PARTITION (d) SELECT * FROM tmp_src
PREHOOK: type: QUERY
PREHOOK: Input: default@tmp_src
@@ -918,9 +826,11 @@ val_348 5
PREHOOK: query: -- multi insert
CREATE TABLE even (c int, d string)
PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
POSTHOOK: query: -- multi insert
CREATE TABLE even (c int, d string)
POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
POSTHOOK: Output: default@even
POSTHOOK: Lineage: tmp_src_part PARTITION(d=1).c SIMPLE [(tmp_src)tmp_src.FieldSchema(name:value, type:string, comment:null), ]
POSTHOOK: Lineage: tmp_src_part PARTITION(d=2).c SIMPLE [(tmp_src)tmp_src.FieldSchema(name:value, type:string, comment:null), ]
@@ -929,8 +839,10 @@ POSTHOOK: Lineage: tmp_src_part PARTITION(d=4).c SIMPLE [(tmp_src)tmp_src.FieldS
POSTHOOK: Lineage: tmp_src_part PARTITION(d=5).c SIMPLE [(tmp_src)tmp_src.FieldSchema(name:value, type:string, comment:null), ]
PREHOOK: query: CREATE TABLE odd (c int, d string)
PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
POSTHOOK: query: CREATE TABLE odd (c int, d string)
POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
POSTHOOK: Output: default@odd
POSTHOOK: Lineage: tmp_src_part PARTITION(d=1).c SIMPLE [(tmp_src)tmp_src.FieldSchema(name:value, type:string, comment:null), ]
POSTHOOK: Lineage: tmp_src_part PARTITION(d=2).c SIMPLE [(tmp_src)tmp_src.FieldSchema(name:value, type:string, comment:null), ]
@@ -954,25 +866,16 @@ POSTHOOK: Lineage: tmp_src_part PARTITION(d=4).c SIMPLE [(tmp_src)tmp_src.FieldS
POSTHOOK: Lineage: tmp_src_part PARTITION(d=5).c SIMPLE [(tmp_src)tmp_src.FieldSchema(name:value, type:string, comment:null), ]
STAGE DEPENDENCIES:
Stage-2 is a root stage
- Stage-9 depends on stages: Stage-2 , consists of Stage-6, Stage-5, Stage-7
- Stage-6
- Stage-3 depends on stages: Stage-6, Stage-5, Stage-8, Stage-12, Stage-11, Stage-14
+ Stage-3 depends on stages: Stage-2
Stage-0 depends on stages: Stage-3
Stage-4 depends on stages: Stage-0
Stage-1 depends on stages: Stage-3
- Stage-10 depends on stages: Stage-1
- Stage-5
- Stage-7
- Stage-8 depends on stages: Stage-7
- Stage-15 depends on stages: Stage-2 , consists of Stage-12, Stage-11, Stage-13
- Stage-12
- Stage-11
- Stage-13
- Stage-14 depends on stages: Stage-13
+ Stage-5 depends on stages: Stage-1
STAGE PLANS:
Stage: Stage-2
Tez
+#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
@@ -1010,15 +913,6 @@ STAGE PLANS:
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.odd
- Stage: Stage-9
- Conditional Operator
-
- Stage: Stage-6
- Move Operator
- files:
- hdfs directory: true
-#### A masked pattern was here ####
-
Stage: Stage-3
Dependency Collection
@@ -1045,85 +939,8 @@ STAGE PLANS:
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.odd
- Stage: Stage-10
- Stats-Aggr Operator
-
Stage: Stage-5
- Tez
- Vertices:
- Merge
- Map Operator Tree:
- TableScan
- File Output Operator
- compressed: false
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.even
-
- Stage: Stage-7
- Tez
- Vertices:
- Merge
- Map Operator Tree:
- TableScan
- File Output Operator
- compressed: false
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.even
-
- Stage: Stage-8
- Move Operator
- files:
- hdfs directory: true
-#### A masked pattern was here ####
-
- Stage: Stage-15
- Conditional Operator
-
- Stage: Stage-12
- Move Operator
- files:
- hdfs directory: true
-#### A masked pattern was here ####
-
- Stage: Stage-11
- Tez
- Vertices:
- Merge
- Map Operator Tree:
- TableScan
- File Output Operator
- compressed: false
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.odd
-
- Stage: Stage-13
- Tez
- Vertices:
- Merge
- Map Operator Tree:
- TableScan
- File Output Operator
- compressed: false
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.odd
-
- Stage: Stage-14
- Move Operator
- files:
- hdfs directory: true
-#### A masked pattern was here ####
+ Stats-Aggr Operator
PREHOOK: query: FROM src
INSERT INTO TABLE even SELECT key, value WHERE key % 2 = 0
diff --git ql/src/test/results/clientpositive/tez/tez_join_tests.q.out ql/src/test/results/clientpositive/tez/tez_join_tests.q.out
index 0117860..63c8ec0 100644
--- ql/src/test/results/clientpositive/tez/tez_join_tests.q.out
+++ ql/src/test/results/clientpositive/tez/tez_join_tests.q.out
@@ -16,6 +16,7 @@ STAGE PLANS:
Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
Reducer 4 <- Reducer 3 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE)
Reducer 5 <- Reducer 4 (SIMPLE_EDGE)
+#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
diff --git ql/src/test/results/clientpositive/tez/tez_joins_explain.q.out ql/src/test/results/clientpositive/tez/tez_joins_explain.q.out
index 6c0c121..6f00d74 100644
--- ql/src/test/results/clientpositive/tez/tez_joins_explain.q.out
+++ ql/src/test/results/clientpositive/tez/tez_joins_explain.q.out
@@ -16,6 +16,7 @@ STAGE PLANS:
Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
Reducer 4 <- Reducer 3 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE)
Reducer 5 <- Reducer 4 (SIMPLE_EDGE)
+#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
diff --git ql/src/test/results/clientpositive/tez/union2.q.out ql/src/test/results/clientpositive/tez/union2.q.out
new file mode 100644
index 0000000..1b1b7d5
--- /dev/null
+++ ql/src/test/results/clientpositive/tez/union2.q.out
@@ -0,0 +1,91 @@
+PREHOOK: query: -- union case: both subqueries are map-reduce jobs on same input, followed by reduce sink
+
+explain
+ select count(1) FROM (select s1.key as key, s1.value as value from src s1 UNION ALL
+ select s2.key as key, s2.value as value from src s2) unionsrc
+PREHOOK: type: QUERY
+POSTHOOK: query: -- union case: both subqueries are map-reduce jobs on same input, followed by reduce sink
+
+explain
+ select count(1) FROM (select s1.key as key, s1.value as value from src s1 UNION ALL
+ select s2.key as key, s2.value as value from src s2) unionsrc
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+ Edges:
+ Map 1 <- Union 2 (CONTAINS)
+ Map 4 <- Union 2 (CONTAINS)
+ Reducer 3 <- Union 2 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: s1
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Select Operator
+ Group By Operator
+ aggregations: count(1)
+ mode: hash
+ outputColumnNames: _col0
+ Reduce Output Operator
+ sort order:
+ value expressions: _col0 (type: bigint)
+ Map 4
+ Map Operator Tree:
+ TableScan
+ alias: s2
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Select Operator
+ Group By Operator
+ aggregations: count(1)
+ mode: hash
+ outputColumnNames: _col0
+ Reduce Output Operator
+ sort order:
+ value expressions: _col0 (type: bigint)
+ Reducer 3
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: bigint)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Union 2
+ Vertex: Union 2
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+
+PREHOOK: query: select count(1) FROM (select s1.key as key, s1.value as value from src s1 UNION ALL
+ select s2.key as key, s2.value as value from src s2) unionsrc
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: select count(1) FROM (select s1.key as key, s1.value as value from src s1 UNION ALL
+ select s2.key as key, s2.value as value from src s2) unionsrc
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+1000
diff --git ql/src/test/results/clientpositive/tez/union3.q.out ql/src/test/results/clientpositive/tez/union3.q.out
new file mode 100644
index 0000000..c14fbd8
--- /dev/null
+++ ql/src/test/results/clientpositive/tez/union3.q.out
@@ -0,0 +1,276 @@
+PREHOOK: query: explain
+SELECT *
+FROM (
+ SELECT 1 AS id
+ FROM (SELECT * FROM src LIMIT 1) s1
+ CLUSTER BY id
+ UNION ALL
+ SELECT 2 AS id
+ FROM (SELECT * FROM src LIMIT 1) s1
+ CLUSTER BY id
+ UNION ALL
+ SELECT 3 AS id
+ FROM (SELECT * FROM src LIMIT 1) s2
+ UNION ALL
+ SELECT 4 AS id
+ FROM (SELECT * FROM src LIMIT 1) s2
+) a
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+SELECT *
+FROM (
+ SELECT 1 AS id
+ FROM (SELECT * FROM src LIMIT 1) s1
+ CLUSTER BY id
+ UNION ALL
+ SELECT 2 AS id
+ FROM (SELECT * FROM src LIMIT 1) s1
+ CLUSTER BY id
+ UNION ALL
+ SELECT 3 AS id
+ FROM (SELECT * FROM src LIMIT 1) s2
+ UNION ALL
+ SELECT 4 AS id
+ FROM (SELECT * FROM src LIMIT 1) s2
+) a
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+ Edges:
+ Reducer 10 <- Map 9 (SIMPLE_EDGE)
+ Reducer 11 <- Reducer 10 (SIMPLE_EDGE), Union 3 (CONTAINS)
+ Reducer 2 <- Map 1 (SIMPLE_EDGE), Union 3 (CONTAINS)
+ Reducer 5 <- Map 4 (SIMPLE_EDGE), Union 3 (CONTAINS)
+ Reducer 7 <- Map 6 (SIMPLE_EDGE)
+ Reducer 8 <- Reducer 7 (SIMPLE_EDGE), Union 3 (CONTAINS)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: src
+ Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 1
+ Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: string), _col1 (type: string)
+ Map 4
+ Map Operator Tree:
+ TableScan
+ alias: src
+ Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 1
+ Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: string), _col1 (type: string)
+ Map 6
+ Map Operator Tree:
+ TableScan
+ alias: src
+ Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 1
+ Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: string), _col1 (type: string)
+ Map 9
+ Map Operator Tree:
+ TableScan
+ alias: src
+ Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 1
+ Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: string), _col1 (type: string)
+ Reducer 10
+ Reduce Operator Tree:
+ Extract
+ Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 1
+ Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: 2 (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: int)
+ Reducer 11
+ Reduce Operator Tree:
+ Extract
+ Select Operator
+ expressions: _col0 (type: int)
+ outputColumnNames: _col0
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Reducer 2
+ Reduce Operator Tree:
+ Extract
+ Limit
+ Number of rows: 1
+ Select Operator
+ expressions: 4 (type: int)
+ outputColumnNames: _col0
+ Select Operator
+ expressions: _col0 (type: int)
+ outputColumnNames: _col0
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Reducer 5
+ Reduce Operator Tree:
+ Extract
+ Limit
+ Number of rows: 1
+ Select Operator
+ expressions: 3 (type: int)
+ outputColumnNames: _col0
+ Select Operator
+ expressions: _col0 (type: int)
+ outputColumnNames: _col0
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Reducer 7
+ Reduce Operator Tree:
+ Extract
+ Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 1
+ Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: 1 (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: int)
+ Reducer 8
+ Reduce Operator Tree:
+ Extract
+ Select Operator
+ expressions: _col0 (type: int)
+ outputColumnNames: _col0
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Union 3
+ Vertex: Union 3
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+
+PREHOOK: query: CREATE TABLE union_out (id int)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+POSTHOOK: query: CREATE TABLE union_out (id int)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@union_out
+PREHOOK: query: insert overwrite table union_out
+SELECT *
+FROM (
+ SELECT 1 AS id
+ FROM (SELECT * FROM src LIMIT 1) s1
+ CLUSTER BY id
+ UNION ALL
+ SELECT 2 AS id
+ FROM (SELECT * FROM src LIMIT 1) s1
+ CLUSTER BY id
+ UNION ALL
+ SELECT 3 AS id
+ FROM (SELECT * FROM src LIMIT 1) s2
+ UNION ALL
+ SELECT 4 AS id
+ FROM (SELECT * FROM src LIMIT 1) s2
+) a
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@union_out
+POSTHOOK: query: insert overwrite table union_out
+SELECT *
+FROM (
+ SELECT 1 AS id
+ FROM (SELECT * FROM src LIMIT 1) s1
+ CLUSTER BY id
+ UNION ALL
+ SELECT 2 AS id
+ FROM (SELECT * FROM src LIMIT 1) s1
+ CLUSTER BY id
+ UNION ALL
+ SELECT 3 AS id
+ FROM (SELECT * FROM src LIMIT 1) s2
+ UNION ALL
+ SELECT 4 AS id
+ FROM (SELECT * FROM src LIMIT 1) s2
+) a
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@union_out
+POSTHOOK: Lineage: union_out.id EXPRESSION []
+PREHOOK: query: select * from union_out cluster by id
+PREHOOK: type: QUERY
+PREHOOK: Input: default@union_out
+#### A masked pattern was here ####
+POSTHOOK: query: select * from union_out cluster by id
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@union_out
+#### A masked pattern was here ####
+POSTHOOK: Lineage: union_out.id EXPRESSION []
+1
+2
+3
+4
diff --git ql/src/test/results/clientpositive/tez/union4.q.out ql/src/test/results/clientpositive/tez/union4.q.out
new file mode 100644
index 0000000..9e74952
--- /dev/null
+++ ql/src/test/results/clientpositive/tez/union4.q.out
@@ -0,0 +1,156 @@
+PREHOOK: query: -- union case: both subqueries are map-reduce jobs on same input, followed by filesink
+
+
+create table tmptable(key string, value int)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+POSTHOOK: query: -- union case: both subqueries are map-reduce jobs on same input, followed by filesink
+
+
+create table tmptable(key string, value int)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@tmptable
+PREHOOK: query: explain
+insert overwrite table tmptable
+ select unionsrc.key, unionsrc.value FROM (select 'tst1' as key, count(1) as value from src s1
+ UNION ALL
+ select 'tst2' as key, count(1) as value from src s2) unionsrc
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+insert overwrite table tmptable
+ select unionsrc.key, unionsrc.value FROM (select 'tst1' as key, count(1) as value from src s1
+ UNION ALL
+ select 'tst2' as key, count(1) as value from src s2) unionsrc
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-0 depends on stages: Stage-2
+ Stage-3 depends on stages: Stage-0
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE), Union 3 (CONTAINS)
+ Reducer 5 <- Map 4 (SIMPLE_EDGE), Union 3 (CONTAINS)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: s1
+ Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE
+ Select Operator
+ Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE
+ Group By Operator
+ aggregations: count(1)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: bigint)
+ Map 4
+ Map Operator Tree:
+ TableScan
+ alias: s2
+ Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE
+ Select Operator
+ Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE
+ Group By Operator
+ aggregations: count(1)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: bigint)
+ Reducer 2
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Select Operator
+ expressions: 'tst1' (type: string), _col0 (type: bigint)
+ outputColumnNames: _col0, _col1
+ Select Operator
+ expressions: _col0 (type: string), UDFToInteger(_col1) (type: int)
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.tmptable
+ Reducer 5
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Select Operator
+ expressions: 'tst2' (type: string), _col0 (type: bigint)
+ outputColumnNames: _col0, _col1
+ Select Operator
+ expressions: _col0 (type: string), UDFToInteger(_col1) (type: int)
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.tmptable
+ Union 3
+ Vertex: Union 3
+
+ Stage: Stage-2
+ Dependency Collection
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ replace: true
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.tmptable
+
+ Stage: Stage-3
+ Stats-Aggr Operator
+
+PREHOOK: query: insert overwrite table tmptable
+select unionsrc.key, unionsrc.value FROM (select 'tst1' as key, count(1) as value from src s1
+ UNION ALL
+ select 'tst2' as key, count(1) as value from src s2) unionsrc
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@tmptable
+POSTHOOK: query: insert overwrite table tmptable
+select unionsrc.key, unionsrc.value FROM (select 'tst1' as key, count(1) as value from src s1
+ UNION ALL
+ select 'tst2' as key, count(1) as value from src s2) unionsrc
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@tmptable
+POSTHOOK: Lineage: tmptable.key EXPRESSION []
+POSTHOOK: Lineage: tmptable.value EXPRESSION [(src)s1.null, (src)s2.null, ]
+PREHOOK: query: select * from tmptable x sort by x.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tmptable
+#### A masked pattern was here ####
+POSTHOOK: query: select * from tmptable x sort by x.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tmptable
+#### A masked pattern was here ####
+POSTHOOK: Lineage: tmptable.key EXPRESSION []
+POSTHOOK: Lineage: tmptable.value EXPRESSION [(src)s1.null, (src)s2.null, ]
+tst1 500
+tst2 500
diff --git ql/src/test/results/clientpositive/tez/union5.q.out ql/src/test/results/clientpositive/tez/union5.q.out
new file mode 100644
index 0000000..efc4552
--- /dev/null
+++ ql/src/test/results/clientpositive/tez/union5.q.out
@@ -0,0 +1,143 @@
+PREHOOK: query: -- union case: both subqueries are map-reduce jobs on same input, followed by reduce sink
+
+explain
+ select unionsrc.key, count(1) FROM (select 'tst1' as key, count(1) as value from src s1
+ UNION ALL
+ select 'tst2' as key, count(1) as value from src s2) unionsrc group by unionsrc.key
+PREHOOK: type: QUERY
+POSTHOOK: query: -- union case: both subqueries are map-reduce jobs on same input, followed by reduce sink
+
+explain
+ select unionsrc.key, count(1) FROM (select 'tst1' as key, count(1) as value from src s1
+ UNION ALL
+ select 'tst2' as key, count(1) as value from src s2) unionsrc group by unionsrc.key
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE), Union 3 (CONTAINS)
+ Reducer 4 <- Union 3 (SIMPLE_EDGE)
+ Reducer 6 <- Map 5 (SIMPLE_EDGE), Union 3 (CONTAINS)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: s1
+ Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE
+ Select Operator
+ Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE
+ Group By Operator
+ aggregations: count(1)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: bigint)
+ Map 5
+ Map Operator Tree:
+ TableScan
+ alias: s2
+ Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE
+ Select Operator
+ Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE
+ Group By Operator
+ aggregations: count(1)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: bigint)
+ Reducer 2
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Select Operator
+ expressions: 'tst1' (type: string), _col0 (type: bigint)
+ outputColumnNames: _col0, _col1
+ Select Operator
+ expressions: _col0 (type: string)
+ outputColumnNames: _col0
+ Group By Operator
+ aggregations: count(1)
+ keys: _col0 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ value expressions: _col1 (type: bigint)
+ Reducer 4
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: bigint)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Reducer 6
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Select Operator
+ expressions: 'tst2' (type: string), _col0 (type: bigint)
+ outputColumnNames: _col0, _col1
+ Select Operator
+ expressions: _col0 (type: string)
+ outputColumnNames: _col0
+ Group By Operator
+ aggregations: count(1)
+ keys: _col0 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ value expressions: _col1 (type: bigint)
+ Union 3
+ Vertex: Union 3
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+
+PREHOOK: query: select unionsrc.key, count(1) FROM (select 'tst1' as key, count(1) as value from src s1
+ UNION ALL
+ select 'tst2' as key, count(1) as value from src s2) unionsrc group by unionsrc.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: select unionsrc.key, count(1) FROM (select 'tst1' as key, count(1) as value from src s1
+ UNION ALL
+ select 'tst2' as key, count(1) as value from src s2) unionsrc group by unionsrc.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+tst1 1
+tst2 1
diff --git ql/src/test/results/clientpositive/tez/union6.q.out ql/src/test/results/clientpositive/tez/union6.q.out
new file mode 100644
index 0000000..c945105
--- /dev/null
+++ ql/src/test/results/clientpositive/tez/union6.q.out
@@ -0,0 +1,164 @@
+PREHOOK: query: -- union case: 1 subquery is a map-reduce job, different inputs for sub-queries, followed by filesink
+
+
+create table tmptable(key string, value string)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+POSTHOOK: query: -- union case: 1 subquery is a map-reduce job, different inputs for sub-queries, followed by filesink
+
+
+create table tmptable(key string, value string)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@tmptable
+PREHOOK: query: explain
+insert overwrite table tmptable
+ select unionsrc.key, unionsrc.value FROM (select 'tst1' as key, cast(count(1) as string) as value from src s1
+ UNION ALL
+ select s2.key as key, s2.value as value from src1 s2) unionsrc
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+insert overwrite table tmptable
+ select unionsrc.key, unionsrc.value FROM (select 'tst1' as key, cast(count(1) as string) as value from src s1
+ UNION ALL
+ select s2.key as key, s2.value as value from src1 s2) unionsrc
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-0 depends on stages: Stage-2
+ Stage-3 depends on stages: Stage-0
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+ Edges:
+ Map 4 <- Union 3 (CONTAINS)
+ Reducer 2 <- Map 1 (SIMPLE_EDGE), Union 3 (CONTAINS)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: s1
+ Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE
+ Select Operator
+ Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE
+ Group By Operator
+ aggregations: count(1)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: bigint)
+ Map 4
+ Map Operator Tree:
+ TableScan
+ alias: s2
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: string)
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.tmptable
+ Reducer 2
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Select Operator
+ expressions: 'tst1' (type: string), UDFToString(_col0) (type: string)
+ outputColumnNames: _col0, _col1
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: string)
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.tmptable
+ Union 3
+ Vertex: Union 3
+
+ Stage: Stage-2
+ Dependency Collection
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ replace: true
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.tmptable
+
+ Stage: Stage-3
+ Stats-Aggr Operator
+
+PREHOOK: query: insert overwrite table tmptable
+select unionsrc.key, unionsrc.value FROM (select 'tst1' as key, cast(count(1) as string) as value from src s1
+ UNION ALL
+ select s2.key as key, s2.value as value from src1 s2) unionsrc
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Input: default@src1
+PREHOOK: Output: default@tmptable
+POSTHOOK: query: insert overwrite table tmptable
+select unionsrc.key, unionsrc.value FROM (select 'tst1' as key, cast(count(1) as string) as value from src s1
+ UNION ALL
+ select s2.key as key, s2.value as value from src1 s2) unionsrc
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Input: default@src1
+POSTHOOK: Output: default@tmptable
+POSTHOOK: Lineage: tmptable.key EXPRESSION [(src1)s2.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: tmptable.value EXPRESSION [(src)s1.null, (src1)s2.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: select * from tmptable x sort by x.key, x.value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tmptable
+#### A masked pattern was here ####
+POSTHOOK: query: select * from tmptable x sort by x.key, x.value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tmptable
+#### A masked pattern was here ####
+POSTHOOK: Lineage: tmptable.key EXPRESSION [(src1)s2.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: tmptable.value EXPRESSION [(src)s1.null, (src1)s2.FieldSchema(name:value, type:string, comment:default), ]
+
+
+
+
+ val_165
+ val_193
+ val_265
+ val_27
+ val_409
+ val_484
+128
+146 val_146
+150 val_150
+213 val_213
+224
+238 val_238
+255 val_255
+273 val_273
+278 val_278
+311 val_311
+369
+401 val_401
+406 val_406
+66 val_66
+98 val_98
+tst1 500
diff --git ql/src/test/results/clientpositive/tez/union7.q.out ql/src/test/results/clientpositive/tez/union7.q.out
new file mode 100644
index 0000000..58cbcd5
--- /dev/null
+++ ql/src/test/results/clientpositive/tez/union7.q.out
@@ -0,0 +1,142 @@
+PREHOOK: query: -- union case: 1 subquery is a map-reduce job, different inputs for sub-queries, followed by reducesink
+
+explain
+ select unionsrc.key, count(1) FROM (select 'tst1' as key, cast(count(1) as string) as value from src s1
+ UNION ALL
+ select s2.key as key, s2.value as value from src1 s2) unionsrc group by unionsrc.key
+PREHOOK: type: QUERY
+POSTHOOK: query: -- union case: 1 subquery is a map-reduce job, different inputs for sub-queries, followed by reducesink
+
+explain
+ select unionsrc.key, count(1) FROM (select 'tst1' as key, cast(count(1) as string) as value from src s1
+ UNION ALL
+ select s2.key as key, s2.value as value from src1 s2) unionsrc group by unionsrc.key
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+ Edges:
+ Map 5 <- Union 3 (CONTAINS)
+ Reducer 2 <- Map 1 (SIMPLE_EDGE), Union 3 (CONTAINS)
+ Reducer 4 <- Union 3 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: s1
+ Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE
+ Select Operator
+ Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE
+ Group By Operator
+ aggregations: count(1)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: bigint)
+ Map 5
+ Map Operator Tree:
+ TableScan
+ alias: s2
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Select Operator
+ expressions: _col0 (type: string)
+ outputColumnNames: _col0
+ Group By Operator
+ aggregations: count(1)
+ keys: _col0 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ value expressions: _col1 (type: bigint)
+ Reducer 2
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Select Operator
+ expressions: 'tst1' (type: string), UDFToString(_col0) (type: string)
+ outputColumnNames: _col0, _col1
+ Select Operator
+ expressions: _col0 (type: string)
+ outputColumnNames: _col0
+ Group By Operator
+ aggregations: count(1)
+ keys: _col0 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ value expressions: _col1 (type: bigint)
+ Reducer 4
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: PARTIAL
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: bigint)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: PARTIAL
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: PARTIAL
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Union 3
+ Vertex: Union 3
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+
+PREHOOK: query: select unionsrc.key, count(1) FROM (select 'tst1' as key, cast(count(1) as string) as value from src s1
+ UNION ALL
+ select s2.key as key, s2.value as value from src1 s2) unionsrc group by unionsrc.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Input: default@src1
+#### A masked pattern was here ####
+POSTHOOK: query: select unionsrc.key, count(1) FROM (select 'tst1' as key, cast(count(1) as string) as value from src s1
+ UNION ALL
+ select s2.key as key, s2.value as value from src1 s2) unionsrc group by unionsrc.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Input: default@src1
+#### A masked pattern was here ####
+ 10
+128 1
+146 1
+150 1
+213 1
+224 1
+238 1
+255 1
+273 1
+278 1
+311 1
+369 1
+401 1
+406 1
+66 1
+98 1
+tst1 1
diff --git ql/src/test/results/clientpositive/tez/union8.q.out ql/src/test/results/clientpositive/tez/union8.q.out
new file mode 100644
index 0000000..cf48953
--- /dev/null
+++ ql/src/test/results/clientpositive/tez/union8.q.out
@@ -0,0 +1,1594 @@
+PREHOOK: query: -- union case: all subqueries are a map-only jobs, 3 way union, same input for all sub-queries, followed by filesink
+
+explain
+ select unionsrc.key, unionsrc.value FROM (select s1.key as key, s1.value as value from src s1 UNION ALL
+ select s2.key as key, s2.value as value from src s2 UNION ALL
+ select s3.key as key, s3.value as value from src s3) unionsrc
+PREHOOK: type: QUERY
+POSTHOOK: query: -- union case: all subqueries are a map-only jobs, 3 way union, same input for all sub-queries, followed by filesink
+
+explain
+ select unionsrc.key, unionsrc.value FROM (select s1.key as key, s1.value as value from src s1 UNION ALL
+ select s2.key as key, s2.value as value from src s2 UNION ALL
+ select s3.key as key, s3.value as value from src s3) unionsrc
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+ Edges:
+ Map 1 <- Union 2 (CONTAINS)
+ Map 3 <- Union 2 (CONTAINS)
+ Map 4 <- Union 2 (CONTAINS)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: s1
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: string)
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Map 3
+ Map Operator Tree:
+ TableScan
+ alias: s2
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: string)
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Map 4
+ Map Operator Tree:
+ TableScan
+ alias: s3
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: string)
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Union 2
+ Vertex: Union 2
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+
+PREHOOK: query: select unionsrc.key, unionsrc.value FROM (select s1.key as key, s1.value as value from src s1 UNION ALL
+ select s2.key as key, s2.value as value from src s2 UNION ALL
+ select s3.key as key, s3.value as value from src s3) unionsrc
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: select unionsrc.key, unionsrc.value FROM (select s1.key as key, s1.value as value from src s1 UNION ALL
+ select s2.key as key, s2.value as value from src s2 UNION ALL
+ select s3.key as key, s3.value as value from src s3) unionsrc
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+238 val_238
+86 val_86
+311 val_311
+27 val_27
+165 val_165
+409 val_409
+255 val_255
+278 val_278
+98 val_98
+484 val_484
+265 val_265
+193 val_193
+401 val_401
+150 val_150
+273 val_273
+224 val_224
+369 val_369
+66 val_66
+128 val_128
+213 val_213
+146 val_146
+406 val_406
+429 val_429
+374 val_374
+152 val_152
+469 val_469
+145 val_145
+495 val_495
+37 val_37
+327 val_327
+281 val_281
+277 val_277
+209 val_209
+15 val_15
+82 val_82
+403 val_403
+166 val_166
+417 val_417
+430 val_430
+252 val_252
+292 val_292
+219 val_219
+287 val_287
+153 val_153
+193 val_193
+338 val_338
+446 val_446
+459 val_459
+394 val_394
+237 val_237
+482 val_482
+174 val_174
+413 val_413
+494 val_494
+207 val_207
+199 val_199
+466 val_466
+208 val_208
+174 val_174
+399 val_399
+396 val_396
+247 val_247
+417 val_417
+489 val_489
+162 val_162
+377 val_377
+397 val_397
+309 val_309
+365 val_365
+266 val_266
+439 val_439
+342 val_342
+367 val_367
+325 val_325
+167 val_167
+195 val_195
+475 val_475
+17 val_17
+113 val_113
+155 val_155
+203 val_203
+339 val_339
+0 val_0
+455 val_455
+128 val_128
+311 val_311
+316 val_316
+57 val_57
+302 val_302
+205 val_205
+149 val_149
+438 val_438
+345 val_345
+129 val_129
+170 val_170
+20 val_20
+489 val_489
+157 val_157
+378 val_378
+221 val_221
+92 val_92
+111 val_111
+47 val_47
+72 val_72
+4 val_4
+280 val_280
+35 val_35
+427 val_427
+277 val_277
+208 val_208
+356 val_356
+399 val_399
+169 val_169
+382 val_382
+498 val_498
+125 val_125
+386 val_386
+437 val_437
+469 val_469
+192 val_192
+286 val_286
+187 val_187
+176 val_176
+54 val_54
+459 val_459
+51 val_51
+138 val_138
+103 val_103
+239 val_239
+213 val_213
+216 val_216
+430 val_430
+278 val_278
+176 val_176
+289 val_289
+221 val_221
+65 val_65
+318 val_318
+332 val_332
+311 val_311
+275 val_275
+137 val_137
+241 val_241
+83 val_83
+333 val_333
+180 val_180
+284 val_284
+12 val_12
+230 val_230
+181 val_181
+67 val_67
+260 val_260
+404 val_404
+384 val_384
+489 val_489
+353 val_353
+373 val_373
+272 val_272
+138 val_138
+217 val_217
+84 val_84
+348 val_348
+466 val_466
+58 val_58
+8 val_8
+411 val_411
+230 val_230
+208 val_208
+348 val_348
+24 val_24
+463 val_463
+431 val_431
+179 val_179
+172 val_172
+42 val_42
+129 val_129
+158 val_158
+119 val_119
+496 val_496
+0 val_0
+322 val_322
+197 val_197
+468 val_468
+393 val_393
+454 val_454
+100 val_100
+298 val_298
+199 val_199
+191 val_191
+418 val_418
+96 val_96
+26 val_26
+165 val_165
+327 val_327
+230 val_230
+205 val_205
+120 val_120
+131 val_131
+51 val_51
+404 val_404
+43 val_43
+436 val_436
+156 val_156
+469 val_469
+468 val_468
+308 val_308
+95 val_95
+196 val_196
+288 val_288
+481 val_481
+457 val_457
+98 val_98
+282 val_282
+197 val_197
+187 val_187
+318 val_318
+318 val_318
+409 val_409
+470 val_470
+137 val_137
+369 val_369
+316 val_316
+169 val_169
+413 val_413
+85 val_85
+77 val_77
+0 val_0
+490 val_490
+87 val_87
+364 val_364
+179 val_179
+118 val_118
+134 val_134
+395 val_395
+282 val_282
+138 val_138
+238 val_238
+419 val_419
+15 val_15
+118 val_118
+72 val_72
+90 val_90
+307 val_307
+19 val_19
+435 val_435
+10 val_10
+277 val_277
+273 val_273
+306 val_306
+224 val_224
+309 val_309
+389 val_389
+327 val_327
+242 val_242
+369 val_369
+392 val_392
+272 val_272
+331 val_331
+401 val_401
+242 val_242
+452 val_452
+177 val_177
+226 val_226
+5 val_5
+497 val_497
+402 val_402
+396 val_396
+317 val_317
+395 val_395
+58 val_58
+35 val_35
+336 val_336
+95 val_95
+11 val_11
+168 val_168
+34 val_34
+229 val_229
+233 val_233
+143 val_143
+472 val_472
+322 val_322
+498 val_498
+160 val_160
+195 val_195
+42 val_42
+321 val_321
+430 val_430
+119 val_119
+489 val_489
+458 val_458
+78 val_78
+76 val_76
+41 val_41
+223 val_223
+492 val_492
+149 val_149
+449 val_449
+218 val_218
+228 val_228
+138 val_138
+453 val_453
+30 val_30
+209 val_209
+64 val_64
+468 val_468
+76 val_76
+74 val_74
+342 val_342
+69 val_69
+230 val_230
+33 val_33
+368 val_368
+103 val_103
+296 val_296
+113 val_113
+216 val_216
+367 val_367
+344 val_344
+167 val_167
+274 val_274
+219 val_219
+239 val_239
+485 val_485
+116 val_116
+223 val_223
+256 val_256
+263 val_263
+70 val_70
+487 val_487
+480 val_480
+401 val_401
+288 val_288
+191 val_191
+5 val_5
+244 val_244
+438 val_438
+128 val_128
+467 val_467
+432 val_432
+202 val_202
+316 val_316
+229 val_229
+469 val_469
+463 val_463
+280 val_280
+2 val_2
+35 val_35
+283 val_283
+331 val_331
+235 val_235
+80 val_80
+44 val_44
+193 val_193
+321 val_321
+335 val_335
+104 val_104
+466 val_466
+366 val_366
+175 val_175
+403 val_403
+483 val_483
+53 val_53
+105 val_105
+257 val_257
+406 val_406
+409 val_409
+190 val_190
+406 val_406
+401 val_401
+114 val_114
+258 val_258
+90 val_90
+203 val_203
+262 val_262
+348 val_348
+424 val_424
+12 val_12
+396 val_396
+201 val_201
+217 val_217
+164 val_164
+431 val_431
+454 val_454
+478 val_478
+298 val_298
+125 val_125
+431 val_431
+164 val_164
+424 val_424
+187 val_187
+382 val_382
+5 val_5
+70 val_70
+397 val_397
+480 val_480
+291 val_291
+24 val_24
+351 val_351
+255 val_255
+104 val_104
+70 val_70
+163 val_163
+438 val_438
+119 val_119
+414 val_414
+200 val_200
+491 val_491
+237 val_237
+439 val_439
+360 val_360
+248 val_248
+479 val_479
+305 val_305
+417 val_417
+199 val_199
+444 val_444
+120 val_120
+429 val_429
+169 val_169
+443 val_443
+323 val_323
+325 val_325
+277 val_277
+230 val_230
+478 val_478
+178 val_178
+468 val_468
+310 val_310
+317 val_317
+333 val_333
+493 val_493
+460 val_460
+207 val_207
+249 val_249
+265 val_265
+480 val_480
+83 val_83
+136 val_136
+353 val_353
+172 val_172
+214 val_214
+462 val_462
+233 val_233
+406 val_406
+133 val_133
+175 val_175
+189 val_189
+454 val_454
+375 val_375
+401 val_401
+421 val_421
+407 val_407
+384 val_384
+256 val_256
+26 val_26
+134 val_134
+67 val_67
+384 val_384
+379 val_379
+18 val_18
+462 val_462
+492 val_492
+100 val_100
+298 val_298
+9 val_9
+341 val_341
+498 val_498
+146 val_146
+458 val_458
+362 val_362
+186 val_186
+285 val_285
+348 val_348
+167 val_167
+18 val_18
+273 val_273
+183 val_183
+281 val_281
+344 val_344
+97 val_97
+469 val_469
+315 val_315
+84 val_84
+28 val_28
+37 val_37
+448 val_448
+152 val_152
+348 val_348
+307 val_307
+194 val_194
+414 val_414
+477 val_477
+222 val_222
+126 val_126
+90 val_90
+169 val_169
+403 val_403
+400 val_400
+200 val_200
+97 val_97
+238 val_238
+86 val_86
+311 val_311
+27 val_27
+165 val_165
+409 val_409
+255 val_255
+278 val_278
+98 val_98
+484 val_484
+265 val_265
+193 val_193
+401 val_401
+150 val_150
+273 val_273
+224 val_224
+369 val_369
+66 val_66
+128 val_128
+213 val_213
+146 val_146
+406 val_406
+429 val_429
+374 val_374
+152 val_152
+469 val_469
+145 val_145
+495 val_495
+37 val_37
+327 val_327
+281 val_281
+277 val_277
+209 val_209
+15 val_15
+82 val_82
+403 val_403
+166 val_166
+417 val_417
+430 val_430
+252 val_252
+292 val_292
+219 val_219
+287 val_287
+153 val_153
+193 val_193
+338 val_338
+446 val_446
+459 val_459
+394 val_394
+237 val_237
+482 val_482
+174 val_174
+413 val_413
+494 val_494
+207 val_207
+199 val_199
+466 val_466
+208 val_208
+174 val_174
+399 val_399
+396 val_396
+247 val_247
+417 val_417
+489 val_489
+162 val_162
+377 val_377
+397 val_397
+309 val_309
+365 val_365
+266 val_266
+439 val_439
+342 val_342
+367 val_367
+325 val_325
+167 val_167
+195 val_195
+475 val_475
+17 val_17
+113 val_113
+155 val_155
+203 val_203
+339 val_339
+0 val_0
+455 val_455
+128 val_128
+311 val_311
+316 val_316
+57 val_57
+302 val_302
+205 val_205
+149 val_149
+438 val_438
+345 val_345
+129 val_129
+170 val_170
+20 val_20
+489 val_489
+157 val_157
+378 val_378
+221 val_221
+92 val_92
+111 val_111
+47 val_47
+72 val_72
+4 val_4
+280 val_280
+35 val_35
+427 val_427
+277 val_277
+208 val_208
+356 val_356
+399 val_399
+169 val_169
+382 val_382
+498 val_498
+125 val_125
+386 val_386
+437 val_437
+469 val_469
+192 val_192
+286 val_286
+187 val_187
+176 val_176
+54 val_54
+459 val_459
+51 val_51
+138 val_138
+103 val_103
+239 val_239
+213 val_213
+216 val_216
+430 val_430
+278 val_278
+176 val_176
+289 val_289
+221 val_221
+65 val_65
+318 val_318
+332 val_332
+311 val_311
+275 val_275
+137 val_137
+241 val_241
+83 val_83
+333 val_333
+180 val_180
+284 val_284
+12 val_12
+230 val_230
+181 val_181
+67 val_67
+260 val_260
+404 val_404
+384 val_384
+489 val_489
+353 val_353
+373 val_373
+272 val_272
+138 val_138
+217 val_217
+84 val_84
+348 val_348
+466 val_466
+58 val_58
+8 val_8
+411 val_411
+230 val_230
+208 val_208
+348 val_348
+24 val_24
+463 val_463
+431 val_431
+179 val_179
+172 val_172
+42 val_42
+129 val_129
+158 val_158
+119 val_119
+496 val_496
+0 val_0
+322 val_322
+197 val_197
+468 val_468
+393 val_393
+454 val_454
+100 val_100
+298 val_298
+199 val_199
+191 val_191
+418 val_418
+96 val_96
+26 val_26
+165 val_165
+327 val_327
+230 val_230
+205 val_205
+120 val_120
+131 val_131
+51 val_51
+404 val_404
+43 val_43
+436 val_436
+156 val_156
+469 val_469
+468 val_468
+308 val_308
+95 val_95
+196 val_196
+288 val_288
+481 val_481
+457 val_457
+98 val_98
+282 val_282
+197 val_197
+187 val_187
+318 val_318
+318 val_318
+409 val_409
+470 val_470
+137 val_137
+369 val_369
+316 val_316
+169 val_169
+413 val_413
+85 val_85
+77 val_77
+0 val_0
+490 val_490
+87 val_87
+364 val_364
+179 val_179
+118 val_118
+134 val_134
+395 val_395
+282 val_282
+138 val_138
+238 val_238
+419 val_419
+15 val_15
+118 val_118
+72 val_72
+90 val_90
+307 val_307
+19 val_19
+435 val_435
+10 val_10
+277 val_277
+273 val_273
+306 val_306
+224 val_224
+309 val_309
+389 val_389
+327 val_327
+242 val_242
+369 val_369
+392 val_392
+272 val_272
+331 val_331
+401 val_401
+242 val_242
+452 val_452
+177 val_177
+226 val_226
+5 val_5
+497 val_497
+402 val_402
+396 val_396
+317 val_317
+395 val_395
+58 val_58
+35 val_35
+336 val_336
+95 val_95
+11 val_11
+168 val_168
+34 val_34
+229 val_229
+233 val_233
+143 val_143
+472 val_472
+322 val_322
+498 val_498
+160 val_160
+195 val_195
+42 val_42
+321 val_321
+430 val_430
+119 val_119
+489 val_489
+458 val_458
+78 val_78
+76 val_76
+41 val_41
+223 val_223
+492 val_492
+149 val_149
+449 val_449
+218 val_218
+228 val_228
+138 val_138
+453 val_453
+30 val_30
+209 val_209
+64 val_64
+468 val_468
+76 val_76
+74 val_74
+342 val_342
+69 val_69
+230 val_230
+33 val_33
+368 val_368
+103 val_103
+296 val_296
+113 val_113
+216 val_216
+367 val_367
+344 val_344
+167 val_167
+274 val_274
+219 val_219
+239 val_239
+485 val_485
+116 val_116
+223 val_223
+256 val_256
+263 val_263
+70 val_70
+487 val_487
+480 val_480
+401 val_401
+288 val_288
+191 val_191
+5 val_5
+244 val_244
+438 val_438
+128 val_128
+467 val_467
+432 val_432
+202 val_202
+316 val_316
+229 val_229
+469 val_469
+463 val_463
+280 val_280
+2 val_2
+35 val_35
+283 val_283
+331 val_331
+235 val_235
+80 val_80
+44 val_44
+193 val_193
+321 val_321
+335 val_335
+104 val_104
+466 val_466
+366 val_366
+175 val_175
+403 val_403
+483 val_483
+53 val_53
+105 val_105
+257 val_257
+406 val_406
+409 val_409
+190 val_190
+406 val_406
+401 val_401
+114 val_114
+258 val_258
+90 val_90
+203 val_203
+262 val_262
+348 val_348
+424 val_424
+12 val_12
+396 val_396
+201 val_201
+217 val_217
+164 val_164
+431 val_431
+454 val_454
+478 val_478
+298 val_298
+125 val_125
+431 val_431
+164 val_164
+424 val_424
+187 val_187
+382 val_382
+5 val_5
+70 val_70
+397 val_397
+480 val_480
+291 val_291
+24 val_24
+351 val_351
+255 val_255
+104 val_104
+70 val_70
+163 val_163
+438 val_438
+119 val_119
+414 val_414
+200 val_200
+491 val_491
+237 val_237
+439 val_439
+360 val_360
+248 val_248
+479 val_479
+305 val_305
+417 val_417
+199 val_199
+444 val_444
+120 val_120
+429 val_429
+169 val_169
+443 val_443
+323 val_323
+325 val_325
+277 val_277
+230 val_230
+478 val_478
+178 val_178
+468 val_468
+310 val_310
+317 val_317
+333 val_333
+493 val_493
+460 val_460
+207 val_207
+249 val_249
+265 val_265
+480 val_480
+83 val_83
+136 val_136
+353 val_353
+172 val_172
+214 val_214
+462 val_462
+233 val_233
+406 val_406
+133 val_133
+175 val_175
+189 val_189
+454 val_454
+375 val_375
+401 val_401
+421 val_421
+407 val_407
+384 val_384
+256 val_256
+26 val_26
+134 val_134
+67 val_67
+384 val_384
+379 val_379
+18 val_18
+462 val_462
+492 val_492
+100 val_100
+298 val_298
+9 val_9
+341 val_341
+498 val_498
+146 val_146
+458 val_458
+362 val_362
+186 val_186
+285 val_285
+348 val_348
+167 val_167
+18 val_18
+273 val_273
+183 val_183
+281 val_281
+344 val_344
+97 val_97
+469 val_469
+315 val_315
+84 val_84
+28 val_28
+37 val_37
+448 val_448
+152 val_152
+348 val_348
+307 val_307
+194 val_194
+414 val_414
+477 val_477
+222 val_222
+126 val_126
+90 val_90
+169 val_169
+403 val_403
+400 val_400
+200 val_200
+97 val_97
+238 val_238
+86 val_86
+311 val_311
+27 val_27
+165 val_165
+409 val_409
+255 val_255
+278 val_278
+98 val_98
+484 val_484
+265 val_265
+193 val_193
+401 val_401
+150 val_150
+273 val_273
+224 val_224
+369 val_369
+66 val_66
+128 val_128
+213 val_213
+146 val_146
+406 val_406
+429 val_429
+374 val_374
+152 val_152
+469 val_469
+145 val_145
+495 val_495
+37 val_37
+327 val_327
+281 val_281
+277 val_277
+209 val_209
+15 val_15
+82 val_82
+403 val_403
+166 val_166
+417 val_417
+430 val_430
+252 val_252
+292 val_292
+219 val_219
+287 val_287
+153 val_153
+193 val_193
+338 val_338
+446 val_446
+459 val_459
+394 val_394
+237 val_237
+482 val_482
+174 val_174
+413 val_413
+494 val_494
+207 val_207
+199 val_199
+466 val_466
+208 val_208
+174 val_174
+399 val_399
+396 val_396
+247 val_247
+417 val_417
+489 val_489
+162 val_162
+377 val_377
+397 val_397
+309 val_309
+365 val_365
+266 val_266
+439 val_439
+342 val_342
+367 val_367
+325 val_325
+167 val_167
+195 val_195
+475 val_475
+17 val_17
+113 val_113
+155 val_155
+203 val_203
+339 val_339
+0 val_0
+455 val_455
+128 val_128
+311 val_311
+316 val_316
+57 val_57
+302 val_302
+205 val_205
+149 val_149
+438 val_438
+345 val_345
+129 val_129
+170 val_170
+20 val_20
+489 val_489
+157 val_157
+378 val_378
+221 val_221
+92 val_92
+111 val_111
+47 val_47
+72 val_72
+4 val_4
+280 val_280
+35 val_35
+427 val_427
+277 val_277
+208 val_208
+356 val_356
+399 val_399
+169 val_169
+382 val_382
+498 val_498
+125 val_125
+386 val_386
+437 val_437
+469 val_469
+192 val_192
+286 val_286
+187 val_187
+176 val_176
+54 val_54
+459 val_459
+51 val_51
+138 val_138
+103 val_103
+239 val_239
+213 val_213
+216 val_216
+430 val_430
+278 val_278
+176 val_176
+289 val_289
+221 val_221
+65 val_65
+318 val_318
+332 val_332
+311 val_311
+275 val_275
+137 val_137
+241 val_241
+83 val_83
+333 val_333
+180 val_180
+284 val_284
+12 val_12
+230 val_230
+181 val_181
+67 val_67
+260 val_260
+404 val_404
+384 val_384
+489 val_489
+353 val_353
+373 val_373
+272 val_272
+138 val_138
+217 val_217
+84 val_84
+348 val_348
+466 val_466
+58 val_58
+8 val_8
+411 val_411
+230 val_230
+208 val_208
+348 val_348
+24 val_24
+463 val_463
+431 val_431
+179 val_179
+172 val_172
+42 val_42
+129 val_129
+158 val_158
+119 val_119
+496 val_496
+0 val_0
+322 val_322
+197 val_197
+468 val_468
+393 val_393
+454 val_454
+100 val_100
+298 val_298
+199 val_199
+191 val_191
+418 val_418
+96 val_96
+26 val_26
+165 val_165
+327 val_327
+230 val_230
+205 val_205
+120 val_120
+131 val_131
+51 val_51
+404 val_404
+43 val_43
+436 val_436
+156 val_156
+469 val_469
+468 val_468
+308 val_308
+95 val_95
+196 val_196
+288 val_288
+481 val_481
+457 val_457
+98 val_98
+282 val_282
+197 val_197
+187 val_187
+318 val_318
+318 val_318
+409 val_409
+470 val_470
+137 val_137
+369 val_369
+316 val_316
+169 val_169
+413 val_413
+85 val_85
+77 val_77
+0 val_0
+490 val_490
+87 val_87
+364 val_364
+179 val_179
+118 val_118
+134 val_134
+395 val_395
+282 val_282
+138 val_138
+238 val_238
+419 val_419
+15 val_15
+118 val_118
+72 val_72
+90 val_90
+307 val_307
+19 val_19
+435 val_435
+10 val_10
+277 val_277
+273 val_273
+306 val_306
+224 val_224
+309 val_309
+389 val_389
+327 val_327
+242 val_242
+369 val_369
+392 val_392
+272 val_272
+331 val_331
+401 val_401
+242 val_242
+452 val_452
+177 val_177
+226 val_226
+5 val_5
+497 val_497
+402 val_402
+396 val_396
+317 val_317
+395 val_395
+58 val_58
+35 val_35
+336 val_336
+95 val_95
+11 val_11
+168 val_168
+34 val_34
+229 val_229
+233 val_233
+143 val_143
+472 val_472
+322 val_322
+498 val_498
+160 val_160
+195 val_195
+42 val_42
+321 val_321
+430 val_430
+119 val_119
+489 val_489
+458 val_458
+78 val_78
+76 val_76
+41 val_41
+223 val_223
+492 val_492
+149 val_149
+449 val_449
+218 val_218
+228 val_228
+138 val_138
+453 val_453
+30 val_30
+209 val_209
+64 val_64
+468 val_468
+76 val_76
+74 val_74
+342 val_342
+69 val_69
+230 val_230
+33 val_33
+368 val_368
+103 val_103
+296 val_296
+113 val_113
+216 val_216
+367 val_367
+344 val_344
+167 val_167
+274 val_274
+219 val_219
+239 val_239
+485 val_485
+116 val_116
+223 val_223
+256 val_256
+263 val_263
+70 val_70
+487 val_487
+480 val_480
+401 val_401
+288 val_288
+191 val_191
+5 val_5
+244 val_244
+438 val_438
+128 val_128
+467 val_467
+432 val_432
+202 val_202
+316 val_316
+229 val_229
+469 val_469
+463 val_463
+280 val_280
+2 val_2
+35 val_35
+283 val_283
+331 val_331
+235 val_235
+80 val_80
+44 val_44
+193 val_193
+321 val_321
+335 val_335
+104 val_104
+466 val_466
+366 val_366
+175 val_175
+403 val_403
+483 val_483
+53 val_53
+105 val_105
+257 val_257
+406 val_406
+409 val_409
+190 val_190
+406 val_406
+401 val_401
+114 val_114
+258 val_258
+90 val_90
+203 val_203
+262 val_262
+348 val_348
+424 val_424
+12 val_12
+396 val_396
+201 val_201
+217 val_217
+164 val_164
+431 val_431
+454 val_454
+478 val_478
+298 val_298
+125 val_125
+431 val_431
+164 val_164
+424 val_424
+187 val_187
+382 val_382
+5 val_5
+70 val_70
+397 val_397
+480 val_480
+291 val_291
+24 val_24
+351 val_351
+255 val_255
+104 val_104
+70 val_70
+163 val_163
+438 val_438
+119 val_119
+414 val_414
+200 val_200
+491 val_491
+237 val_237
+439 val_439
+360 val_360
+248 val_248
+479 val_479
+305 val_305
+417 val_417
+199 val_199
+444 val_444
+120 val_120
+429 val_429
+169 val_169
+443 val_443
+323 val_323
+325 val_325
+277 val_277
+230 val_230
+478 val_478
+178 val_178
+468 val_468
+310 val_310
+317 val_317
+333 val_333
+493 val_493
+460 val_460
+207 val_207
+249 val_249
+265 val_265
+480 val_480
+83 val_83
+136 val_136
+353 val_353
+172 val_172
+214 val_214
+462 val_462
+233 val_233
+406 val_406
+133 val_133
+175 val_175
+189 val_189
+454 val_454
+375 val_375
+401 val_401
+421 val_421
+407 val_407
+384 val_384
+256 val_256
+26 val_26
+134 val_134
+67 val_67
+384 val_384
+379 val_379
+18 val_18
+462 val_462
+492 val_492
+100 val_100
+298 val_298
+9 val_9
+341 val_341
+498 val_498
+146 val_146
+458 val_458
+362 val_362
+186 val_186
+285 val_285
+348 val_348
+167 val_167
+18 val_18
+273 val_273
+183 val_183
+281 val_281
+344 val_344
+97 val_97
+469 val_469
+315 val_315
+84 val_84
+28 val_28
+37 val_37
+448 val_448
+152 val_152
+348 val_348
+307 val_307
+194 val_194
+414 val_414
+477 val_477
+222 val_222
+126 val_126
+90 val_90
+169 val_169
+403 val_403
+400 val_400
+200 val_200
+97 val_97
diff --git ql/src/test/results/clientpositive/tez/union9.q.out ql/src/test/results/clientpositive/tez/union9.q.out
new file mode 100644
index 0000000..ad23db3
--- /dev/null
+++ ql/src/test/results/clientpositive/tez/union9.q.out
@@ -0,0 +1,111 @@
+PREHOOK: query: -- union case: all subqueries are a map-only jobs, 3 way union, same input for all sub-queries, followed by reducesink
+
+explain
+ select count(1) FROM (select s1.key as key, s1.value as value from src s1 UNION ALL
+ select s2.key as key, s2.value as value from src s2 UNION ALL
+ select s3.key as key, s3.value as value from src s3) unionsrc
+PREHOOK: type: QUERY
+POSTHOOK: query: -- union case: all subqueries are a map-only jobs, 3 way union, same input for all sub-queries, followed by reducesink
+
+explain
+ select count(1) FROM (select s1.key as key, s1.value as value from src s1 UNION ALL
+ select s2.key as key, s2.value as value from src s2 UNION ALL
+ select s3.key as key, s3.value as value from src s3) unionsrc
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+ Edges:
+ Map 1 <- Union 2 (CONTAINS)
+ Map 4 <- Union 2 (CONTAINS)
+ Map 5 <- Union 2 (CONTAINS)
+ Reducer 3 <- Union 2 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: s1
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Select Operator
+ Group By Operator
+ aggregations: count(1)
+ mode: hash
+ outputColumnNames: _col0
+ Reduce Output Operator
+ sort order:
+ value expressions: _col0 (type: bigint)
+ Map 4
+ Map Operator Tree:
+ TableScan
+ alias: s2
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Select Operator
+ Group By Operator
+ aggregations: count(1)
+ mode: hash
+ outputColumnNames: _col0
+ Reduce Output Operator
+ sort order:
+ value expressions: _col0 (type: bigint)
+ Map 5
+ Map Operator Tree:
+ TableScan
+ alias: s3
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Select Operator
+ Group By Operator
+ aggregations: count(1)
+ mode: hash
+ outputColumnNames: _col0
+ Reduce Output Operator
+ sort order:
+ value expressions: _col0 (type: bigint)
+ Reducer 3
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: bigint)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Union 2
+ Vertex: Union 2
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+
+PREHOOK: query: select count(1) FROM (select s1.key as key, s1.value as value from src s1 UNION ALL
+ select s2.key as key, s2.value as value from src s2 UNION ALL
+ select s3.key as key, s3.value as value from src s3) unionsrc
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: select count(1) FROM (select s1.key as key, s1.value as value from src s1 UNION ALL
+ select s2.key as key, s2.value as value from src s2 UNION ALL
+ select s3.key as key, s3.value as value from src s3) unionsrc
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+1500
diff --git shims/0.20/src/main/java/org/apache/hadoop/hive/shims/Hadoop20Shims.java shims/0.20/src/main/java/org/apache/hadoop/hive/shims/Hadoop20Shims.java
index 4f5d4fa..51c8051 100644
--- shims/0.20/src/main/java/org/apache/hadoop/hive/shims/Hadoop20Shims.java
+++ shims/0.20/src/main/java/org/apache/hadoop/hive/shims/Hadoop20Shims.java
@@ -42,6 +42,7 @@
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.BlockLocation;
+import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
@@ -50,8 +51,10 @@
import org.apache.hadoop.fs.Trash;
import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.apache.hadoop.hive.io.HiveIOExceptionHandlerUtil;
+import org.apache.hadoop.hive.shims.HadoopShims.DirectDecompressorShim;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.compress.CompressionCodec;
import org.apache.hadoop.mapred.ClusterStatus;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.InputFormat;
@@ -773,6 +776,19 @@ public FileSystem createProxyFileSystem(FileSystem fs, URI uri) {
ret.put("MAPREDTASKCLEANUPNEEDED", "mapreduce.job.committer.task.cleanup.needed");
return ret;
}
+
+ @Override
+ public ZeroCopyReaderShim getZeroCopyReader(FSDataInputStream in, ByteBufferPoolShim pool) throws IOException {
+ /* not supported */
+ return null;
+ }
+
+ @Override
+ public DirectDecompressorShim getDirectDecompressor(DirectCompressionType codec) {
+ /* not supported */
+ return null;
+ }
+
@Override
public Configuration getConfiguration(org.apache.hadoop.mapreduce.JobContext context) {
return context.getConfiguration();
diff --git shims/0.20S/src/main/java/org/apache/hadoop/hive/shims/Hadoop20SShims.java shims/0.20S/src/main/java/org/apache/hadoop/hive/shims/Hadoop20SShims.java
index be57716..e4e56b7 100644
--- shims/0.20S/src/main/java/org/apache/hadoop/hive/shims/Hadoop20SShims.java
+++ shims/0.20S/src/main/java/org/apache/hadoop/hive/shims/Hadoop20SShims.java
@@ -30,6 +30,7 @@
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.filecache.DistributedCache;
import org.apache.hadoop.fs.BlockLocation;
+import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
@@ -37,7 +38,9 @@
import org.apache.hadoop.fs.ProxyFileSystem;
import org.apache.hadoop.fs.Trash;
import org.apache.hadoop.hdfs.MiniDFSCluster;
+import org.apache.hadoop.hive.shims.HadoopShims.DirectDecompressorShim;
import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.compress.CompressionCodec;
import org.apache.hadoop.mapred.JobTracker;
import org.apache.hadoop.mapred.MiniMRCluster;
import org.apache.hadoop.mapred.ClusterStatus;
@@ -410,6 +413,18 @@ public FileSystem createProxyFileSystem(FileSystem fs, URI uri) {
ret.put("MAPREDTASKCLEANUPNEEDED", "mapreduce.job.committer.task.cleanup.needed");
return ret;
}
+
+ @Override
+ public ZeroCopyReaderShim getZeroCopyReader(FSDataInputStream in, ByteBufferPoolShim pool) throws IOException {
+ /* not supported */
+ return null;
+ }
+
+ @Override
+ public DirectDecompressorShim getDirectDecompressor(DirectCompressionType codec) {
+ /* not supported */
+ return null;
+ }
@Override
public Configuration getConfiguration(org.apache.hadoop.mapreduce.JobContext context) {
diff --git shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java
index 3d778df..5df5ed5 100644
--- shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java
+++ shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java
@@ -27,11 +27,13 @@
import java.util.Map;
import java.util.HashMap;
import java.net.URI;
+import java.nio.ByteBuffer;
import java.io.FileNotFoundException;
import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.BlockLocation;
+import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.LocatedFileStatus;
@@ -41,7 +43,12 @@
import org.apache.hadoop.fs.RemoteIterator;
import org.apache.hadoop.fs.Trash;
import org.apache.hadoop.hdfs.MiniDFSCluster;
+import org.apache.hadoop.hive.shims.HadoopShims.ByteBufferPoolShim;
+import org.apache.hadoop.hive.shims.HadoopShims.DirectCompressionType;
+import org.apache.hadoop.hive.shims.HadoopShims.DirectDecompressorShim;
+import org.apache.hadoop.hive.shims.HadoopShims.ZeroCopyReaderShim;
import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.compress.CompressionCodec;
import org.apache.hadoop.mapred.ClusterStatus;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.MiniMRCluster;
@@ -71,6 +78,19 @@
HadoopShims.MiniDFSShim cluster = null;
+ final boolean zeroCopy;
+
+ public Hadoop23Shims() {
+ boolean zcr = false;
+ try {
+ Class.forName("org.apache.hadoop.fs.CacheFlag", false,
+ ShimLoader.class.getClassLoader());
+ zcr = true;
+ } catch (ClassNotFoundException ce) {
+ }
+ this.zeroCopy = zcr;
+ }
+
@Override
public String getTaskAttemptLogUrl(JobConf conf,
String taskTrackerHttpAddress, String taskAttemptId)
@@ -556,6 +576,24 @@ public FileSystem createProxyFileSystem(FileSystem fs, URI uri) {
ret.put("MAPREDSETUPCLEANUPNEEDED", "mapreduce.job.committer.setup.cleanup.needed");
ret.put("MAPREDTASKCLEANUPNEEDED", "mapreduce.job.committer.task.cleanup.needed");
return ret;
+ }
+
+ @Override
+ public ZeroCopyReaderShim getZeroCopyReader(FSDataInputStream in, ByteBufferPoolShim pool) throws IOException {
+ if(zeroCopy) {
+ return ZeroCopyShims.getZeroCopyReader(in, pool);
+ }
+ /* not supported */
+ return null;
+ }
+
+ @Override
+ public DirectDecompressorShim getDirectDecompressor(DirectCompressionType codec) {
+ if(zeroCopy) {
+ return ZeroCopyShims.getDirectDecompressor(codec);
+ }
+ /* not supported */
+ return null;
}
@Override
diff --git shims/0.23/src/main/java/org/apache/hadoop/hive/shims/ZeroCopyShims.java shims/0.23/src/main/java/org/apache/hadoop/hive/shims/ZeroCopyShims.java
new file mode 100644
index 0000000..8de08ad
--- /dev/null
+++ shims/0.23/src/main/java/org/apache/hadoop/hive/shims/ZeroCopyShims.java
@@ -0,0 +1,128 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.shims;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.EnumSet;
+
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.ReadOption;
+import org.apache.hadoop.hive.shims.HadoopShims.DirectCompressionType;
+import org.apache.hadoop.hive.shims.HadoopShims.DirectDecompressorShim;
+import org.apache.hadoop.io.ByteBufferPool;
+import org.apache.hadoop.io.compress.DirectDecompressor;
+import org.apache.hadoop.io.compress.snappy.SnappyDecompressor.SnappyDirectDecompressor;
+import org.apache.hadoop.io.compress.zlib.ZlibDecompressor.CompressionHeader;
+import org.apache.hadoop.io.compress.zlib.ZlibDecompressor.ZlibDirectDecompressor;
+
+import org.apache.hadoop.hive.shims.HadoopShims.ByteBufferPoolShim;
+import org.apache.hadoop.hive.shims.HadoopShims.ZeroCopyReaderShim;
+
+class ZeroCopyShims {
+ private static final class ByteBufferPoolAdapter implements ByteBufferPool {
+ private ByteBufferPoolShim pool;
+
+ public ByteBufferPoolAdapter(ByteBufferPoolShim pool) {
+ this.pool = pool;
+ }
+
+ @Override
+ public final ByteBuffer getBuffer(boolean direct, int length) {
+ return this.pool.getBuffer(direct, length);
+ }
+
+ @Override
+ public final void putBuffer(ByteBuffer buffer) {
+ this.pool.putBuffer(buffer);
+ }
+ }
+
+ private static final class ZeroCopyAdapter implements ZeroCopyReaderShim {
+ private final FSDataInputStream in;
+ private final ByteBufferPoolAdapter pool;
+ private final static EnumSet CHECK_SUM = EnumSet
+ .noneOf(ReadOption.class);
+ private final static EnumSet NO_CHECK_SUM = EnumSet
+ .of(ReadOption.SKIP_CHECKSUMS);
+
+ public ZeroCopyAdapter(FSDataInputStream in, ByteBufferPoolShim poolshim) {
+ this.in = in;
+ if (poolshim != null) {
+ pool = new ByteBufferPoolAdapter(poolshim);
+ } else {
+ pool = null;
+ }
+ }
+
+ public final ByteBuffer readBuffer(int maxLength, boolean verifyChecksums)
+ throws IOException {
+ EnumSet options = NO_CHECK_SUM;
+ if (verifyChecksums) {
+ options = CHECK_SUM;
+ }
+ return this.in.read(this.pool, maxLength, options);
+ }
+
+ public final void releaseBuffer(ByteBuffer buffer) {
+ this.in.releaseBuffer(buffer);
+ }
+ }
+
+ public static ZeroCopyReaderShim getZeroCopyReader(FSDataInputStream in,
+ ByteBufferPoolShim pool) throws IOException {
+ return new ZeroCopyAdapter(in, pool);
+ }
+
+ private static final class DirectDecompressorAdapter implements
+ DirectDecompressorShim {
+ private final DirectDecompressor decompressor;
+
+ public DirectDecompressorAdapter(DirectDecompressor decompressor) {
+ this.decompressor = decompressor;
+ }
+
+ public void decompress(ByteBuffer src, ByteBuffer dst) throws IOException {
+ this.decompressor.decompress(src, dst);
+ }
+ }
+
+ public static DirectDecompressorShim getDirectDecompressor(
+ DirectCompressionType codec) {
+ DirectDecompressor decompressor = null;
+ switch (codec) {
+ case ZLIB: {
+ decompressor = new ZlibDirectDecompressor();
+ }
+ break;
+ case ZLIB_NOHEADER: {
+ decompressor = new ZlibDirectDecompressor(CompressionHeader.NO_HEADER, 0);
+ }
+ break;
+ case SNAPPY: {
+ decompressor = new SnappyDirectDecompressor();
+ }
+ break;
+ }
+ if (decompressor != null) {
+ return new DirectDecompressorAdapter(decompressor);
+ }
+ /* not supported */
+ return null;
+ }
+}
diff --git shims/common/src/main/java/org/apache/hadoop/hive/shims/HadoopShims.java shims/common/src/main/java/org/apache/hadoop/hive/shims/HadoopShims.java
index 9e9a60d..e15ab4e 100644
--- shims/common/src/main/java/org/apache/hadoop/hive/shims/HadoopShims.java
+++ shims/common/src/main/java/org/apache/hadoop/hive/shims/HadoopShims.java
@@ -24,6 +24,7 @@
import java.net.MalformedURLException;
import java.net.URI;
import java.net.URISyntaxException;
+import java.nio.ByteBuffer;
import java.security.PrivilegedExceptionAction;
import java.util.Comparator;
import java.util.Iterator;
@@ -36,11 +37,13 @@
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.BlockLocation;
+import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.PathFilter;
import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.compress.CompressionCodec;
import org.apache.hadoop.mapred.ClusterStatus;
import org.apache.hadoop.mapred.InputSplit;
import org.apache.hadoop.mapred.JobConf;
@@ -521,6 +524,69 @@ RecordReader getRecordReader(JobConf job, InputSplitShim split, Reporter reporte
public Map getHadoopConfNames();
+ /**
+ * a hadoop.io ByteBufferPool shim.
+ */
+ public interface ByteBufferPoolShim {
+ /**
+ * Get a new ByteBuffer from the pool. The pool can provide this from
+ * removing a buffer from its internal cache, or by allocating a
+ * new buffer.
+ *
+ * @param direct Whether the buffer should be direct.
+ * @param length The minimum length the buffer will have.
+ * @return A new ByteBuffer. Its capacity can be less
+ * than what was requested, but must be at
+ * least 1 byte.
+ */
+ ByteBuffer getBuffer(boolean direct, int length);
+
+ /**
+ * Release a buffer back to the pool.
+ * The pool may choose to put this buffer into its cache/free it.
+ *
+ * @param buffer a direct bytebuffer
+ */
+ void putBuffer(ByteBuffer buffer);
+ }
+
+ /**
+ * Provides an HDFS ZeroCopyReader shim.
+ * @param in FSDataInputStream to read from (where the cached/mmap buffers are tied to)
+ * @param in ByteBufferPoolShim to allocate fallback buffers with
+ *
+ * @return returns null if not supported
+ */
+ public ZeroCopyReaderShim getZeroCopyReader(FSDataInputStream in, ByteBufferPoolShim pool) throws IOException;
+
+ public interface ZeroCopyReaderShim {
+ /**
+ * Get a ByteBuffer from the FSDataInputStream - this can be either a HeapByteBuffer or an MappedByteBuffer.
+ * Also move the in stream by that amount. The data read can be small than maxLength.
+ *
+ * @return ByteBuffer read from the stream,
+ */
+ public ByteBuffer readBuffer(int maxLength, boolean verifyChecksums) throws IOException;
+ /**
+ * Release a ByteBuffer obtained from a read on the
+ * Also move the in stream by that amount. The data read can be small than maxLength.
+ *
+ */
+ public void releaseBuffer(ByteBuffer buffer);
+ }
+
+ public enum DirectCompressionType {
+ NONE,
+ ZLIB_NOHEADER,
+ ZLIB,
+ SNAPPY,
+ };
+
+ public interface DirectDecompressorShim {
+ public void decompress(ByteBuffer src, ByteBuffer dst) throws IOException;
+ }
+
+ public DirectDecompressorShim getDirectDecompressor(DirectCompressionType codec);
/**
* Get configuration from JobContext