diff --git itests/qtest/testconfiguration.properties itests/qtest/testconfiguration.properties index 6731561..f724d0e 100644 --- itests/qtest/testconfiguration.properties +++ itests/qtest/testconfiguration.properties @@ -1,5 +1,5 @@ minimr.query.files=stats_counter_partitioned.q,list_bucket_dml_10.q,input16_cc.q,scriptfile1.q,scriptfile1_win.q,bucket4.q,bucketmapjoin6.q,disable_merge_for_bucketing.q,reduce_deduplicate.q,smb_mapjoin_8.q,join1.q,groupby2.q,bucketizedhiveinputformat.q,bucketmapjoin7.q,optrstat_groupby.q,bucket_num_reducers.q,bucket5.q,load_fs2.q,bucket_num_reducers2.q,infer_bucket_sort_merge.q,infer_bucket_sort_reducers_power_two.q,infer_bucket_sort_dyn_part.q,infer_bucket_sort_bucketed_table.q,infer_bucket_sort_map_operators.q,infer_bucket_sort_num_buckets.q,leftsemijoin_mr.q,schemeAuthority.q,schemeAuthority2.q,truncate_column_buckets.q,remote_script.q,,load_hdfs_file_with_space_in_the_name.q,parallel_orderby.q,import_exported_table.q,stats_counter.q,auto_sortmerge_join_16.q,quotedid_smb.q,file_with_header_footer.q,external_table_with_space_in_location_path.q,root_dir_external_table.q,index_bitmap3.q,ql_rewrite_gbtoidx.q,index_bitmap_auto.q,udf_using.q minimr.query.negative.files=cluster_tasklog_retrieval.q,minimr_broken_pipe.q,mapreduce_stack_trace.q,mapreduce_stack_trace_turnoff.q,mapreduce_stack_trace_hadoop20.q,mapreduce_stack_trace_turnoff_hadoop20.q,file_with_header_footer_negative.q,udf_local_resource.q minitez.query.files=tez_fsstat.q,mapjoin_decimal.q,tez_join_tests.q,tez_joins_explain.q,mrr.q,tez_dml.q,tez_insert_overwrite_local_directory_1.q,tez_union.q,bucket_map_join_tez1.q,bucket_map_join_tez2.q,tez_schema_evolution.q -minitez.query.files.shared=cross_product_check_1.q,cross_product_check_2.q,dynpart_sort_opt_vectorization.q,dynpart_sort_optimization.q,orc_analyze.q,join0.q,join1.q,auto_join0.q,auto_join1.q,bucket2.q,bucket3.q,bucket4.q,count.q,create_merge_compressed.q,cross_join.q,ctas.q,custom_input_output_format.q,disable_merge_for_bucketing.q,enforce_order.q,filter_join_breaktask.q,filter_join_breaktask2.q,groupby1.q,groupby2.q,groupby3.q,having.q,insert1.q,insert_into1.q,insert_into2.q,leftsemijoin.q,limit_pushdown.q,load_dyn_part1.q,load_dyn_part2.q,load_dyn_part3.q,mapjoin_mapjoin.q,mapreduce1.q,mapreduce2.q,merge1.q,merge2.q,metadata_only_queries.q,sample1.q,subquery_in.q,subquery_exists.q,vectorization_15.q,ptf.q,stats_counter.q,stats_noscan_1.q,stats_counter_partitioned.q,union2.q,union3.q,union4.q,union5.q,union6.q,union7.q,union8.q,union9.q,transform1.q,transform2.q,transform_ppr1.q,transform_ppr2.q,script_env_var1.q,script_env_var2.q,script_pipe.q,scriptfile1.q +minitez.query.files.shared=cross_product_check_1.q,cross_product_check_2.q,dynpart_sort_opt_vectorization.q,dynpart_sort_optimization.q,orc_analyze.q,join0.q,join1.q,auto_join0.q,auto_join1.q,bucket2.q,bucket3.q,bucket4.q,count.q,create_merge_compressed.q,cross_join.q,ctas.q,custom_input_output_format.q,disable_merge_for_bucketing.q,enforce_order.q,filter_join_breaktask.q,filter_join_breaktask2.q,groupby1.q,groupby2.q,groupby3.q,having.q,insert1.q,insert_into1.q,insert_into2.q,leftsemijoin.q,limit_pushdown.q,load_dyn_part1.q,load_dyn_part2.q,load_dyn_part3.q,mapjoin_mapjoin.q,mapreduce1.q,mapreduce2.q,merge1.q,merge2.q,metadata_only_queries.q,sample1.q,subquery_in.q,subquery_exists.q,vectorization_15.q,ptf.q,stats_counter.q,stats_noscan_1.q,stats_counter_partitioned.q,union2.q,union3.q,union4.q,union5.q,union6.q,union7.q,union8.q,union9.q,transform1.q,transform2.q,transform_ppr1.q,transform_ppr2.q,script_env_var1.q,script_env_var2.q,script_pipe.q,scriptfile1.q,metadataonly1.q beeline.positive.exclude=add_part_exist.q,alter1.q,alter2.q,alter4.q,alter5.q,alter_rename_partition.q,alter_rename_partition_authorization.q,archive.q,archive_corrupt.q,archive_multi.q,archive_mr_1806.q,archive_multi_mr_1806.q,authorization_1.q,authorization_2.q,authorization_4.q,authorization_5.q,authorization_6.q,authorization_7.q,ba_table1.q,ba_table2.q,ba_table3.q,ba_table_udfs.q,binary_table_bincolserde.q,binary_table_colserde.q,cluster.q,columnarserde_create_shortcut.q,combine2.q,constant_prop.q,create_nested_type.q,create_or_replace_view.q,create_struct_table.q,create_union_table.q,database.q,database_location.q,database_properties.q,ddltime.q,describe_database_json.q,drop_database_removes_partition_dirs.q,escape1.q,escape2.q,exim_00_nonpart_empty.q,exim_01_nonpart.q,exim_02_00_part_empty.q,exim_02_part.q,exim_03_nonpart_over_compat.q,exim_04_all_part.q,exim_04_evolved_parts.q,exim_05_some_part.q,exim_06_one_part.q,exim_07_all_part_over_nonoverlap.q,exim_08_nonpart_rename.q,exim_09_part_spec_nonoverlap.q,exim_10_external_managed.q,exim_11_managed_external.q,exim_12_external_location.q,exim_13_managed_location.q,exim_14_managed_location_over_existing.q,exim_15_external_part.q,exim_16_part_external.q,exim_17_part_managed.q,exim_18_part_external.q,exim_19_00_part_external_location.q,exim_19_part_external_location.q,exim_20_part_managed_location.q,exim_21_export_authsuccess.q,exim_22_import_exist_authsuccess.q,exim_23_import_part_authsuccess.q,exim_24_import_nonexist_authsuccess.q,global_limit.q,groupby_complex_types.q,groupby_complex_types_multi_single_reducer.q,index_auth.q,index_auto.q,index_auto_empty.q,index_bitmap.q,index_bitmap1.q,index_bitmap2.q,index_bitmap3.q,index_bitmap_auto.q,index_bitmap_rc.q,index_compact.q,index_compact_1.q,index_compact_2.q,index_compact_3.q,index_stale_partitioned.q,init_file.q,input16.q,input16_cc.q,input46.q,input_columnarserde.q,input_dynamicserde.q,input_lazyserde.q,input_testxpath3.q,input_testxpath4.q,insert2_overwrite_partitions.q,insertexternal1.q,join_thrift.q,lateral_view.q,load_binary_data.q,load_exist_part_authsuccess.q,load_nonpart_authsuccess.q,load_part_authsuccess.q,loadpart_err.q,lock1.q,lock2.q,lock3.q,lock4.q,merge_dynamic_partition.q,multi_insert.q,multi_insert_move_tasks_share_dependencies.q,null_column.q,ppd_clusterby.q,query_with_semi.q,rename_column.q,sample6.q,sample_islocalmode_hook.q,set_processor_namespaces.q,show_tables.q,source.q,split_sample.q,str_to_map.q,transform1.q,udaf_collect_set.q,udaf_context_ngrams.q,udaf_histogram_numeric.q,udaf_ngrams.q,udaf_percentile_approx.q,udf_array.q,udf_bitmap_and.q,udf_bitmap_or.q,udf_explode.q,udf_format_number.q,udf_map.q,udf_map_keys.q,udf_map_values.q,udf_max.q,udf_min.q,udf_named_struct.q,udf_percentile.q,udf_printf.q,udf_sentences.q,udf_sort_array.q,udf_split.q,udf_struct.q,udf_substr.q,udf_translate.q,udf_union.q,udf_xpath.q,udtf_stack.q,view.q,virtual_column.q diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/Task.java ql/src/java/org/apache/hadoop/hive/ql/exec/Task.java index 622ee45..8f51f3c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/Task.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/Task.java @@ -22,6 +22,7 @@ import java.io.Serializable; import java.util.ArrayList; import java.util.Collection; +import java.util.Collections; import java.util.HashMap; import java.util.LinkedList; import java.util.List; @@ -35,6 +36,7 @@ import org.apache.hadoop.hive.ql.lib.Node; import org.apache.hadoop.hive.ql.metadata.Hive; import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.MapWork; import org.apache.hadoop.hive.ql.plan.OperatorDesc; import org.apache.hadoop.hive.ql.plan.api.StageType; import org.apache.hadoop.hive.ql.session.SessionState; @@ -365,6 +367,10 @@ public T getWork() { return work; } + public Collection getMapWork() { + return Collections.emptyList(); + } + public void setId(String id) { this.id = id; } @@ -389,7 +395,7 @@ public boolean hasReduce() { return false; } - public Operator getReducer() { + public Operator getReducer(MapWork work) { return null; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java index 29d59a4..56ec1c3 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java @@ -271,6 +271,10 @@ public static MapredWork getMapRedWork(Configuration conf) { return w; } + public static void cacheMapWork(Configuration conf, MapWork work, Path hiveScratchDir) { + cacheBaseWork(conf, MAP_PLAN_NAME, work, hiveScratchDir); + } + public static void setMapWork(Configuration conf, MapWork work) { setBaseWork(conf, MAP_PLAN_NAME, work); } @@ -287,6 +291,17 @@ public static ReduceWork getReduceWork(Configuration conf) { return (ReduceWork) getBaseWork(conf, REDUCE_PLAN_NAME); } + public static void cacheBaseWork(Configuration conf, String name, BaseWork work, + Path hiveScratchDir) { + try { + setPlanPath(conf, hiveScratchDir); + setBaseWork(conf, name, work); + } catch (IOException e) { + LOG.error("Failed to cache plan", e); + throw new RuntimeException(e); + } + } + /** * Pushes work into the global work map */ @@ -2369,13 +2384,15 @@ public static long sumOfExcept(Map aliasToSize, public static boolean isEmptyPath(JobConf job, Path dirPath, Context ctx) throws Exception { - ContentSummary cs = ctx.getCS(dirPath); - if (cs != null) { - LOG.info("Content Summary " + dirPath + "length: " + cs.getLength() + " num files: " - + cs.getFileCount() + " num directories: " + cs.getDirectoryCount()); - return (cs.getLength() == 0 && cs.getFileCount() == 0 && cs.getDirectoryCount() <= 1); - } else { - LOG.info("Content Summary not cached for " + dirPath); + if (ctx != null) { + ContentSummary cs = ctx.getCS(dirPath); + if (cs != null) { + LOG.info("Content Summary " + dirPath + "length: " + cs.getLength() + " num files: " + + cs.getFileCount() + " num directories: " + cs.getDirectoryCount()); + return (cs.getLength() == 0 && cs.getFileCount() == 0 && cs.getDirectoryCount() <= 1); + } else { + LOG.info("Content Summary not cached for " + dirPath); + } } return isEmptyPath(job, dirPath); } @@ -3003,7 +3020,13 @@ public static double getHighestSamplePercentage (MapWork work) { * so we don't want to depend on scratch dir and context. */ public static List getInputPathsTez(JobConf job, MapWork work) throws Exception { - List paths = getInputPaths(job, work, null, null); + String lScratchDir = HiveConf.getVar(job, HiveConf.ConfVars.SCRATCHDIR); + + // we usually don't want to create dummy files for tez, however the metadata only + // optimization relies on it. + List paths = getInputPaths(job, work, new Path(lScratchDir), null, + !work.isUseOneNullRowInputFormat()); + return paths; } @@ -3021,8 +3044,8 @@ public static double getHighestSamplePercentage (MapWork work) { * @return List of paths to process for the given MapWork * @throws Exception */ - public static List getInputPaths(JobConf job, MapWork work, Path hiveScratchDir, Context ctx) - throws Exception { + public static List getInputPaths(JobConf job, MapWork work, Path hiveScratchDir, + Context ctx, boolean skipDummy) throws Exception { int sequenceNumber = 0; Set pathsProcessed = new HashSet(); @@ -3047,7 +3070,7 @@ public static double getHighestSamplePercentage (MapWork work) { pathsProcessed.add(path); LOG.info("Adding input file " + path); - if (!HiveConf.getVar(job, ConfVars.HIVE_EXECUTION_ENGINE).equals("tez") + if (!skipDummy && isEmptyPath(job, path, ctx)) { path = createDummyFileForEmptyPartition(path, job, work, hiveScratchDir, alias, sequenceNumber++); @@ -3065,8 +3088,7 @@ public static double getHighestSamplePercentage (MapWork work) { // T2) x; // If T is empty and T2 contains 100 rows, the user expects: 0, 100 (2 // rows) - if (path == null - && !HiveConf.getVar(job, ConfVars.HIVE_EXECUTION_ENGINE).equals("tez")) { + if (path == null && !skipDummy) { path = createDummyFileForEmptyTable(job, work, hiveScratchDir, alias, sequenceNumber++); pathsToAdd.add(path); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecDriver.java ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecDriver.java index 1095173..b4a41bc 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecDriver.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecDriver.java @@ -366,7 +366,7 @@ public int execute(DriverContext driverContext) { } } work.configureJobConf(job); - List inputPaths = Utilities.getInputPaths(job, mWork, emptyScratchDir, ctx); + List inputPaths = Utilities.getInputPaths(job, mWork, emptyScratchDir, ctx, false); Utilities.setInputPaths(job, inputPaths); Utilities.setMapRedWork(job, work, ctx.getMRTmpPath()); @@ -789,6 +789,11 @@ public static String generateCmdLine(HiveConf hconf, Context ctx) } @Override + public Collection getMapWork() { + return Collections.singleton(getWork().getMapWork()); + } + + @Override public boolean isMapRedTask() { return true; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/mr/MapRedTask.java ql/src/java/org/apache/hadoop/hive/ql/exec/mr/MapRedTask.java index a9869f7..543aa0a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/mr/MapRedTask.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/mr/MapRedTask.java @@ -40,6 +40,7 @@ import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.exec.Utilities.StreamPrinter; import org.apache.hadoop.hive.ql.plan.MapredWork; +import org.apache.hadoop.hive.ql.plan.MapWork; import org.apache.hadoop.hive.ql.plan.OperatorDesc; import org.apache.hadoop.hive.ql.plan.ReduceWork; import org.apache.hadoop.hive.ql.session.SessionState; @@ -462,8 +463,11 @@ public static String isEligibleForLocalMode(HiveConf conf, } @Override - public Operator getReducer() { - return getWork().getReduceWork() == null ? null : getWork().getReduceWork().getReducer(); + public Operator getReducer(MapWork mapWork) { + if (getWork().getMapWork() == mapWork) { + return getWork().getReduceWork() == null ? null : getWork().getReduceWork().getReducer(); + } + return null; } @Override diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java index 2e9728a..ba42edd 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java @@ -49,6 +49,7 @@ import org.apache.hadoop.hive.ql.exec.mr.ExecReducer; import org.apache.hadoop.hive.ql.exec.tez.tools.TezMergedLogicalInput; import org.apache.hadoop.hive.ql.io.BucketizedHiveInputFormat; +import org.apache.hadoop.hive.ql.io.CombineHiveInputFormat; import org.apache.hadoop.hive.ql.io.HiveInputFormat; import org.apache.hadoop.hive.ql.io.HiveKey; import org.apache.hadoop.hive.ql.io.HiveOutputFormatImpl; @@ -195,6 +196,10 @@ private JobConf initializeVertexConf(JobConf baseConf, MapWork mapWork) { inpFormat = BucketizedHiveInputFormat.class.getName(); } + if (mapWork.isUseOneNullRowInputFormat()) { + inpFormat = CombineHiveInputFormat.class.getName(); + } + conf.set("mapred.mapper.class", ExecMapper.class.getName()); conf.set("mapred.input.format.class", inpFormat); @@ -413,7 +418,7 @@ private Vertex createVertex(JobConf conf, MapWork mapWork, Path tezDir = getTezDir(mrScratchDir); // set up the operator plan - Utilities.setMapWork(conf, mapWork, mrScratchDir, false); + Utilities.cacheMapWork(conf, mapWork, mrScratchDir); // create the directories FileSinkOperators need Utilities.createTmpDirs(conf, mapWork); @@ -441,6 +446,7 @@ private Vertex createVertex(JobConf conf, MapWork mapWork, } } } + if (vertexHasCustomInput) { useTezGroupedSplits = false; // grouping happens in execution phase. Setting the class to TezGroupedSplitsInputFormat @@ -459,7 +465,8 @@ private Vertex createVertex(JobConf conf, MapWork mapWork, } } - if (HiveConf.getBoolVar(conf, ConfVars.HIVE_AM_SPLIT_GENERATION)) { + if (HiveConf.getBoolVar(conf, ConfVars.HIVE_AM_SPLIT_GENERATION) + && !mapWork.isUseOneNullRowInputFormat()) { // if we're generating the splits in the AM, we just need to set // the correct plugin. amSplitGeneratorClass = HiveSplitGenerator.class; @@ -470,6 +477,9 @@ private Vertex createVertex(JobConf conf, MapWork mapWork, numTasks = inputSplitInfo.getNumTasks(); } + // set up the operator plan + Utilities.setMapWork(conf, mapWork, mrScratchDir, false); + byte[] serializedConf = MRHelpers.createUserPayloadFromConf(conf); map = new Vertex(mapWork.getName(), new ProcessorDescriptor(MapTezProcessor.class.getName()). diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezTask.java ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezTask.java index 949bcfb..951e918 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezTask.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezTask.java @@ -18,6 +18,7 @@ package org.apache.hadoop.hive.ql.exec.tez; +import java.util.Collection; import java.util.Collections; import java.util.EnumSet; import java.util.HashMap; @@ -36,6 +37,9 @@ import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.log.PerfLogger; import org.apache.hadoop.hive.ql.plan.BaseWork; +import org.apache.hadoop.hive.ql.plan.MapWork; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.ReduceWork; import org.apache.hadoop.hive.ql.plan.TezEdgeProperty; import org.apache.hadoop.hive.ql.plan.TezEdgeProperty.EdgeType; import org.apache.hadoop.hive.ql.plan.TezWork; @@ -354,4 +358,42 @@ public StageType getType() { public String getName() { return "TEZ"; } + + @Override + public Collection getMapWork() { + List result = new LinkedList(); + TezWork work = getWork(); + + // framework expects MapWork instances that have no physical parents (i.e.: union parent is + // fine, broadcast parent isn't) + for (BaseWork w: work.getAllWorkUnsorted()) { + if (w instanceof MapWork) { + List parents = work.getParents(w); + boolean candidate = true; + for (BaseWork parent: parents) { + if (!(parent instanceof UnionWork)) { + candidate = false; + } + } + if (candidate) { + result.add((MapWork)w); + } + } + } + return result; + } + + @Override + public Operator getReducer(MapWork mapWork) { + List children = getWork().getChildren(mapWork); + if (children.size() != 1) { + return null; + } + + if (!(children.get(0) instanceof ReduceWork)) { + return null; + } + + return ((ReduceWork)children.get(0)).getReducer(); + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/MetadataOnlyOptimizer.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/MetadataOnlyOptimizer.java index 521e5ae..b4ffe3f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/MetadataOnlyOptimizer.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/MetadataOnlyOptimizer.java @@ -230,6 +230,8 @@ private PartitionDesc changePartitionToMetadataOnly(PartitionDesc desc) { } private void processAlias(MapWork work, String alias) { + work.setUseOneNullRowInputFormat(true); + // Change the alias partition desc PartitionDesc aliasPartn = work.getAliasToPartnInfo().get(alias); changePartitionToMetadataOnly(aliasPartn); @@ -257,18 +259,11 @@ private String encode(Map partSpec) { public Object dispatch(Node nd, Stack stack, Object... nodeOutputs) throws SemanticException { Task task = (Task) nd; - - Collection> topOperators - = task.getTopOperators(); - if (topOperators.size() == 0) { - return null; - } - - LOG.info("Looking for table scans where optimization is applicable"); + // create a the context for walking operators ParseContext parseContext = physicalContext.getParseContext(); WalkerCtx walkerCtx = new WalkerCtx(); - + Map opRules = new LinkedHashMap(); opRules.put(new RuleRegExp("R1", TableScanOperator.getOperatorName() + "%"), @@ -277,41 +272,52 @@ public Object dispatch(Node nd, Stack stack, Object... nodeOutputs) GroupByOperator.getOperatorName() + "%.*" + FileSinkOperator.getOperatorName() + "%"), new FileSinkProcessor()); - // The dispatcher fires the processor corresponding to the closest - // matching rule and passes the context along - Dispatcher disp = new DefaultRuleDispatcher(null, opRules, walkerCtx); - GraphWalker ogw = new PreOrderWalker(disp); - - // Create a list of topOp nodes - ArrayList topNodes = new ArrayList(); - // Get the top Nodes for this map-reduce task - for (Operator - workOperator : topOperators) { - if (parseContext.getTopOps().values().contains(workOperator)) { - topNodes.add(workOperator); + for (MapWork mapWork: task.getMapWork()) { + LOG.debug("Looking at: "+mapWork.getName()); + Collection> topOperators + = mapWork.getAliasToWork().values(); + if (topOperators.size() == 0) { + LOG.debug("No top operators"); + return null; } - } - if (task.getReducer() != null) { - topNodes.add(task.getReducer()); - } + LOG.info("Looking for table scans where optimization is applicable"); - ogw.startWalking(topNodes, null); + // The dispatcher fires the processor corresponding to the closest + // matching rule and passes the context along + Dispatcher disp = new DefaultRuleDispatcher(null, opRules, walkerCtx); + GraphWalker ogw = new PreOrderWalker(disp); - LOG.info(String.format("Found %d metadata only table scans", - walkerCtx.getMetadataOnlyTableScans().size())); - Iterator iterator - = walkerCtx.getMetadataOnlyTableScans().iterator(); + // Create a list of topOp nodes + ArrayList topNodes = new ArrayList(); + // Get the top Nodes for this map-reduce task + for (Operator + workOperator : topOperators) { + if (parseContext.getTopOps().values().contains(workOperator)) { + topNodes.add(workOperator); + } + } - while (iterator.hasNext()) { - TableScanOperator tso = iterator.next(); - ((TableScanDesc)tso.getConf()).setIsMetadataOnly(true); - MapWork work = ((MapredWork) task.getWork()).getMapWork(); - String alias = getAliasForTableScanOperator(work, tso); - LOG.info("Metadata only table scan for " + alias); - processAlias(work, alias); + Operator reducer = task.getReducer(mapWork); + if (reducer != null) { + topNodes.add(reducer); + } + + ogw.startWalking(topNodes, null); + + LOG.info(String.format("Found %d metadata only table scans", + walkerCtx.getMetadataOnlyTableScans().size())); + Iterator iterator + = walkerCtx.getMetadataOnlyTableScans().iterator(); + + while (iterator.hasNext()) { + TableScanOperator tso = iterator.next(); + ((TableScanDesc)tso.getConf()).setIsMetadataOnly(true); + String alias = getAliasForTableScanOperator(mapWork, tso); + LOG.info("Metadata only table scan for " + alias); + processAlias(mapWork, alias); + } } - return null; } } diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java index c09c5be..10c8d51 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java @@ -60,6 +60,7 @@ import org.apache.hadoop.hive.ql.optimizer.ReduceSinkMapJoinProc; import org.apache.hadoop.hive.ql.optimizer.SetReducerParallelism; import org.apache.hadoop.hive.ql.optimizer.physical.CrossProductCheck; +import org.apache.hadoop.hive.ql.optimizer.physical.MetadataOnlyOptimizer; import org.apache.hadoop.hive.ql.optimizer.physical.PhysicalContext; import org.apache.hadoop.hive.ql.optimizer.physical.Vectorizer; import org.apache.hadoop.hive.ql.optimizer.physical.StageIDsRearranger; @@ -247,15 +248,28 @@ protected void optimizeTaskPlan(List> rootTasks, Pa PhysicalContext physicalCtx = new PhysicalContext(conf, pCtx, pCtx.getContext(), rootTasks, pCtx.getFetchTask()); + if (conf.getBoolVar(HiveConf.ConfVars.HIVEMETADATAONLYQUERIES)) { + physicalCtx = new MetadataOnlyOptimizer().resolve(physicalCtx); + } else { + LOG.debug("Skipping metadata only query optimization"); + } + if (conf.getBoolVar(HiveConf.ConfVars.HIVE_CHECK_CROSS_PRODUCT)) { physicalCtx = new CrossProductCheck().resolve(physicalCtx); + } else { + LOG.debug("Skipping cross product analysis"); } if (conf.getBoolVar(HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED)) { - (new Vectorizer()).resolve(physicalCtx); + physicalCtx = new Vectorizer().resolve(physicalCtx); + } else { + LOG.debug("Skipping vectorization"); } + if (!"none".equalsIgnoreCase(conf.getVar(HiveConf.ConfVars.HIVESTAGEIDREARRANGE))) { - (new StageIDsRearranger()).resolve(physicalCtx); + physicalCtx = new StageIDsRearranger().resolve(physicalCtx); + } else { + LOG.debug("Skipping stage id rearranger"); } return; } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/MapWork.java ql/src/java/org/apache/hadoop/hive/ql/plan/MapWork.java index 9945dea..1d96c5d 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/MapWork.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/MapWork.java @@ -114,6 +114,8 @@ private boolean useBucketizedHiveInputFormat; + private boolean useOneNullRowInputFormat; + private Map> scratchColumnVectorTypes = null; private Map> scratchColumnMap = null; private boolean vectorMode = false; @@ -390,6 +392,7 @@ public String getInputformat() { public void setInputformat(String inputformat) { this.inputformat = inputformat; } + public boolean isUseBucketizedHiveInputFormat() { return useBucketizedHiveInputFormat; } @@ -398,6 +401,14 @@ public void setUseBucketizedHiveInputFormat(boolean useBucketizedHiveInputFormat this.useBucketizedHiveInputFormat = useBucketizedHiveInputFormat; } + public void setUseOneNullRowInputFormat(boolean useOneNullRowInputFormat) { + this.useOneNullRowInputFormat = useOneNullRowInputFormat; + } + + public boolean isUseOneNullRowInputFormat() { + return useOneNullRowInputFormat; + } + public QBJoinTree getJoinTree() { return joinTree; } @@ -534,4 +545,14 @@ public void setVectorMode(boolean vectorMode) { this.vectorMode = vectorMode; } + public void logPathToAliases() { + if (LOG.isDebugEnabled()) { + LOG.debug("LOGGING PATH TO ALIASES"); + for (Map.Entry> entry: pathToAliases.entrySet()) { + for (String a: entry.getValue()) { + LOG.debug("Path: " + entry.getKey() + ", Alias: " + a); + } + } + } + } } diff --git ql/src/test/org/apache/hadoop/hive/ql/io/TestSymlinkTextInputFormat.java ql/src/test/org/apache/hadoop/hive/ql/io/TestSymlinkTextInputFormat.java index 41243fe..56b0a3d 100644 --- ql/src/test/org/apache/hadoop/hive/ql/io/TestSymlinkTextInputFormat.java +++ ql/src/test/org/apache/hadoop/hive/ql/io/TestSymlinkTextInputFormat.java @@ -177,7 +177,7 @@ public void testCombine() throws Exception { QueryPlan plan = drv.getPlan(); MapRedTask selectTask = (MapRedTask)plan.getRootTasks().get(0); - List inputPaths = Utilities.getInputPaths(newJob, selectTask.getWork().getMapWork(), emptyScratchDir, ctx); + List inputPaths = Utilities.getInputPaths(newJob, selectTask.getWork().getMapWork(), emptyScratchDir, ctx, false); Utilities.setInputPaths(newJob, inputPaths); Utilities.setMapRedWork(newJob, selectTask.getWork(), ctx.getMRTmpPath()); diff --git ql/src/test/results/clientpositive/tez/metadata_only_queries_with_filters.q.out ql/src/test/results/clientpositive/tez/metadata_only_queries_with_filters.q.out new file mode 100644 index 0000000..62c9cc3 --- /dev/null +++ ql/src/test/results/clientpositive/tez/metadata_only_queries_with_filters.q.out @@ -0,0 +1,196 @@ +PREHOOK: query: create table over10k( + t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + ts timestamp, + dec decimal, + bin binary) + row format delimited + fields terminated by '|' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +POSTHOOK: query: create table over10k( + t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + ts timestamp, + dec decimal, + bin binary) + row format delimited + fields terminated by '|' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@over10k +PREHOOK: query: load data local inpath '../../data/files/over10k' into table over10k +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@over10k +POSTHOOK: query: load data local inpath '../../data/files/over10k' into table over10k +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@over10k +PREHOOK: query: create table stats_tbl_part( + t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + ts timestamp, + dec decimal, + bin binary) partitioned by (dt int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +POSTHOOK: query: create table stats_tbl_part( + t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + ts timestamp, + dec decimal, + bin binary) partitioned by (dt int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@stats_tbl_part +PREHOOK: query: from over10k +insert overwrite table stats_tbl_part partition (dt=2010) select t,si,i,b,f,d,bo,s,ts,dec,bin where t>0 and t<30 +insert overwrite table stats_tbl_part partition (dt=2014) select t,si,i,b,f,d,bo,s,ts,dec,bin where t > 30 and t<60 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +PREHOOK: Output: default@stats_tbl_part@dt=2010 +PREHOOK: Output: default@stats_tbl_part@dt=2014 +POSTHOOK: query: from over10k +insert overwrite table stats_tbl_part partition (dt=2010) select t,si,i,b,f,d,bo,s,ts,dec,bin where t>0 and t<30 +insert overwrite table stats_tbl_part partition (dt=2014) select t,si,i,b,f,d,bo,s,ts,dec,bin where t > 30 and t<60 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +POSTHOOK: Output: default@stats_tbl_part@dt=2010 +POSTHOOK: Output: default@stats_tbl_part@dt=2014 +POSTHOOK: Lineage: stats_tbl_part PARTITION(dt=2010).b SIMPLE [(over10k)over10k.FieldSchema(name:b, type:bigint, comment:null), ] +POSTHOOK: Lineage: stats_tbl_part PARTITION(dt=2010).bin SIMPLE [(over10k)over10k.FieldSchema(name:bin, type:binary, comment:null), ] +POSTHOOK: Lineage: stats_tbl_part PARTITION(dt=2010).bo SIMPLE [(over10k)over10k.FieldSchema(name:bo, type:boolean, comment:null), ] +POSTHOOK: Lineage: stats_tbl_part PARTITION(dt=2010).d SIMPLE [(over10k)over10k.FieldSchema(name:d, type:double, comment:null), ] +POSTHOOK: Lineage: stats_tbl_part PARTITION(dt=2010).dec SIMPLE [(over10k)over10k.FieldSchema(name:dec, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: stats_tbl_part PARTITION(dt=2010).f SIMPLE [(over10k)over10k.FieldSchema(name:f, type:float, comment:null), ] +POSTHOOK: Lineage: stats_tbl_part PARTITION(dt=2010).i SIMPLE [(over10k)over10k.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: stats_tbl_part PARTITION(dt=2010).s SIMPLE [(over10k)over10k.FieldSchema(name:s, type:string, comment:null), ] +POSTHOOK: Lineage: stats_tbl_part PARTITION(dt=2010).si SIMPLE [(over10k)over10k.FieldSchema(name:si, type:smallint, comment:null), ] +POSTHOOK: Lineage: stats_tbl_part PARTITION(dt=2010).t SIMPLE [(over10k)over10k.FieldSchema(name:t, type:tinyint, comment:null), ] +POSTHOOK: Lineage: stats_tbl_part PARTITION(dt=2010).ts SIMPLE [(over10k)over10k.FieldSchema(name:ts, type:timestamp, comment:null), ] +POSTHOOK: Lineage: stats_tbl_part PARTITION(dt=2014).b SIMPLE [(over10k)over10k.FieldSchema(name:b, type:bigint, comment:null), ] +POSTHOOK: Lineage: stats_tbl_part PARTITION(dt=2014).bin SIMPLE [(over10k)over10k.FieldSchema(name:bin, type:binary, comment:null), ] +POSTHOOK: Lineage: stats_tbl_part PARTITION(dt=2014).bo SIMPLE [(over10k)over10k.FieldSchema(name:bo, type:boolean, comment:null), ] +POSTHOOK: Lineage: stats_tbl_part PARTITION(dt=2014).d SIMPLE [(over10k)over10k.FieldSchema(name:d, type:double, comment:null), ] +POSTHOOK: Lineage: stats_tbl_part PARTITION(dt=2014).dec SIMPLE [(over10k)over10k.FieldSchema(name:dec, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: stats_tbl_part PARTITION(dt=2014).f SIMPLE [(over10k)over10k.FieldSchema(name:f, type:float, comment:null), ] +POSTHOOK: Lineage: stats_tbl_part PARTITION(dt=2014).i SIMPLE [(over10k)over10k.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: stats_tbl_part PARTITION(dt=2014).s SIMPLE [(over10k)over10k.FieldSchema(name:s, type:string, comment:null), ] +POSTHOOK: Lineage: stats_tbl_part PARTITION(dt=2014).si SIMPLE [(over10k)over10k.FieldSchema(name:si, type:smallint, comment:null), ] +POSTHOOK: Lineage: stats_tbl_part PARTITION(dt=2014).t SIMPLE [(over10k)over10k.FieldSchema(name:t, type:tinyint, comment:null), ] +POSTHOOK: Lineage: stats_tbl_part PARTITION(dt=2014).ts SIMPLE [(over10k)over10k.FieldSchema(name:ts, type:timestamp, comment:null), ] +PREHOOK: query: analyze table stats_tbl_part partition(dt) compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@stats_tbl_part +PREHOOK: Input: default@stats_tbl_part@dt=2010 +PREHOOK: Input: default@stats_tbl_part@dt=2014 +PREHOOK: Output: default@stats_tbl_part +PREHOOK: Output: default@stats_tbl_part@dt=2010 +PREHOOK: Output: default@stats_tbl_part@dt=2014 +POSTHOOK: query: analyze table stats_tbl_part partition(dt) compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@stats_tbl_part +POSTHOOK: Input: default@stats_tbl_part@dt=2010 +POSTHOOK: Input: default@stats_tbl_part@dt=2014 +POSTHOOK: Output: default@stats_tbl_part +POSTHOOK: Output: default@stats_tbl_part@dt=2010 +POSTHOOK: Output: default@stats_tbl_part@dt=2014 +PREHOOK: query: analyze table stats_tbl_part partition(dt=2010) compute statistics for columns t,si,i,b,f,d,bo,s,bin +PREHOOK: type: QUERY +PREHOOK: Input: default@stats_tbl_part +PREHOOK: Input: default@stats_tbl_part@dt=2010 +#### A masked pattern was here #### +POSTHOOK: query: analyze table stats_tbl_part partition(dt=2010) compute statistics for columns t,si,i,b,f,d,bo,s,bin +POSTHOOK: type: QUERY +POSTHOOK: Input: default@stats_tbl_part +POSTHOOK: Input: default@stats_tbl_part@dt=2010 +#### A masked pattern was here #### +PREHOOK: query: analyze table stats_tbl_part partition(dt=2014) compute statistics for columns t,si,i,b,f,d,bo,s,bin +PREHOOK: type: QUERY +PREHOOK: Input: default@stats_tbl_part +PREHOOK: Input: default@stats_tbl_part@dt=2014 +#### A masked pattern was here #### +POSTHOOK: query: analyze table stats_tbl_part partition(dt=2014) compute statistics for columns t,si,i,b,f,d,bo,s,bin +POSTHOOK: type: QUERY +POSTHOOK: Input: default@stats_tbl_part +POSTHOOK: Input: default@stats_tbl_part@dt=2014 +#### A masked pattern was here #### +PREHOOK: query: explain +select count(*), count(1), sum(1), count(s), count(bo), count(bin), count(si), max(i), min(b), max(f), min(d) from stats_tbl_part where dt = 2010 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select count(*), count(1), sum(1), count(s), count(bo), count(bin), count(si), max(i), min(b), max(f), min(d) from stats_tbl_part where dt = 2010 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: 1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*), count(1), sum(1), count(s), count(bo), count(bin), count(si), max(i), min(b), max(f), min(d) from stats_tbl_part where dt = 2010 +PREHOOK: type: QUERY +#### A masked pattern was here #### +POSTHOOK: query: select count(*), count(1), sum(1), count(s), count(bo), count(bin), count(si), max(i), min(b), max(f), min(d) from stats_tbl_part where dt = 2010 +POSTHOOK: type: QUERY +#### A masked pattern was here #### +2322 2322 2322 2322 2322 2322 2322 65791 4294967296 99.9800033569336 0.03 +PREHOOK: query: explain +select count(*), count(1), sum(1), sum(2), count(s), count(bo), count(bin), count(si), max(i), min(b), max(f), min(d) from stats_tbl_part where dt > 2010 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select count(*), count(1), sum(1), sum(2), count(s), count(bo), count(bin), count(si), max(i), min(b), max(f), min(d) from stats_tbl_part where dt > 2010 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: 1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*), count(1), sum(1), sum(2), count(s), count(bo), count(bin), count(si), max(i), min(b), max(f), min(d) from stats_tbl_part where dt > 2010 +PREHOOK: type: QUERY +#### A masked pattern was here #### +POSTHOOK: query: select count(*), count(1), sum(1), sum(2), count(s), count(bo), count(bin), count(si), max(i), min(b), max(f), min(d) from stats_tbl_part where dt > 2010 +POSTHOOK: type: QUERY +#### A masked pattern was here #### +2219 2219 2219 4438 2219 2219 2219 2219 65791 4294967296 99.95999908447266 0.04 +PREHOOK: query: drop table stats_tbl_part +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@stats_tbl_part +PREHOOK: Output: default@stats_tbl_part +POSTHOOK: query: drop table stats_tbl_part +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@stats_tbl_part +POSTHOOK: Output: default@stats_tbl_part diff --git ql/src/test/results/clientpositive/tez/metadataonly1.q.out ql/src/test/results/clientpositive/tez/metadataonly1.q.out new file mode 100644 index 0000000..1ba9010 --- /dev/null +++ ql/src/test/results/clientpositive/tez/metadataonly1.q.out @@ -0,0 +1,2035 @@ +PREHOOK: query: CREATE TABLE TEST1(A INT, B DOUBLE) partitioned by (ds string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +POSTHOOK: query: CREATE TABLE TEST1(A INT, B DOUBLE) partitioned by (ds string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@TEST1 +PREHOOK: query: explain extended select max(ds) from TEST1 +PREHOOK: type: QUERY +POSTHOOK: query: explain extended select max(ds) from TEST1 +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + TEST1 + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_FUNCTION + max + TOK_TABLE_OR_COL + ds + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: max(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select max(ds) from TEST1 +PREHOOK: type: QUERY +PREHOOK: Input: default@test1 +#### A masked pattern was here #### +POSTHOOK: query: select max(ds) from TEST1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test1 +#### A masked pattern was here #### +NULL +PREHOOK: query: alter table TEST1 add partition (ds='1') +PREHOOK: type: ALTERTABLE_ADDPARTS +PREHOOK: Output: default@test1 +POSTHOOK: query: alter table TEST1 add partition (ds='1') +POSTHOOK: type: ALTERTABLE_ADDPARTS +POSTHOOK: Output: default@test1 +POSTHOOK: Output: default@test1@ds=1 +PREHOOK: query: explain extended select max(ds) from TEST1 +PREHOOK: type: QUERY +POSTHOOK: query: explain extended select max(ds) from TEST1 +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + TEST1 + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_FUNCTION + max + TOK_TABLE_OR_COL + ds + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: test1 + Statistics: Num rows: 0 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + GatherStats: false + Select Operator + expressions: ds (type: string) + outputColumnNames: ds + Statistics: Num rows: 0 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Group By Operator + aggregations: max(ds) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE + tag: -1 + value expressions: _col0 (type: string) + Path -> Alias: + -mr-10002default.test1{ds=1} [test1] + Path -> Partition: + -mr-10002default.test1{ds=1} + Partition + base file name: ds=1 + input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 1 + properties: + bucket_count -1 + columns a,b + columns.comments + columns.types int:double +#### A masked pattern was here #### + name default.test1 + partition_columns ds + partition_columns.types string + serialization.ddl struct test1 { i32 a, double b} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.NullStructSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.NullStructSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns a,b + columns.comments + columns.types int:double +#### A masked pattern was here #### + name default.test1 + partition_columns ds + partition_columns.types string + serialization.ddl struct test1 { i32 a, double b} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test1 + name: default.test1 + Truncated Path -> Alias: + -mr-10002default.test1{ds=1} [test1] + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: max(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select max(ds) from TEST1 +PREHOOK: type: QUERY +PREHOOK: Input: default@test1 +PREHOOK: Input: default@test1@ds=1 +#### A masked pattern was here #### +POSTHOOK: query: select max(ds) from TEST1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test1 +POSTHOOK: Input: default@test1@ds=1 +#### A masked pattern was here #### +1 +PREHOOK: query: explain extended select count(distinct ds) from TEST1 +PREHOOK: type: QUERY +POSTHOOK: query: explain extended select count(distinct ds) from TEST1 +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + TEST1 + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_FUNCTIONDI + count + TOK_TABLE_OR_COL + ds + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: test1 + Statistics: Num rows: 0 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + GatherStats: false + Select Operator + expressions: ds (type: string) + outputColumnNames: ds + Statistics: Num rows: 0 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Group By Operator + aggregations: count(DISTINCT ds) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + tag: -1 + Path -> Alias: + -mr-10002default.test1{ds=1} [test1] + Path -> Partition: + -mr-10002default.test1{ds=1} + Partition + base file name: ds=1 + input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 1 + properties: + bucket_count -1 + columns a,b + columns.comments + columns.types int:double +#### A masked pattern was here #### + name default.test1 + partition_columns ds + partition_columns.types string + serialization.ddl struct test1 { i32 a, double b} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.NullStructSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.NullStructSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns a,b + columns.comments + columns.types int:double +#### A masked pattern was here #### + name default.test1 + partition_columns ds + partition_columns.types string + serialization.ddl struct test1 { i32 a, double b} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test1 + name: default.test1 + Truncated Path -> Alias: + -mr-10002default.test1{ds=1} [test1] + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: count(DISTINCT KEY._col0:0._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types bigint + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(distinct ds) from TEST1 +PREHOOK: type: QUERY +PREHOOK: Input: default@test1 +PREHOOK: Input: default@test1@ds=1 +#### A masked pattern was here #### +POSTHOOK: query: select count(distinct ds) from TEST1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test1 +POSTHOOK: Input: default@test1@ds=1 +#### A masked pattern was here #### +1 +PREHOOK: query: explain extended select count(ds) from TEST1 +PREHOOK: type: QUERY +POSTHOOK: query: explain extended select count(ds) from TEST1 +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + TEST1 + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_FUNCTION + count + TOK_TABLE_OR_COL + ds + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: test1 + Statistics: Num rows: 0 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + GatherStats: false + Select Operator + expressions: ds (type: string) + outputColumnNames: ds + Statistics: Num rows: 0 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Group By Operator + aggregations: count(ds) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + tag: -1 + value expressions: _col0 (type: bigint) + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: ds=1 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 1 + properties: + bucket_count -1 + columns a,b + columns.comments + columns.types int:double +#### A masked pattern was here #### + name default.test1 + partition_columns ds + partition_columns.types string + serialization.ddl struct test1 { i32 a, double b} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns a,b + columns.comments + columns.types int:double +#### A masked pattern was here #### + name default.test1 + partition_columns ds + partition_columns.types string + serialization.ddl struct test1 { i32 a, double b} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test1 + name: default.test1 + Truncated Path -> Alias: + /test1/ds=1 [test1] + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types bigint + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(ds) from TEST1 +PREHOOK: type: QUERY +PREHOOK: Input: default@test1 +PREHOOK: Input: default@test1@ds=1 +#### A masked pattern was here #### +POSTHOOK: query: select count(ds) from TEST1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test1 +POSTHOOK: Input: default@test1@ds=1 +#### A masked pattern was here #### +0 +PREHOOK: query: alter table TEST1 add partition (ds='2') +PREHOOK: type: ALTERTABLE_ADDPARTS +PREHOOK: Output: default@test1 +POSTHOOK: query: alter table TEST1 add partition (ds='2') +POSTHOOK: type: ALTERTABLE_ADDPARTS +POSTHOOK: Output: default@test1 +POSTHOOK: Output: default@test1@ds=2 +PREHOOK: query: explain extended +select count(*) from TEST1 a2 join (select max(ds) m from TEST1) b on a2.ds=b.m +PREHOOK: type: QUERY +POSTHOOK: query: explain extended +select count(*) from TEST1 a2 join (select max(ds) m from TEST1) b on a2.ds=b.m +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + TEST1 + a2 + TOK_SUBQUERY + TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + TEST1 + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_FUNCTION + max + TOK_TABLE_OR_COL + ds + m + b + = + . + TOK_TABLE_OR_COL + a2 + ds + . + TOK_TABLE_OR_COL + b + m + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_FUNCTIONSTAR + count + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Map 5 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: test1 + Statistics: Num rows: 0 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + GatherStats: false + Select Operator + expressions: ds (type: string) + outputColumnNames: ds + Statistics: Num rows: 0 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Group By Operator + aggregations: max(ds) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE + tag: -1 + value expressions: _col0 (type: string) + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: ds=1 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 1 + properties: + bucket_count -1 + columns a,b + columns.comments + columns.types int:double +#### A masked pattern was here #### + name default.test1 + partition_columns ds + partition_columns.types string + serialization.ddl struct test1 { i32 a, double b} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns a,b + columns.comments + columns.types int:double +#### A masked pattern was here #### + name default.test1 + partition_columns ds + partition_columns.types string + serialization.ddl struct test1 { i32 a, double b} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test1 + name: default.test1 +#### A masked pattern was here #### + Partition + base file name: ds=2 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2 + properties: + bucket_count -1 + columns a,b + columns.comments + columns.types int:double +#### A masked pattern was here #### + name default.test1 + partition_columns ds + partition_columns.types string + serialization.ddl struct test1 { i32 a, double b} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns a,b + columns.comments + columns.types int:double +#### A masked pattern was here #### + name default.test1 + partition_columns ds + partition_columns.types string + serialization.ddl struct test1 { i32 a, double b} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test1 + name: default.test1 + Truncated Path -> Alias: + /test1/ds=1 [test1] + /test1/ds=2 [test1] + Map 5 + Map Operator Tree: + TableScan + alias: a2 + Statistics: Num rows: 0 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + GatherStats: false + Reduce Output Operator + key expressions: ds (type: string) + sort order: + + Map-reduce partition columns: ds (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + tag: 0 + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: ds=1 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 1 + properties: + bucket_count -1 + columns a,b + columns.comments + columns.types int:double +#### A masked pattern was here #### + name default.test1 + partition_columns ds + partition_columns.types string + serialization.ddl struct test1 { i32 a, double b} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns a,b + columns.comments + columns.types int:double +#### A masked pattern was here #### + name default.test1 + partition_columns ds + partition_columns.types string + serialization.ddl struct test1 { i32 a, double b} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test1 + name: default.test1 +#### A masked pattern was here #### + Partition + base file name: ds=2 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2 + properties: + bucket_count -1 + columns a,b + columns.comments + columns.types int:double +#### A masked pattern was here #### + name default.test1 + partition_columns ds + partition_columns.types string + serialization.ddl struct test1 { i32 a, double b} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns a,b + columns.comments + columns.types int:double +#### A masked pattern was here #### + name default.test1 + partition_columns ds + partition_columns.types string + serialization.ddl struct test1 { i32 a, double b} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test1 + name: default.test1 + Truncated Path -> Alias: + /test1/ds=1 [a2] + /test1/ds=2 [a2] + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: max(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + isSamplingPred: false + predicate: _col0 is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + tag: 1 + Reducer 3 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: bigint) + Reducer 4 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types bigint + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) from TEST1 a2 join (select max(ds) m from TEST1) b on a2.ds=b.m +PREHOOK: type: QUERY +PREHOOK: Input: default@test1 +PREHOOK: Input: default@test1@ds=1 +PREHOOK: Input: default@test1@ds=2 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from TEST1 a2 join (select max(ds) m from TEST1) b on a2.ds=b.m +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test1 +POSTHOOK: Input: default@test1@ds=1 +POSTHOOK: Input: default@test1@ds=2 +#### A masked pattern was here #### +0 +PREHOOK: query: CREATE TABLE TEST2(A INT, B DOUBLE) partitioned by (ds string, hr string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +POSTHOOK: query: CREATE TABLE TEST2(A INT, B DOUBLE) partitioned by (ds string, hr string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@TEST2 +PREHOOK: query: alter table TEST2 add partition (ds='1', hr='1') +PREHOOK: type: ALTERTABLE_ADDPARTS +PREHOOK: Output: default@test2 +POSTHOOK: query: alter table TEST2 add partition (ds='1', hr='1') +POSTHOOK: type: ALTERTABLE_ADDPARTS +POSTHOOK: Output: default@test2 +POSTHOOK: Output: default@test2@ds=1/hr=1 +PREHOOK: query: alter table TEST2 add partition (ds='1', hr='2') +PREHOOK: type: ALTERTABLE_ADDPARTS +PREHOOK: Output: default@test2 +POSTHOOK: query: alter table TEST2 add partition (ds='1', hr='2') +POSTHOOK: type: ALTERTABLE_ADDPARTS +POSTHOOK: Output: default@test2 +POSTHOOK: Output: default@test2@ds=1/hr=2 +PREHOOK: query: alter table TEST2 add partition (ds='1', hr='3') +PREHOOK: type: ALTERTABLE_ADDPARTS +PREHOOK: Output: default@test2 +POSTHOOK: query: alter table TEST2 add partition (ds='1', hr='3') +POSTHOOK: type: ALTERTABLE_ADDPARTS +POSTHOOK: Output: default@test2 +POSTHOOK: Output: default@test2@ds=1/hr=3 +PREHOOK: query: explain extended select ds, count(distinct hr) from TEST2 group by ds +PREHOOK: type: QUERY +POSTHOOK: query: explain extended select ds, count(distinct hr) from TEST2 group by ds +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + TEST2 + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + ds + TOK_SELEXPR + TOK_FUNCTIONDI + count + TOK_TABLE_OR_COL + hr + TOK_GROUPBY + TOK_TABLE_OR_COL + ds + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: test2 + Statistics: Num rows: 0 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + GatherStats: false + Select Operator + expressions: ds (type: string), hr (type: string) + outputColumnNames: ds, hr + Statistics: Num rows: 0 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Group By Operator + aggregations: count(DISTINCT hr) + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + tag: -1 + Path -> Alias: + -mr-10002default.test2{ds=1, hr=1} [test2] + -mr-10003default.test2{ds=1, hr=2} [test2] + -mr-10004default.test2{ds=1, hr=3} [test2] + Path -> Partition: + -mr-10002default.test2{ds=1, hr=1} + Partition + base file name: hr=1 + input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 1 + hr 1 + properties: + bucket_count -1 + columns a,b + columns.comments + columns.types int:double +#### A masked pattern was here #### + name default.test2 + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct test2 { i32 a, double b} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.NullStructSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.NullStructSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns a,b + columns.comments + columns.types int:double +#### A masked pattern was here #### + name default.test2 + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct test2 { i32 a, double b} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test2 + name: default.test2 + -mr-10003default.test2{ds=1, hr=2} + Partition + base file name: hr=2 + input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 1 + hr 2 + properties: + bucket_count -1 + columns a,b + columns.comments + columns.types int:double +#### A masked pattern was here #### + name default.test2 + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct test2 { i32 a, double b} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.NullStructSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.NullStructSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns a,b + columns.comments + columns.types int:double +#### A masked pattern was here #### + name default.test2 + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct test2 { i32 a, double b} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test2 + name: default.test2 + -mr-10004default.test2{ds=1, hr=3} + Partition + base file name: hr=3 + input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 1 + hr 3 + properties: + bucket_count -1 + columns a,b + columns.comments + columns.types int:double +#### A masked pattern was here #### + name default.test2 + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct test2 { i32 a, double b} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.NullStructSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.NullStructSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns a,b + columns.comments + columns.types int:double +#### A masked pattern was here #### + name default.test2 + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct test2 { i32 a, double b} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test2 + name: default.test2 + Truncated Path -> Alias: + -mr-10002default.test2{ds=1, hr=1} [test2] + -mr-10003default.test2{ds=1, hr=2} [test2] + -mr-10004default.test2{ds=1, hr=3} [test2] + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: count(DISTINCT KEY._col1:0._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1 + columns.types string:bigint + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select ds, count(distinct hr) from TEST2 group by ds +PREHOOK: type: QUERY +PREHOOK: Input: default@test2 +PREHOOK: Input: default@test2@ds=1/hr=1 +PREHOOK: Input: default@test2@ds=1/hr=2 +PREHOOK: Input: default@test2@ds=1/hr=3 +#### A masked pattern was here #### +POSTHOOK: query: select ds, count(distinct hr) from TEST2 group by ds +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test2 +POSTHOOK: Input: default@test2@ds=1/hr=1 +POSTHOOK: Input: default@test2@ds=1/hr=2 +POSTHOOK: Input: default@test2@ds=1/hr=3 +#### A masked pattern was here #### +1 3 +PREHOOK: query: explain extended select ds, count(hr) from TEST2 group by ds +PREHOOK: type: QUERY +POSTHOOK: query: explain extended select ds, count(hr) from TEST2 group by ds +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + TEST2 + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + ds + TOK_SELEXPR + TOK_FUNCTION + count + TOK_TABLE_OR_COL + hr + TOK_GROUPBY + TOK_TABLE_OR_COL + ds + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: test2 + Statistics: Num rows: 0 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + GatherStats: false + Select Operator + expressions: ds (type: string), hr (type: string) + outputColumnNames: ds, hr + Statistics: Num rows: 0 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Group By Operator + aggregations: count(hr) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + tag: -1 + value expressions: _col1 (type: bigint) + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: hr=1 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 1 + hr 1 + properties: + bucket_count -1 + columns a,b + columns.comments + columns.types int:double +#### A masked pattern was here #### + name default.test2 + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct test2 { i32 a, double b} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns a,b + columns.comments + columns.types int:double +#### A masked pattern was here #### + name default.test2 + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct test2 { i32 a, double b} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test2 + name: default.test2 +#### A masked pattern was here #### + Partition + base file name: hr=2 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 1 + hr 2 + properties: + bucket_count -1 + columns a,b + columns.comments + columns.types int:double +#### A masked pattern was here #### + name default.test2 + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct test2 { i32 a, double b} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns a,b + columns.comments + columns.types int:double +#### A masked pattern was here #### + name default.test2 + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct test2 { i32 a, double b} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test2 + name: default.test2 +#### A masked pattern was here #### + Partition + base file name: hr=3 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 1 + hr 3 + properties: + bucket_count -1 + columns a,b + columns.comments + columns.types int:double +#### A masked pattern was here #### + name default.test2 + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct test2 { i32 a, double b} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns a,b + columns.comments + columns.types int:double +#### A masked pattern was here #### + name default.test2 + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct test2 { i32 a, double b} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test2 + name: default.test2 + Truncated Path -> Alias: + /test2/ds=1/hr=1 [test2] + /test2/ds=1/hr=2 [test2] + /test2/ds=1/hr=3 [test2] + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1 + columns.types string:bigint + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select ds, count(hr) from TEST2 group by ds +PREHOOK: type: QUERY +PREHOOK: Input: default@test2 +PREHOOK: Input: default@test2@ds=1/hr=1 +PREHOOK: Input: default@test2@ds=1/hr=2 +PREHOOK: Input: default@test2@ds=1/hr=3 +#### A masked pattern was here #### +POSTHOOK: query: select ds, count(hr) from TEST2 group by ds +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test2 +POSTHOOK: Input: default@test2@ds=1/hr=1 +POSTHOOK: Input: default@test2@ds=1/hr=2 +POSTHOOK: Input: default@test2@ds=1/hr=3 +#### A masked pattern was here #### +PREHOOK: query: explain extended select max(ds) from TEST1 +PREHOOK: type: QUERY +POSTHOOK: query: explain extended select max(ds) from TEST1 +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + TEST1 + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_FUNCTION + max + TOK_TABLE_OR_COL + ds + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: test1 + Statistics: Num rows: 0 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + GatherStats: false + Select Operator + expressions: ds (type: string) + outputColumnNames: ds + Statistics: Num rows: 0 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Group By Operator + aggregations: max(ds) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE + tag: -1 + value expressions: _col0 (type: string) + Path -> Alias: + -mr-10002default.test1{ds=1} [test1] + -mr-10003default.test1{ds=2} [test1] + Path -> Partition: + -mr-10002default.test1{ds=1} + Partition + base file name: ds=1 + input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 1 + properties: + bucket_count -1 + columns a,b + columns.comments + columns.types int:double +#### A masked pattern was here #### + name default.test1 + partition_columns ds + partition_columns.types string + serialization.ddl struct test1 { i32 a, double b} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.NullStructSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.NullStructSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns a,b + columns.comments + columns.types int:double +#### A masked pattern was here #### + name default.test1 + partition_columns ds + partition_columns.types string + serialization.ddl struct test1 { i32 a, double b} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test1 + name: default.test1 + -mr-10003default.test1{ds=2} + Partition + base file name: ds=2 + input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2 + properties: + bucket_count -1 + columns a,b + columns.comments + columns.types int:double +#### A masked pattern was here #### + name default.test1 + partition_columns ds + partition_columns.types string + serialization.ddl struct test1 { i32 a, double b} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.NullStructSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.NullStructSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns a,b + columns.comments + columns.types int:double +#### A masked pattern was here #### + name default.test1 + partition_columns ds + partition_columns.types string + serialization.ddl struct test1 { i32 a, double b} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test1 + name: default.test1 + Truncated Path -> Alias: + -mr-10002default.test1{ds=1} [test1] + -mr-10003default.test1{ds=2} [test1] + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: max(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select max(ds) from TEST1 +PREHOOK: type: QUERY +PREHOOK: Input: default@test1 +PREHOOK: Input: default@test1@ds=1 +PREHOOK: Input: default@test1@ds=2 +#### A masked pattern was here #### +POSTHOOK: query: select max(ds) from TEST1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test1 +POSTHOOK: Input: default@test1@ds=1 +POSTHOOK: Input: default@test1@ds=2 +#### A masked pattern was here #### +2 +PREHOOK: query: select distinct ds from srcpart +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: select distinct ds from srcpart +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +2008-04-08 +2008-04-09 +PREHOOK: query: select min(ds),max(ds) from srcpart +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: select min(ds),max(ds) from srcpart +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +2008-04-08 2008-04-09 +PREHOOK: query: -- HIVE-3594 URI encoding for temporary path +alter table TEST2 add partition (ds='01:10:10', hr='01') +PREHOOK: type: ALTERTABLE_ADDPARTS +PREHOOK: Output: default@test2 +POSTHOOK: query: -- HIVE-3594 URI encoding for temporary path +alter table TEST2 add partition (ds='01:10:10', hr='01') +POSTHOOK: type: ALTERTABLE_ADDPARTS +POSTHOOK: Output: default@test2 +POSTHOOK: Output: default@test2@ds=01%3A10%3A10/hr=01 +PREHOOK: query: alter table TEST2 add partition (ds='01:10:20', hr='02') +PREHOOK: type: ALTERTABLE_ADDPARTS +PREHOOK: Output: default@test2 +POSTHOOK: query: alter table TEST2 add partition (ds='01:10:20', hr='02') +POSTHOOK: type: ALTERTABLE_ADDPARTS +POSTHOOK: Output: default@test2 +POSTHOOK: Output: default@test2@ds=01%3A10%3A20/hr=02 +PREHOOK: query: explain extended select ds, count(distinct hr) from TEST2 group by ds +PREHOOK: type: QUERY +POSTHOOK: query: explain extended select ds, count(distinct hr) from TEST2 group by ds +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + TEST2 + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + ds + TOK_SELEXPR + TOK_FUNCTIONDI + count + TOK_TABLE_OR_COL + hr + TOK_GROUPBY + TOK_TABLE_OR_COL + ds + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: test2 + Statistics: Num rows: 0 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + GatherStats: false + Select Operator + expressions: ds (type: string), hr (type: string) + outputColumnNames: ds, hr + Statistics: Num rows: 0 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Group By Operator + aggregations: count(DISTINCT hr) + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + tag: -1 + Path -> Alias: + -mr-10002default.test2{ds=01_10_10, hr=01} [test2] + -mr-10003default.test2{ds=01_10_20, hr=02} [test2] + -mr-10004default.test2{ds=1, hr=1} [test2] + -mr-10005default.test2{ds=1, hr=2} [test2] + -mr-10006default.test2{ds=1, hr=3} [test2] + Path -> Partition: + -mr-10002default.test2{ds=01_10_10, hr=01} + Partition + base file name: hr=01 + input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 01:10:10 + hr 01 + properties: + bucket_count -1 + columns a,b + columns.comments + columns.types int:double +#### A masked pattern was here #### + name default.test2 + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct test2 { i32 a, double b} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.NullStructSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.NullStructSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns a,b + columns.comments + columns.types int:double +#### A masked pattern was here #### + name default.test2 + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct test2 { i32 a, double b} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test2 + name: default.test2 + -mr-10003default.test2{ds=01_10_20, hr=02} + Partition + base file name: hr=02 + input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 01:10:20 + hr 02 + properties: + bucket_count -1 + columns a,b + columns.comments + columns.types int:double +#### A masked pattern was here #### + name default.test2 + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct test2 { i32 a, double b} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.NullStructSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.NullStructSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns a,b + columns.comments + columns.types int:double +#### A masked pattern was here #### + name default.test2 + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct test2 { i32 a, double b} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test2 + name: default.test2 + -mr-10004default.test2{ds=1, hr=1} + Partition + base file name: hr=1 + input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 1 + hr 1 + properties: + bucket_count -1 + columns a,b + columns.comments + columns.types int:double +#### A masked pattern was here #### + name default.test2 + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct test2 { i32 a, double b} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.NullStructSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.NullStructSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns a,b + columns.comments + columns.types int:double +#### A masked pattern was here #### + name default.test2 + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct test2 { i32 a, double b} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test2 + name: default.test2 + -mr-10005default.test2{ds=1, hr=2} + Partition + base file name: hr=2 + input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 1 + hr 2 + properties: + bucket_count -1 + columns a,b + columns.comments + columns.types int:double +#### A masked pattern was here #### + name default.test2 + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct test2 { i32 a, double b} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.NullStructSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.NullStructSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns a,b + columns.comments + columns.types int:double +#### A masked pattern was here #### + name default.test2 + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct test2 { i32 a, double b} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test2 + name: default.test2 + -mr-10006default.test2{ds=1, hr=3} + Partition + base file name: hr=3 + input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 1 + hr 3 + properties: + bucket_count -1 + columns a,b + columns.comments + columns.types int:double +#### A masked pattern was here #### + name default.test2 + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct test2 { i32 a, double b} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.NullStructSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.NullStructSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns a,b + columns.comments + columns.types int:double +#### A masked pattern was here #### + name default.test2 + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct test2 { i32 a, double b} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test2 + name: default.test2 + Truncated Path -> Alias: + -mr-10002default.test2{ds=01_10_10, hr=01} [test2] + -mr-10003default.test2{ds=01_10_20, hr=02} [test2] + -mr-10004default.test2{ds=1, hr=1} [test2] + -mr-10005default.test2{ds=1, hr=2} [test2] + -mr-10006default.test2{ds=1, hr=3} [test2] + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: count(DISTINCT KEY._col1:0._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1 + columns.types string:bigint + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select ds, count(distinct hr) from TEST2 group by ds +PREHOOK: type: QUERY +PREHOOK: Input: default@test2 +PREHOOK: Input: default@test2@ds=01%3A10%3A10/hr=01 +PREHOOK: Input: default@test2@ds=01%3A10%3A20/hr=02 +PREHOOK: Input: default@test2@ds=1/hr=1 +PREHOOK: Input: default@test2@ds=1/hr=2 +PREHOOK: Input: default@test2@ds=1/hr=3 +#### A masked pattern was here #### +POSTHOOK: query: select ds, count(distinct hr) from TEST2 group by ds +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test2 +POSTHOOK: Input: default@test2@ds=01%3A10%3A10/hr=01 +POSTHOOK: Input: default@test2@ds=01%3A10%3A20/hr=02 +POSTHOOK: Input: default@test2@ds=1/hr=1 +POSTHOOK: Input: default@test2@ds=1/hr=2 +POSTHOOK: Input: default@test2@ds=1/hr=3 +#### A masked pattern was here #### +01:10:10 1 +01:10:20 1 +1 3