diff --git itests/qtest/pom.xml itests/qtest/pom.xml index 29988a6..ae97775 100644 --- itests/qtest/pom.xml +++ itests/qtest/pom.xml @@ -39,7 +39,7 @@ stats_counter_partitioned.q,list_bucket_dml_10.q,input16_cc.q,scriptfile1.q,scriptfile1_win.q,bucket4.q,bucketmapjoin6.q,disable_merge_for_bucketing.q,reduce_deduplicate.q,smb_mapjoin_8.q,join1.q,groupby2.q,bucketizedhiveinputformat.q,bucketmapjoin7.q,optrstat_groupby.q,bucket_num_reducers.q,bucket5.q,load_fs2.q,bucket_num_reducers2.q,infer_bucket_sort_merge.q,infer_bucket_sort_reducers_power_two.q,infer_bucket_sort_dyn_part.q,infer_bucket_sort_bucketed_table.q,infer_bucket_sort_map_operators.q,infer_bucket_sort_num_buckets.q,leftsemijoin_mr.q,schemeAuthority.q,schemeAuthority2.q,truncate_column_buckets.q,remote_script.q,,load_hdfs_file_with_space_in_the_name.q,parallel_orderby.q,import_exported_table.q,stats_counter.q,auto_sortmerge_join_16.q,quotedid_smb.q,file_with_header_footer.q,external_table_with_space_in_location_path.q,root_dir_external_table.q cluster_tasklog_retrieval.q,minimr_broken_pipe.q,mapreduce_stack_trace.q,mapreduce_stack_trace_turnoff.q,mapreduce_stack_trace_hadoop20.q,mapreduce_stack_trace_turnoff_hadoop20.q,file_with_header_footer_negative.q tez_join_tests.q,tez_joins_explain.q,mrr.q,tez_dml.q,tez_insert_overwrite_local_directory_1.q - join0.q,join1.q,auto_join0.q,auto_join1.q,bucket2.q,bucket3.q,bucket4.q,count.q,create_merge_compressed.q,cross_join.q,ctas.q,custom_input_output_format.q,disable_merge_for_bucketing.q,enforce_order.q,filter_join_breaktask.q,filter_join_breaktask2.q,groupby1.q,groupby2.q,groupby3.q,having.q,insert1.q,insert_into1.q,insert_into2.q,leftsemijoin.q,limit_pushdown.q,load_dyn_part1.q,load_dyn_part2.q,load_dyn_part3.q,mapjoin_mapjoin.q,mapreduce1.q,mapreduce2.q,merge1.q,merge2.q,metadata_only_queries.q,sample1.q,subquery_in.q,subquery_exists.q,vectorization_15.q,ptf.q + join0.q,join1.q,auto_join0.q,auto_join1.q,bucket2.q,bucket3.q,bucket4.q,count.q,create_merge_compressed.q,cross_join.q,ctas.q,custom_input_output_format.q,disable_merge_for_bucketing.q,enforce_order.q,filter_join_breaktask.q,filter_join_breaktask2.q,groupby1.q,groupby2.q,groupby3.q,having.q,insert1.q,insert_into1.q,insert_into2.q,leftsemijoin.q,limit_pushdown.q,load_dyn_part1.q,load_dyn_part2.q,load_dyn_part3.q,mapjoin_mapjoin.q,mapreduce1.q,mapreduce2.q,merge1.q,merge2.q,metadata_only_queries.q,sample1.q,subquery_in.q,subquery_exists.q,vectorization_15.q,ptf.q,stats_counter.q,stats_noscan_1.q,stats_counter_partitioned.q add_part_exist.q,alter1.q,alter2.q,alter4.q,alter5.q,alter_rename_partition.q,alter_rename_partition_authorization.q,archive.q,archive_corrupt.q,archive_multi.q,archive_mr_1806.q,archive_multi_mr_1806.q,authorization_1.q,authorization_2.q,authorization_4.q,authorization_5.q,authorization_6.q,authorization_7.q,ba_table1.q,ba_table2.q,ba_table3.q,ba_table_udfs.q,binary_table_bincolserde.q,binary_table_colserde.q,cluster.q,columnarserde_create_shortcut.q,combine2.q,constant_prop.q,create_nested_type.q,create_or_replace_view.q,create_struct_table.q,create_union_table.q,database.q,database_location.q,database_properties.q,ddltime.q,describe_database_json.q,drop_database_removes_partition_dirs.q,escape1.q,escape2.q,exim_00_nonpart_empty.q,exim_01_nonpart.q,exim_02_00_part_empty.q,exim_02_part.q,exim_03_nonpart_over_compat.q,exim_04_all_part.q,exim_04_evolved_parts.q,exim_05_some_part.q,exim_06_one_part.q,exim_07_all_part_over_nonoverlap.q,exim_08_nonpart_rename.q,exim_09_part_spec_nonoverlap.q,exim_10_external_managed.q,exim_11_managed_external.q,exim_12_external_location.q,exim_13_managed_location.q,exim_14_managed_location_over_existing.q,exim_15_external_part.q,exim_16_part_external.q,exim_17_part_managed.q,exim_18_part_external.q,exim_19_00_part_external_location.q,exim_19_part_external_location.q,exim_20_part_managed_location.q,exim_21_export_authsuccess.q,exim_22_import_exist_authsuccess.q,exim_23_import_part_authsuccess.q,exim_24_import_nonexist_authsuccess.q,global_limit.q,groupby_complex_types.q,groupby_complex_types_multi_single_reducer.q,index_auth.q,index_auto.q,index_auto_empty.q,index_bitmap.q,index_bitmap1.q,index_bitmap2.q,index_bitmap3.q,index_bitmap_auto.q,index_bitmap_rc.q,index_compact.q,index_compact_1.q,index_compact_2.q,index_compact_3.q,index_stale_partitioned.q,init_file.q,input16.q,input16_cc.q,input46.q,input_columnarserde.q,input_dynamicserde.q,input_lazyserde.q,input_testxpath3.q,input_testxpath4.q,insert2_overwrite_partitions.q,insertexternal1.q,join_thrift.q,lateral_view.q,load_binary_data.q,load_exist_part_authsuccess.q,load_nonpart_authsuccess.q,load_part_authsuccess.q,loadpart_err.q,lock1.q,lock2.q,lock3.q,lock4.q,merge_dynamic_partition.q,multi_insert.q,multi_insert_move_tasks_share_dependencies.q,null_column.q,ppd_clusterby.q,query_with_semi.q,rename_column.q,sample6.q,sample_islocalmode_hook.q,set_processor_namespaces.q,show_tables.q,source.q,split_sample.q,str_to_map.q,transform1.q,udaf_collect_set.q,udaf_context_ngrams.q,udaf_histogram_numeric.q,udaf_ngrams.q,udaf_percentile_approx.q,udf_array.q,udf_bitmap_and.q,udf_bitmap_or.q,udf_explode.q,udf_format_number.q,udf_map.q,udf_map_keys.q,udf_map_values.q,udf_max.q,udf_min.q,udf_named_struct.q,udf_percentile.q,udf_printf.q,udf_sentences.q,udf_sort_array.q,udf_split.q,udf_struct.q,udf_substr.q,udf_translate.q,udf_union.q,udf_xpath.q,udtf_stack.q,view.q,virtual_column.q diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/StatsTask.java ql/src/java/org/apache/hadoop/hive/ql/exec/StatsTask.java index 597358a..0169077 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/StatsTask.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/StatsTask.java @@ -44,6 +44,7 @@ import org.apache.hadoop.hive.ql.plan.StatsWork; import org.apache.hadoop.hive.ql.plan.api.StageType; import org.apache.hadoop.hive.ql.stats.CounterStatsAggregator; +import org.apache.hadoop.hive.ql.stats.CounterStatsAggregatorTez; import org.apache.hadoop.hive.ql.stats.StatsAggregator; import org.apache.hadoop.hive.ql.stats.StatsFactory; import org.apache.hadoop.hive.ql.stats.StatsPublisher; @@ -154,7 +155,8 @@ private int aggregateStats() { int maxPrefixLength = StatsFactory.getMaxPrefixLength(conf); // "counter" type does not need to collect stats per task - boolean counterStat = statsAggregator instanceof CounterStatsAggregator; + boolean counterStat = statsAggregator instanceof CounterStatsAggregator + || statsAggregator instanceof CounterStatsAggregatorTez; if (partitions == null) { org.apache.hadoop.hive.metastore.api.Table tTable = table.getTTable(); Map parameters = tTable.getParameters(); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezTask.java ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezTask.java index c6f431c..f582400 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezTask.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezTask.java @@ -49,6 +49,8 @@ import org.apache.hadoop.util.StringUtils; import org.apache.hadoop.yarn.api.records.LocalResource; import org.apache.tez.client.TezSession; +import org.apache.tez.common.counters.CounterGroup; +import org.apache.tez.common.counters.TezCounter; import org.apache.tez.common.counters.TezCounters; import org.apache.tez.dag.api.DAG; import org.apache.tez.dag.api.Edge; @@ -154,6 +156,14 @@ public int execute(DriverContext driverContext) { Set statusGetOpts = EnumSet.of(StatusGetOpts.GET_COUNTERS); counters = client.getDAGStatus(statusGetOpts).getDAGCounters(); + if (LOG.isInfoEnabled()) { + for (CounterGroup group: counters) { + LOG.info(group.getDisplayName() +":"); + for (TezCounter counter: group) { + LOG.info(" "+counter.getDisplayName()+": "+counter.getValue()); + } + } + } } catch (Exception e) { LOG.error("Failed to execute tez graph.", e); // rc will be 1 at this point indicating failure. diff --git ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/stats/PartialScanMapper.java ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/stats/PartialScanMapper.java index 12953af..3e1ef0a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/stats/PartialScanMapper.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/stats/PartialScanMapper.java @@ -28,6 +28,7 @@ import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.ErrorMsg; import org.apache.hadoop.hive.ql.exec.Utilities; +import org.apache.hadoop.hive.ql.exec.MapredContext; import org.apache.hadoop.hive.ql.io.RCFile.KeyBuffer; import org.apache.hadoop.hive.ql.io.rcfile.merge.RCFileKeyBufferWrapper; import org.apache.hadoop.hive.ql.io.rcfile.merge.RCFileValueBufferWrapper; @@ -59,6 +60,7 @@ private long uncompressedFileSize = 0; private long rowNo = 0; private boolean exception = false; + private Reporter rp = null; public final static Log LOG = LogFactory.getLog("PartialScanMapper"); @@ -68,6 +70,7 @@ public PartialScanMapper() { @Override public void configure(JobConf job) { jc = job; + MapredContext.init(true, new JobConf(jc)); statsAggKeyPrefix = HiveConf.getVar(job, HiveConf.ConfVars.HIVE_STATS_KEY_PREFIX); } @@ -77,6 +80,12 @@ public void configure(JobConf job) { public void map(Object k, RCFileValueBufferWrapper value, OutputCollector output, Reporter reporter) throws IOException { + + if (rp == null) { + this.rp = reporter; + MapredContext.get().setReporter(reporter); + } + try { //CombineHiveInputFormat is set in PartialScanTask. RCFileKeyBufferWrapper key = (RCFileKeyBufferWrapper) ((CombineHiveKey) k).getKey(); @@ -114,6 +123,8 @@ public void close() throws IOException { } catch (HiveException e) { this.exception = true; throw new RuntimeException(e); + } finally { + MapredContext.close(); } } diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRTableScan1.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRTableScan1.java index 9c80714..501b0c8 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRTableScan1.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRTableScan1.java @@ -124,23 +124,10 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx opProcCtx, if (currWork.getReduceWork() != null) { currWork.getReduceWork().setGatheringStats(true); } + // NOTE: here we should use the new partition predicate pushdown API to get a list of pruned list, // and pass it to setTaskPlan as the last parameter - Set confirmedPartns = new HashSet(); - tableSpec tblSpec = parseInfo.getTableSpec(); - if (tblSpec.specType == tableSpec.SpecType.STATIC_PARTITION) { - // static partition - if (tblSpec.partHandle != null) { - confirmedPartns.add(tblSpec.partHandle); - } else { - // partial partition spec has null partHandle - assert parseInfo.isNoScanAnalyzeCommand(); - confirmedPartns.addAll(tblSpec.partitions); - } - } else if (tblSpec.specType == tableSpec.SpecType.DYNAMIC_PARTITION) { - // dynamic partition - confirmedPartns.addAll(tblSpec.partitions); - } + Set confirmedPartns = GenMapRedUtils.getConfirmedPartitionsForScan(parseInfo); if (confirmedPartns.size() > 0) { Table source = parseCtx.getQB().getMetaData().getTableForAlias(alias); PrunedPartitionList partList = new PrunedPartitionList(source, confirmedPartns, false); @@ -174,25 +161,8 @@ private void handlePartialScanCommand(TableScanOperator op, GenMRProcContext ctx Task currTask, QBParseInfo parseInfo, StatsWork statsWork, Task statsTask) throws SemanticException { String aggregationKey = op.getConf().getStatsAggPrefix(); - List inputPaths = new ArrayList(); - switch (parseInfo.getTableSpec().specType) { - case TABLE_ONLY: - inputPaths.add(parseInfo.getTableSpec().tableHandle.getPath()); - break; - case STATIC_PARTITION: - Partition part = parseInfo.getTableSpec().partHandle; - try { - aggregationKey += Warehouse.makePartPath(part.getSpec()); - } catch (MetaException e) { - throw new SemanticException(ErrorMsg.ANALYZE_TABLE_PARTIALSCAN_AGGKEY.getMsg( - part.getDataLocation().toString() + e.getMessage())); - } - inputPaths.add(part.getDataLocation()); - break; - default: - assert false; - } - + List inputPaths = GenMapRedUtils.getInputPathsForPartialScan(parseInfo, aggregationKey); + // scan work PartialScanWork scanWork = new PartialScanWork(inputPaths); scanWork.setMapperCannotSpanPartns(true); diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java index 4ee3d6b..a629bdd 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java @@ -21,6 +21,7 @@ import java.io.Serializable; import java.util.ArrayList; import java.util.HashMap; +import java.util.HashSet; import java.util.Iterator; import java.util.LinkedHashMap; import java.util.List; @@ -34,6 +35,7 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.conf.HiveConf.ConfVars; +import org.apache.hadoop.hive.ql.ErrorMsg; import org.apache.hadoop.hive.ql.Context; import org.apache.hadoop.hive.ql.exec.ColumnInfo; import org.apache.hadoop.hive.ql.exec.ConditionalTask; @@ -60,14 +62,18 @@ import org.apache.hadoop.hive.ql.io.rcfile.merge.MergeWork; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.metadata.Partition; +import org.apache.hadoop.hive.metastore.api.MetaException; +import org.apache.hadoop.hive.metastore.Warehouse; import org.apache.hadoop.hive.ql.optimizer.GenMRProcContext.GenMRUnionCtx; import org.apache.hadoop.hive.ql.optimizer.GenMRProcContext.GenMapRedCtx; import org.apache.hadoop.hive.ql.optimizer.listbucketingpruner.ListBucketingPruner; import org.apache.hadoop.hive.ql.optimizer.ppr.PartitionPruner; +import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.tableSpec; import org.apache.hadoop.hive.ql.parse.OpParseContext; import org.apache.hadoop.hive.ql.parse.ParseContext; import org.apache.hadoop.hive.ql.parse.PrunedPartitionList; import org.apache.hadoop.hive.ql.parse.QBJoinTree; +import org.apache.hadoop.hive.ql.parse.QBParseInfo; import org.apache.hadoop.hive.ql.parse.RowResolver; import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.ql.plan.BaseWork; @@ -1720,6 +1726,48 @@ public static Path createMoveTask(Task currTask, boolean return dest; } + public static Set getConfirmedPartitionsForScan(QBParseInfo parseInfo) { + Set confirmedPartns = new HashSet(); + tableSpec tblSpec = parseInfo.getTableSpec(); + if (tblSpec.specType == tableSpec.SpecType.STATIC_PARTITION) { + // static partition + if (tblSpec.partHandle != null) { + confirmedPartns.add(tblSpec.partHandle); + } else { + // partial partition spec has null partHandle + assert parseInfo.isNoScanAnalyzeCommand(); + confirmedPartns.addAll(tblSpec.partitions); + } + } else if (tblSpec.specType == tableSpec.SpecType.DYNAMIC_PARTITION) { + // dynamic partition + confirmedPartns.addAll(tblSpec.partitions); + } + return confirmedPartns; + } + + public static List getInputPathsForPartialScan(QBParseInfo parseInfo, String aggregationKey) + throws SemanticException { + List inputPaths = new ArrayList(); + switch (parseInfo.getTableSpec().specType) { + case TABLE_ONLY: + inputPaths.add(parseInfo.getTableSpec().tableHandle.getPath()); + break; + case STATIC_PARTITION: + Partition part = parseInfo.getTableSpec().partHandle; + try { + aggregationKey += Warehouse.makePartPath(part.getSpec()); + } catch (MetaException e) { + throw new SemanticException(ErrorMsg.ANALYZE_TABLE_PARTIALSCAN_AGGKEY.getMsg( + part.getDataLocation().toString() + e.getMessage())); + } + inputPaths.add(part.getDataLocation()); + break; + default: + assert false; + } + return inputPaths; + } + private GenMapRedUtils() { // prevent instantiation } diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java new file mode 100644 index 0000000..042cb39 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java @@ -0,0 +1,131 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.parse; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.ql.exec.Operator; +import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator; +import org.apache.hadoop.hive.ql.exec.TableScanOperator; +import org.apache.hadoop.hive.ql.optimizer.GenMapRedUtils; +import org.apache.hadoop.hive.ql.plan.BaseWork; +import org.apache.hadoop.hive.ql.plan.MapWork; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.ReduceWork; +import org.apache.hadoop.hive.ql.plan.TezWork; +import org.apache.hadoop.hive.ql.plan.TezWork.EdgeType; + +/** + * GenTezUtils is a collection of shared helper methods to produce + * TezWork + */ +public class GenTezUtils { + + static final private Log LOG = LogFactory.getLog(GenTezUtils.class.getName()); + + // sequence number is used to name vertices (e.g.: Map 1, Reduce 14, ...) + private int sequenceNumber = 0; + + // singleton + private static GenTezUtils utils; + + public static GenTezUtils getUtils() { + if (utils == null) { + utils = new GenTezUtils(); + } + return utils; + } + + protected GenTezUtils() { + } + + public void resetSequenceNumber() { + sequenceNumber = 0; + } + + public ReduceWork createReduceWork(GenTezProcContext context, Operator root, TezWork tezWork) { + assert !root.getParentOperators().isEmpty(); + ReduceWork reduceWork = new ReduceWork("Reducer "+ (++sequenceNumber)); + LOG.debug("Adding reduce work (" + reduceWork.getName() + ") for " + root); + reduceWork.setReducer(root); + reduceWork.setNeedsTagging(GenMapRedUtils.needsTagging(reduceWork)); + + // All parents should be reduce sinks. We pick the one we just walked + // to choose the number of reducers. In the join/union case they will + // all be -1. In sort/order case where it matters there will be only + // one parent. + assert context.parentOfRoot instanceof ReduceSinkOperator; + ReduceSinkOperator reduceSink = (ReduceSinkOperator) context.parentOfRoot; + + reduceWork.setNumReduceTasks(reduceSink.getConf().getNumReducers()); + + setupReduceSink(context, reduceWork, reduceSink); + + tezWork.add(reduceWork); + tezWork.connect( + context.preceedingWork, + reduceWork, EdgeType.SIMPLE_EDGE); + + return reduceWork; + } + + protected void setupReduceSink(GenTezProcContext context, ReduceWork reduceWork, + ReduceSinkOperator reduceSink) { + + LOG.debug("Setting up reduce sink: " + reduceSink + + " with following reduce work: " + reduceWork.getName()); + + // need to fill in information about the key and value in the reducer + GenMapRedUtils.setKeyAndValueDesc(reduceWork, reduceSink); + + // remember which parent belongs to which tag + reduceWork.getTagToInput().put(reduceSink.getConf().getTag(), + context.preceedingWork.getName()); + + // remember the output name of the reduce sink + reduceSink.getConf().setOutputName(reduceWork.getName()); + } + + public MapWork createMapWork(GenTezProcContext context, Operator root, + TezWork tezWork, PrunedPartitionList partitions) throws SemanticException { + assert root.getParentOperators().isEmpty(); + MapWork mapWork = new MapWork("Map "+ (++sequenceNumber)); + LOG.debug("Adding map work (" + mapWork.getName() + ") for " + root); + + // map work starts with table scan operators + assert root instanceof TableScanOperator; + String alias = ((TableScanOperator)root).getConf().getAlias(); + + setupMapWork(mapWork, context, partitions, root, alias); + + // add new item to the tez work + tezWork.add(mapWork); + + return mapWork; + } + + // this method's main use is to help unit testing this class + protected void setupMapWork(MapWork mapWork, GenTezProcContext context, + PrunedPartitionList partitions, Operator root, + String alias) throws SemanticException { + // All the setup is done in GenMapRedUtils + GenMapRedUtils.setMapWork(mapWork, context.parseContext, + context.inputs, partitions, root, alias, context.conf, false); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezWork.java ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezWork.java index 8363bbf..475c940 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezWork.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezWork.java @@ -49,8 +49,15 @@ static final private Log LOG = LogFactory.getLog(GenTezWork.class.getName()); - // sequence number is used to name vertices (e.g.: Map 1, Reduce 14, ...) - private int sequenceNumber = 0; + // instance of shared utils + private GenTezUtils utils = null; + + /** + * Constructor takes utils as parameter to facilitate testing + */ + public GenTezWork(GenTezUtils utils) { + this.utils = utils; + } @Override public Object process(Node nd, Stack stack, @@ -92,9 +99,9 @@ public Object process(Node nd, Stack stack, } else { // create a new vertex if (context.preceedingWork == null) { - work = createMapWork(context, root, tezWork); + work = utils.createMapWork(context, root, tezWork, null); } else { - work = createReduceWork(context, root, tezWork); + work = utils.createReduceWork(context, root, tezWork); } context.rootToWorkMap.put(root, work); } @@ -186,74 +193,4 @@ public Object process(Node nd, Stack stack, return null; } - - protected ReduceWork createReduceWork(GenTezProcContext context, Operator root, - TezWork tezWork) { - assert !root.getParentOperators().isEmpty(); - ReduceWork reduceWork = new ReduceWork("Reducer "+ (++sequenceNumber)); - LOG.debug("Adding reduce work (" + reduceWork.getName() + ") for " + root); - reduceWork.setReducer(root); - reduceWork.setNeedsTagging(GenMapRedUtils.needsTagging(reduceWork)); - - // All parents should be reduce sinks. We pick the one we just walked - // to choose the number of reducers. In the join/union case they will - // all be -1. In sort/order case where it matters there will be only - // one parent. - assert context.parentOfRoot instanceof ReduceSinkOperator; - ReduceSinkOperator reduceSink = (ReduceSinkOperator) context.parentOfRoot; - - reduceWork.setNumReduceTasks(reduceSink.getConf().getNumReducers()); - - setupReduceSink(context, reduceWork, reduceSink); - - tezWork.add(reduceWork); - tezWork.connect( - context.preceedingWork, - reduceWork, EdgeType.SIMPLE_EDGE); - - return reduceWork; - } - - protected void setupReduceSink(GenTezProcContext context, ReduceWork reduceWork, - ReduceSinkOperator reduceSink) { - - LOG.debug("Setting up reduce sink: " + reduceSink - + " with following reduce work: " + reduceWork.getName()); - - // need to fill in information about the key and value in the reducer - GenMapRedUtils.setKeyAndValueDesc(reduceWork, reduceSink); - - // remember which parent belongs to which tag - reduceWork.getTagToInput().put(reduceSink.getConf().getTag(), - context.preceedingWork.getName()); - - // remember the output name of the reduce sink - reduceSink.getConf().setOutputName(reduceWork.getName()); - } - - protected MapWork createMapWork(GenTezProcContext context, Operator root, - TezWork tezWork) throws SemanticException { - assert root.getParentOperators().isEmpty(); - MapWork mapWork = new MapWork("Map "+ (++sequenceNumber)); - LOG.debug("Adding map work (" + mapWork.getName() + ") for " + root); - - // map work starts with table scan operators - assert root instanceof TableScanOperator; - String alias = ((TableScanOperator)root).getConf().getAlias(); - - setupMapWork(mapWork, context, root, alias); - - // add new item to the tez work - tezWork.add(mapWork); - - return mapWork; - } - - // this method's main use is to help unit testing this class - protected void setupMapWork(MapWork mapWork, GenTezProcContext context, - Operator root, String alias) throws SemanticException { - // All the setup is done in GenMapRedUtils - GenMapRedUtils.setMapWork(mapWork, context.parseContext, - context.inputs, null, root, alias, context.conf, false); - } } diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/ProcessAnalyzeTable.java ql/src/java/org/apache/hadoop/hive/ql/parse/ProcessAnalyzeTable.java new file mode 100644 index 0000000..c02131d --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/parse/ProcessAnalyzeTable.java @@ -0,0 +1,175 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.parse; + +import java.util.List; +import java.util.Set; +import java.util.Stack; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.DriverContext; +import org.apache.hadoop.hive.ql.ErrorMsg; +import org.apache.hadoop.hive.ql.exec.TableScanOperator; +import org.apache.hadoop.hive.ql.exec.Task; +import org.apache.hadoop.hive.ql.exec.TaskFactory; +import org.apache.hadoop.hive.ql.io.rcfile.stats.PartialScanWork; +import org.apache.hadoop.hive.ql.lib.Node; +import org.apache.hadoop.hive.ql.lib.NodeProcessor; +import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; +import org.apache.hadoop.hive.ql.metadata.Partition; +import org.apache.hadoop.hive.ql.metadata.Table; +import org.apache.hadoop.hive.ql.optimizer.GenMapRedUtils; +import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.tableSpec; +import org.apache.hadoop.hive.ql.parse.GenTezWork; +import org.apache.hadoop.hive.ql.parse.ParseContext; +import org.apache.hadoop.hive.ql.parse.PrunedPartitionList; +import org.apache.hadoop.hive.ql.parse.QBParseInfo; +import org.apache.hadoop.hive.ql.parse.SemanticException; +import org.apache.hadoop.hive.ql.plan.MapWork; +import org.apache.hadoop.hive.ql.plan.TezWork; +import org.apache.hadoop.hive.ql.plan.StatsWork; + +/** + * ProcessAnalyzeTable sets up work for the several variants of analyze table + * (normal, no scan, partial scan.) The plan at this point will be a single + * table scan operator. + */ +public class ProcessAnalyzeTable implements NodeProcessor { + + static final private Log LOG = LogFactory.getLog(ProcessAnalyzeTable.class.getName()); + + // shared plan utils for tez + private GenTezUtils utils = null; + + /** + * Injecting the utils in the constructor facilitates testing + */ + public ProcessAnalyzeTable(GenTezUtils utils) { + this.utils = utils; + } + + @SuppressWarnings("unchecked") + @Override + public Object process(Node nd, Stack stack, + NodeProcessorCtx procContext, Object... nodeOutputs) + throws SemanticException { + + GenTezProcContext context = (GenTezProcContext) procContext; + + TableScanOperator tableScan = (TableScanOperator) nd; + + ParseContext parseContext = context.parseContext; + QB queryBlock = parseContext.getQB(); + QBParseInfo parseInfo = parseContext.getQB().getParseInfo(); + + if (parseInfo.isAnalyzeCommand()) { + + assert tableScan.getChildOperators() == null + || tableScan.getChildOperators().size() == 0; + + String alias = null; + for (String a: parseContext.getTopOps().keySet()) { + if (tableScan == parseContext.getTopOps().get(a)) { + alias = a; + } + } + + assert alias != null; + + TezWork tezWork = context.currentTask.getWork(); + + // ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS; + // The plan consists of a simple TezTask followed by a StatsTask. + // The Tez task is just a simple TableScanOperator + + StatsWork statsWork = new StatsWork(parseInfo.getTableSpec()); + statsWork.setAggKey(tableScan.getConf().getStatsAggPrefix()); + statsWork.setSourceTask(context.currentTask); + statsWork.setStatsReliable(parseContext.getConf().getBoolVar(HiveConf.ConfVars.HIVE_STATS_RELIABLE)); + Task statsTask = TaskFactory.get(statsWork, parseContext.getConf()); + context.currentTask.addDependentTask(statsTask); + + // ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS noscan; + // The plan consists of a StatsTask only. + if (parseInfo.isNoScanAnalyzeCommand()) { + statsTask.setParentTasks(null); + statsWork.setNoScanAnalyzeCommand(true); + context.rootTasks.remove(context.currentTask); + context.rootTasks.add(statsTask); + } + + // ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS partialscan; + if (parseInfo.isPartialScanAnalyzeCommand()) { + handlePartialScanCommand(tableScan, parseContext, parseInfo, statsWork, context, statsTask); + } + + // NOTE: here we should use the new partition predicate pushdown API to get a list of pruned list, + // and pass it to setTaskPlan as the last parameter + Set confirmedPartns = GenMapRedUtils.getConfirmedPartitionsForScan(parseInfo); + PrunedPartitionList partitions = null; + if (confirmedPartns.size() > 0) { + Table source = queryBlock.getMetaData().getTableForAlias(alias); + partitions = new PrunedPartitionList(source, confirmedPartns, false); + } + + MapWork w = utils.createMapWork(context, tableScan, tezWork, partitions); + w.setGatheringStats(true); + + return true; + } + + return null; + } + + /** + * handle partial scan command. + * + * It is composed of PartialScanTask followed by StatsTask. + */ + private void handlePartialScanCommand(TableScanOperator tableScan, ParseContext parseContext, + QBParseInfo parseInfo, StatsWork statsWork, GenTezProcContext context, + Task statsTask) throws SemanticException { + + String aggregationKey = tableScan.getConf().getStatsAggPrefix(); + List inputPaths = GenMapRedUtils.getInputPathsForPartialScan(parseInfo, aggregationKey); + + // scan work + PartialScanWork scanWork = new PartialScanWork(inputPaths); + scanWork.setMapperCannotSpanPartns(true); + scanWork.setAggKey(aggregationKey); + + // stats work + statsWork.setPartialScanAnalyzeCommand(true); + + // partial scan task + DriverContext driverCxt = new DriverContext(); + Task partialScanTask = TaskFactory.get(scanWork, parseContext.getConf()); + partialScanTask.initialize(parseContext.getConf(), null, driverCxt); + partialScanTask.setWork(scanWork); + statsWork.setSourceTask(partialScanTask); + + // task dependency + context.rootTasks.remove(context.currentTask); + context.rootTasks.add(partialScanTask); + partialScanTask.addDependentTask(statsTask); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompilerFactory.java ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompilerFactory.java index 6a415c5..af8e1da 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompilerFactory.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompilerFactory.java @@ -35,8 +35,7 @@ private TaskCompilerFactory() { * into executable units. */ public static TaskCompiler getCompiler(HiveConf conf, ParseContext parseContext) { - if (HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("tez") - && !parseContext.getQB().getParseInfo().isAnalyzeCommand()) { + if (HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("tez")) { return new TezCompiler(); } else { return new MapReduceCompiler(); diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java index dff743f..b7738c5 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java @@ -38,6 +38,7 @@ import org.apache.hadoop.hive.ql.exec.MapJoinOperator; import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator; +import org.apache.hadoop.hive.ql.exec.TableScanOperator; import org.apache.hadoop.hive.ql.exec.Task; import org.apache.hadoop.hive.ql.exec.UnionOperator; import org.apache.hadoop.hive.ql.exec.tez.TezTask; @@ -109,8 +110,10 @@ protected void generateTaskTree(List> rootTasks, Pa List> mvTask, Set inputs, Set outputs) throws SemanticException { + GenTezUtils.getUtils().resetSequenceNumber(); + ParseContext tempParseContext = getParseContext(pCtx, rootTasks); - GenTezWork genTezWork = new GenTezWork(); + GenTezWork genTezWork = new GenTezWork(GenTezUtils.getUtils()); GenTezProcContext procCtx = new GenTezProcContext( conf, tempParseContext, mvTask, rootTasks, inputs, outputs); @@ -131,6 +134,10 @@ protected void generateTaskTree(List> rootTasks, Pa FileSinkOperator.getOperatorName() + "%"), new CompositeProcessor(new FileSinkProcessor(), genTezWork)); + opRules.put(new RuleRegExp("Handle Potential Analyze Command", + TableScanOperator.getOperatorName() + "%"), + new ProcessAnalyzeTable(GenTezUtils.getUtils())); + opRules.put(new RuleRegExp("Bail on Union", UnionOperator.getOperatorName() + "%"), new NodeProcessor() { diff --git ql/src/java/org/apache/hadoop/hive/ql/stats/CounterStatsAggregatorTez.java ql/src/java/org/apache/hadoop/hive/ql/stats/CounterStatsAggregatorTez.java index d63406e..ef49801 100644 --- ql/src/java/org/apache/hadoop/hive/ql/stats/CounterStatsAggregatorTez.java +++ ql/src/java/org/apache/hadoop/hive/ql/stats/CounterStatsAggregatorTez.java @@ -57,17 +57,21 @@ public boolean connect(Configuration hconf, Task sourceTask) { @Override public String aggregateStats(String keyPrefix, String statType) { - if (delegate) { - return mrAggregator.aggregateStats(keyPrefix, statType); - } + String result; - long value = 0; - for (String groupName : counters.getGroupNames()) { - if (groupName.startsWith(keyPrefix)) { - value += counters.getGroup(groupName).findCounter(statType).getValue(); + if (delegate) { + result = mrAggregator.aggregateStats(keyPrefix, statType); + } else { + long value = 0; + for (String groupName : counters.getGroupNames()) { + if (groupName.startsWith(keyPrefix)) { + value += counters.getGroup(groupName).findCounter(statType).getValue(); + } } + result = String.valueOf(value); } - return String.valueOf(value); + LOG.info("Counter based stats for ("+keyPrefix+") are: "+result); + return result; } @Override diff --git ql/src/test/org/apache/hadoop/hive/ql/parse/TestGenTezWork.java ql/src/test/org/apache/hadoop/hive/ql/parse/TestGenTezWork.java index 01583c7..5871ad7 100644 --- ql/src/test/org/apache/hadoop/hive/ql/parse/TestGenTezWork.java +++ ql/src/test/org/apache/hadoop/hive/ql/parse/TestGenTezWork.java @@ -76,17 +76,19 @@ public void setUp() throws Exception { (Set)Collections.EMPTY_SET, (Set)Collections.EMPTY_SET); - proc = new GenTezWork() { + proc = new GenTezWork(new GenTezUtils() { @Override - protected void setupMapWork(MapWork mapWork, GenTezProcContext context, - Operator root, String alias) throws SemanticException { + protected void setupMapWork(MapWork mapWork, GenTezProcContext context, + PrunedPartitionList partitions, Operator root, String alias) + throws SemanticException { + LinkedHashMap> map = new LinkedHashMap>(); map.put("foo", root); mapWork.setAliasToWork(map); return; } - }; + }); fs = new FileSinkOperator(); fs.setConf(new FileSinkDesc()); diff --git ql/src/test/results/clientpositive/tez/ctas.q.out ql/src/test/results/clientpositive/tez/ctas.q.out index 21e5ec7..e7ab86c 100644 --- ql/src/test/results/clientpositive/tez/ctas.q.out +++ ql/src/test/results/clientpositive/tez/ctas.q.out @@ -398,8 +398,8 @@ Table Type: MANAGED_TABLE Table Parameters: COLUMN_STATS_ACCURATE true numFiles 1 - numRows 0 - rawDataSize 0 + numRows 10 + rawDataSize 96 totalSize 106 #### A masked pattern was here #### @@ -576,8 +576,8 @@ Table Type: MANAGED_TABLE Table Parameters: COLUMN_STATS_ACCURATE true numFiles 1 - numRows 0 - rawDataSize 0 + numRows 10 + rawDataSize 120 totalSize 199 #### A masked pattern was here #### @@ -642,8 +642,8 @@ Table Type: MANAGED_TABLE Table Parameters: COLUMN_STATS_ACCURATE true numFiles 1 - numRows 0 - rawDataSize 0 + numRows 10 + rawDataSize 120 totalSize 199 #### A masked pattern was here #### @@ -842,8 +842,8 @@ Table Type: MANAGED_TABLE Table Parameters: COLUMN_STATS_ACCURATE true numFiles 1 - numRows 0 - rawDataSize 0 + numRows 10 + rawDataSize 96 totalSize 106 #### A masked pattern was here #### diff --git ql/src/test/results/clientpositive/tez/filter_join_breaktask.q.out ql/src/test/results/clientpositive/tez/filter_join_breaktask.q.out index 4f7d320..03c4780 100644 --- ql/src/test/results/clientpositive/tez/filter_join_breaktask.q.out +++ ql/src/test/results/clientpositive/tez/filter_join_breaktask.q.out @@ -42,7 +42,7 @@ STAGE PLANS: TableScan alias: f Statistics: - numRows: 59 dataSize: 236 basicStatsState: COMPLETE colStatsState: NONE + numRows: 25 dataSize: 211 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Filter Operator isSamplingPred: false @@ -50,7 +50,7 @@ STAGE PLANS: expr: key is not null type: boolean Statistics: - numRows: 30 dataSize: 120 basicStatsState: COMPLETE colStatsState: NONE + numRows: 13 dataSize: 109 basicStatsState: COMPLETE colStatsState: NONE Reduce Output Operator key expressions: expr: key @@ -60,7 +60,7 @@ STAGE PLANS: expr: key type: int Statistics: - numRows: 30 dataSize: 120 basicStatsState: COMPLETE colStatsState: NONE + numRows: 13 dataSize: 109 basicStatsState: COMPLETE colStatsState: NONE tag: 0 value expressions: expr: key @@ -82,9 +82,9 @@ STAGE PLANS: #### A masked pattern was here #### name default.filter_join_breaktask numFiles 1 - numRows 0 + numRows 25 partition_columns ds - rawDataSize 0 + rawDataSize 211 serialization.ddl struct filter_join_breaktask { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -115,7 +115,7 @@ STAGE PLANS: TableScan alias: m Statistics: - numRows: 2 dataSize: 236 basicStatsState: COMPLETE colStatsState: NONE + numRows: 25 dataSize: 211 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Filter Operator isSamplingPred: false @@ -123,7 +123,7 @@ STAGE PLANS: expr: ((key is not null and value is not null) and (value <> '')) type: boolean Statistics: - numRows: 1 dataSize: 118 basicStatsState: COMPLETE colStatsState: NONE + numRows: 7 dataSize: 59 basicStatsState: COMPLETE colStatsState: NONE Reduce Output Operator key expressions: expr: key @@ -133,7 +133,7 @@ STAGE PLANS: expr: key type: int Statistics: - numRows: 1 dataSize: 118 basicStatsState: COMPLETE colStatsState: NONE + numRows: 7 dataSize: 59 basicStatsState: COMPLETE colStatsState: NONE tag: 1 value expressions: expr: value @@ -155,9 +155,9 @@ STAGE PLANS: #### A masked pattern was here #### name default.filter_join_breaktask numFiles 1 - numRows 0 + numRows 25 partition_columns ds - rawDataSize 0 + rawDataSize 211 serialization.ddl struct filter_join_breaktask { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -194,7 +194,7 @@ STAGE PLANS: handleSkewJoin: false outputColumnNames: _col0, _col6 Statistics: - numRows: 33 dataSize: 132 basicStatsState: COMPLETE colStatsState: NONE + numRows: 14 dataSize: 119 basicStatsState: COMPLETE colStatsState: NONE Reduce Output Operator key expressions: expr: _col6 @@ -204,7 +204,7 @@ STAGE PLANS: expr: _col6 type: string Statistics: - numRows: 33 dataSize: 132 basicStatsState: COMPLETE colStatsState: NONE + numRows: 14 dataSize: 119 basicStatsState: COMPLETE colStatsState: NONE tag: 0 value expressions: expr: _col0 @@ -214,7 +214,7 @@ STAGE PLANS: TableScan alias: g Statistics: - numRows: 2 dataSize: 236 basicStatsState: COMPLETE colStatsState: NONE + numRows: 25 dataSize: 211 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Filter Operator isSamplingPred: false @@ -222,7 +222,7 @@ STAGE PLANS: expr: (value <> '') type: boolean Statistics: - numRows: 2 dataSize: 236 basicStatsState: COMPLETE colStatsState: NONE + numRows: 25 dataSize: 211 basicStatsState: COMPLETE colStatsState: NONE Reduce Output Operator key expressions: expr: value @@ -232,7 +232,7 @@ STAGE PLANS: expr: value type: string Statistics: - numRows: 2 dataSize: 236 basicStatsState: COMPLETE colStatsState: NONE + numRows: 25 dataSize: 211 basicStatsState: COMPLETE colStatsState: NONE tag: 1 value expressions: expr: value @@ -254,9 +254,9 @@ STAGE PLANS: #### A masked pattern was here #### name default.filter_join_breaktask numFiles 1 - numRows 0 + numRows 25 partition_columns ds - rawDataSize 0 + rawDataSize 211 serialization.ddl struct filter_join_breaktask { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -293,7 +293,7 @@ STAGE PLANS: handleSkewJoin: false outputColumnNames: _col0, _col11 Statistics: - numRows: 36 dataSize: 145 basicStatsState: COMPLETE colStatsState: NONE + numRows: 27 dataSize: 232 basicStatsState: COMPLETE colStatsState: NONE Select Operator expressions: expr: _col0 @@ -302,14 +302,14 @@ STAGE PLANS: type: string outputColumnNames: _col0, _col1 Statistics: - numRows: 36 dataSize: 145 basicStatsState: COMPLETE colStatsState: NONE + numRows: 27 dataSize: 232 basicStatsState: COMPLETE colStatsState: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 Statistics: - numRows: 36 dataSize: 145 basicStatsState: COMPLETE colStatsState: NONE + numRows: 27 dataSize: 232 basicStatsState: COMPLETE colStatsState: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat diff --git ql/src/test/results/clientpositive/tez/metadata_only_queries.q.out ql/src/test/results/clientpositive/tez/metadata_only_queries.q.out index 2295312..3c69ced 100644 --- ql/src/test/results/clientpositive/tez/metadata_only_queries.q.out +++ ql/src/test/results/clientpositive/tez/metadata_only_queries.q.out @@ -837,144 +837,18 @@ ABSTRACT SYNTAX TREE: (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME stats_tbl))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count)) (TOK_SELEXPR (TOK_FUNCTION sum 1)) (TOK_SELEXPR (TOK_FUNCTION sum 0.2)) (TOK_SELEXPR (TOK_FUNCTION count 1)) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL s))) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL bo))) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL bin))) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL si))) (TOK_SELEXPR (TOK_FUNCTION max (TOK_TABLE_OR_COL i))) (TOK_SELEXPR (TOK_FUNCTION min (TOK_TABLE_OR_COL b))) (TOK_SELEXPR (TOK_FUNCTION max (TOK_TABLE_OR_COL f))) (TOK_SELEXPR (TOK_FUNCTION min (TOK_TABLE_OR_COL d)))))) STAGE DEPENDENCIES: - Stage-1 is a root stage Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Tez - Alias -> Map Operator Tree: - stats_tbl - TableScan - alias: stats_tbl - Select Operator - expressions: - expr: s - type: string - expr: bo - type: boolean - expr: bin - type: binary - expr: si - type: smallint - expr: i - type: int - expr: b - type: bigint - expr: f - type: float - expr: d - type: double - outputColumnNames: s, bo, bin, si, i, b, f, d - Group By Operator - aggregations: - expr: count() - expr: sum(1) - expr: sum(0.2) - expr: count(1) - expr: count(s) - expr: count(bo) - expr: count(bin) - expr: count(si) - expr: max(i) - expr: min(b) - expr: max(f) - expr: min(d) - bucketGroup: false - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - expr: _col1 - type: bigint - expr: _col2 - type: double - expr: _col3 - type: bigint - expr: _col4 - type: bigint - expr: _col5 - type: bigint - expr: _col6 - type: bigint - expr: _col7 - type: bigint - expr: _col8 - type: int - expr: _col9 - type: bigint - expr: _col10 - type: float - expr: _col11 - type: double - Reduce Operator Tree: - Group By Operator - aggregations: - expr: count(VALUE._col0) - expr: sum(VALUE._col1) - expr: sum(VALUE._col2) - expr: count(VALUE._col3) - expr: count(VALUE._col4) - expr: count(VALUE._col5) - expr: count(VALUE._col6) - expr: count(VALUE._col7) - expr: max(VALUE._col8) - expr: min(VALUE._col9) - expr: max(VALUE._col10) - expr: min(VALUE._col11) - bucketGroup: false - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - Select Operator - expressions: - expr: _col0 - type: bigint - expr: _col1 - type: bigint - expr: _col2 - type: double - expr: _col3 - type: bigint - expr: _col4 - type: bigint - expr: _col5 - type: bigint - expr: _col6 - type: bigint - expr: _col7 - type: bigint - expr: _col8 - type: int - expr: _col9 - type: bigint - expr: _col10 - type: float - expr: _col11 - type: double - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-0 Fetch Operator - limit: -1 + limit: 1 PREHOOK: query: select count(*), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b), max(f), min(d) from stats_tbl PREHOOK: type: QUERY -PREHOOK: Input: default@stats_tbl #### A masked pattern was here #### POSTHOOK: query: select count(*), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b), max(f), min(d) from stats_tbl POSTHOOK: type: QUERY -POSTHOOK: Input: default@stats_tbl #### A masked pattern was here #### POSTHOOK: Lineage: stats_tbl.b SIMPLE [(over10k)over10k.FieldSchema(name:b, type:bigint, comment:null), ] POSTHOOK: Lineage: stats_tbl.bin SIMPLE [(over10k)over10k.FieldSchema(name:bin, type:binary, comment:null), ] @@ -1020,7 +894,7 @@ POSTHOOK: Lineage: stats_tbl_part PARTITION(dt=2012).s SIMPLE [(over10k)over10k. POSTHOOK: Lineage: stats_tbl_part PARTITION(dt=2012).si SIMPLE [(over10k)over10k.FieldSchema(name:si, type:smallint, comment:null), ] POSTHOOK: Lineage: stats_tbl_part PARTITION(dt=2012).t SIMPLE [(over10k)over10k.FieldSchema(name:t, type:tinyint, comment:null), ] POSTHOOK: Lineage: stats_tbl_part PARTITION(dt=2012).ts SIMPLE [(over10k)over10k.FieldSchema(name:ts, type:timestamp, comment:null), ] -9999 9999 1999.8000000003176 9999 9999 9999 9999 9999 65791 4294967296 99.98 0.01 +9999 9999 1999.8 9999 9999 9999 9999 9999 65791 0 99.9800033569336 0.0 PREHOOK: query: explain select count(*), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b), max(f), min(d) from stats_tbl_part PREHOOK: type: QUERY @@ -1075,150 +949,18 @@ ABSTRACT SYNTAX TREE: (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME stats_tbl_part))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count)) (TOK_SELEXPR (TOK_FUNCTION sum 1)) (TOK_SELEXPR (TOK_FUNCTION sum 0.2)) (TOK_SELEXPR (TOK_FUNCTION count 1)) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL s))) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL bo))) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL bin))) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL si))) (TOK_SELEXPR (TOK_FUNCTION max (TOK_TABLE_OR_COL i))) (TOK_SELEXPR (TOK_FUNCTION min (TOK_TABLE_OR_COL b))) (TOK_SELEXPR (TOK_FUNCTION max (TOK_TABLE_OR_COL f))) (TOK_SELEXPR (TOK_FUNCTION min (TOK_TABLE_OR_COL d)))))) STAGE DEPENDENCIES: - Stage-1 is a root stage Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Tez - Alias -> Map Operator Tree: - stats_tbl_part - TableScan - alias: stats_tbl_part - Select Operator - expressions: - expr: s - type: string - expr: bo - type: boolean - expr: bin - type: binary - expr: si - type: smallint - expr: i - type: int - expr: b - type: bigint - expr: f - type: float - expr: d - type: double - outputColumnNames: s, bo, bin, si, i, b, f, d - Group By Operator - aggregations: - expr: count() - expr: sum(1) - expr: sum(0.2) - expr: count(1) - expr: count(s) - expr: count(bo) - expr: count(bin) - expr: count(si) - expr: max(i) - expr: min(b) - expr: max(f) - expr: min(d) - bucketGroup: false - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - expr: _col1 - type: bigint - expr: _col2 - type: double - expr: _col3 - type: bigint - expr: _col4 - type: bigint - expr: _col5 - type: bigint - expr: _col6 - type: bigint - expr: _col7 - type: bigint - expr: _col8 - type: int - expr: _col9 - type: bigint - expr: _col10 - type: float - expr: _col11 - type: double - Reduce Operator Tree: - Group By Operator - aggregations: - expr: count(VALUE._col0) - expr: sum(VALUE._col1) - expr: sum(VALUE._col2) - expr: count(VALUE._col3) - expr: count(VALUE._col4) - expr: count(VALUE._col5) - expr: count(VALUE._col6) - expr: count(VALUE._col7) - expr: max(VALUE._col8) - expr: min(VALUE._col9) - expr: max(VALUE._col10) - expr: min(VALUE._col11) - bucketGroup: false - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - Select Operator - expressions: - expr: _col0 - type: bigint - expr: _col1 - type: bigint - expr: _col2 - type: double - expr: _col3 - type: bigint - expr: _col4 - type: bigint - expr: _col5 - type: bigint - expr: _col6 - type: bigint - expr: _col7 - type: bigint - expr: _col8 - type: int - expr: _col9 - type: bigint - expr: _col10 - type: float - expr: _col11 - type: double - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-0 Fetch Operator - limit: -1 + limit: 1 PREHOOK: query: select count(*), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b), max(f), min(d) from stats_tbl_part PREHOOK: type: QUERY -PREHOOK: Input: default@stats_tbl_part -PREHOOK: Input: default@stats_tbl_part@dt=2010 -PREHOOK: Input: default@stats_tbl_part@dt=2011 -PREHOOK: Input: default@stats_tbl_part@dt=2012 #### A masked pattern was here #### POSTHOOK: query: select count(*), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b), max(f), min(d) from stats_tbl_part POSTHOOK: type: QUERY -POSTHOOK: Input: default@stats_tbl_part -POSTHOOK: Input: default@stats_tbl_part@dt=2010 -POSTHOOK: Input: default@stats_tbl_part@dt=2011 -POSTHOOK: Input: default@stats_tbl_part@dt=2012 #### A masked pattern was here #### POSTHOOK: Lineage: stats_tbl.b SIMPLE [(over10k)over10k.FieldSchema(name:b, type:bigint, comment:null), ] POSTHOOK: Lineage: stats_tbl.bin SIMPLE [(over10k)over10k.FieldSchema(name:bin, type:binary, comment:null), ] @@ -1264,7 +1006,7 @@ POSTHOOK: Lineage: stats_tbl_part PARTITION(dt=2012).s SIMPLE [(over10k)over10k. POSTHOOK: Lineage: stats_tbl_part PARTITION(dt=2012).si SIMPLE [(over10k)over10k.FieldSchema(name:si, type:smallint, comment:null), ] POSTHOOK: Lineage: stats_tbl_part PARTITION(dt=2012).t SIMPLE [(over10k)over10k.FieldSchema(name:t, type:tinyint, comment:null), ] POSTHOOK: Lineage: stats_tbl_part PARTITION(dt=2012).ts SIMPLE [(over10k)over10k.FieldSchema(name:ts, type:timestamp, comment:null), ] -9489 9489 1897.8000000002944 9489 9489 9489 9489 9489 65791 4294967296 99.98 0.01 +9489 9489 1897.8 9489 9489 9489 9489 9489 65791 0 99.9800033569336 0.0 PREHOOK: query: explain select count(ts) from stats_tbl_part PREHOOK: type: QUERY POSTHOOK: query: explain select count(ts) from stats_tbl_part diff --git ql/src/test/results/clientpositive/tez/stats_counter.q.out ql/src/test/results/clientpositive/tez/stats_counter.q.out new file mode 100644 index 0000000..40d8656 --- /dev/null +++ ql/src/test/results/clientpositive/tez/stats_counter.q.out @@ -0,0 +1,94 @@ +PREHOOK: query: -- by analyze +create table dummy1 as select * from src +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@src +POSTHOOK: query: -- by analyze +create table dummy1 as select * from src +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dummy1 +PREHOOK: query: analyze table dummy1 compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@dummy1 +PREHOOK: Output: default@dummy1 +POSTHOOK: query: analyze table dummy1 compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dummy1 +POSTHOOK: Output: default@dummy1 +PREHOOK: query: desc formatted dummy1 +PREHOOK: type: DESCTABLE +POSTHOOK: query: desc formatted dummy1 +POSTHOOK: type: DESCTABLE +# col_name data_type comment + +key string None +value string None + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Protect Mode: None +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: + COLUMN_STATS_ACCURATE true + numFiles 1 + numRows 500 + rawDataSize 5312 + totalSize 5812 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: -- by autogather +create table dummy2 as select * from src +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@src +POSTHOOK: query: -- by autogather +create table dummy2 as select * from src +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dummy2 +PREHOOK: query: desc formatted dummy2 +PREHOOK: type: DESCTABLE +POSTHOOK: query: desc formatted dummy2 +POSTHOOK: type: DESCTABLE +# col_name data_type comment + +key string None +value string None + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Protect Mode: None +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: + COLUMN_STATS_ACCURATE true + numFiles 1 + numRows 500 + rawDataSize 5312 + totalSize 5812 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 diff --git ql/src/test/results/clientpositive/tez/stats_counter_partitioned.q.out ql/src/test/results/clientpositive/tez/stats_counter_partitioned.q.out new file mode 100644 index 0000000..62daf36 --- /dev/null +++ ql/src/test/results/clientpositive/tez/stats_counter_partitioned.q.out @@ -0,0 +1,525 @@ +PREHOOK: query: -- partitioned table analyze + +create table dummy (key string, value string) partitioned by (ds string, hr string) +PREHOOK: type: CREATETABLE +POSTHOOK: query: -- partitioned table analyze + +create table dummy (key string, value string) partitioned by (ds string, hr string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@dummy +PREHOOK: query: load data local inpath '../../data/files/kv1.txt' into table dummy partition (ds='2008',hr='12') +PREHOOK: type: LOAD +PREHOOK: Output: default@dummy +POSTHOOK: query: load data local inpath '../../data/files/kv1.txt' into table dummy partition (ds='2008',hr='12') +POSTHOOK: type: LOAD +POSTHOOK: Output: default@dummy +POSTHOOK: Output: default@dummy@ds=2008/hr=12 +PREHOOK: query: load data local inpath '../../data/files/kv1.txt' into table dummy partition (ds='2008',hr='11') +PREHOOK: type: LOAD +PREHOOK: Output: default@dummy +POSTHOOK: query: load data local inpath '../../data/files/kv1.txt' into table dummy partition (ds='2008',hr='11') +POSTHOOK: type: LOAD +POSTHOOK: Output: default@dummy +POSTHOOK: Output: default@dummy@ds=2008/hr=11 +PREHOOK: query: analyze table dummy partition (ds,hr) compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@dummy +PREHOOK: Input: default@dummy@ds=2008/hr=11 +PREHOOK: Input: default@dummy@ds=2008/hr=12 +PREHOOK: Output: default@dummy +PREHOOK: Output: default@dummy@ds=2008/hr=11 +PREHOOK: Output: default@dummy@ds=2008/hr=12 +POSTHOOK: query: analyze table dummy partition (ds,hr) compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dummy +POSTHOOK: Input: default@dummy@ds=2008/hr=11 +POSTHOOK: Input: default@dummy@ds=2008/hr=12 +POSTHOOK: Output: default@dummy +POSTHOOK: Output: default@dummy@ds=2008/hr=11 +POSTHOOK: Output: default@dummy@ds=2008/hr=12 +PREHOOK: query: describe formatted dummy partition (ds='2008', hr='11') +PREHOOK: type: DESCTABLE +POSTHOOK: query: describe formatted dummy partition (ds='2008', hr='11') +POSTHOOK: type: DESCTABLE +# col_name data_type comment + +key string None +value string None + +# Partition Information +# col_name data_type comment + +ds string None +hr string None + +# Detailed Partition Information +Partition Value: [2008, 11] +Database: default +Table: dummy +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE true + numFiles 1 + numRows 500 + rawDataSize 5312 + totalSize 5812 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: describe formatted dummy partition (ds='2008', hr='12') +PREHOOK: type: DESCTABLE +POSTHOOK: query: describe formatted dummy partition (ds='2008', hr='12') +POSTHOOK: type: DESCTABLE +# col_name data_type comment + +key string None +value string None + +# Partition Information +# col_name data_type comment + +ds string None +hr string None + +# Detailed Partition Information +Partition Value: [2008, 12] +Database: default +Table: dummy +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE true + numFiles 1 + numRows 500 + rawDataSize 5312 + totalSize 5812 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: drop table dummy +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@dummy +PREHOOK: Output: default@dummy +POSTHOOK: query: drop table dummy +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@dummy +POSTHOOK: Output: default@dummy +PREHOOK: query: -- static partitioned table on insert + +create table dummy (key string, value string) partitioned by (ds string, hr string) +PREHOOK: type: CREATETABLE +POSTHOOK: query: -- static partitioned table on insert + +create table dummy (key string, value string) partitioned by (ds string, hr string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@dummy +PREHOOK: query: insert overwrite table dummy partition (ds='10',hr='11') select * from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dummy@ds=10/hr=11 +POSTHOOK: query: insert overwrite table dummy partition (ds='10',hr='11') select * from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dummy@ds=10/hr=11 +POSTHOOK: Lineage: dummy PARTITION(ds=10,hr=11).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dummy PARTITION(ds=10,hr=11).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: insert overwrite table dummy partition (ds='10',hr='12') select * from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dummy@ds=10/hr=12 +POSTHOOK: query: insert overwrite table dummy partition (ds='10',hr='12') select * from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dummy@ds=10/hr=12 +POSTHOOK: Lineage: dummy PARTITION(ds=10,hr=11).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dummy PARTITION(ds=10,hr=11).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dummy PARTITION(ds=10,hr=12).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dummy PARTITION(ds=10,hr=12).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: describe formatted dummy partition (ds='10', hr='11') +PREHOOK: type: DESCTABLE +POSTHOOK: query: describe formatted dummy partition (ds='10', hr='11') +POSTHOOK: type: DESCTABLE +POSTHOOK: Lineage: dummy PARTITION(ds=10,hr=11).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dummy PARTITION(ds=10,hr=11).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dummy PARTITION(ds=10,hr=12).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dummy PARTITION(ds=10,hr=12).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +# col_name data_type comment + +key string None +value string None + +# Partition Information +# col_name data_type comment + +ds string None +hr string None + +# Detailed Partition Information +Partition Value: [10, 11] +Database: default +Table: dummy +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE true + numFiles 1 + numRows 500 + rawDataSize 5312 + totalSize 5812 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: describe formatted dummy partition (ds='10', hr='12') +PREHOOK: type: DESCTABLE +POSTHOOK: query: describe formatted dummy partition (ds='10', hr='12') +POSTHOOK: type: DESCTABLE +POSTHOOK: Lineage: dummy PARTITION(ds=10,hr=11).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dummy PARTITION(ds=10,hr=11).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dummy PARTITION(ds=10,hr=12).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dummy PARTITION(ds=10,hr=12).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +# col_name data_type comment + +key string None +value string None + +# Partition Information +# col_name data_type comment + +ds string None +hr string None + +# Detailed Partition Information +Partition Value: [10, 12] +Database: default +Table: dummy +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE true + numFiles 1 + numRows 500 + rawDataSize 5312 + totalSize 5812 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: drop table dummy +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@dummy +PREHOOK: Output: default@dummy +POSTHOOK: query: drop table dummy +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@dummy +POSTHOOK: Output: default@dummy +POSTHOOK: Lineage: dummy PARTITION(ds=10,hr=11).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dummy PARTITION(ds=10,hr=11).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dummy PARTITION(ds=10,hr=12).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dummy PARTITION(ds=10,hr=12).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: -- dynamic partitioned table on insert + +create table dummy (key int) partitioned by (hr int) +PREHOOK: type: CREATETABLE +POSTHOOK: query: -- dynamic partitioned table on insert + +create table dummy (key int) partitioned by (hr int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@dummy +POSTHOOK: Lineage: dummy PARTITION(ds=10,hr=11).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dummy PARTITION(ds=10,hr=11).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dummy PARTITION(ds=10,hr=12).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dummy PARTITION(ds=10,hr=12).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: CREATE TABLE tbl(key int, value int) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE tbl(key int, value int) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@tbl +POSTHOOK: Lineage: dummy PARTITION(ds=10,hr=11).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dummy PARTITION(ds=10,hr=11).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dummy PARTITION(ds=10,hr=12).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dummy PARTITION(ds=10,hr=12).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/tbl.txt' OVERWRITE INTO TABLE tbl +PREHOOK: type: LOAD +PREHOOK: Output: default@tbl +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/tbl.txt' OVERWRITE INTO TABLE tbl +POSTHOOK: type: LOAD +POSTHOOK: Output: default@tbl +POSTHOOK: Lineage: dummy PARTITION(ds=10,hr=11).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dummy PARTITION(ds=10,hr=11).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dummy PARTITION(ds=10,hr=12).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dummy PARTITION(ds=10,hr=12).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: insert overwrite table dummy partition (hr) select * from tbl +PREHOOK: type: QUERY +PREHOOK: Input: default@tbl +PREHOOK: Output: default@dummy +POSTHOOK: query: insert overwrite table dummy partition (hr) select * from tbl +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tbl +POSTHOOK: Output: default@dummy@hr=1994 +POSTHOOK: Output: default@dummy@hr=1996 +POSTHOOK: Output: default@dummy@hr=1997 +POSTHOOK: Output: default@dummy@hr=1998 +POSTHOOK: Lineage: dummy PARTITION(ds=10,hr=11).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dummy PARTITION(ds=10,hr=11).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dummy PARTITION(ds=10,hr=12).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dummy PARTITION(ds=10,hr=12).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dummy PARTITION(hr=1994).key SIMPLE [(tbl)tbl.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: dummy PARTITION(hr=1996).key SIMPLE [(tbl)tbl.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: dummy PARTITION(hr=1997).key SIMPLE [(tbl)tbl.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: dummy PARTITION(hr=1998).key SIMPLE [(tbl)tbl.FieldSchema(name:key, type:int, comment:null), ] +PREHOOK: query: describe formatted dummy partition (hr=1997) +PREHOOK: type: DESCTABLE +POSTHOOK: query: describe formatted dummy partition (hr=1997) +POSTHOOK: type: DESCTABLE +POSTHOOK: Lineage: dummy PARTITION(ds=10,hr=11).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dummy PARTITION(ds=10,hr=11).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dummy PARTITION(ds=10,hr=12).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dummy PARTITION(ds=10,hr=12).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dummy PARTITION(hr=1994).key SIMPLE [(tbl)tbl.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: dummy PARTITION(hr=1996).key SIMPLE [(tbl)tbl.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: dummy PARTITION(hr=1997).key SIMPLE [(tbl)tbl.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: dummy PARTITION(hr=1998).key SIMPLE [(tbl)tbl.FieldSchema(name:key, type:int, comment:null), ] +# col_name data_type comment + +key int None + +# Partition Information +# col_name data_type comment + +hr int None + +# Detailed Partition Information +Partition Value: [1997] +Database: default +Table: dummy +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE true + numFiles 1 + numRows 6 + rawDataSize 6 + totalSize 12 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: describe formatted dummy partition (hr=1994) +PREHOOK: type: DESCTABLE +POSTHOOK: query: describe formatted dummy partition (hr=1994) +POSTHOOK: type: DESCTABLE +POSTHOOK: Lineage: dummy PARTITION(ds=10,hr=11).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dummy PARTITION(ds=10,hr=11).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dummy PARTITION(ds=10,hr=12).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dummy PARTITION(ds=10,hr=12).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dummy PARTITION(hr=1994).key SIMPLE [(tbl)tbl.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: dummy PARTITION(hr=1996).key SIMPLE [(tbl)tbl.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: dummy PARTITION(hr=1997).key SIMPLE [(tbl)tbl.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: dummy PARTITION(hr=1998).key SIMPLE [(tbl)tbl.FieldSchema(name:key, type:int, comment:null), ] +# col_name data_type comment + +key int None + +# Partition Information +# col_name data_type comment + +hr int None + +# Detailed Partition Information +Partition Value: [1994] +Database: default +Table: dummy +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE true + numFiles 1 + numRows 1 + rawDataSize 1 + totalSize 2 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: describe formatted dummy partition (hr=1998) +PREHOOK: type: DESCTABLE +POSTHOOK: query: describe formatted dummy partition (hr=1998) +POSTHOOK: type: DESCTABLE +POSTHOOK: Lineage: dummy PARTITION(ds=10,hr=11).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dummy PARTITION(ds=10,hr=11).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dummy PARTITION(ds=10,hr=12).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dummy PARTITION(ds=10,hr=12).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dummy PARTITION(hr=1994).key SIMPLE [(tbl)tbl.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: dummy PARTITION(hr=1996).key SIMPLE [(tbl)tbl.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: dummy PARTITION(hr=1997).key SIMPLE [(tbl)tbl.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: dummy PARTITION(hr=1998).key SIMPLE [(tbl)tbl.FieldSchema(name:key, type:int, comment:null), ] +# col_name data_type comment + +key int None + +# Partition Information +# col_name data_type comment + +hr int None + +# Detailed Partition Information +Partition Value: [1998] +Database: default +Table: dummy +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE true + numFiles 1 + numRows 2 + rawDataSize 2 + totalSize 4 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: describe formatted dummy partition (hr=1996) +PREHOOK: type: DESCTABLE +POSTHOOK: query: describe formatted dummy partition (hr=1996) +POSTHOOK: type: DESCTABLE +POSTHOOK: Lineage: dummy PARTITION(ds=10,hr=11).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dummy PARTITION(ds=10,hr=11).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dummy PARTITION(ds=10,hr=12).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dummy PARTITION(ds=10,hr=12).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dummy PARTITION(hr=1994).key SIMPLE [(tbl)tbl.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: dummy PARTITION(hr=1996).key SIMPLE [(tbl)tbl.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: dummy PARTITION(hr=1997).key SIMPLE [(tbl)tbl.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: dummy PARTITION(hr=1998).key SIMPLE [(tbl)tbl.FieldSchema(name:key, type:int, comment:null), ] +# col_name data_type comment + +key int None + +# Partition Information +# col_name data_type comment + +hr int None + +# Detailed Partition Information +Partition Value: [1996] +Database: default +Table: dummy +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE true + numFiles 1 + numRows 1 + rawDataSize 1 + totalSize 2 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: drop table tbl +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@tbl +PREHOOK: Output: default@tbl +POSTHOOK: query: drop table tbl +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@tbl +POSTHOOK: Output: default@tbl +POSTHOOK: Lineage: dummy PARTITION(ds=10,hr=11).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dummy PARTITION(ds=10,hr=11).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dummy PARTITION(ds=10,hr=12).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dummy PARTITION(ds=10,hr=12).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dummy PARTITION(hr=1994).key SIMPLE [(tbl)tbl.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: dummy PARTITION(hr=1996).key SIMPLE [(tbl)tbl.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: dummy PARTITION(hr=1997).key SIMPLE [(tbl)tbl.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: dummy PARTITION(hr=1998).key SIMPLE [(tbl)tbl.FieldSchema(name:key, type:int, comment:null), ] +PREHOOK: query: drop table dummy +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@dummy +PREHOOK: Output: default@dummy +POSTHOOK: query: drop table dummy +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@dummy +POSTHOOK: Output: default@dummy +POSTHOOK: Lineage: dummy PARTITION(ds=10,hr=11).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dummy PARTITION(ds=10,hr=11).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dummy PARTITION(ds=10,hr=12).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dummy PARTITION(ds=10,hr=12).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dummy PARTITION(hr=1994).key SIMPLE [(tbl)tbl.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: dummy PARTITION(hr=1996).key SIMPLE [(tbl)tbl.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: dummy PARTITION(hr=1997).key SIMPLE [(tbl)tbl.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: dummy PARTITION(hr=1998).key SIMPLE [(tbl)tbl.FieldSchema(name:key, type:int, comment:null), ] diff --git ql/src/test/results/clientpositive/tez/stats_noscan_1.q.out ql/src/test/results/clientpositive/tez/stats_noscan_1.q.out new file mode 100644 index 0000000..f05f232 --- /dev/null +++ ql/src/test/results/clientpositive/tez/stats_noscan_1.q.out @@ -0,0 +1,719 @@ +PREHOOK: query: -- test analyze table ... compute statistics noscan + +-- 1. test full spec +create table analyze_srcpart like srcpart +PREHOOK: type: CREATETABLE +POSTHOOK: query: -- test analyze table ... compute statistics noscan + +-- 1. test full spec +create table analyze_srcpart like srcpart +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@analyze_srcpart +PREHOOK: query: insert overwrite table analyze_srcpart partition (ds, hr) select * from srcpart where ds is not null +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +PREHOOK: Output: default@analyze_srcpart +POSTHOOK: query: insert overwrite table analyze_srcpart partition (ds, hr) select * from srcpart where ds is not null +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +POSTHOOK: Output: default@analyze_srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Output: default@analyze_srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Output: default@analyze_srcpart@ds=2008-04-09/hr=11 +POSTHOOK: Output: default@analyze_srcpart@ds=2008-04-09/hr=12 +POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: explain +analyze table analyze_srcpart PARTITION(ds='2008-04-08',hr=11) compute statistics noscan +PREHOOK: type: QUERY +POSTHOOK: query: explain +analyze table analyze_srcpart PARTITION(ds='2008-04-08',hr=11) compute statistics noscan +POSTHOOK: type: QUERY +POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +ABSTRACT SYNTAX TREE: + (TOK_ANALYZE (TOK_TAB (TOK_TABNAME analyze_srcpart) (TOK_PARTSPEC (TOK_PARTVAL ds '2008-04-08') (TOK_PARTVAL hr 11))) noscan) + +STAGE DEPENDENCIES: + Stage-2 is a root stage + +STAGE PLANS: + Stage: Stage-2 + Stats-Aggr Operator + +PREHOOK: query: analyze table analyze_srcpart PARTITION(ds='2008-04-08',hr=11) compute statistics noscan +PREHOOK: type: QUERY +PREHOOK: Input: default@analyze_srcpart +PREHOOK: Input: default@analyze_srcpart@ds=2008-04-08/hr=11 +PREHOOK: Output: default@analyze_srcpart +PREHOOK: Output: default@analyze_srcpart@ds=2008-04-08/hr=11 +POSTHOOK: query: analyze table analyze_srcpart PARTITION(ds='2008-04-08',hr=11) compute statistics noscan +POSTHOOK: type: QUERY +POSTHOOK: Input: default@analyze_srcpart +POSTHOOK: Input: default@analyze_srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Output: default@analyze_srcpart +POSTHOOK: Output: default@analyze_srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: analyze table analyze_srcpart PARTITION(ds='2008-04-08',hr=12) compute statistics noscan +PREHOOK: type: QUERY +PREHOOK: Input: default@analyze_srcpart +PREHOOK: Input: default@analyze_srcpart@ds=2008-04-08/hr=12 +PREHOOK: Output: default@analyze_srcpart +PREHOOK: Output: default@analyze_srcpart@ds=2008-04-08/hr=12 +POSTHOOK: query: analyze table analyze_srcpart PARTITION(ds='2008-04-08',hr=12) compute statistics noscan +POSTHOOK: type: QUERY +POSTHOOK: Input: default@analyze_srcpart +POSTHOOK: Input: default@analyze_srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Output: default@analyze_srcpart +POSTHOOK: Output: default@analyze_srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: -- confirm result +describe formatted analyze_srcpart PARTITION(ds='2008-04-08',hr=11) +PREHOOK: type: DESCTABLE +POSTHOOK: query: -- confirm result +describe formatted analyze_srcpart PARTITION(ds='2008-04-08',hr=11) +POSTHOOK: type: DESCTABLE +POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +# col_name data_type comment + +key string default +value string default + +# Partition Information +# col_name data_type comment + +ds string None +hr string None + +# Detailed Partition Information +Partition Value: [2008-04-08, 11] +Database: default +Table: analyze_srcpart +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE true + numFiles 1 + numRows -1 + rawDataSize -1 + totalSize 5812 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: describe formatted analyze_srcpart PARTITION(ds='2008-04-08',hr=12) +PREHOOK: type: DESCTABLE +POSTHOOK: query: describe formatted analyze_srcpart PARTITION(ds='2008-04-08',hr=12) +POSTHOOK: type: DESCTABLE +POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +# col_name data_type comment + +key string default +value string default + +# Partition Information +# col_name data_type comment + +ds string None +hr string None + +# Detailed Partition Information +Partition Value: [2008-04-08, 12] +Database: default +Table: analyze_srcpart +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE true + numFiles 1 + numRows -1 + rawDataSize -1 + totalSize 5812 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: describe formatted analyze_srcpart PARTITION(ds='2008-04-09',hr=11) +PREHOOK: type: DESCTABLE +POSTHOOK: query: describe formatted analyze_srcpart PARTITION(ds='2008-04-09',hr=11) +POSTHOOK: type: DESCTABLE +POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +# col_name data_type comment + +key string default +value string default + +# Partition Information +# col_name data_type comment + +ds string None +hr string None + +# Detailed Partition Information +Partition Value: [2008-04-09, 11] +Database: default +Table: analyze_srcpart +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE false + numFiles 1 + numRows -1 + rawDataSize -1 + totalSize 5812 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: describe formatted analyze_srcpart PARTITION(ds='2008-04-09',hr=12) +PREHOOK: type: DESCTABLE +POSTHOOK: query: describe formatted analyze_srcpart PARTITION(ds='2008-04-09',hr=12) +POSTHOOK: type: DESCTABLE +POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +# col_name data_type comment + +key string default +value string default + +# Partition Information +# col_name data_type comment + +ds string None +hr string None + +# Detailed Partition Information +Partition Value: [2008-04-09, 12] +Database: default +Table: analyze_srcpart +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE false + numFiles 1 + numRows -1 + rawDataSize -1 + totalSize 5812 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: describe formatted analyze_srcpart +PREHOOK: type: DESCTABLE +POSTHOOK: query: describe formatted analyze_srcpart +POSTHOOK: type: DESCTABLE +POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +# col_name data_type comment + +key string default +value string default + +# Partition Information +# col_name data_type comment + +ds string None +hr string None + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Protect Mode: None +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: drop table analyze_srcpart +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@analyze_srcpart +PREHOOK: Output: default@analyze_srcpart +POSTHOOK: query: drop table analyze_srcpart +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@analyze_srcpart +POSTHOOK: Output: default@analyze_srcpart +POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: -- 2. test partial spec +create table analyze_srcpart_partial like srcpart +PREHOOK: type: CREATETABLE +POSTHOOK: query: -- 2. test partial spec +create table analyze_srcpart_partial like srcpart +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@analyze_srcpart_partial +POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: insert overwrite table analyze_srcpart_partial partition (ds, hr) select * from srcpart where ds is not null +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +PREHOOK: Output: default@analyze_srcpart_partial +POSTHOOK: query: insert overwrite table analyze_srcpart_partial partition (ds, hr) select * from srcpart where ds is not null +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +POSTHOOK: Output: default@analyze_srcpart_partial@ds=2008-04-08/hr=11 +POSTHOOK: Output: default@analyze_srcpart_partial@ds=2008-04-08/hr=12 +POSTHOOK: Output: default@analyze_srcpart_partial@ds=2008-04-09/hr=11 +POSTHOOK: Output: default@analyze_srcpart_partial@ds=2008-04-09/hr=12 +POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart_partial PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart_partial PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart_partial PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart_partial PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart_partial PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart_partial PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart_partial PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart_partial PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: explain +analyze table analyze_srcpart_partial PARTITION(ds='2008-04-08') compute statistics noscan +PREHOOK: type: QUERY +POSTHOOK: query: explain +analyze table analyze_srcpart_partial PARTITION(ds='2008-04-08') compute statistics noscan +POSTHOOK: type: QUERY +POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart_partial PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart_partial PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart_partial PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart_partial PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart_partial PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart_partial PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart_partial PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart_partial PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +ABSTRACT SYNTAX TREE: + (TOK_ANALYZE (TOK_TAB (TOK_TABNAME analyze_srcpart_partial) (TOK_PARTSPEC (TOK_PARTVAL ds '2008-04-08'))) noscan) + +STAGE DEPENDENCIES: + Stage-2 is a root stage + +STAGE PLANS: + Stage: Stage-2 + Stats-Aggr Operator + +PREHOOK: query: analyze table analyze_srcpart_partial PARTITION(ds='2008-04-08') compute statistics noscan +PREHOOK: type: QUERY +PREHOOK: Input: default@analyze_srcpart_partial +PREHOOK: Input: default@analyze_srcpart_partial@ds=2008-04-08/hr=11 +PREHOOK: Input: default@analyze_srcpart_partial@ds=2008-04-08/hr=12 +PREHOOK: Output: default@analyze_srcpart_partial +PREHOOK: Output: default@analyze_srcpart_partial@ds=2008-04-08/hr=11 +PREHOOK: Output: default@analyze_srcpart_partial@ds=2008-04-08/hr=12 +POSTHOOK: query: analyze table analyze_srcpart_partial PARTITION(ds='2008-04-08') compute statistics noscan +POSTHOOK: type: QUERY +POSTHOOK: Input: default@analyze_srcpart_partial +POSTHOOK: Input: default@analyze_srcpart_partial@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@analyze_srcpart_partial@ds=2008-04-08/hr=12 +POSTHOOK: Output: default@analyze_srcpart_partial +POSTHOOK: Output: default@analyze_srcpart_partial@ds=2008-04-08/hr=11 +POSTHOOK: Output: default@analyze_srcpart_partial@ds=2008-04-08/hr=12 +POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart_partial PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart_partial PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart_partial PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart_partial PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart_partial PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart_partial PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart_partial PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart_partial PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: -- confirm result +describe formatted analyze_srcpart_partial PARTITION(ds='2008-04-08',hr=11) +PREHOOK: type: DESCTABLE +POSTHOOK: query: -- confirm result +describe formatted analyze_srcpart_partial PARTITION(ds='2008-04-08',hr=11) +POSTHOOK: type: DESCTABLE +POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart_partial PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart_partial PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart_partial PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart_partial PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart_partial PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart_partial PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart_partial PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart_partial PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +# col_name data_type comment + +key string default +value string default + +# Partition Information +# col_name data_type comment + +ds string None +hr string None + +# Detailed Partition Information +Partition Value: [2008-04-08, 11] +Database: default +Table: analyze_srcpart_partial +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE true + numFiles 1 + numRows -1 + rawDataSize -1 + totalSize 5812 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: describe formatted analyze_srcpart_partial PARTITION(ds='2008-04-08',hr=12) +PREHOOK: type: DESCTABLE +POSTHOOK: query: describe formatted analyze_srcpart_partial PARTITION(ds='2008-04-08',hr=12) +POSTHOOK: type: DESCTABLE +POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart_partial PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart_partial PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart_partial PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart_partial PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart_partial PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart_partial PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart_partial PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart_partial PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +# col_name data_type comment + +key string default +value string default + +# Partition Information +# col_name data_type comment + +ds string None +hr string None + +# Detailed Partition Information +Partition Value: [2008-04-08, 12] +Database: default +Table: analyze_srcpart_partial +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE true + numFiles 1 + numRows -1 + rawDataSize -1 + totalSize 5812 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: describe formatted analyze_srcpart_partial PARTITION(ds='2008-04-09',hr=11) +PREHOOK: type: DESCTABLE +POSTHOOK: query: describe formatted analyze_srcpart_partial PARTITION(ds='2008-04-09',hr=11) +POSTHOOK: type: DESCTABLE +POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart_partial PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart_partial PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart_partial PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart_partial PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart_partial PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart_partial PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart_partial PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart_partial PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +# col_name data_type comment + +key string default +value string default + +# Partition Information +# col_name data_type comment + +ds string None +hr string None + +# Detailed Partition Information +Partition Value: [2008-04-09, 11] +Database: default +Table: analyze_srcpart_partial +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE false + numFiles 1 + numRows -1 + rawDataSize -1 + totalSize 5812 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: describe formatted analyze_srcpart_partial PARTITION(ds='2008-04-09',hr=12) +PREHOOK: type: DESCTABLE +POSTHOOK: query: describe formatted analyze_srcpart_partial PARTITION(ds='2008-04-09',hr=12) +POSTHOOK: type: DESCTABLE +POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart_partial PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart_partial PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart_partial PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart_partial PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart_partial PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart_partial PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart_partial PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart_partial PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +# col_name data_type comment + +key string default +value string default + +# Partition Information +# col_name data_type comment + +ds string None +hr string None + +# Detailed Partition Information +Partition Value: [2008-04-09, 12] +Database: default +Table: analyze_srcpart_partial +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE false + numFiles 1 + numRows -1 + rawDataSize -1 + totalSize 5812 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: drop table analyze_srcpart_partial +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@analyze_srcpart_partial +PREHOOK: Output: default@analyze_srcpart_partial +POSTHOOK: query: drop table analyze_srcpart_partial +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@analyze_srcpart_partial +POSTHOOK: Output: default@analyze_srcpart_partial +POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart_partial PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart_partial PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart_partial PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart_partial PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart_partial PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart_partial PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart_partial PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: analyze_srcpart_partial PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ]