Index: itests/qtest/pom.xml
===================================================================
--- itests/qtest/pom.xml (revision 1548014)
+++ itests/qtest/pom.xml (working copy)
@@ -36,7 +36,7 @@
false
false
- list_bucket_dml_10.q,input16_cc.q,scriptfile1.q,scriptfile1_win.q,bucket4.q,bucketmapjoin6.q,disable_merge_for_bucketing.q,reduce_deduplicate.q,smb_mapjoin_8.q,join1.q,groupby2.q,bucketizedhiveinputformat.q,bucketmapjoin7.q,optrstat_groupby.q,bucket_num_reducers.q,bucket5.q,load_fs2.q,bucket_num_reducers2.q,infer_bucket_sort_merge.q,infer_bucket_sort_reducers_power_two.q,infer_bucket_sort_dyn_part.q,infer_bucket_sort_bucketed_table.q,infer_bucket_sort_map_operators.q,infer_bucket_sort_num_buckets.q,leftsemijoin_mr.q,schemeAuthority.q,schemeAuthority2.q,truncate_column_buckets.q,remote_script.q,,load_hdfs_file_with_space_in_the_name.q,parallel_orderby.q,import_exported_table.q,stats_counter.q
+ stats_counter_partitioned.q,list_bucket_dml_10.q,input16_cc.q,scriptfile1.q,scriptfile1_win.q,bucket4.q,bucketmapjoin6.q,disable_merge_for_bucketing.q,reduce_deduplicate.q,smb_mapjoin_8.q,join1.q,groupby2.q,bucketizedhiveinputformat.q,bucketmapjoin7.q,optrstat_groupby.q,bucket_num_reducers.q,bucket5.q,load_fs2.q,bucket_num_reducers2.q,infer_bucket_sort_merge.q,infer_bucket_sort_reducers_power_two.q,infer_bucket_sort_dyn_part.q,infer_bucket_sort_bucketed_table.q,infer_bucket_sort_map_operators.q,infer_bucket_sort_num_buckets.q,leftsemijoin_mr.q,schemeAuthority.q,schemeAuthority2.q,truncate_column_buckets.q,remote_script.q,,load_hdfs_file_with_space_in_the_name.q,parallel_orderby.q,import_exported_table.q,stats_counter.q
cluster_tasklog_retrieval.q,minimr_broken_pipe.q,mapreduce_stack_trace.q,mapreduce_stack_trace_turnoff.q,mapreduce_stack_trace_hadoop20.q,mapreduce_stack_trace_turnoff_hadoop20.q
add_part_exist.q,alter1.q,alter2.q,alter4.q,alter5.q,alter_rename_partition.q,alter_rename_partition_authorization.q,archive.q,archive_corrupt.q,archive_multi.q,archive_mr_1806.q,archive_multi_mr_1806.q,authorization_1.q,authorization_2.q,authorization_4.q,authorization_5.q,authorization_6.q,authorization_7.q,ba_table1.q,ba_table2.q,ba_table3.q,ba_table_udfs.q,binary_table_bincolserde.q,binary_table_colserde.q,cluster.q,columnarserde_create_shortcut.q,combine2.q,constant_prop.q,create_nested_type.q,create_or_replace_view.q,create_struct_table.q,create_union_table.q,database.q,database_location.q,database_properties.q,ddltime.q,describe_database_json.q,drop_database_removes_partition_dirs.q,escape1.q,escape2.q,exim_00_nonpart_empty.q,exim_01_nonpart.q,exim_02_00_part_empty.q,exim_02_part.q,exim_03_nonpart_over_compat.q,exim_04_all_part.q,exim_04_evolved_parts.q,exim_05_some_part.q,exim_06_one_part.q,exim_07_all_part_over_nonoverlap.q,exim_08_nonpart_rename.q,exim_09_part_spec_nonoverlap.q,exim_10_external_managed.q,exim_11_managed_external.q,exim_12_external_location.q,exim_13_managed_location.q,exim_14_managed_location_over_existing.q,exim_15_external_part.q,exim_16_part_external.q,exim_17_part_managed.q,exim_18_part_external.q,exim_19_00_part_external_location.q,exim_19_part_external_location.q,exim_20_part_managed_location.q,exim_21_export_authsuccess.q,exim_22_import_exist_authsuccess.q,exim_23_import_part_authsuccess.q,exim_24_import_nonexist_authsuccess.q,global_limit.q,groupby_complex_types.q,groupby_complex_types_multi_single_reducer.q,index_auth.q,index_auto.q,index_auto_empty.q,index_bitmap.q,index_bitmap1.q,index_bitmap2.q,index_bitmap3.q,index_bitmap_auto.q,index_bitmap_rc.q,index_compact.q,index_compact_1.q,index_compact_2.q,index_compact_3.q,index_stale_partitioned.q,init_file.q,input16.q,input16_cc.q,input46.q,input_columnarserde.q,input_dynamicserde.q,input_lazyserde.q,input_testxpath3.q,input_testxpath4.q,insert2_overwrite_partitions.q,insertexternal1.q,join_thrift.q,lateral_view.q,load_binary_data.q,load_exist_part_authsuccess.q,load_nonpart_authsuccess.q,load_part_authsuccess.q,loadpart_err.q,lock1.q,lock2.q,lock3.q,lock4.q,merge_dynamic_partition.q,multi_insert.q,multi_insert_move_tasks_share_dependencies.q,null_column.q,ppd_clusterby.q,query_with_semi.q,rename_column.q,sample6.q,sample_islocalmode_hook.q,set_processor_namespaces.q,show_tables.q,source.q,split_sample.q,str_to_map.q,transform1.q,udaf_collect_set.q,udaf_context_ngrams.q,udaf_histogram_numeric.q,udaf_ngrams.q,udaf_percentile_approx.q,udf_array.q,udf_bitmap_and.q,udf_bitmap_or.q,udf_explode.q,udf_format_number.q,udf_map.q,udf_map_keys.q,udf_map_values.q,udf_max.q,udf_min.q,udf_named_struct.q,udf_percentile.q,udf_printf.q,udf_sentences.q,udf_sort_array.q,udf_split.q,udf_struct.q,udf_substr.q,udf_translate.q,udf_union.q,udf_xpath.q,udtf_stack.q,view.q,virtual_column.q
Index: ql/src/test/results/clientpositive/stats13.q.out
===================================================================
--- ql/src/test/results/clientpositive/stats13.q.out (revision 1548014)
+++ ql/src/test/results/clientpositive/stats13.q.out (working copy)
@@ -60,7 +60,7 @@
alias: analyze_srcpart
Statistics:
numRows: 0 dataSize: 23248 basicStatsState: PARTIAL colStatsState: COMPLETE
- Statistics Aggregation Key Prefix: analyze_srcpart/
+ Statistics Aggregation Key Prefix: default.analyze_srcpart/
GatherStats: true
Path -> Alias:
#### A masked pattern was here ####
@@ -112,7 +112,7 @@
Stage: Stage-1
Stats-Aggr Operator
- Stats Aggregation Key Prefix: analyze_srcpart/
+ Stats Aggregation Key Prefix: default.analyze_srcpart/
PREHOOK: query: analyze table analyze_srcpart PARTITION(ds='2008-04-08',hr=11) compute statistics
Index: ql/src/test/results/clientpositive/stats12.q.out
===================================================================
--- ql/src/test/results/clientpositive/stats12.q.out (revision 1548014)
+++ ql/src/test/results/clientpositive/stats12.q.out (working copy)
@@ -60,7 +60,7 @@
alias: analyze_srcpart
Statistics:
numRows: 0 dataSize: 23248 basicStatsState: PARTIAL colStatsState: COMPLETE
- Statistics Aggregation Key Prefix: analyze_srcpart/
+ Statistics Aggregation Key Prefix: default.analyze_srcpart/
GatherStats: true
Path -> Alias:
#### A masked pattern was here ####
@@ -155,7 +155,7 @@
Stage: Stage-1
Stats-Aggr Operator
- Stats Aggregation Key Prefix: analyze_srcpart/
+ Stats Aggregation Key Prefix: default.analyze_srcpart/
PREHOOK: query: analyze table analyze_srcpart PARTITION(ds='2008-04-08',hr) compute statistics
Index: ql/src/test/results/clientpositive/stats_counter_partitioned.q.out
===================================================================
--- ql/src/test/results/clientpositive/stats_counter_partitioned.q.out (revision 0)
+++ ql/src/test/results/clientpositive/stats_counter_partitioned.q.out (revision 0)
@@ -0,0 +1,525 @@
+PREHOOK: query: -- partitioned table analyze
+
+create table dummy (key string, value string) partitioned by (ds string, hr string)
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: -- partitioned table analyze
+
+create table dummy (key string, value string) partitioned by (ds string, hr string)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@dummy
+PREHOOK: query: load data local inpath '../../data/files/kv1.txt' into table dummy partition (ds='2008',hr='12')
+PREHOOK: type: LOAD
+PREHOOK: Output: default@dummy
+POSTHOOK: query: load data local inpath '../../data/files/kv1.txt' into table dummy partition (ds='2008',hr='12')
+POSTHOOK: type: LOAD
+POSTHOOK: Output: default@dummy
+POSTHOOK: Output: default@dummy@ds=2008/hr=12
+PREHOOK: query: load data local inpath '../../data/files/kv1.txt' into table dummy partition (ds='2008',hr='11')
+PREHOOK: type: LOAD
+PREHOOK: Output: default@dummy
+POSTHOOK: query: load data local inpath '../../data/files/kv1.txt' into table dummy partition (ds='2008',hr='11')
+POSTHOOK: type: LOAD
+POSTHOOK: Output: default@dummy
+POSTHOOK: Output: default@dummy@ds=2008/hr=11
+PREHOOK: query: analyze table dummy partition (ds,hr) compute statistics
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dummy
+PREHOOK: Input: default@dummy@ds=2008/hr=11
+PREHOOK: Input: default@dummy@ds=2008/hr=12
+PREHOOK: Output: default@dummy
+PREHOOK: Output: default@dummy@ds=2008/hr=11
+PREHOOK: Output: default@dummy@ds=2008/hr=12
+POSTHOOK: query: analyze table dummy partition (ds,hr) compute statistics
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dummy
+POSTHOOK: Input: default@dummy@ds=2008/hr=11
+POSTHOOK: Input: default@dummy@ds=2008/hr=12
+POSTHOOK: Output: default@dummy
+POSTHOOK: Output: default@dummy@ds=2008/hr=11
+POSTHOOK: Output: default@dummy@ds=2008/hr=12
+PREHOOK: query: describe formatted dummy partition (ds='2008', hr='11')
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: describe formatted dummy partition (ds='2008', hr='11')
+POSTHOOK: type: DESCTABLE
+# col_name data_type comment
+
+key string None
+value string None
+
+# Partition Information
+# col_name data_type comment
+
+ds string None
+hr string None
+
+# Detailed Partition Information
+Partition Value: [2008, 11]
+Database: default
+Table: dummy
+#### A masked pattern was here ####
+Protect Mode: None
+#### A masked pattern was here ####
+Partition Parameters:
+ COLUMN_STATS_ACCURATE true
+ numFiles 1
+ numRows 500
+ rawDataSize 5312
+ totalSize 5812
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+InputFormat: org.apache.hadoop.mapred.TextInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+Compressed: No
+Num Buckets: -1
+Bucket Columns: []
+Sort Columns: []
+Storage Desc Params:
+ serialization.format 1
+PREHOOK: query: describe formatted dummy partition (ds='2008', hr='12')
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: describe formatted dummy partition (ds='2008', hr='12')
+POSTHOOK: type: DESCTABLE
+# col_name data_type comment
+
+key string None
+value string None
+
+# Partition Information
+# col_name data_type comment
+
+ds string None
+hr string None
+
+# Detailed Partition Information
+Partition Value: [2008, 12]
+Database: default
+Table: dummy
+#### A masked pattern was here ####
+Protect Mode: None
+#### A masked pattern was here ####
+Partition Parameters:
+ COLUMN_STATS_ACCURATE true
+ numFiles 1
+ numRows 500
+ rawDataSize 5312
+ totalSize 5812
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+InputFormat: org.apache.hadoop.mapred.TextInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+Compressed: No
+Num Buckets: -1
+Bucket Columns: []
+Sort Columns: []
+Storage Desc Params:
+ serialization.format 1
+PREHOOK: query: drop table dummy
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@dummy
+PREHOOK: Output: default@dummy
+POSTHOOK: query: drop table dummy
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@dummy
+POSTHOOK: Output: default@dummy
+PREHOOK: query: -- static partitioned table on insert
+
+create table dummy (key string, value string) partitioned by (ds string, hr string)
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: -- static partitioned table on insert
+
+create table dummy (key string, value string) partitioned by (ds string, hr string)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@dummy
+PREHOOK: query: insert overwrite table dummy partition (ds='10',hr='11') select * from src
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@dummy@ds=10/hr=11
+POSTHOOK: query: insert overwrite table dummy partition (ds='10',hr='11') select * from src
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@dummy@ds=10/hr=11
+POSTHOOK: Lineage: dummy PARTITION(ds=10,hr=11).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: dummy PARTITION(ds=10,hr=11).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: insert overwrite table dummy partition (ds='10',hr='12') select * from src
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@dummy@ds=10/hr=12
+POSTHOOK: query: insert overwrite table dummy partition (ds='10',hr='12') select * from src
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@dummy@ds=10/hr=12
+POSTHOOK: Lineage: dummy PARTITION(ds=10,hr=11).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: dummy PARTITION(ds=10,hr=11).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dummy PARTITION(ds=10,hr=12).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: dummy PARTITION(ds=10,hr=12).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: describe formatted dummy partition (ds='10', hr='11')
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: describe formatted dummy partition (ds='10', hr='11')
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Lineage: dummy PARTITION(ds=10,hr=11).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: dummy PARTITION(ds=10,hr=11).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dummy PARTITION(ds=10,hr=12).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: dummy PARTITION(ds=10,hr=12).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+# col_name data_type comment
+
+key string None
+value string None
+
+# Partition Information
+# col_name data_type comment
+
+ds string None
+hr string None
+
+# Detailed Partition Information
+Partition Value: [10, 11]
+Database: default
+Table: dummy
+#### A masked pattern was here ####
+Protect Mode: None
+#### A masked pattern was here ####
+Partition Parameters:
+ COLUMN_STATS_ACCURATE true
+ numFiles 1
+ numRows 500
+ rawDataSize 5312
+ totalSize 5812
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+InputFormat: org.apache.hadoop.mapred.TextInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+Compressed: No
+Num Buckets: -1
+Bucket Columns: []
+Sort Columns: []
+Storage Desc Params:
+ serialization.format 1
+PREHOOK: query: describe formatted dummy partition (ds='10', hr='12')
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: describe formatted dummy partition (ds='10', hr='12')
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Lineage: dummy PARTITION(ds=10,hr=11).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: dummy PARTITION(ds=10,hr=11).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dummy PARTITION(ds=10,hr=12).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: dummy PARTITION(ds=10,hr=12).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+# col_name data_type comment
+
+key string None
+value string None
+
+# Partition Information
+# col_name data_type comment
+
+ds string None
+hr string None
+
+# Detailed Partition Information
+Partition Value: [10, 12]
+Database: default
+Table: dummy
+#### A masked pattern was here ####
+Protect Mode: None
+#### A masked pattern was here ####
+Partition Parameters:
+ COLUMN_STATS_ACCURATE true
+ numFiles 1
+ numRows 500
+ rawDataSize 5312
+ totalSize 5812
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+InputFormat: org.apache.hadoop.mapred.TextInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+Compressed: No
+Num Buckets: -1
+Bucket Columns: []
+Sort Columns: []
+Storage Desc Params:
+ serialization.format 1
+PREHOOK: query: drop table dummy
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@dummy
+PREHOOK: Output: default@dummy
+POSTHOOK: query: drop table dummy
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@dummy
+POSTHOOK: Output: default@dummy
+POSTHOOK: Lineage: dummy PARTITION(ds=10,hr=11).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: dummy PARTITION(ds=10,hr=11).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dummy PARTITION(ds=10,hr=12).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: dummy PARTITION(ds=10,hr=12).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: -- dynamic partitioned table on insert
+
+create table dummy (key int) partitioned by (hr int)
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: -- dynamic partitioned table on insert
+
+create table dummy (key int) partitioned by (hr int)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@dummy
+POSTHOOK: Lineage: dummy PARTITION(ds=10,hr=11).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: dummy PARTITION(ds=10,hr=11).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dummy PARTITION(ds=10,hr=12).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: dummy PARTITION(ds=10,hr=12).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: CREATE TABLE tbl(key int, value int) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|'
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: CREATE TABLE tbl(key int, value int) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|'
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@tbl
+POSTHOOK: Lineage: dummy PARTITION(ds=10,hr=11).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: dummy PARTITION(ds=10,hr=11).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dummy PARTITION(ds=10,hr=12).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: dummy PARTITION(ds=10,hr=12).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/tbl.txt' OVERWRITE INTO TABLE tbl
+PREHOOK: type: LOAD
+PREHOOK: Output: default@tbl
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/tbl.txt' OVERWRITE INTO TABLE tbl
+POSTHOOK: type: LOAD
+POSTHOOK: Output: default@tbl
+POSTHOOK: Lineage: dummy PARTITION(ds=10,hr=11).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: dummy PARTITION(ds=10,hr=11).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dummy PARTITION(ds=10,hr=12).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: dummy PARTITION(ds=10,hr=12).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: insert overwrite table dummy partition (hr) select * from tbl
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl
+PREHOOK: Output: default@dummy
+POSTHOOK: query: insert overwrite table dummy partition (hr) select * from tbl
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl
+POSTHOOK: Output: default@dummy@hr=1994
+POSTHOOK: Output: default@dummy@hr=1996
+POSTHOOK: Output: default@dummy@hr=1997
+POSTHOOK: Output: default@dummy@hr=1998
+POSTHOOK: Lineage: dummy PARTITION(ds=10,hr=11).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: dummy PARTITION(ds=10,hr=11).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dummy PARTITION(ds=10,hr=12).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: dummy PARTITION(ds=10,hr=12).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dummy PARTITION(hr=1994).key SIMPLE [(tbl)tbl.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: dummy PARTITION(hr=1996).key SIMPLE [(tbl)tbl.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: dummy PARTITION(hr=1997).key SIMPLE [(tbl)tbl.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: dummy PARTITION(hr=1998).key SIMPLE [(tbl)tbl.FieldSchema(name:key, type:int, comment:null), ]
+PREHOOK: query: describe formatted dummy partition (hr=1997)
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: describe formatted dummy partition (hr=1997)
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Lineage: dummy PARTITION(ds=10,hr=11).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: dummy PARTITION(ds=10,hr=11).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dummy PARTITION(ds=10,hr=12).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: dummy PARTITION(ds=10,hr=12).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dummy PARTITION(hr=1994).key SIMPLE [(tbl)tbl.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: dummy PARTITION(hr=1996).key SIMPLE [(tbl)tbl.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: dummy PARTITION(hr=1997).key SIMPLE [(tbl)tbl.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: dummy PARTITION(hr=1998).key SIMPLE [(tbl)tbl.FieldSchema(name:key, type:int, comment:null), ]
+# col_name data_type comment
+
+key int None
+
+# Partition Information
+# col_name data_type comment
+
+hr int None
+
+# Detailed Partition Information
+Partition Value: [1997]
+Database: default
+Table: dummy
+#### A masked pattern was here ####
+Protect Mode: None
+#### A masked pattern was here ####
+Partition Parameters:
+ COLUMN_STATS_ACCURATE true
+ numFiles 1
+ numRows 6
+ rawDataSize 6
+ totalSize 12
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+InputFormat: org.apache.hadoop.mapred.TextInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+Compressed: No
+Num Buckets: -1
+Bucket Columns: []
+Sort Columns: []
+Storage Desc Params:
+ serialization.format 1
+PREHOOK: query: describe formatted dummy partition (hr=1994)
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: describe formatted dummy partition (hr=1994)
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Lineage: dummy PARTITION(ds=10,hr=11).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: dummy PARTITION(ds=10,hr=11).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dummy PARTITION(ds=10,hr=12).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: dummy PARTITION(ds=10,hr=12).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dummy PARTITION(hr=1994).key SIMPLE [(tbl)tbl.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: dummy PARTITION(hr=1996).key SIMPLE [(tbl)tbl.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: dummy PARTITION(hr=1997).key SIMPLE [(tbl)tbl.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: dummy PARTITION(hr=1998).key SIMPLE [(tbl)tbl.FieldSchema(name:key, type:int, comment:null), ]
+# col_name data_type comment
+
+key int None
+
+# Partition Information
+# col_name data_type comment
+
+hr int None
+
+# Detailed Partition Information
+Partition Value: [1994]
+Database: default
+Table: dummy
+#### A masked pattern was here ####
+Protect Mode: None
+#### A masked pattern was here ####
+Partition Parameters:
+ COLUMN_STATS_ACCURATE true
+ numFiles 1
+ numRows 1
+ rawDataSize 1
+ totalSize 2
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+InputFormat: org.apache.hadoop.mapred.TextInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+Compressed: No
+Num Buckets: -1
+Bucket Columns: []
+Sort Columns: []
+Storage Desc Params:
+ serialization.format 1
+PREHOOK: query: describe formatted dummy partition (hr=1998)
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: describe formatted dummy partition (hr=1998)
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Lineage: dummy PARTITION(ds=10,hr=11).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: dummy PARTITION(ds=10,hr=11).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dummy PARTITION(ds=10,hr=12).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: dummy PARTITION(ds=10,hr=12).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dummy PARTITION(hr=1994).key SIMPLE [(tbl)tbl.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: dummy PARTITION(hr=1996).key SIMPLE [(tbl)tbl.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: dummy PARTITION(hr=1997).key SIMPLE [(tbl)tbl.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: dummy PARTITION(hr=1998).key SIMPLE [(tbl)tbl.FieldSchema(name:key, type:int, comment:null), ]
+# col_name data_type comment
+
+key int None
+
+# Partition Information
+# col_name data_type comment
+
+hr int None
+
+# Detailed Partition Information
+Partition Value: [1998]
+Database: default
+Table: dummy
+#### A masked pattern was here ####
+Protect Mode: None
+#### A masked pattern was here ####
+Partition Parameters:
+ COLUMN_STATS_ACCURATE true
+ numFiles 1
+ numRows 2
+ rawDataSize 2
+ totalSize 4
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+InputFormat: org.apache.hadoop.mapred.TextInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+Compressed: No
+Num Buckets: -1
+Bucket Columns: []
+Sort Columns: []
+Storage Desc Params:
+ serialization.format 1
+PREHOOK: query: describe formatted dummy partition (hr=1996)
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: describe formatted dummy partition (hr=1996)
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Lineage: dummy PARTITION(ds=10,hr=11).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: dummy PARTITION(ds=10,hr=11).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dummy PARTITION(ds=10,hr=12).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: dummy PARTITION(ds=10,hr=12).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dummy PARTITION(hr=1994).key SIMPLE [(tbl)tbl.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: dummy PARTITION(hr=1996).key SIMPLE [(tbl)tbl.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: dummy PARTITION(hr=1997).key SIMPLE [(tbl)tbl.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: dummy PARTITION(hr=1998).key SIMPLE [(tbl)tbl.FieldSchema(name:key, type:int, comment:null), ]
+# col_name data_type comment
+
+key int None
+
+# Partition Information
+# col_name data_type comment
+
+hr int None
+
+# Detailed Partition Information
+Partition Value: [1996]
+Database: default
+Table: dummy
+#### A masked pattern was here ####
+Protect Mode: None
+#### A masked pattern was here ####
+Partition Parameters:
+ COLUMN_STATS_ACCURATE true
+ numFiles 1
+ numRows 1
+ rawDataSize 1
+ totalSize 2
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+InputFormat: org.apache.hadoop.mapred.TextInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+Compressed: No
+Num Buckets: -1
+Bucket Columns: []
+Sort Columns: []
+Storage Desc Params:
+ serialization.format 1
+PREHOOK: query: drop table tbl
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@tbl
+PREHOOK: Output: default@tbl
+POSTHOOK: query: drop table tbl
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@tbl
+POSTHOOK: Output: default@tbl
+POSTHOOK: Lineage: dummy PARTITION(ds=10,hr=11).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: dummy PARTITION(ds=10,hr=11).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dummy PARTITION(ds=10,hr=12).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: dummy PARTITION(ds=10,hr=12).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dummy PARTITION(hr=1994).key SIMPLE [(tbl)tbl.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: dummy PARTITION(hr=1996).key SIMPLE [(tbl)tbl.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: dummy PARTITION(hr=1997).key SIMPLE [(tbl)tbl.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: dummy PARTITION(hr=1998).key SIMPLE [(tbl)tbl.FieldSchema(name:key, type:int, comment:null), ]
+PREHOOK: query: drop table dummy
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@dummy
+PREHOOK: Output: default@dummy
+POSTHOOK: query: drop table dummy
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@dummy
+POSTHOOK: Output: default@dummy
+POSTHOOK: Lineage: dummy PARTITION(ds=10,hr=11).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: dummy PARTITION(ds=10,hr=11).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dummy PARTITION(ds=10,hr=12).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: dummy PARTITION(ds=10,hr=12).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dummy PARTITION(hr=1994).key SIMPLE [(tbl)tbl.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: dummy PARTITION(hr=1996).key SIMPLE [(tbl)tbl.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: dummy PARTITION(hr=1997).key SIMPLE [(tbl)tbl.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: dummy PARTITION(hr=1998).key SIMPLE [(tbl)tbl.FieldSchema(name:key, type:int, comment:null), ]
Index: ql/src/test/queries/clientpositive/stats_counter_partitioned.q
===================================================================
--- ql/src/test/queries/clientpositive/stats_counter_partitioned.q (revision 0)
+++ ql/src/test/queries/clientpositive/stats_counter_partitioned.q (revision 0)
@@ -0,0 +1,45 @@
+set hive.stats.dbclass=counter;
+set hive.stats.autogather=true;
+set hive.exec.dynamic.partition.mode=nonstrict;
+
+-- partitioned table analyze
+
+create table dummy (key string, value string) partitioned by (ds string, hr string);
+
+load data local inpath '../../data/files/kv1.txt' into table dummy partition (ds='2008',hr='12');
+load data local inpath '../../data/files/kv1.txt' into table dummy partition (ds='2008',hr='11');
+
+analyze table dummy partition (ds,hr) compute statistics;
+describe formatted dummy partition (ds='2008', hr='11');
+describe formatted dummy partition (ds='2008', hr='12');
+
+drop table dummy;
+
+-- static partitioned table on insert
+
+create table dummy (key string, value string) partitioned by (ds string, hr string);
+
+insert overwrite table dummy partition (ds='10',hr='11') select * from src;
+insert overwrite table dummy partition (ds='10',hr='12') select * from src;
+
+describe formatted dummy partition (ds='10', hr='11');
+describe formatted dummy partition (ds='10', hr='12');
+
+drop table dummy;
+
+-- dynamic partitioned table on insert
+
+create table dummy (key int) partitioned by (hr int);
+
+CREATE TABLE tbl(key int, value int) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|';
+LOAD DATA LOCAL INPATH '../../data/files/tbl.txt' OVERWRITE INTO TABLE tbl;
+
+insert overwrite table dummy partition (hr) select * from tbl;
+
+describe formatted dummy partition (hr=1997);
+describe formatted dummy partition (hr=1994);
+describe formatted dummy partition (hr=1998);
+describe formatted dummy partition (hr=1996);
+
+drop table tbl;
+drop table dummy;
Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRTableScan1.java
===================================================================
--- ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRTableScan1.java (revision 1548014)
+++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRTableScan1.java (working copy)
@@ -35,6 +35,7 @@
import org.apache.hadoop.hive.ql.exec.TableScanOperator;
import org.apache.hadoop.hive.ql.exec.Task;
import org.apache.hadoop.hive.ql.exec.TaskFactory;
+import org.apache.hadoop.hive.ql.exec.mr.MapRedTask;
import org.apache.hadoop.hive.ql.io.rcfile.stats.PartialScanWork;
import org.apache.hadoop.hive.ql.lib.Node;
import org.apache.hadoop.hive.ql.lib.NodeProcessor;
@@ -95,6 +96,7 @@
StatsWork statsWork = new StatsWork(parseCtx.getQB().getParseInfo().getTableSpec());
statsWork.setAggKey(op.getConf().getStatsAggPrefix());
+ statsWork.setSourceTask((MapRedTask)currTask);
statsWork.setStatsReliable(
parseCtx.getConf().getBoolVar(HiveConf.ConfVars.HIVE_STATS_RELIABLE));
Task statsTask = TaskFactory.get(statsWork, parseCtx.getConf());
Index: ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
===================================================================
--- ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (revision 1548014)
+++ ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (working copy)
@@ -8508,11 +8508,11 @@
// Theoretically the key prefix could be any unique string shared
// between TableScanOperator (when publishing) and StatsTask (when aggregating).
// Here we use
- // table_name + partitionSec
+ // db_name.table_name + partitionSec
// as the prefix for easy of read during explain and debugging.
// Currently, partition spec can only be static partition.
String k = tblName + Path.SEPARATOR;
- tsDesc.setStatsAggPrefix(k);
+ tsDesc.setStatsAggPrefix(tab.getDbName()+"."+k);
// set up WritenEntity for replication
outputs.add(new WriteEntity(tab, true));
Index: ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java
===================================================================
--- ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java (revision 1548014)
+++ ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java (working copy)
@@ -52,6 +52,7 @@
import org.apache.hadoop.hive.ql.plan.PlanUtils;
import org.apache.hadoop.hive.ql.plan.SkewedColumnPositionPair;
import org.apache.hadoop.hive.ql.plan.api.OperatorType;
+import org.apache.hadoop.hive.ql.stats.CounterStatsPublisher;
import org.apache.hadoop.hive.ql.stats.StatsPublisher;
import org.apache.hadoop.hive.serde2.SerDeException;
import org.apache.hadoop.hive.serde2.SerDeStats;
@@ -974,15 +975,26 @@
// construct the key(fileID) to insert into the intermediate stats table
if (fspKey == "") {
+ if (statsPublisher instanceof CounterStatsPublisher) {
+ // key is of form either of dbName.TblName/ or dbName.TblName/p1=v1/
+ key = Utilities.appendPathSeparator(conf.getTableInfo().getTableName() + Path.SEPARATOR + spSpec);
+ } else {
// for non-partitioned/static partitioned table, the key for temp storage is
// common key prefix + static partition spec + taskID
String keyPrefix = Utilities.getHashedStatsPrefix(
conf.getStatsAggPrefix() + spSpec, conf.getMaxStatsKeyPrefixLength());
key = keyPrefix + taskID;
+ }
} else {
- // for partitioned table, the key is
- // common key prefix + static partition spec + DynamicPartSpec + taskID
- key = createKeyForStatsPublisher(taskID, spSpec, fspKey);
+ if (statsPublisher instanceof CounterStatsPublisher) {
+ // key is of form either of dbName.TblName/p1=v1/
+ key = Utilities.appendPathSeparator(Utilities.appendPathSeparator(
+ conf.getTableInfo().getTableName() + Path.SEPARATOR + spSpec) + fspKey);
+ } else {
+ // for partitioned table, the key is
+ // common key prefix + static partition spec + DynamicPartSpec + taskID
+ key = createKeyForStatsPublisher(taskID, spSpec, fspKey);
+ }
}
Map statsToPublish = new HashMap();
for (String statType : fspValue.stat.getStoredStats()) {
Index: ql/src/java/org/apache/hadoop/hive/ql/exec/TableScanOperator.java
===================================================================
--- ql/src/java/org/apache/hadoop/hive/ql/exec/TableScanOperator.java (revision 1548014)
+++ ql/src/java/org/apache/hadoop/hive/ql/exec/TableScanOperator.java (working copy)
@@ -28,6 +28,8 @@
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.common.FileUtils;
import org.apache.hadoop.hive.common.StatsSetupConst;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
import org.apache.hadoop.hive.ql.ErrorMsg;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.metadata.VirtualColumn;
@@ -35,6 +37,7 @@
import org.apache.hadoop.hive.ql.plan.TableDesc;
import org.apache.hadoop.hive.ql.plan.TableScanDesc;
import org.apache.hadoop.hive.ql.plan.api.OperatorType;
+import org.apache.hadoop.hive.ql.stats.CounterStatsPublisher;
import org.apache.hadoop.hive.ql.stats.StatsPublisher;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
@@ -290,17 +293,27 @@
for (String pspecs : stats.keySet()) {
statsToPublish.clear();
if (pspecs.isEmpty()) {
+ if (statsPublisher instanceof CounterStatsPublisher) {
+ // key is of form : dbName.TblName/
+ key = conf.getStatsAggPrefix();
+ } else {
// In case of a non-partitioned table, the key for temp storage is just
// "tableName + taskID"
String keyPrefix = Utilities.getHashedStatsPrefix(
- conf.getStatsAggPrefix(), conf.getMaxStatsKeyPrefixLength());
+ conf.getStatsAggPrefix(), conf.getMaxStatsKeyPrefixLength());
key = keyPrefix + taskID;
+ }
} else {
- // In case of a partition, the key for temp storage is
- // "tableName + partitionSpecs + taskID"
- String keyPrefix = Utilities.getHashedStatsPrefix(
- conf.getStatsAggPrefix() + pspecs, conf.getMaxStatsKeyPrefixLength());
- key = keyPrefix + taskID;
+ if (statsPublisher instanceof CounterStatsPublisher) {
+ // key is of form : dbName.tblName/p1=v1/
+ key = Utilities.appendPathSeparator(conf.getStatsAggPrefix()+pspecs);
+ } else {
+ // In case of a partition, the key for temp storage is
+ // "tableName + partitionSpecs + taskID"
+ String keyPrefix = Utilities.getHashedStatsPrefix(
+ conf.getStatsAggPrefix() + pspecs, conf.getMaxStatsKeyPrefixLength());
+ key = keyPrefix + taskID;
+ }
}
for(String statType : stats.get(pspecs).getStoredStats()) {
statsToPublish.put(statType, Long.toString(stats.get(pspecs).getStat(statType)));
Index: ql/src/java/org/apache/hadoop/hive/ql/exec/StatsTask.java
===================================================================
--- ql/src/java/org/apache/hadoop/hive/ql/exec/StatsTask.java (revision 1548014)
+++ ql/src/java/org/apache/hadoop/hive/ql/exec/StatsTask.java (working copy)
@@ -29,8 +29,10 @@
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.common.StatsSetupConst;
import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
import org.apache.hadoop.hive.metastore.Warehouse;
import org.apache.hadoop.hive.ql.DriverContext;
import org.apache.hadoop.hive.ql.ErrorMsg;
@@ -173,9 +175,8 @@
try {
// Stats setup:
Warehouse wh = new Warehouse(conf);
-
+ String statsImplementationClass = HiveConf.getVar(conf, HiveConf.ConfVars.HIVESTATSDBCLASS);
if (!this.getWork().getNoStatsAggregator()) {
- String statsImplementationClass = HiveConf.getVar(conf, HiveConf.ConfVars.HIVESTATSDBCLASS);
StatsFactory factory = StatsFactory.newFactory(statsImplementationClass, conf);
if (factory != null && work.isNoScanAnalyzeCommand()){
// initialize stats publishing table for noscan which has only stats task
@@ -214,17 +215,24 @@
boolean atomic = HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_STATS_ATOMIC);
int maxPrefixLength = HiveConf.getIntVar(conf,
HiveConf.ConfVars.HIVE_STATS_KEY_PREFIX_MAX_LENGTH);
-
+ String tableFullName = table.getDbName() + "." + table.getTableName();
if (partitions == null) {
// non-partitioned tables:
if (!tableStatsExist && atomic) {
return 0;
}
- // In case of a non-partitioned table, the key for stats temporary store is "rootDir"
+
if (statsAggregator != null) {
- String aggKey = Utilities.getHashedStatsPrefix(work.getAggKey(), maxPrefixLength);
+ String aggKey;
+ if (statsImplementationClass.equals("counter")) {
+ // Key is of the form dbName.tblName/
+ aggKey = tableFullName+Path.SEPARATOR;
+ } else {
+ // In case of a non-partitioned table, the key for stats temporary store is "rootDir"
+ aggKey = Utilities.getHashedStatsPrefix(work.getAggKey(), maxPrefixLength);
+ }
updateStats(StatsSetupConst.statsRequireCompute, tblStats, statsAggregator, parameters,
- aggKey, atomic);
+ aggKey, atomic);
statsAggregator.cleanUp(aggKey);
}
// The collectable stats for the aggregator needs to be cleared.
@@ -244,9 +252,6 @@
}
parameters.put(StatsSetupConst.STATS_GENERATED_VIA_STATS_TASK, StatsSetupConst.TRUE);
tTable.setParameters(parameters);
-
- String tableFullName = table.getDbName() + "." + table.getTableName();
-
db.alterTable(tableFullName, new Table(tTable));
console.printInfo("Table " + tableFullName + " stats: [" + tblStats.toString() + ']');
@@ -277,12 +282,20 @@
// get the new partition stats
//
Statistics newPartStats = new Statistics();
+ String partitionID;
- // In that case of a partition, the key for stats temporary store is
- // "rootDir/[dynamic_partition_specs/]%"
- String partitionID = Utilities.getHashedStatsPrefix(
- work.getAggKey() + Warehouse.makePartPath(partn.getSpec()), maxPrefixLength);
-
+ if (statsImplementationClass.equals("counter")) {
+ // stat-Agg-key is of form : dbName.tblName/p1=v1/p2=val2/
+ partitionID = Utilities.appendPathSeparator(tableFullName + Path.SEPARATOR +
+ Warehouse.makePartPath(partn.getSpec()));
+ // there is no need to aggregate stats in this case, but this should also work.
+ // also check non-partitioned code path.
+ } else {
+ // In that case of a partition, the key for stats temporary store is
+ // "rootDir/[dynamic_partition_specs/]%"
+ partitionID = Utilities.getHashedStatsPrefix(
+ work.getAggKey() + Warehouse.makePartPath(partn.getSpec()), maxPrefixLength);
+ }
LOG.info("Stats aggregator : " + partitionID);
if (statsAggregator != null) {
@@ -303,7 +316,6 @@
}
}
}
-
/**
* calculate fast statistics
*/
@@ -327,7 +339,6 @@
parameters.put(StatsSetupConst.STATS_GENERATED_VIA_STATS_TASK, StatsSetupConst.TRUE);
tPart.setParameters(parameters);
- String tableFullName = table.getDbName() + "." + table.getTableName();
db.alterPartition(tableFullName, new Partition(table, tPart));
console.printInfo("Partition " + tableFullName + partn.getSpec() +
Index: ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
===================================================================
--- ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java (revision 1548014)
+++ ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java (working copy)
@@ -2298,7 +2298,7 @@
return ret;
}
- private static String appendPathSeparator(String path) {
+ public static String appendPathSeparator(String path) {
if (!path.endsWith(Path.SEPARATOR)) {
path = path + Path.SEPARATOR;
}
Index: ql/src/java/org/apache/hadoop/hive/ql/stats/CounterStatsAggregator.java
===================================================================
--- ql/src/java/org/apache/hadoop/hive/ql/stats/CounterStatsAggregator.java (revision 1548014)
+++ ql/src/java/org/apache/hadoop/hive/ql/stats/CounterStatsAggregator.java (working copy)
@@ -19,6 +19,7 @@
package org.apache.hadoop.hive.ql.stats;
import java.io.IOException;
+import java.util.Iterator;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
@@ -26,6 +27,7 @@
import org.apache.hadoop.hive.ql.exec.mr.ExecDriver;
import org.apache.hadoop.hive.ql.exec.mr.MapRedTask;
import org.apache.hadoop.mapred.Counters;
+import org.apache.hadoop.mapred.Counters.Counter;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.RunningJob;
@@ -46,7 +48,7 @@
counters = job.getCounters();
}
} catch (Exception e) {
- LOG.error("Failed to get Job instance for " + sourceTask.getJobID());
+ LOG.error("Failed to get Job instance for " + sourceTask.getJobID(),e);
}
return counters != null;
}
@@ -56,14 +58,10 @@
}
@Override
- public String aggregateStats(String keyPrefix, String statType) {
- long value = 0;
- for (String groupName : counters.getGroupNames()) {
- if (groupName.startsWith(keyPrefix)) {
- value += counters.getGroup(groupName).getCounter(statType);
- }
- }
- return String.valueOf(value);
+ public String aggregateStats(String counterGrpName, String statType) {
+ // In case of counters, aggregation is done by JobTracker / MR AM itself
+ // so no need to aggregate, simply return the counter value for requested stat.
+ return String.valueOf(counters.getGroup(counterGrpName).getCounter(statType));
}
@Override