diff --git itests/src/test/resources/testconfiguration.properties itests/src/test/resources/testconfiguration.properties index 09c667e..22ae478 100644 --- itests/src/test/resources/testconfiguration.properties +++ itests/src/test/resources/testconfiguration.properties @@ -825,6 +825,7 @@ spark.query.files=add_part_multiple.q, \ sort_merge_join_desc_8.q, \ spark_test.q, \ stats_counter.q, \ + stats_counter_partitioned.q, \ stats_noscan_1.q, \ stats_noscan_2.q, \ stats_only_null.q, \ diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkTask.java ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkTask.java index 30b7632..6ba7cdf 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkTask.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkTask.java @@ -262,14 +262,14 @@ private void printConfigInfo() throws IOException { List prefixs = new LinkedList(); StatsWork statsWork = statsTask.getWork(); String tablePrefix = getTablePrefix(statsWork); - List partitions = getPartitionsList(statsWork); + List> partitions = getPartitionsList(statsWork); int maxPrefixLength = StatsFactory.getMaxPrefixLength(conf); if (partitions == null) { prefixs.add(Utilities.getHashedStatsPrefix(tablePrefix, maxPrefixLength)); } else { - for (Partition partition : partitions) { - String prefixWithPartition = Utilities.join(tablePrefix, Warehouse.makePartPath(partition.getSpec())); + for (Map partition : partitions) { + String prefixWithPartition = Utilities.join(tablePrefix, Warehouse.makePartPath(partition)); prefixs.add(Utilities.getHashedStatsPrefix(prefixWithPartition, maxPrefixLength)); } } @@ -319,12 +319,12 @@ private static StatsTask getStatsTaskInChildTasks(Task r return null; } - private List getPartitionsList(StatsWork work) throws HiveException { + private List> getPartitionsList(StatsWork work) throws HiveException { if (work.getLoadFileDesc() != null) { return null; //we are in CTAS, so we know there are no partitions } Table table; - List list = new ArrayList(); + List> list = new ArrayList>(); if (work.getTableSpecs() != null) { @@ -337,8 +337,8 @@ private static StatsTask getStatsTaskInChildTasks(Task r // get all partitions that matches with the partition spec List partitions = tblSpec.partitions; if (partitions != null) { - for (Partition partn : partitions) { - list.add(partn); + for (Partition partition : partitions) { + list.add(partition.getSpec()); } } } else if (work.getLoadTableDesc() != null) { @@ -353,8 +353,7 @@ private static StatsTask getStatsTaskInChildTasks(Task r if (dpCtx != null && dpCtx.getNumDPCols() > 0) { // dynamic partitions // we could not get dynamic partition information before SparkTask execution. } else { // static partition - Partition partn = db.getPartition(table, tbd.getPartitionSpec(), false); - list.add(partn); + list.add(tbd.getPartitionSpec()); } } return list; diff --git ql/src/test/results/clientpositive/spark/stats_counter_partitioned.q.out ql/src/test/results/clientpositive/spark/stats_counter_partitioned.q.out new file mode 100644 index 0000000..4b84eca --- /dev/null +++ ql/src/test/results/clientpositive/spark/stats_counter_partitioned.q.out @@ -0,0 +1,481 @@ +PREHOOK: query: -- partitioned table analyze + +create table dummy (key string, value string) partitioned by (ds string, hr string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dummy +POSTHOOK: query: -- partitioned table analyze + +create table dummy (key string, value string) partitioned by (ds string, hr string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dummy +PREHOOK: query: load data local inpath '../../data/files/kv1.txt' into table dummy partition (ds='2008',hr='12') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@dummy +POSTHOOK: query: load data local inpath '../../data/files/kv1.txt' into table dummy partition (ds='2008',hr='12') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@dummy +POSTHOOK: Output: default@dummy@ds=2008/hr=12 +PREHOOK: query: load data local inpath '../../data/files/kv1.txt' into table dummy partition (ds='2008',hr='11') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@dummy +POSTHOOK: query: load data local inpath '../../data/files/kv1.txt' into table dummy partition (ds='2008',hr='11') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@dummy +POSTHOOK: Output: default@dummy@ds=2008/hr=11 +PREHOOK: query: analyze table dummy partition (ds,hr) compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@dummy +PREHOOK: Input: default@dummy@ds=2008/hr=11 +PREHOOK: Input: default@dummy@ds=2008/hr=12 +PREHOOK: Output: default@dummy +PREHOOK: Output: default@dummy@ds=2008/hr=11 +PREHOOK: Output: default@dummy@ds=2008/hr=12 +POSTHOOK: query: analyze table dummy partition (ds,hr) compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dummy +POSTHOOK: Input: default@dummy@ds=2008/hr=11 +POSTHOOK: Input: default@dummy@ds=2008/hr=12 +POSTHOOK: Output: default@dummy +POSTHOOK: Output: default@dummy@ds=2008/hr=11 +POSTHOOK: Output: default@dummy@ds=2008/hr=12 +PREHOOK: query: describe formatted dummy partition (ds='2008', hr='11') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@dummy +POSTHOOK: query: describe formatted dummy partition (ds='2008', hr='11') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@dummy +# col_name data_type comment + +key string +value string + +# Partition Information +# col_name data_type comment + +ds string +hr string + +# Detailed Partition Information +Partition Value: [2008, 11] +Database: default +Table: dummy +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE true + numFiles 1 + numRows 500 + rawDataSize 5312 + totalSize 5812 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: describe formatted dummy partition (ds='2008', hr='12') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@dummy +POSTHOOK: query: describe formatted dummy partition (ds='2008', hr='12') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@dummy +# col_name data_type comment + +key string +value string + +# Partition Information +# col_name data_type comment + +ds string +hr string + +# Detailed Partition Information +Partition Value: [2008, 12] +Database: default +Table: dummy +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE true + numFiles 1 + numRows 500 + rawDataSize 5312 + totalSize 5812 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: drop table dummy +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@dummy +PREHOOK: Output: default@dummy +POSTHOOK: query: drop table dummy +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@dummy +POSTHOOK: Output: default@dummy +PREHOOK: query: -- static partitioned table on insert + +create table dummy (key string, value string) partitioned by (ds string, hr string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dummy +POSTHOOK: query: -- static partitioned table on insert + +create table dummy (key string, value string) partitioned by (ds string, hr string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dummy +PREHOOK: query: insert overwrite table dummy partition (ds='10',hr='11') select * from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dummy@ds=10/hr=11 +POSTHOOK: query: insert overwrite table dummy partition (ds='10',hr='11') select * from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dummy@ds=10/hr=11 +POSTHOOK: Lineage: dummy PARTITION(ds=10,hr=11).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dummy PARTITION(ds=10,hr=11).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: insert overwrite table dummy partition (ds='10',hr='12') select * from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dummy@ds=10/hr=12 +POSTHOOK: query: insert overwrite table dummy partition (ds='10',hr='12') select * from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dummy@ds=10/hr=12 +POSTHOOK: Lineage: dummy PARTITION(ds=10,hr=12).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dummy PARTITION(ds=10,hr=12).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: describe formatted dummy partition (ds='10', hr='11') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@dummy +POSTHOOK: query: describe formatted dummy partition (ds='10', hr='11') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@dummy +# col_name data_type comment + +key string +value string + +# Partition Information +# col_name data_type comment + +ds string +hr string + +# Detailed Partition Information +Partition Value: [10, 11] +Database: default +Table: dummy +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE true + numFiles 1 + numRows 500 + rawDataSize 5312 + totalSize 5812 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: describe formatted dummy partition (ds='10', hr='12') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@dummy +POSTHOOK: query: describe formatted dummy partition (ds='10', hr='12') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@dummy +# col_name data_type comment + +key string +value string + +# Partition Information +# col_name data_type comment + +ds string +hr string + +# Detailed Partition Information +Partition Value: [10, 12] +Database: default +Table: dummy +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE true + numFiles 1 + numRows 500 + rawDataSize 5312 + totalSize 5812 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: drop table dummy +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@dummy +PREHOOK: Output: default@dummy +POSTHOOK: query: drop table dummy +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@dummy +POSTHOOK: Output: default@dummy +PREHOOK: query: -- dynamic partitioned table on insert + +create table dummy (key int) partitioned by (hr int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dummy +POSTHOOK: query: -- dynamic partitioned table on insert + +create table dummy (key int) partitioned by (hr int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dummy +PREHOOK: query: CREATE TABLE tbl(key int, value int) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tbl +POSTHOOK: query: CREATE TABLE tbl(key int, value int) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tbl +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/tbl.txt' OVERWRITE INTO TABLE tbl +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@tbl +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/tbl.txt' OVERWRITE INTO TABLE tbl +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@tbl +PREHOOK: query: insert overwrite table dummy partition (hr) select * from tbl +PREHOOK: type: QUERY +PREHOOK: Input: default@tbl +PREHOOK: Output: default@dummy +POSTHOOK: query: insert overwrite table dummy partition (hr) select * from tbl +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tbl +POSTHOOK: Output: default@dummy@hr=1994 +POSTHOOK: Output: default@dummy@hr=1996 +POSTHOOK: Output: default@dummy@hr=1997 +POSTHOOK: Output: default@dummy@hr=1998 +POSTHOOK: Lineage: dummy PARTITION(hr=1994).key SIMPLE [(tbl)tbl.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: dummy PARTITION(hr=1996).key SIMPLE [(tbl)tbl.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: dummy PARTITION(hr=1997).key SIMPLE [(tbl)tbl.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: dummy PARTITION(hr=1998).key SIMPLE [(tbl)tbl.FieldSchema(name:key, type:int, comment:null), ] +PREHOOK: query: describe formatted dummy partition (hr=1997) +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@dummy +POSTHOOK: query: describe formatted dummy partition (hr=1997) +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@dummy +# col_name data_type comment + +key int + +# Partition Information +# col_name data_type comment + +hr int + +# Detailed Partition Information +Partition Value: [1997] +Database: default +Table: dummy +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE true + numFiles 1 + numRows 0 + rawDataSize 0 + totalSize 12 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: describe formatted dummy partition (hr=1994) +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@dummy +POSTHOOK: query: describe formatted dummy partition (hr=1994) +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@dummy +# col_name data_type comment + +key int + +# Partition Information +# col_name data_type comment + +hr int + +# Detailed Partition Information +Partition Value: [1994] +Database: default +Table: dummy +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE true + numFiles 1 + numRows 0 + rawDataSize 0 + totalSize 2 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: describe formatted dummy partition (hr=1998) +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@dummy +POSTHOOK: query: describe formatted dummy partition (hr=1998) +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@dummy +# col_name data_type comment + +key int + +# Partition Information +# col_name data_type comment + +hr int + +# Detailed Partition Information +Partition Value: [1998] +Database: default +Table: dummy +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE true + numFiles 1 + numRows 0 + rawDataSize 0 + totalSize 4 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: describe formatted dummy partition (hr=1996) +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@dummy +POSTHOOK: query: describe formatted dummy partition (hr=1996) +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@dummy +# col_name data_type comment + +key int + +# Partition Information +# col_name data_type comment + +hr int + +# Detailed Partition Information +Partition Value: [1996] +Database: default +Table: dummy +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE true + numFiles 1 + numRows 0 + rawDataSize 0 + totalSize 2 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: drop table tbl +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@tbl +PREHOOK: Output: default@tbl +POSTHOOK: query: drop table tbl +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@tbl +POSTHOOK: Output: default@tbl +PREHOOK: query: drop table dummy +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@dummy +PREHOOK: Output: default@dummy +POSTHOOK: query: drop table dummy +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@dummy +POSTHOOK: Output: default@dummy