diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsTask.java index 176a593..e2f696e 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsTask.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsTask.java @@ -28,6 +28,7 @@ import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.conf.HiveConf.ConfVars; import org.apache.hadoop.hive.metastore.Warehouse; import org.apache.hadoop.hive.metastore.api.BinaryColumnStatsData; import org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData; @@ -305,8 +306,10 @@ private void unpackStructObject(ObjectInspector oi, Object o, String fName, List partVals = new ArrayList(); // Iterate over partition columns to figure out partition name for (int i = fields.size() - partColSchema.size(); i < fields.size(); i++) { - partVals.add(((PrimitiveObjectInspector)fields.get(i).getFieldObjectInspector()). - getPrimitiveJavaObject(list.get(i)).toString()); + Object partVal = ((PrimitiveObjectInspector)fields.get(i).getFieldObjectInspector()). + getPrimitiveJavaObject(list.get(i)); + partVals.add(partVal == null ? // could be null for default partition + this.conf.getVar(ConfVars.DEFAULTPARTITIONNAME) : partVal.toString()); } partName = Warehouse.makePartName(partColSchema, partVals); } diff --git a/ql/src/test/queries/clientpositive/stats_only_null.q b/ql/src/test/queries/clientpositive/stats_only_null.q index b47bc48..a91022c 100644 --- a/ql/src/test/queries/clientpositive/stats_only_null.q +++ b/ql/src/test/queries/clientpositive/stats_only_null.q @@ -34,6 +34,17 @@ select count(*), count(a), count(b), count(c), count(d) from stats_null_part; select count(*), count(a), count(b), count(c), count(d) from stats_null; select count(*), count(a), count(b), count(c), count(d) from stats_null_part; + +drop table stats_null_part; +set hive.exec.dynamic.partition.mode=nonstrict; +CREATE TABLE stats_null_part(a double, b int, c STRING, d smallint) partitioned by (dt int) STORED AS TEXTFILE; + +insert into table stats_null_part partition(dt) select a,b,c,d,b from temps_null ; +analyze table stats_null_part compute statistics for columns; + +describe formatted stats_null_part.a partition(dt = 1); + +reset hive.exec.dynamic.partition.mode; drop table stats_null; drop table stats_null_part; drop table temps_null; diff --git a/ql/src/test/results/clientpositive/stats_only_null.q.out b/ql/src/test/results/clientpositive/stats_only_null.q.out index 063da37..c4728c9 100644 --- a/ql/src/test/results/clientpositive/stats_only_null.q.out +++ b/ql/src/test/results/clientpositive/stats_only_null.q.out @@ -334,6 +334,60 @@ POSTHOOK: query: select count(*), count(a), count(b), count(c), count(d) from st POSTHOOK: type: QUERY #### A masked pattern was here #### 10 8 8 10 10 +PREHOOK: query: drop table stats_null_part +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@stats_null_part +PREHOOK: Output: default@stats_null_part +POSTHOOK: query: drop table stats_null_part +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@stats_null_part +POSTHOOK: Output: default@stats_null_part +PREHOOK: query: CREATE TABLE stats_null_part(a double, b int, c STRING, d smallint) partitioned by (dt int) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@stats_null_part +POSTHOOK: query: CREATE TABLE stats_null_part(a double, b int, c STRING, d smallint) partitioned by (dt int) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@stats_null_part +PREHOOK: query: insert into table stats_null_part partition(dt) select a,b,c,d,b from temps_null +PREHOOK: type: QUERY +PREHOOK: Input: default@temps_null +PREHOOK: Output: default@stats_null_part +POSTHOOK: query: insert into table stats_null_part partition(dt) select a,b,c,d,b from temps_null +POSTHOOK: type: QUERY +POSTHOOK: Input: default@temps_null +POSTHOOK: Output: default@stats_null_part@dt=1 +POSTHOOK: Output: default@stats_null_part@dt=__HIVE_DEFAULT_PARTITION__ +POSTHOOK: Lineage: stats_null_part PARTITION(dt=1).a SIMPLE [(temps_null)temps_null.FieldSchema(name:a, type:double, comment:null), ] +POSTHOOK: Lineage: stats_null_part PARTITION(dt=1).b SIMPLE [(temps_null)temps_null.FieldSchema(name:b, type:int, comment:null), ] +POSTHOOK: Lineage: stats_null_part PARTITION(dt=1).c SIMPLE [(temps_null)temps_null.FieldSchema(name:c, type:string, comment:null), ] +POSTHOOK: Lineage: stats_null_part PARTITION(dt=1).d SIMPLE [(temps_null)temps_null.FieldSchema(name:d, type:smallint, comment:null), ] +POSTHOOK: Lineage: stats_null_part PARTITION(dt=__HIVE_DEFAULT_PARTITION__).a SIMPLE [(temps_null)temps_null.FieldSchema(name:a, type:double, comment:null), ] +POSTHOOK: Lineage: stats_null_part PARTITION(dt=__HIVE_DEFAULT_PARTITION__).b SIMPLE [(temps_null)temps_null.FieldSchema(name:b, type:int, comment:null), ] +POSTHOOK: Lineage: stats_null_part PARTITION(dt=__HIVE_DEFAULT_PARTITION__).c SIMPLE [(temps_null)temps_null.FieldSchema(name:c, type:string, comment:null), ] +POSTHOOK: Lineage: stats_null_part PARTITION(dt=__HIVE_DEFAULT_PARTITION__).d SIMPLE [(temps_null)temps_null.FieldSchema(name:d, type:smallint, comment:null), ] +PREHOOK: query: analyze table stats_null_part compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@stats_null_part +PREHOOK: Input: default@stats_null_part@dt=1 +PREHOOK: Input: default@stats_null_part@dt=__HIVE_DEFAULT_PARTITION__ +#### A masked pattern was here #### +POSTHOOK: query: analyze table stats_null_part compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@stats_null_part +POSTHOOK: Input: default@stats_null_part@dt=1 +POSTHOOK: Input: default@stats_null_part@dt=__HIVE_DEFAULT_PARTITION__ +#### A masked pattern was here #### +PREHOOK: query: describe formatted stats_null_part.a partition(dt = 1) +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@stats_null_part +POSTHOOK: query: describe formatted stats_null_part.a partition(dt = 1) +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@stats_null_part +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment + +a double 1.0 1.0 1 1 from deserializer PREHOOK: query: drop table stats_null PREHOOK: type: DROPTABLE PREHOOK: Input: default@stats_null