diff --git ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java index eb46e32..5f7839f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java +++ ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java @@ -246,7 +246,7 @@ public static Statistics collectStatistics(HiveConf conf, PrunedPartitionList pa List emptyStats = Lists.newArrayList(); // add partition column stats - addParitionColumnStats(neededColumns, referencedColumns, schema, table, partList, + addParitionColumnStats(conf, neededColumns, referencedColumns, schema, table, partList, emptyStats); stats.addToColumnStats(emptyStats); @@ -260,7 +260,7 @@ public static Statistics collectStatistics(HiveConf conf, PrunedPartitionList pa List columnStats = convertColStats(colStats, table.getTableName(), colToTabAlias); - addParitionColumnStats(neededColumns, referencedColumns, schema, table, partList, + addParitionColumnStats(conf, neededColumns, referencedColumns, schema, table, partList, columnStats); stats.addToColumnStats(columnStats); @@ -277,7 +277,7 @@ public static Statistics collectStatistics(HiveConf conf, PrunedPartitionList pa return stats; } - private static void addParitionColumnStats(List neededColumns, + private static void addParitionColumnStats(HiveConf conf, List neededColumns, List referencedColumns, List schema, Table table, PrunedPartitionList partList, List colStats) throws HiveException { @@ -300,6 +300,8 @@ private static void addParitionColumnStats(List neededColumns, long numPartitions = getNDVPartitionColumn(partList.getPartitions(), ci.getInternalName()); partCS.setCountDistint(numPartitions); + partCS.setAvgColLen(StatsUtils.getAvgColLenOfVariableLengthTypes(conf, + ci.getObjectInspector(), partCS.getColumnType())); colStats.add(partCS); } } @@ -1000,7 +1002,13 @@ public static ColStatistics getColStatisticsFromExpression(HiveConf conf, Statis if (encd.getIsPartitionColOrVirtualCol()) { - // vitual columns + ColStatistics colStats = parentStats.getColumnStatisticsFromColName(colName); + if (colStats != null) { + /* If statistics for the column already exist use it. */ + return colStats; + } + + // virtual columns colType = encd.getTypeInfo().getTypeName(); countDistincts = numRows; oi = encd.getWritableObjectInspector();