diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java index 5a7c1afb34..f5c9cbd26e 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java @@ -2575,7 +2575,7 @@ private void updateColStats(HiveConf conf, Statistics stats, long leftUnmatchedR } } if (neededColumns.size() != 0) { - int restColumnsDefaultSize = + long restColumnsDefaultSize = StatsUtils.estimateRowSizeFromSchema(conf, jop.getSchema().getSignature(), neededColumns); newDataSize = StatsUtils.safeAdd(newDataSize, StatsUtils.safeMult(restColumnsDefaultSize, newNumRows)); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java index 3f71ee40de..8084dcd62a 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java @@ -649,37 +649,41 @@ private static Range getRangePartitionColumn(PartitionIterable partitions, Strin return range; } - public static int estimateRowSizeFromSchema(HiveConf conf, List schema) { - List neededColumns = new ArrayList<>(); + private static long getAvgColSize(final ColumnInfo columnInfo, HiveConf conf) { + ObjectInspector oi = columnInfo.getObjectInspector(); + String colTypeLowerCase = columnInfo.getTypeName().toLowerCase(); + if (colTypeLowerCase.equals(serdeConstants.STRING_TYPE_NAME) + || colTypeLowerCase.equals(serdeConstants.BINARY_TYPE_NAME) + || colTypeLowerCase.startsWith(serdeConstants.VARCHAR_TYPE_NAME) + || colTypeLowerCase.startsWith(serdeConstants.CHAR_TYPE_NAME) + || colTypeLowerCase.startsWith(serdeConstants.LIST_TYPE_NAME) + || colTypeLowerCase.startsWith(serdeConstants.MAP_TYPE_NAME) + || colTypeLowerCase.startsWith(serdeConstants.STRUCT_TYPE_NAME) + || colTypeLowerCase.startsWith(serdeConstants.UNION_TYPE_NAME)) { + return getAvgColLenOf(conf, oi, colTypeLowerCase); + } else { + return getAvgColLenOfFixedLengthTypes(colTypeLowerCase); + } + } + + public static long estimateRowSizeFromSchema(HiveConf conf, List schema) { + long avgRowSize = 0; for (ColumnInfo ci : schema) { - neededColumns.add(ci.getInternalName()); + avgRowSize += getAvgColSize(ci, conf); } - return estimateRowSizeFromSchema(conf, schema, neededColumns); + return avgRowSize; } - public static int estimateRowSizeFromSchema(HiveConf conf, List schema, + public static long estimateRowSizeFromSchema(HiveConf conf, List schema, List neededColumns) { - int avgRowSize = 0; + long avgRowSize = 0; for (String neededCol : neededColumns) { ColumnInfo ci = getColumnInfoForColumn(neededCol, schema); if (ci == null) { // No need to collect statistics of index columns continue; } - ObjectInspector oi = ci.getObjectInspector(); - String colTypeLowerCase = ci.getTypeName().toLowerCase(); - if (colTypeLowerCase.equals(serdeConstants.STRING_TYPE_NAME) - || colTypeLowerCase.equals(serdeConstants.BINARY_TYPE_NAME) - || colTypeLowerCase.startsWith(serdeConstants.VARCHAR_TYPE_NAME) - || colTypeLowerCase.startsWith(serdeConstants.CHAR_TYPE_NAME) - || colTypeLowerCase.startsWith(serdeConstants.LIST_TYPE_NAME) - || colTypeLowerCase.startsWith(serdeConstants.MAP_TYPE_NAME) - || colTypeLowerCase.startsWith(serdeConstants.STRUCT_TYPE_NAME) - || colTypeLowerCase.startsWith(serdeConstants.UNION_TYPE_NAME)) { - avgRowSize += getAvgColLenOf(conf, oi, colTypeLowerCase); - } else { - avgRowSize += getAvgColLenOfFixedLengthTypes(colTypeLowerCase); - } + avgRowSize += getAvgColSize(ci, conf); } return avgRowSize; }