diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdParallelism.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdParallelism.java index 96ca5ec..9fb6782 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdParallelism.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdParallelism.java @@ -17,6 +17,8 @@ */ package org.apache.hadoop.hive.ql.optimizer.calcite.stats; +import java.util.List; + import org.apache.calcite.rel.RelNode; import org.apache.calcite.rel.metadata.ReflectiveRelMetadataProvider; import org.apache.calcite.rel.metadata.RelMdParallelism; @@ -62,7 +64,20 @@ public Integer splitCount(HiveJoin join) { public Integer splitCount(HiveTableScan scan) { RelOptHiveTable table = (RelOptHiveTable) scan.getTable(); - return table.getHiveTableMD().getNumBuckets(); + final List averageColSizes = RelMetadataQuery.getAverageColumnSizes(scan); + final Double rowCount = table.getRowCount(); + Double averageRowSize = 0d; + if (averageColSizes == null || rowCount == null || rowCount == -1) { + return null; + } + + for (Double colSize : averageColSizes) { + averageRowSize += colSize; + } + + final Double totalSize = averageRowSize * rowCount; + final Double splitCount = totalSize / maxSplitSize; + return splitCount.intValue(); } public Integer splitCount(RelNode rel) {