From bd22e607b7b8774437dde0dc11a9a71d40a09842 Mon Sep 17 00:00:00 2001 From: kangkaisen Date: Mon, 16 May 2016 14:49:26 +0800 Subject: [PATCH] KYLIN-1694 make multiply coefficient configurable when estimating cuboid size --- .../main/java/org/apache/kylin/common/KylinConfigBase.java | 8 ++++++++ .../org/apache/kylin/engine/mr/common/CubeStatsReader.java | 11 +++++++---- 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java b/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java index 2e17c94..26849f9 100644 --- a/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java +++ b/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java @@ -374,6 +374,14 @@ abstract public class KylinConfigBase implements Serializable { return Long.parseLong(getOptional("kylin.job.step.timeout", String.valueOf(2 * 60 * 60))); } + public double getJobCuboidSizeRatio(){ + return Double.parseDouble(getOptional("kylin.job.cuboid.size.ratio", "0.25")); + } + + public double getJobCuboidSizeMemHungryRatio(){ + return Double.parseDouble(getOptional("kylin.job.cuboid.size.memhungry.ratio", "0.05")); + } + public String getCubeAlgorithm() { return getOptional("kylin.cube.algorithm", "auto"); } diff --git a/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/CubeStatsReader.java b/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/CubeStatsReader.java index 940172b..8973a99 100644 --- a/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/CubeStatsReader.java +++ b/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/CubeStatsReader.java @@ -180,6 +180,7 @@ public class CubeStatsReader { private static double estimateCuboidStorageSize(CubeSegment cubeSegment, long cuboidId, long rowCount, long baseCuboidId, List rowKeyColumnLength) { int bytesLength = cubeSegment.getRowKeyPreambleSize(); + KylinConfig kylinConf = cubeSegment.getConfig(); long mask = Long.highestOneBit(baseCuboidId); long parentCuboidIdActualLength = Long.SIZE - Long.numberOfLeadingZeros(baseCuboidId); @@ -204,11 +205,13 @@ public class CubeStatsReader { double ret = 1.0 * bytesLength * rowCount / (1024L * 1024L); if (isMemoryHungry) { - logger.info("Cube is memory hungry, storage size estimation multiply 0.05"); - ret *= 0.05; + double cuboidSizeMemHungryRatio = kylinConf.getJobCuboidSizeMemHungryRatio(); + logger.info("Cube is memory hungry, storage size estimation multiply " + cuboidSizeMemHungryRatio); + ret *= cuboidSizeMemHungryRatio; } else { - logger.info("Cube is not memory hungry, storage size estimation multiply 0.25"); - ret *= 0.25; + double cuboidSizeRatio = kylinConf.getJobCuboidSizeRatio(); + logger.info("Cube is not memory hungry, storage size estimation multiply " + cuboidSizeRatio); + ret *= cuboidSizeRatio; } logger.info("Cuboid " + cuboidId + " has " + rowCount + " rows, each row size is " + bytesLength + " bytes." + " Total size is " + ret + "M."); return ret; -- 2.5.4 (Apple Git-61)