From f5b9a895c3dce0f2ba60b62b2ae9ba57368fea42 Mon Sep 17 00:00:00 2001 From: sunyerui Date: Mon, 28 Dec 2015 19:58:24 +0800 Subject: [PATCH] KYLIN-1186 Support precise Count Distinct using bitmap --- .../cube/model/validation/rule/FunctionRule.java | 25 ++++ .../test_kylin_cube_without_slr_desc.json | 2 +- ...test_kylin_cube_without_slr_left_join_desc.json | 2 +- metadata/pom.xml | 8 ++ .../apache/kylin/measure/MeasureTypeFactory.java | 2 + .../kylin/measure/bitmap/BitmapAggregator.java | 57 ++++++++ .../apache/kylin/measure/bitmap/BitmapCounter.java | 159 +++++++++++++++++++++ .../measure/bitmap/BitmapDistinctCountAggFunc.java | 52 +++++++ .../kylin/measure/bitmap/BitmapMeasureType.java | 105 ++++++++++++++ .../kylin/measure/bitmap/BitmapSerializer.java | 79 ++++++++++ .../apache/kylin/query/test/KylinQueryTest.java | 2 +- webapp/app/js/model/cubeConfig.js | 3 +- 12 files changed, 492 insertions(+), 4 deletions(-) create mode 100644 metadata/src/main/java/org/apache/kylin/measure/bitmap/BitmapAggregator.java create mode 100644 metadata/src/main/java/org/apache/kylin/measure/bitmap/BitmapCounter.java create mode 100644 metadata/src/main/java/org/apache/kylin/measure/bitmap/BitmapDistinctCountAggFunc.java create mode 100644 metadata/src/main/java/org/apache/kylin/measure/bitmap/BitmapMeasureType.java create mode 100644 metadata/src/main/java/org/apache/kylin/measure/bitmap/BitmapSerializer.java diff --git a/cube/src/main/java/org/apache/kylin/cube/model/validation/rule/FunctionRule.java b/cube/src/main/java/org/apache/kylin/cube/model/validation/rule/FunctionRule.java index d7d9f13..11f82d4 100644 --- a/cube/src/main/java/org/apache/kylin/cube/model/validation/rule/FunctionRule.java +++ b/cube/src/main/java/org/apache/kylin/cube/model/validation/rule/FunctionRule.java @@ -29,6 +29,7 @@ import org.apache.kylin.cube.model.CubeDesc; import org.apache.kylin.cube.model.validation.IValidatorRule; import org.apache.kylin.cube.model.validation.ResultLevel; import org.apache.kylin.cube.model.validation.ValidateContext; +import org.apache.kylin.measure.bitmap.BitmapMeasureType; import org.apache.kylin.metadata.MetadataManager; import org.apache.kylin.metadata.model.ColumnDesc; import org.apache.kylin.metadata.model.FunctionDesc; @@ -98,6 +99,10 @@ public class FunctionRule implements IValidatorRule { context.addResult(ResultLevel.ERROR, ex.getMessage()); } + if (BitmapMeasureType.isBitmapCountDistinct(func)) { + validateBitmapColumnType(context, cube, func); + } + if (func.isCount()) countFuncs.add(func); } @@ -156,4 +161,24 @@ public class FunctionRule implements IValidatorRule { } } + + /** + * Check the bitmap distinct count column type is Int or not + * @param context + * @param cube + * @param funcDesc + */ + private void validateBitmapColumnType(ValidateContext context, CubeDesc cube, FunctionDesc funcDesc) { + TableDesc table = MetadataManager.getInstance(cube.getConfig()).getTableDesc(cube.getFactTable()); + String columnName = funcDesc.getParameter().getValue(); + for (ColumnDesc columnDesc : table.getColumns()) { + if (columnDesc.getName().equalsIgnoreCase(columnName)) { + if (!columnDesc.getType().isIntegerFamily()) { + context.addResult(ResultLevel.ERROR, "Bitmap only support Int but " + + columnDesc.getDatatype() + " of column " + columnName); + return; + } + } + } + } } diff --git a/examples/test_case_data/localmeta/cube_desc/test_kylin_cube_without_slr_desc.json b/examples/test_case_data/localmeta/cube_desc/test_kylin_cube_without_slr_desc.json index bf29268..536a8ba 100644 --- a/examples/test_case_data/localmeta/cube_desc/test_kylin_cube_without_slr_desc.json +++ b/examples/test_case_data/localmeta/cube_desc/test_kylin_cube_without_slr_desc.json @@ -145,7 +145,7 @@ "type": "column", "value": "SELLER_ID" }, - "returntype": "hllc(10)" + "returntype": "bitmap" }, "dependent_measure_ref": null }, diff --git a/examples/test_case_data/localmeta/cube_desc/test_kylin_cube_without_slr_left_join_desc.json b/examples/test_case_data/localmeta/cube_desc/test_kylin_cube_without_slr_left_join_desc.json index 2f314f0..6c244bc 100644 --- a/examples/test_case_data/localmeta/cube_desc/test_kylin_cube_without_slr_left_join_desc.json +++ b/examples/test_case_data/localmeta/cube_desc/test_kylin_cube_without_slr_left_join_desc.json @@ -145,7 +145,7 @@ "type": "column", "value": "SELLER_ID" }, - "returntype": "hllc(10)" + "returntype": "bitmap" }, "dependent_measure_ref": null }, diff --git a/metadata/pom.xml b/metadata/pom.xml index c7c849f..1d4a1a0 100644 --- a/metadata/pom.xml +++ b/metadata/pom.xml @@ -68,6 +68,14 @@ com.google.guava guava + + org.apache.calcite + calcite-core + + + org.roaringbitmap + RoaringBitmap + diff --git a/metadata/src/main/java/org/apache/kylin/measure/MeasureTypeFactory.java b/metadata/src/main/java/org/apache/kylin/measure/MeasureTypeFactory.java index 158e4d5..3561c18 100644 --- a/metadata/src/main/java/org/apache/kylin/measure/MeasureTypeFactory.java +++ b/metadata/src/main/java/org/apache/kylin/measure/MeasureTypeFactory.java @@ -22,6 +22,7 @@ import java.util.List; import java.util.Map; import org.apache.kylin.measure.basic.BasicMeasureType; +import org.apache.kylin.measure.bitmap.BitmapMeasureType; import org.apache.kylin.measure.hllc.HLLCMeasureType; import org.apache.kylin.metadata.datatype.DataType; import org.apache.kylin.metadata.datatype.DataTypeSerializer; @@ -57,6 +58,7 @@ abstract public class MeasureTypeFactory { // two built-in advanced measure types factoryInsts.add(new HLLCMeasureType.Factory()); + factoryInsts.add(new BitmapMeasureType.Factory()); /* * Maybe do classpath search for more custom measure types? diff --git a/metadata/src/main/java/org/apache/kylin/measure/bitmap/BitmapAggregator.java b/metadata/src/main/java/org/apache/kylin/measure/bitmap/BitmapAggregator.java new file mode 100644 index 0000000..13da5db --- /dev/null +++ b/metadata/src/main/java/org/apache/kylin/measure/bitmap/BitmapAggregator.java @@ -0,0 +1,57 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kylin.measure.bitmap; + +import org.apache.kylin.measure.MeasureAggregator; + +/** + * Created by sunyerui on 15/12/2. + */ +public class BitmapAggregator extends MeasureAggregator { + + private BitmapCounter sum = null; + + @Override + public void reset() { + sum = null; + } + + @Override + public void aggregate(BitmapCounter value) { + if (sum == null) { + sum = new BitmapCounter(value); + } else { + sum.merge(value); + } + } + + @Override + public BitmapCounter getState() { + return sum; + } + + @Override + public int getMemBytesEstimate() { + if (sum == null) { + return Integer.MIN_VALUE; + } else { + return 64 + sum.getMemBytes(); + } + } +} diff --git a/metadata/src/main/java/org/apache/kylin/measure/bitmap/BitmapCounter.java b/metadata/src/main/java/org/apache/kylin/measure/bitmap/BitmapCounter.java new file mode 100644 index 0000000..a19fc3f --- /dev/null +++ b/metadata/src/main/java/org/apache/kylin/measure/bitmap/BitmapCounter.java @@ -0,0 +1,159 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kylin.measure.bitmap; + +import org.apache.hadoop.io.DataInputByteBuffer; +import org.roaringbitmap.buffer.MutableRoaringBitmap; + +import java.io.ByteArrayOutputStream; +import java.io.DataOutputStream; +import java.io.IOException; +import java.nio.ByteBuffer; + +/** + * Created by sunyerui on 15/12/1. + */ +public class BitmapCounter implements Comparable { + + private MutableRoaringBitmap bitmap = new MutableRoaringBitmap(); + + public BitmapCounter() {} + + public BitmapCounter(BitmapCounter another) { + merge(another); + } + + public void clear() { + bitmap.clear(); + } + + public void add(int value) { + bitmap.add(value); + } + + public void add(byte[] value) { + if (value == null || value.length == 0) { + return; + } + try { + int l = Integer.parseInt(new String(value)); + add(l); + } catch (NumberFormatException e) { + throw e; + } + } + + public void add(byte[] value, int offset, int length) { + if (value == null || length == 0) { + return; + } + try { + int l = Integer.parseInt(new String(value, offset, length)); + add(l); + } catch (NumberFormatException e) { + throw e; + } + } + + public void add(String value) { + if (value == null || value.isEmpty()) { + return; + } + try { + int l = Integer.parseInt(value); + add(l); + } catch (NumberFormatException e) { + throw e; + } + } + + public void add(long value) { + // TODO we need support long later + add((int) value); + } + + public void merge(BitmapCounter another) { + this.bitmap.or(another.bitmap); + } + + public long getCount() { + return this.bitmap.getCardinality(); + } + + public int getMemBytes() { + return this.bitmap.getSizeInBytes(); + } + + public void writeRegisters(ByteBuffer out) throws IOException { + ByteArrayOutputStream bos = new ByteArrayOutputStream(); + DataOutputStream dos = new DataOutputStream(bos); + bitmap.runOptimize(); + bitmap.serialize(dos); + dos.close(); + ByteBuffer bb = ByteBuffer.wrap(bos.toByteArray()); + out.put(bb); + } + + public void readRegisters(ByteBuffer in) throws IOException { + DataInputByteBuffer input = new DataInputByteBuffer(); + input.reset(new ByteBuffer[]{in}); + bitmap.deserialize(input); + } + + @Override + public int hashCode() { + final int prime = 31; + int result = 1; + result = prime * result + bitmap.hashCode(); + return result; + } + + @Override + public boolean equals(Object obj) { + if (this == obj) + return true; + if (obj == null) + return false; + if (getClass() != obj.getClass()) + return false; + BitmapCounter other = (BitmapCounter) obj; + return bitmap.equals(other.bitmap); + } + + @Override + public int compareTo(BitmapCounter o) { + if (o == null) + return 1; + + long e1 = this.getCount(); + long e2 = o.getCount(); + + if (e1 == e2) + return 0; + else if (e1 > e2) + return 1; + else + return -1; + } + + public int peekLength(ByteBuffer in) { + // TODO + return 0; + } +} diff --git a/metadata/src/main/java/org/apache/kylin/measure/bitmap/BitmapDistinctCountAggFunc.java b/metadata/src/main/java/org/apache/kylin/measure/bitmap/BitmapDistinctCountAggFunc.java new file mode 100644 index 0000000..be6d28a --- /dev/null +++ b/metadata/src/main/java/org/apache/kylin/measure/bitmap/BitmapDistinctCountAggFunc.java @@ -0,0 +1,52 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. +*/ + +package org.apache.kylin.measure.bitmap; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Created by sunyerui on 15/12/22. + */ +public class BitmapDistinctCountAggFunc { + + private static final Logger logger = LoggerFactory.getLogger(BitmapDistinctCountAggFunc.class); + + public static BitmapCounter init() { + return null; + } + + public static BitmapCounter add(BitmapCounter counter, Object v) { + BitmapCounter c = (BitmapCounter) v; + if (counter == null) { + return new BitmapCounter(c); + } else { + counter.merge(c); + return counter; + } + } + + public static BitmapCounter merge(BitmapCounter counter0, Object counter1) { + return add(counter0, counter1); + } + + public static long result(BitmapCounter counter) { + return counter == null ? 0L : counter.getCount(); + } +} diff --git a/metadata/src/main/java/org/apache/kylin/measure/bitmap/BitmapMeasureType.java b/metadata/src/main/java/org/apache/kylin/measure/bitmap/BitmapMeasureType.java new file mode 100644 index 0000000..f8b705e --- /dev/null +++ b/metadata/src/main/java/org/apache/kylin/measure/bitmap/BitmapMeasureType.java @@ -0,0 +1,105 @@ +package org.apache.kylin.measure.bitmap; + +import org.apache.kylin.common.util.Dictionary; +import org.apache.kylin.measure.MeasureAggregator; +import org.apache.kylin.measure.MeasureIngester; +import org.apache.kylin.measure.MeasureType; +import org.apache.kylin.measure.MeasureTypeFactory; +import org.apache.kylin.metadata.datatype.DataType; +import org.apache.kylin.metadata.datatype.DataTypeSerializer; +import org.apache.kylin.metadata.model.FunctionDesc; +import org.apache.kylin.metadata.model.MeasureDesc; +import org.apache.kylin.metadata.model.TblColRef; + +import java.util.Map; + +/** + * Created by sunyerui on 15/12/10. + */ +public class BitmapMeasureType extends MeasureType { + public static final String FUNC_COUNT_DISTINCT = "COUNT_DISTINCT"; + public static final String DATATYPE_BITMAP = "bitmap"; + + public static class Factory extends MeasureTypeFactory { + + @Override + public MeasureType createMeasureType(String funcName, DataType dataType) { + return new BitmapMeasureType(funcName, dataType); + } + + @Override + public String getAggrFunctionName() { + return FUNC_COUNT_DISTINCT; + } + + @Override + public String getAggrDataTypeName() { + return DATATYPE_BITMAP; + } + + @Override + public Class> getAggrDataTypeSerializer() { + return BitmapSerializer.class; + } + } + + public DataType dataType; + + public BitmapMeasureType(String funcName, DataType dataType) { + this.dataType = dataType; + } + + @Override + public void validate(FunctionDesc functionDesc) throws IllegalArgumentException { + validate(functionDesc.getExpression(), functionDesc.getReturnDataType(), true); + } + + private void validate(String funcName, DataType dataType, boolean checkDataType) { + if (FUNC_COUNT_DISTINCT.equals(funcName) == false) + throw new IllegalArgumentException(); + + if (DATATYPE_BITMAP.equals(dataType.getName()) == false) + throw new IllegalArgumentException(); + } + + @Override + public boolean isMemoryHungry() { + return true; + } + + @Override + public MeasureIngester newIngester() { + return new MeasureIngester() { + BitmapCounter current = new BitmapCounter(); + + @Override + public BitmapCounter valueOf(String[] values, MeasureDesc measureDesc, Map> dictionaryMap) { + BitmapCounter bitmap = current; + bitmap.clear(); + for (String v : values) + bitmap.add(v); + return bitmap; + } + }; + } + + @Override + public MeasureAggregator newAggregator() { + return new BitmapAggregator(); + } + + @Override + public boolean needRewrite() { + return true; + } + + @Override + public Class getRewriteCalciteAggrFunctionClass() { + return BitmapDistinctCountAggFunc.class; + } + + public static boolean isBitmapCountDistinct(FunctionDesc func) { + return FUNC_COUNT_DISTINCT.equals(func.getExpression()) + && DATATYPE_BITMAP.equals(func.getReturnType()); + } +} diff --git a/metadata/src/main/java/org/apache/kylin/measure/bitmap/BitmapSerializer.java b/metadata/src/main/java/org/apache/kylin/measure/bitmap/BitmapSerializer.java new file mode 100644 index 0000000..fbc46b5 --- /dev/null +++ b/metadata/src/main/java/org/apache/kylin/measure/bitmap/BitmapSerializer.java @@ -0,0 +1,79 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kylin.measure.bitmap; + +import org.apache.kylin.metadata.datatype.DataType; +import org.apache.kylin.metadata.datatype.DataTypeSerializer; + +import java.io.IOException; +import java.nio.ByteBuffer; + +/** + * Created by sunyerui on 15/12/1. + */ +public class BitmapSerializer extends DataTypeSerializer { + + private ThreadLocal current = new ThreadLocal<>(); + + public BitmapSerializer(DataType type) {} + + @Override + public void serialize(BitmapCounter value, ByteBuffer out) { + try { + value.writeRegisters(out); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + private BitmapCounter current() { + BitmapCounter counter = current.get(); + if (counter == null) { + counter = new BitmapCounter(); + current.set(counter); + } + return counter; + } + + @Override + public BitmapCounter deserialize(ByteBuffer in) { + BitmapCounter counter = current(); + try { + counter.readRegisters(in); + } catch (IOException e) { + throw new RuntimeException(e); + } + return counter; + } + + @Override + public int peekLength(ByteBuffer in) { + return current().peekLength(in); + } + + @Override + public int maxLength() { + return current().getMemBytes(); + } + + @Override + public int getStorageBytesEstimate() { + return current().getMemBytes(); + } +} diff --git a/query/src/test/java/org/apache/kylin/query/test/KylinQueryTest.java b/query/src/test/java/org/apache/kylin/query/test/KylinQueryTest.java index f9e575b..3638f9b 100644 --- a/query/src/test/java/org/apache/kylin/query/test/KylinQueryTest.java +++ b/query/src/test/java/org/apache/kylin/query/test/KylinQueryTest.java @@ -196,7 +196,7 @@ public class KylinQueryTest extends KylinTestBase { @Test public void testDistinctCountQuery() throws Exception { - batchExecuteQuery("src/test/resources/query/sql_distinct"); + execAndCompQuery("src/test/resources/query/sql_distinct", null, true); } @Test diff --git a/webapp/app/js/model/cubeConfig.js b/webapp/app/js/model/cubeConfig.js index 8662680..e1f25b8 100644 --- a/webapp/app/js/model/cubeConfig.js +++ b/webapp/app/js/model/cubeConfig.js @@ -47,7 +47,8 @@ KylinApp.constant('cubeConfig', { {name: 'Error Rate < 4.88%', value: 'hllc12'}, {name: 'Error Rate < 2.44%', value: 'hllc14'}, {name: 'Error Rate < 1.72%', value: 'hllc15'}, - {name: 'Error Rate < 1.22%', value: 'hllc16'} + {name: 'Error Rate < 1.22%', value: 'hllc16'}, + {name: 'Precisely (Only for Integer Family column)', value: 'bitmap'} ], dftSelections: { measureExpression: 'SUM', -- 2.3.2 (Apple Git-55)