From 7586ebc50101b2b8c43d6b4edc1b4e346b5e5664 Mon Sep 17 00:00:00 2001 From: sunyerui Date: Wed, 20 Jan 2016 18:28:59 +0800 Subject: [PATCH] KYLIN-1186 Support precise Count Distinct using bitmap --- .../kylin/metadata/measure/MeasureCodecTest.java | 10 +- core-metadata/pom.xml | 10 ++ .../apache/kylin/measure/MeasureTypeFactory.java | 2 + .../kylin/measure/bitmap/BitmapAggregator.java | 57 +++++++ .../apache/kylin/measure/bitmap/BitmapCounter.java | 173 +++++++++++++++++++++ .../measure/bitmap/BitmapDistinctCountAggFunc.java | 52 +++++++ .../kylin/measure/bitmap/BitmapMeasureType.java | 109 +++++++++++++ .../kylin/measure/bitmap/BitmapSerializer.java | 81 ++++++++++ .../kylin/measure/bitmap/BitmapAggregatorTest.java | 58 +++++++ .../kylin/measure/bitmap/BitmapCounterTest.java | 73 +++++++++ .../kylin/measure/bitmap/BitmapSerializerTest.java | 57 +++++++ .../test_kylin_cube_without_slr_desc.json | 13 +- ...test_kylin_cube_without_slr_left_join_desc.json | 13 +- pom.xml | 17 +- .../org/apache/kylin/query/test/ITIIQueryTest.java | 6 + .../apache/kylin/query/test/ITKylinQueryTest.java | 5 + .../query/sql_distinct_precisely/query00.sql | 24 +++ .../query/sql_distinct_precisely/query01.sql | 25 +++ .../query/sql_distinct_precisely/query02.sql | 26 ++++ .../query/sql_distinct_precisely/query03.sql | 33 ++++ .../query/sql_distinct_precisely/query04.sql | 34 ++++ .../query/sql_distinct_precisely/query05.sql | 25 +++ .../query/sql_distinct_precisely/query06.sql | 26 ++++ .../query/sql_distinct_precisely/query07.sql | 24 +++ storage-hbase/pom.xml | 1 + webapp/app/js/model/cubeConfig.js | 3 +- 26 files changed, 947 insertions(+), 10 deletions(-) create mode 100644 core-metadata/src/main/java/org/apache/kylin/measure/bitmap/BitmapAggregator.java create mode 100644 core-metadata/src/main/java/org/apache/kylin/measure/bitmap/BitmapCounter.java create mode 100644 core-metadata/src/main/java/org/apache/kylin/measure/bitmap/BitmapDistinctCountAggFunc.java create mode 100644 core-metadata/src/main/java/org/apache/kylin/measure/bitmap/BitmapMeasureType.java create mode 100644 core-metadata/src/main/java/org/apache/kylin/measure/bitmap/BitmapSerializer.java create mode 100644 core-metadata/src/test/java/org/apache/kylin/measure/bitmap/BitmapAggregatorTest.java create mode 100644 core-metadata/src/test/java/org/apache/kylin/measure/bitmap/BitmapCounterTest.java create mode 100644 core-metadata/src/test/java/org/apache/kylin/measure/bitmap/BitmapSerializerTest.java create mode 100644 query/src/test/resources/query/sql_distinct_precisely/query00.sql create mode 100644 query/src/test/resources/query/sql_distinct_precisely/query01.sql create mode 100644 query/src/test/resources/query/sql_distinct_precisely/query02.sql create mode 100644 query/src/test/resources/query/sql_distinct_precisely/query03.sql create mode 100644 query/src/test/resources/query/sql_distinct_precisely/query04.sql create mode 100644 query/src/test/resources/query/sql_distinct_precisely/query05.sql create mode 100644 query/src/test/resources/query/sql_distinct_precisely/query06.sql create mode 100644 query/src/test/resources/query/sql_distinct_precisely/query07.sql diff --git a/core-cube/src/test/java/org/apache/kylin/metadata/measure/MeasureCodecTest.java b/core-cube/src/test/java/org/apache/kylin/metadata/measure/MeasureCodecTest.java index 02dca72..c64280e 100644 --- a/core-cube/src/test/java/org/apache/kylin/metadata/measure/MeasureCodecTest.java +++ b/core-cube/src/test/java/org/apache/kylin/metadata/measure/MeasureCodecTest.java @@ -26,6 +26,7 @@ import java.nio.ByteBuffer; import org.apache.kylin.common.hll.HyperLogLogPlusCounter; import org.apache.kylin.cube.kv.RowConstants; import org.apache.kylin.measure.MeasureCodec; +import org.apache.kylin.measure.bitmap.BitmapCounter; import org.apache.kylin.metadata.datatype.DoubleMutable; import org.apache.kylin.metadata.datatype.LongMutable; import org.apache.kylin.metadata.model.FunctionDesc; @@ -39,7 +40,8 @@ public class MeasureCodecTest { @Test public void basicTest() { - MeasureDesc descs[] = new MeasureDesc[] { measure("double"), measure("long"), measure("decimal"), measure("HLLC16") }; + MeasureDesc descs[] = new MeasureDesc[] { measure("double"), measure("long"), measure + ("decimal"), measure("HLLC16"), measure("bitmap") }; MeasureCodec codec = new MeasureCodec(descs); DoubleMutable d = new DoubleMutable(1.0); @@ -48,7 +50,11 @@ public class MeasureCodecTest { HyperLogLogPlusCounter hllc = new HyperLogLogPlusCounter(16); hllc.add("1234567"); hllc.add("abcdefg"); - Object values[] = new Object[] { d, l, b, hllc }; + BitmapCounter bitmap = new BitmapCounter(); + bitmap.add(123); + bitmap.add(45678); + bitmap.add(Long.MAX_VALUE-10); + Object values[] = new Object[] { d, l, b, hllc, bitmap }; ByteBuffer buf = ByteBuffer.allocate(RowConstants.ROWVALUE_BUFFER_SIZE); diff --git a/core-metadata/pom.xml b/core-metadata/pom.xml index 3f1576a..645a739 100644 --- a/core-metadata/pom.xml +++ b/core-metadata/pom.xml @@ -47,6 +47,16 @@ ${ehcache.version} + + org.roaringbitmap + RoaringBitmap + + + + org.apache.hadoop + hadoop-common + + org.apache.kylin diff --git a/core-metadata/src/main/java/org/apache/kylin/measure/MeasureTypeFactory.java b/core-metadata/src/main/java/org/apache/kylin/measure/MeasureTypeFactory.java index 5c2e6ed..5e045e6 100644 --- a/core-metadata/src/main/java/org/apache/kylin/measure/MeasureTypeFactory.java +++ b/core-metadata/src/main/java/org/apache/kylin/measure/MeasureTypeFactory.java @@ -22,6 +22,7 @@ import java.util.List; import java.util.Map; import org.apache.kylin.measure.basic.BasicMeasureType; +import org.apache.kylin.measure.bitmap.BitmapMeasureType; import org.apache.kylin.measure.hllc.HLLCMeasureType; import org.apache.kylin.measure.topn.TopNMeasureType; import org.apache.kylin.metadata.datatype.DataType; @@ -91,6 +92,7 @@ abstract public class MeasureTypeFactory { // two built-in advanced measure types factoryInsts.add(new HLLCMeasureType.Factory()); + factoryInsts.add(new BitmapMeasureType.Factory()); factoryInsts.add(new TopNMeasureType.Factory()); /* diff --git a/core-metadata/src/main/java/org/apache/kylin/measure/bitmap/BitmapAggregator.java b/core-metadata/src/main/java/org/apache/kylin/measure/bitmap/BitmapAggregator.java new file mode 100644 index 0000000..be72090 --- /dev/null +++ b/core-metadata/src/main/java/org/apache/kylin/measure/bitmap/BitmapAggregator.java @@ -0,0 +1,57 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kylin.measure.bitmap; + +import org.apache.kylin.measure.MeasureAggregator; + +/** + * Created by sunyerui on 15/12/2. + */ +public class BitmapAggregator extends MeasureAggregator { + + private BitmapCounter sum = null; + + @Override + public void reset() { + sum = null; + } + + @Override + public void aggregate(BitmapCounter value) { + if (sum == null) { + sum = new BitmapCounter(value); + } else { + sum.merge(value); + } + } + + @Override + public BitmapCounter getState() { + return sum; + } + + @Override + public int getMemBytesEstimate() { + if (sum == null) { + return Integer.MIN_VALUE; + } else { + return sum.getMemBytes(); + } + } +} diff --git a/core-metadata/src/main/java/org/apache/kylin/measure/bitmap/BitmapCounter.java b/core-metadata/src/main/java/org/apache/kylin/measure/bitmap/BitmapCounter.java new file mode 100644 index 0000000..910b931 --- /dev/null +++ b/core-metadata/src/main/java/org/apache/kylin/measure/bitmap/BitmapCounter.java @@ -0,0 +1,173 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kylin.measure.bitmap; + +import org.apache.hadoop.io.DataInputByteBuffer; +import org.roaringbitmap.RoaringBitmap; +import org.roaringbitmap.buffer.MutableRoaringBitmap; + +import java.io.ByteArrayOutputStream; +import java.io.DataOutputStream; +import java.io.IOException; +import java.nio.ByteBuffer; + +/** + * Created by sunyerui on 15/12/1. + */ +public class BitmapCounter implements Comparable { + + private MutableRoaringBitmap bitmap = new MutableRoaringBitmap(); + + public BitmapCounter() { + } + + public BitmapCounter(BitmapCounter another) { + merge(another); + } + + public void clear() { + bitmap.clear(); + } + + public void add(int value) { + bitmap.add(value); + } + + public void add(byte[] value) { + if (value == null || value.length == 0) { + return; + } + try { + int l = Integer.parseInt(new String(value)); + add(l); + } catch (NumberFormatException e) { + throw e; + } + } + + public void add(byte[] value, int offset, int length) { + if (value == null || length == 0) { + return; + } + try { + int l = Integer.parseInt(new String(value, offset, length)); + add(l); + } catch (NumberFormatException e) { + throw e; + } + } + + public void add(String value) { + if (value == null || value.isEmpty()) { + return; + } + try { + int l = Integer.parseInt(value); + add(l); + } catch (NumberFormatException e) { + throw e; + } + } + + public void add(long value) { + // TODO we need support long later + add((int) value); + } + + public void merge(BitmapCounter another) { + this.bitmap.or(another.bitmap); + } + + public long getCount() { + return this.bitmap.getCardinality(); + } + + public int getMemBytes() { + return this.bitmap.getSizeInBytes(); + } + + public void writeRegisters(ByteBuffer out) throws IOException { + ByteArrayOutputStream bos = new ByteArrayOutputStream(); + DataOutputStream dos = new DataOutputStream(bos); + bitmap.runOptimize(); + bitmap.serialize(dos); + dos.close(); + ByteBuffer bb = ByteBuffer.wrap(bos.toByteArray()); + out.put(bb); + } + + public void readRegisters(ByteBuffer in) throws IOException { + DataInputByteBuffer input = new DataInputByteBuffer(); + input.reset(new ByteBuffer[]{in}); + bitmap.deserialize(input); + } + + @Override + public int hashCode() { + final int prime = 31; + int result = 1; + result = prime * result + bitmap.hashCode(); + return result; + } + + @Override + public boolean equals(Object obj) { + if (this == obj) + return true; + if (obj == null) + return false; + if (getClass() != obj.getClass()) + return false; + BitmapCounter other = (BitmapCounter) obj; + return bitmap.equals(other.bitmap); + } + + @Override + public int compareTo(BitmapCounter o) { + if (o == null) + return 1; + + long e1 = this.getCount(); + long e2 = o.getCount(); + + if (e1 == e2) + return 0; + else if (e1 > e2) + return 1; + else + return -1; + } + + public int peekLength(ByteBuffer in) { + int mark = in.position(); + int len; + + DataInputByteBuffer input = new DataInputByteBuffer(); + input.reset(new ByteBuffer[]{in}); + RoaringBitmap bitmap = new RoaringBitmap(); + try { + bitmap.deserialize(input); + } catch (IOException e) { + } + + len = in.position() - mark; + in.position(mark); + return len; + } +} diff --git a/core-metadata/src/main/java/org/apache/kylin/measure/bitmap/BitmapDistinctCountAggFunc.java b/core-metadata/src/main/java/org/apache/kylin/measure/bitmap/BitmapDistinctCountAggFunc.java new file mode 100644 index 0000000..be6d28a --- /dev/null +++ b/core-metadata/src/main/java/org/apache/kylin/measure/bitmap/BitmapDistinctCountAggFunc.java @@ -0,0 +1,52 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. +*/ + +package org.apache.kylin.measure.bitmap; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Created by sunyerui on 15/12/22. + */ +public class BitmapDistinctCountAggFunc { + + private static final Logger logger = LoggerFactory.getLogger(BitmapDistinctCountAggFunc.class); + + public static BitmapCounter init() { + return null; + } + + public static BitmapCounter add(BitmapCounter counter, Object v) { + BitmapCounter c = (BitmapCounter) v; + if (counter == null) { + return new BitmapCounter(c); + } else { + counter.merge(c); + return counter; + } + } + + public static BitmapCounter merge(BitmapCounter counter0, Object counter1) { + return add(counter0, counter1); + } + + public static long result(BitmapCounter counter) { + return counter == null ? 0L : counter.getCount(); + } +} diff --git a/core-metadata/src/main/java/org/apache/kylin/measure/bitmap/BitmapMeasureType.java b/core-metadata/src/main/java/org/apache/kylin/measure/bitmap/BitmapMeasureType.java new file mode 100644 index 0000000..df8e765 --- /dev/null +++ b/core-metadata/src/main/java/org/apache/kylin/measure/bitmap/BitmapMeasureType.java @@ -0,0 +1,109 @@ +package org.apache.kylin.measure.bitmap; + +import org.apache.kylin.common.util.Dictionary; +import org.apache.kylin.measure.MeasureAggregator; +import org.apache.kylin.measure.MeasureIngester; +import org.apache.kylin.measure.MeasureType; +import org.apache.kylin.measure.MeasureTypeFactory; +import org.apache.kylin.metadata.datatype.DataType; +import org.apache.kylin.metadata.datatype.DataTypeSerializer; +import org.apache.kylin.metadata.model.FunctionDesc; +import org.apache.kylin.metadata.model.MeasureDesc; +import org.apache.kylin.metadata.model.TblColRef; + +import java.util.List; +import java.util.Map; + +/** + * Created by sunyerui on 15/12/10. + */ +public class BitmapMeasureType extends MeasureType { + public static final String FUNC_COUNT_DISTINCT = "COUNT_DISTINCT"; + public static final String DATATYPE_BITMAP = "bitmap"; + + public static class Factory extends MeasureTypeFactory { + + @Override + public MeasureType createMeasureType(String funcName, DataType dataType) { + return new BitmapMeasureType(funcName, dataType); + } + + @Override + public String getAggrFunctionName() { + return FUNC_COUNT_DISTINCT; + } + + @Override + public String getAggrDataTypeName() { + return DATATYPE_BITMAP; + } + + @Override + public Class> getAggrDataTypeSerializer() { + return BitmapSerializer.class; + } + } + + public DataType dataType; + + public BitmapMeasureType(String funcName, DataType dataType) { + this.dataType = dataType; + } + + @Override + public void validate(FunctionDesc functionDesc) throws IllegalArgumentException { + if (FUNC_COUNT_DISTINCT.equals(functionDesc.getExpression()) == false) + throw new IllegalArgumentException("BitmapMeasureType func is not " + FUNC_COUNT_DISTINCT + " but " + functionDesc.getExpression()); + + if (DATATYPE_BITMAP.equals(functionDesc.getReturnDataType().getName()) == false) + throw new IllegalArgumentException("BitmapMeasureType datatype is not " + DATATYPE_BITMAP + " but " + functionDesc.getReturnDataType().getName()); + + List colRefs = functionDesc.getParameter().getColRefs(); + if (colRefs.size() != 1) { + throw new IllegalArgumentException("BitmapMeasureType col parameters count is not 1 but " + colRefs.size()); + } + + TblColRef colRef = colRefs.get(0); + DataType type = colRef.getType(); + if (!type.isIntegerFamily()) { + throw new IllegalArgumentException("BitmapMeasureType col type is not IntegerFamily but " + type.getName() + " of column " + colRef.getCanonicalName()); + } + } + + @Override + public boolean isMemoryHungry() { + return true; + } + + @Override + public MeasureIngester newIngester() { + return new MeasureIngester() { + BitmapCounter current = new BitmapCounter(); + + @Override + public BitmapCounter valueOf(String[] values, MeasureDesc measureDesc, Map> dictionaryMap) { + BitmapCounter bitmap = current; + bitmap.clear(); + for (String v : values) + bitmap.add(v); + return bitmap; + } + }; + } + + @Override + public MeasureAggregator newAggregator() { + return new BitmapAggregator(); + } + + @Override + public boolean needRewrite() { + return true; + } + + @Override + public Class getRewriteCalciteAggrFunctionClass() { + return BitmapDistinctCountAggFunc.class; + } + +} diff --git a/core-metadata/src/main/java/org/apache/kylin/measure/bitmap/BitmapSerializer.java b/core-metadata/src/main/java/org/apache/kylin/measure/bitmap/BitmapSerializer.java new file mode 100644 index 0000000..812eb4d --- /dev/null +++ b/core-metadata/src/main/java/org/apache/kylin/measure/bitmap/BitmapSerializer.java @@ -0,0 +1,81 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kylin.measure.bitmap; + +import org.apache.kylin.metadata.datatype.DataType; +import org.apache.kylin.metadata.datatype.DataTypeSerializer; + +import java.io.IOException; +import java.nio.ByteBuffer; + +/** + * Created by sunyerui on 15/12/1. + */ +public class BitmapSerializer extends DataTypeSerializer { + + private ThreadLocal current = new ThreadLocal<>(); + + public BitmapSerializer(DataType type) { + } + + @Override + public void serialize(BitmapCounter value, ByteBuffer out) { + try { + value.writeRegisters(out); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + private BitmapCounter current() { + BitmapCounter counter = current.get(); + if (counter == null) { + counter = new BitmapCounter(); + current.set(counter); + } + return counter; + } + + @Override + public BitmapCounter deserialize(ByteBuffer in) { + BitmapCounter counter = current(); + try { + counter.readRegisters(in); + } catch (IOException e) { + throw new RuntimeException(e); + } + return counter; + } + + @Override + public int peekLength(ByteBuffer in) { + return current().peekLength(in); + } + + @Override + public int maxLength() { + // the bitmap is non-fixed length, and we just assume 32MB here, maybe change it later + return 8 * 1024 * 1024; + } + + @Override + public int getStorageBytesEstimate() { + return current().getMemBytes(); + } +} diff --git a/core-metadata/src/test/java/org/apache/kylin/measure/bitmap/BitmapAggregatorTest.java b/core-metadata/src/test/java/org/apache/kylin/measure/bitmap/BitmapAggregatorTest.java new file mode 100644 index 0000000..31772f9 --- /dev/null +++ b/core-metadata/src/test/java/org/apache/kylin/measure/bitmap/BitmapAggregatorTest.java @@ -0,0 +1,58 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kylin.measure.bitmap; + +import org.junit.Test; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNull; + +/** + * Created by sunyerui on 15/12/31. + */ +public class BitmapAggregatorTest { + + @Test + public void testAggregator() { + BitmapCounter counter = new BitmapCounter(); + counter.add(1); + counter.add(3333); + counter.add("123".getBytes()); + counter.add((long)123); + assertEquals(3, counter.getCount()); + + BitmapCounter counter2 = new BitmapCounter(); + counter2.add("23456"); + counter2.add(12273456); + counter2.add("4258"); + counter2.add(123); + assertEquals(4, counter2.getCount()); + + BitmapAggregator aggregator = new BitmapAggregator(); + assertNull(aggregator.getState()); + assertEquals(Integer.MIN_VALUE, aggregator.getMemBytesEstimate()); + + aggregator.aggregate(counter); + aggregator.aggregate(counter2); + assertEquals(6, aggregator.getState().getCount()); + aggregator.reset(); + assertNull(aggregator.getState()); + } + +} \ No newline at end of file diff --git a/core-metadata/src/test/java/org/apache/kylin/measure/bitmap/BitmapCounterTest.java b/core-metadata/src/test/java/org/apache/kylin/measure/bitmap/BitmapCounterTest.java new file mode 100644 index 0000000..5caf5b1 --- /dev/null +++ b/core-metadata/src/test/java/org/apache/kylin/measure/bitmap/BitmapCounterTest.java @@ -0,0 +1,73 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kylin.measure.bitmap; + +import org.junit.Test; + +import java.io.IOException; +import java.nio.ByteBuffer; + +import static org.junit.Assert.assertEquals; + +/** + * Created by sunyerui on 15/12/31. + */ +public class BitmapCounterTest { + + @Test + public void testAddAndMergeValues() { + BitmapCounter counter = new BitmapCounter(); + counter.add(1); + counter.add(3333); + counter.add("123".getBytes()); + counter.add((long)123); + assertEquals(3, counter.getCount()); + + BitmapCounter counter2 = new BitmapCounter(); + counter2.add("23456"); + counter2.add(12273456); + counter2.add("4258"); + counter2.add(123); + assertEquals(4, counter2.getCount()); + + counter.merge(counter2); + assertEquals(6, counter.getCount()); + System.out.print("counter size: " + counter.getMemBytes() + ", counter2 size: " + counter2.getMemBytes()); + } + + @Test + public void testSerDeCounter() throws IOException { + BitmapCounter counter = new BitmapCounter(); + for (int i = 1; i < 1000; i++) { + counter.add(i); + } + ByteBuffer buffer = ByteBuffer.allocate(10 * 1024 * 1024); + counter.writeRegisters(buffer); + int len = buffer.position(); + + buffer.position(0); + assertEquals(len, counter.peekLength(buffer)); + assertEquals(0, buffer.position()); + + BitmapCounter counter2 = new BitmapCounter(); + counter2.readRegisters(buffer); + assertEquals(999, counter2.getCount()); + } + +} \ No newline at end of file diff --git a/core-metadata/src/test/java/org/apache/kylin/measure/bitmap/BitmapSerializerTest.java b/core-metadata/src/test/java/org/apache/kylin/measure/bitmap/BitmapSerializerTest.java new file mode 100644 index 0000000..bcb1406 --- /dev/null +++ b/core-metadata/src/test/java/org/apache/kylin/measure/bitmap/BitmapSerializerTest.java @@ -0,0 +1,57 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kylin.measure.bitmap; + +import org.apache.kylin.metadata.datatype.DataType; +import org.junit.Test; + +import java.nio.ByteBuffer; + +import static org.junit.Assert.assertEquals; + +/** + * Created by sunyerui on 15/12/31. + */ +public class BitmapSerializerTest { + + @Test + public void testSerDeCounter() { + BitmapCounter counter = new BitmapCounter(); + counter.add(1); + counter.add(3333); + counter.add("123".getBytes()); + counter.add((long)123); + assertEquals(3, counter.getCount()); + + ByteBuffer buffer = ByteBuffer.allocate(10 * 1024 * 1024); + BitmapSerializer serializer = new BitmapSerializer(DataType.ANY); + serializer.serialize(counter, buffer); + int len = buffer.position(); + + buffer.position(0); + BitmapSerializer deSerializer = new BitmapSerializer(DataType.ANY); + BitmapCounter counter2 = deSerializer.deserialize(buffer); + assertEquals(3, counter2.getCount()); + + buffer.position(0); + assertEquals(len, deSerializer.peekLength(buffer)); + assertEquals(32 * 1024 * 1024, deSerializer.maxLength()); + System.out.println("counter size " + deSerializer.getStorageBytesEstimate()); + } +} \ No newline at end of file diff --git a/examples/test_case_data/localmeta/cube_desc/test_kylin_cube_without_slr_desc.json b/examples/test_case_data/localmeta/cube_desc/test_kylin_cube_without_slr_desc.json index 99fed60..16603de 100644 --- a/examples/test_case_data/localmeta/cube_desc/test_kylin_cube_without_slr_desc.json +++ b/examples/test_case_data/localmeta/cube_desc/test_kylin_cube_without_slr_desc.json @@ -132,6 +132,17 @@ }, "dependent_measure_ref" : null }, { + "name": "LEAF_CATEG_ID_BITMAP", + "function": { + "expression": "COUNT_DISTINCT", + "parameter": { + "type": "column", + "value": "LEAF_CATEG_ID" + }, + "returntype": "bitmap" + }, + "dependent_measure_ref": null + }, { "name" : "TOP_SELLER", "function" : { "expression" : "TOP_N", @@ -190,7 +201,7 @@ "name" : "f2", "columns" : [ { "qualifier" : "m", - "measure_refs" : [ "seller_cnt_hll", "seller_format_cnt" ] + "measure_refs" : [ "seller_cnt_hll", "seller_format_cnt", "leaf_categ_id_bitmap" ] } ] }, { "name" : "f3", diff --git a/examples/test_case_data/localmeta/cube_desc/test_kylin_cube_without_slr_left_join_desc.json b/examples/test_case_data/localmeta/cube_desc/test_kylin_cube_without_slr_left_join_desc.json index 79e5799..0b82d1e 100644 --- a/examples/test_case_data/localmeta/cube_desc/test_kylin_cube_without_slr_left_join_desc.json +++ b/examples/test_case_data/localmeta/cube_desc/test_kylin_cube_without_slr_left_join_desc.json @@ -132,6 +132,17 @@ }, "dependent_measure_ref" : null }, { + "name": "LEAF_CATEG_ID_BITMAP", + "function": { + "expression": "COUNT_DISTINCT", + "parameter": { + "type": "column", + "value": "LEAF_CATEG_ID" + }, + "returntype": "bitmap" + }, + "dependent_measure_ref": null + }, { "name" : "TOP_SELLER", "function" : { "expression" : "TOP_N", @@ -190,7 +201,7 @@ "name" : "f2", "columns" : [ { "qualifier" : "m", - "measure_refs" : [ "seller_cnt_hll", "seller_format_cnt" ] + "measure_refs" : [ "seller_cnt_hll", "seller_format_cnt", "leaf_categ_id_bitmap" ] } ] }, { "name" : "f3", diff --git a/pom.xml b/pom.xml index cbbac81..6db15e9 100644 --- a/pom.xml +++ b/pom.xml @@ -89,6 +89,7 @@ 3.0.3 2.8.1 4.5 + (0.5.4,] 3.1.2.RELEASE @@ -457,11 +458,17 @@ curator-recipes ${curator.version} - - org.apache.httpcomponents - httpclient - ${apache-httpclient.version} - + + org.apache.httpcomponents + httpclient + ${apache-httpclient.version} + + + + org.roaringbitmap + RoaringBitmap + ${roaring.version} + diff --git a/query/src/test/java/org/apache/kylin/query/test/ITIIQueryTest.java b/query/src/test/java/org/apache/kylin/query/test/ITIIQueryTest.java index 344433a..0d0197d 100644 --- a/query/src/test/java/org/apache/kylin/query/test/ITIIQueryTest.java +++ b/query/src/test/java/org/apache/kylin/query/test/ITIIQueryTest.java @@ -26,6 +26,7 @@ import org.apache.kylin.metadata.realization.RealizationType; import org.apache.kylin.query.routing.Candidate; import org.junit.AfterClass; import org.junit.BeforeClass; +import org.junit.Ignore; import org.junit.Test; import org.junit.runner.RunWith; import org.junit.runners.Parameterized; @@ -83,4 +84,9 @@ public class ITIIQueryTest extends ITKylinQueryTest { execAndCompQuery("src/test/resources/query/sql_ii", null, true); } + @Override + @Test + @Ignore("Skip Precisely Distinct Count Queries for II") + public void testPreciselyDistinctCountQuery() { + } } diff --git a/query/src/test/java/org/apache/kylin/query/test/ITKylinQueryTest.java b/query/src/test/java/org/apache/kylin/query/test/ITKylinQueryTest.java index 73e1263..f758356 100644 --- a/query/src/test/java/org/apache/kylin/query/test/ITKylinQueryTest.java +++ b/query/src/test/java/org/apache/kylin/query/test/ITKylinQueryTest.java @@ -162,6 +162,11 @@ public class ITKylinQueryTest extends KylinTestBase { } @Test + public void testPreciselyDistinctCountQuery() throws Exception { + execAndCompQuery("src/test/resources/query/sql_distinct_precisely", null, true); + } + + @Test public void testStreamingTableQuery() throws Exception { execAndCompQuery("src/test/resources/query/sql_streaming", null, true); } diff --git a/query/src/test/resources/query/sql_distinct_precisely/query00.sql b/query/src/test/resources/query/sql_distinct_precisely/query00.sql new file mode 100644 index 0000000..e1e4a9e --- /dev/null +++ b/query/src/test/resources/query/sql_distinct_precisely/query00.sql @@ -0,0 +1,24 @@ +-- +-- Licensed to the Apache Software Foundation (ASF) under one +-- or more contributor license agreements. See the NOTICE file +-- distributed with this work for additional information +-- regarding copyright ownership. The ASF licenses this file +-- to you under the Apache License, Version 2.0 (the +-- "License"); you may not use this file except in compliance +-- with the License. You may obtain a copy of the License at +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, software +-- distributed under the License is distributed on an "AS IS" BASIS, +-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +-- See the License for the specific language governing permissions and +-- limitations under the License. +-- + +select lstg_format_name, cal_dt, + sum(price) as GMV, + count(1) as TRANS_CNT, + count(distinct leaf_categ_id) as LEAF_CATEG_CNT + from test_kylin_fact + group by lstg_format_name, cal_dt diff --git a/query/src/test/resources/query/sql_distinct_precisely/query01.sql b/query/src/test/resources/query/sql_distinct_precisely/query01.sql new file mode 100644 index 0000000..c1868b8 --- /dev/null +++ b/query/src/test/resources/query/sql_distinct_precisely/query01.sql @@ -0,0 +1,25 @@ +-- +-- Licensed to the Apache Software Foundation (ASF) under one +-- or more contributor license agreements. See the NOTICE file +-- distributed with this work for additional information +-- regarding copyright ownership. The ASF licenses this file +-- to you under the Apache License, Version 2.0 (the +-- "License"); you may not use this file except in compliance +-- with the License. You may obtain a copy of the License at +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, software +-- distributed under the License is distributed on an "AS IS" BASIS, +-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +-- See the License for the specific language governing permissions and +-- limitations under the License. +-- + +select lstg_format_name, + sum(price) as GMV, + count(1) as TRANS_CNT, + count(distinct leaf_categ_id) as LEAF_CATEG_CNT + from test_kylin_fact + where lstg_format_name='FP-GTC' + group by lstg_format_name diff --git a/query/src/test/resources/query/sql_distinct_precisely/query02.sql b/query/src/test/resources/query/sql_distinct_precisely/query02.sql new file mode 100644 index 0000000..5a3527a --- /dev/null +++ b/query/src/test/resources/query/sql_distinct_precisely/query02.sql @@ -0,0 +1,26 @@ +-- +-- Licensed to the Apache Software Foundation (ASF) under one +-- or more contributor license agreements. See the NOTICE file +-- distributed with this work for additional information +-- regarding copyright ownership. The ASF licenses this file +-- to you under the Apache License, Version 2.0 (the +-- "License"); you may not use this file except in compliance +-- with the License. You may obtain a copy of the License at +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, software +-- distributed under the License is distributed on an "AS IS" BASIS, +-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +-- See the License for the specific language governing permissions and +-- limitations under the License. +-- + +select lstg_format_name, + sum(price) as GMV, + count(1) as TRANS_CNT, + count(distinct leaf_categ_id) as LEAF_CATEG_CNT + from test_kylin_fact + where lstg_format_name='FP-GTC' + group by lstg_format_name + having count(distinct seller_id) > 50 diff --git a/query/src/test/resources/query/sql_distinct_precisely/query03.sql b/query/src/test/resources/query/sql_distinct_precisely/query03.sql new file mode 100644 index 0000000..dacdc87 --- /dev/null +++ b/query/src/test/resources/query/sql_distinct_precisely/query03.sql @@ -0,0 +1,33 @@ +-- +-- Licensed to the Apache Software Foundation (ASF) under one +-- or more contributor license agreements. See the NOTICE file +-- distributed with this work for additional information +-- regarding copyright ownership. The ASF licenses this file +-- to you under the Apache License, Version 2.0 (the +-- "License"); you may not use this file except in compliance +-- with the License. You may obtain a copy of the License at +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, software +-- distributed under the License is distributed on an "AS IS" BASIS, +-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +-- See the License for the specific language governing permissions and +-- limitations under the License. +-- + +select test_cal_dt.week_beg_dt,sum(test_kylin_fact.price) as GMV + , count(1) as TRANS_CNT, count(distinct test_kylin_fact.leaf_categ_id) as LEAF_CATEG_CNT + from test_kylin_fact + inner JOIN edw.test_cal_dt as test_cal_dt + ON test_kylin_fact.cal_dt = test_cal_dt.cal_dt + inner JOIN test_category_groupings + on test_kylin_fact.leaf_categ_id = test_category_groupings.leaf_categ_id and + test_kylin_fact.lstg_site_id = test_category_groupings.site_id + inner JOIN edw.test_sites as test_sites + on test_kylin_fact.lstg_site_id = test_sites.site_id + inner JOIN edw.test_seller_type_dim as test_seller_type_dim + on test_kylin_fact.slr_segment_cd = test_seller_type_dim.seller_type_cd + where test_kylin_fact.lstg_format_name='FP-GTC' + and test_cal_dt.week_beg_dt between DATE '2013-05-01' and DATE '2013-08-01' + group by test_cal_dt.week_beg_dt diff --git a/query/src/test/resources/query/sql_distinct_precisely/query04.sql b/query/src/test/resources/query/sql_distinct_precisely/query04.sql new file mode 100644 index 0000000..ff511c3 --- /dev/null +++ b/query/src/test/resources/query/sql_distinct_precisely/query04.sql @@ -0,0 +1,34 @@ +-- +-- Licensed to the Apache Software Foundation (ASF) under one +-- or more contributor license agreements. See the NOTICE file +-- distributed with this work for additional information +-- regarding copyright ownership. The ASF licenses this file +-- to you under the Apache License, Version 2.0 (the +-- "License"); you may not use this file except in compliance +-- with the License. You may obtain a copy of the License at +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, software +-- distributed under the License is distributed on an "AS IS" BASIS, +-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +-- See the License for the specific language governing permissions and +-- limitations under the License. +-- + +select test_cal_dt.week_beg_dt,sum(test_kylin_fact.price) as GMV + , count(1) as TRANS_CNT, count(distinct test_kylin_fact.leaf_categ_id) as LEAF_CATEG_CNT + from test_kylin_fact + inner JOIN edw.test_cal_dt as test_cal_dt + ON test_kylin_fact.cal_dt = test_cal_dt.cal_dt + inner JOIN test_category_groupings + on test_kylin_fact.leaf_categ_id = test_category_groupings.leaf_categ_id and + test_kylin_fact.lstg_site_id = test_category_groupings.site_id + inner JOIN edw.test_sites as test_sites + on test_kylin_fact.lstg_site_id = test_sites.site_id + inner JOIN edw.test_seller_type_dim as test_seller_type_dim + on test_kylin_fact.slr_segment_cd = test_seller_type_dim.seller_type_cd + where test_kylin_fact.lstg_format_name='FP-GTC' + and test_cal_dt.week_beg_dt between DATE '2013-05-01' and DATE '2013-08-01' + group by test_cal_dt.week_beg_dt + having count(distinct seller_id) > 2 diff --git a/query/src/test/resources/query/sql_distinct_precisely/query05.sql b/query/src/test/resources/query/sql_distinct_precisely/query05.sql new file mode 100644 index 0000000..3d5e5e8 --- /dev/null +++ b/query/src/test/resources/query/sql_distinct_precisely/query05.sql @@ -0,0 +1,25 @@ +-- +-- Licensed to the Apache Software Foundation (ASF) under one +-- or more contributor license agreements. See the NOTICE file +-- distributed with this work for additional information +-- regarding copyright ownership. The ASF licenses this file +-- to you under the Apache License, Version 2.0 (the +-- "License"); you may not use this file except in compliance +-- with the License. You may obtain a copy of the License at +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, software +-- distributed under the License is distributed on an "AS IS" BASIS, +-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +-- See the License for the specific language governing permissions and +-- limitations under the License. +-- + +select lstg_format_name, + sum(price) as GMV, + count(1) as TRANS_CNT, + count(distinct leaf_categ_id) as LEAF_CATEG_CNT + from test_kylin_fact + group by lstg_format_name + order by lstg_format_name diff --git a/query/src/test/resources/query/sql_distinct_precisely/query06.sql b/query/src/test/resources/query/sql_distinct_precisely/query06.sql new file mode 100644 index 0000000..858c92e --- /dev/null +++ b/query/src/test/resources/query/sql_distinct_precisely/query06.sql @@ -0,0 +1,26 @@ +-- +-- Licensed to the Apache Software Foundation (ASF) under one +-- or more contributor license agreements. See the NOTICE file +-- distributed with this work for additional information +-- regarding copyright ownership. The ASF licenses this file +-- to you under the Apache License, Version 2.0 (the +-- "License"); you may not use this file except in compliance +-- with the License. You may obtain a copy of the License at +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, software +-- distributed under the License is distributed on an "AS IS" BASIS, +-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +-- See the License for the specific language governing permissions and +-- limitations under the License. +-- + +select lstg_format_name, + sum(price) as GMV, + count(1) as TRANS_CNT, + count(distinct leaf_categ_id) as LEAF_CATEG_CNT + from test_kylin_fact + where lstg_format_name='FP-GTC' + group by lstg_format_name + order by lstg_format_name diff --git a/query/src/test/resources/query/sql_distinct_precisely/query07.sql b/query/src/test/resources/query/sql_distinct_precisely/query07.sql new file mode 100644 index 0000000..41252c4 --- /dev/null +++ b/query/src/test/resources/query/sql_distinct_precisely/query07.sql @@ -0,0 +1,24 @@ +-- +-- Licensed to the Apache Software Foundation (ASF) under one +-- or more contributor license agreements. See the NOTICE file +-- distributed with this work for additional information +-- regarding copyright ownership. The ASF licenses this file +-- to you under the Apache License, Version 2.0 (the +-- "License"); you may not use this file except in compliance +-- with the License. You may obtain a copy of the License at +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, software +-- distributed under the License is distributed on an "AS IS" BASIS, +-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +-- See the License for the specific language governing permissions and +-- limitations under the License. +-- + +select lstg_format_name, + sum(price) as GMV, + count(1) as TRANS_CNT, + count(distinct leaf_categ_id) as LEAF_CATEG_CNT + from test_kylin_fact + group by lstg_format_name diff --git a/storage-hbase/pom.xml b/storage-hbase/pom.xml index e16705e..164937d 100644 --- a/storage-hbase/pom.xml +++ b/storage-hbase/pom.xml @@ -139,6 +139,7 @@ org.apache.kylin:kylin-invertedindex com.ning:compress-lzf com.n3twork.druid:extendedset + org.roaringbitmap:RoaringBitmap diff --git a/webapp/app/js/model/cubeConfig.js b/webapp/app/js/model/cubeConfig.js index 8fc22fd..fe7e193 100644 --- a/webapp/app/js/model/cubeConfig.js +++ b/webapp/app/js/model/cubeConfig.js @@ -47,7 +47,8 @@ KylinApp.constant('cubeConfig', { {name: 'Error Rate < 4.88%', value: 'hllc12'}, {name: 'Error Rate < 2.44%', value: 'hllc14'}, {name: 'Error Rate < 1.72%', value: 'hllc15'}, - {name: 'Error Rate < 1.22%', value: 'hllc16'} + {name: 'Error Rate < 1.22%', value: 'hllc16'}, + {name: 'Precisely (Only for Integer Family column)', value: 'bitmap'} ], dftSelections: { measureExpression: 'SUM', -- 2.3.2 (Apple Git-55)