From e5b94d7b123acfa4cd66915670200c30b32b983c Mon Sep 17 00:00:00 2001
From: xiefan46 <958034172@qq.com>
Date: Tue, 25 Apr 2017 18:10:09 +0800
Subject: [PATCH] KYLIN-2249 Bug fix : build cube error when use inmem but ok
 with layer

---
 .../apache/kylin/dict/TrieDictionaryBuilder.java   | 31 +++++++++++++++-------
 .../apache/kylin/dict/TrieDictionaryForest.java    | 14 +++++-----
 .../kylin/dict/TrieDictionaryForestBuilder.java    |  2 +-
 .../kylin/dict/TrieDictionaryForestTest.java       | 11 ++++++++
 .../org/apache/kylin/dict/TrieDictionaryTest.java  | 10 +++++++
 5 files changed, 50 insertions(+), 18 deletions(-)

diff --git a/core-dictionary/src/main/java/org/apache/kylin/dict/TrieDictionaryBuilder.java b/core-dictionary/src/main/java/org/apache/kylin/dict/TrieDictionaryBuilder.java
index 102c49e..69f3067 100644
--- a/core-dictionary/src/main/java/org/apache/kylin/dict/TrieDictionaryBuilder.java
+++ b/core-dictionary/src/main/java/org/apache/kylin/dict/TrieDictionaryBuilder.java
@@ -34,11 +34,11 @@ import org.apache.kylin.common.util.BytesUtil;
  * Builds a dictionary using Trie structure. All values are taken in byte[] form
  * and organized in a Trie with ordering. Then numeric IDs are assigned in
  * sequence.
- * 
+ *
  * @author yangli9
  */
 public class TrieDictionaryBuilder<T> {
-    
+
     private static final int _2GB = 2000000000;
 
     public static class Node {
@@ -76,6 +76,8 @@ public class TrieDictionaryBuilder<T> {
     private Node root;
     protected BytesConverter<T> bytesConverter;
 
+    private boolean hasValue = false;
+
     public TrieDictionaryBuilder(BytesConverter<T> bytesConverter) {
         this.root = new Node(new byte[0], false);
         this.bytesConverter = bytesConverter;
@@ -91,6 +93,7 @@ public class TrieDictionaryBuilder<T> {
     }
 
     private void addValueR(Node node, byte[] value, int start) {
+        hasValue = true;
         // match the value part of current node
         int i = 0, j = start;
         int n = node.part.length, nn = value.length;
@@ -180,9 +183,9 @@ public class TrieDictionaryBuilder<T> {
     public static class Stats {
         public int nValues; // number of values in total
         public int nValueBytesPlain; // number of bytes for all values
-                                     // uncompressed
+        // uncompressed
         public int nValueBytesCompressed; // number of values bytes in Trie
-                                          // (compressed)
+        // (compressed)
         public int maxValueLength; // size of longest value in bytes
 
         // the trie is multi-byte-per-node
@@ -234,7 +237,13 @@ public class TrieDictionaryBuilder<T> {
         }
     }
 
-    /** out print some statistics of the trie and the dictionary built from it */
+    public boolean isHasValue() {
+        return hasValue;
+    }
+
+    /**
+     * out print some statistics of the trie and the dictionary built from it
+     */
     public Stats stats() {
         // calculate nEndValueBeneath
         traversePostOrder(new Visitor() {
@@ -313,7 +322,9 @@ public class TrieDictionaryBuilder<T> {
         return s;
     }
 
-    /** out print trie for debug */
+    /**
+     * out print trie for debug
+     */
     public void print() {
         print(System.out);
     }
@@ -396,11 +407,11 @@ public class TrieDictionaryBuilder<T> {
     /**
      * Flatten the trie into a byte array for a minimized memory footprint.
      * Lookup remains fast. Cost is inflexibility to modify (becomes immutable).
-     * 
+     * <p>
      * Flattened node structure is HEAD + NODEs, for each node:
      * - o byte, offset to child node, o = stats.mbpn_sizeChildOffset
-     *    - 1 bit, isLastChild flag, the 1st MSB of o
-     *    - 1 bit, isEndOfValue flag, the 2nd MSB of o
+     * - 1 bit, isLastChild flag, the 1st MSB of o
+     * - 1 bit, isEndOfValue flag, the 2nd MSB of o
      * - c byte, number of values beneath, c = stats.mbpn_sizeNoValueBeneath
      * - 1 byte, number of value bytes
      * - n byte, value bytes
@@ -417,7 +428,7 @@ public class TrieDictionaryBuilder<T> {
         Stats stats = stats();
         int sizeNoValuesBeneath = stats.mbpn_sizeNoValueBeneath;
         int sizeChildOffset = stats.mbpn_sizeChildOffset;
-        
+
         if (stats.mbpn_footprint <= 0) // must never happen, but let us be cautious
             throw new IllegalStateException("Too big dictionary, dictionary cannot be bigger than 2GB");
         if (stats.mbpn_footprint > _2GB)
diff --git a/core-dictionary/src/main/java/org/apache/kylin/dict/TrieDictionaryForest.java b/core-dictionary/src/main/java/org/apache/kylin/dict/TrieDictionaryForest.java
index 1023892..09d5bc2 100755
--- a/core-dictionary/src/main/java/org/apache/kylin/dict/TrieDictionaryForest.java
+++ b/core-dictionary/src/main/java/org/apache/kylin/dict/TrieDictionaryForest.java
@@ -63,7 +63,7 @@ public class TrieDictionaryForest<T> extends CacheDictionary<T> {
     }
 
     public TrieDictionaryForest(ArrayList<TrieDictionary<T>> trees, ArrayList<ByteArray> valueDivide, //
-            ArrayList<Integer> accuOffset, BytesConverter<T> bytesConverter, int baseId) {
+                                ArrayList<Integer> accuOffset, BytesConverter<T> bytesConverter, int baseId) {
         init(trees, valueDivide, accuOffset, bytesConverter, baseId);
     }
 
@@ -342,7 +342,7 @@ public class TrieDictionaryForest<T> extends CacheDictionary<T> {
         initSizeOfValue();
     }
 
-    private void initMaxValueForEachTrie(){
+    private void initMaxValueForEachTrie() {
         //init max value
         this.maxValue = new ArrayList<>();
         if (this.trees == null || trees.isEmpty()) {
@@ -356,7 +356,7 @@ public class TrieDictionaryForest<T> extends CacheDictionary<T> {
         }
     }
 
-    private void initMaxId(){
+    private void initMaxId() {
         if (trees.isEmpty()) {
             this.maxId = baseId - 1;
             return;
@@ -365,7 +365,7 @@ public class TrieDictionaryForest<T> extends CacheDictionary<T> {
         this.maxId = accuOffset.get(index) + trees.get(index).getMaxId() + baseId;
     }
 
-    private void initMinId(){
+    private void initMinId() {
         if (trees.isEmpty()) {
             this.minId = baseId;
             return;
@@ -373,8 +373,8 @@ public class TrieDictionaryForest<T> extends CacheDictionary<T> {
         this.minId = trees.get(0).getMinId() + baseId;
     }
 
-    private void initSizeOfId(){
-        if (trees.isEmpty()){
+    private void initSizeOfId() {
+        if (trees.isEmpty()) {
             this.sizeOfId = 1;
             return;
         }
@@ -383,7 +383,7 @@ public class TrieDictionaryForest<T> extends CacheDictionary<T> {
         this.sizeOfId = BytesUtil.sizeForValue(baseId + maxOffset + lastTree.getMaxId() + 1L);
     }
 
-    private void initSizeOfValue(){
+    private void initSizeOfValue() {
         int maxValue = 0;
         for (TrieDictionary<T> tree : trees)
             maxValue = Math.max(maxValue, tree.getSizeOfValue());
diff --git a/core-dictionary/src/main/java/org/apache/kylin/dict/TrieDictionaryForestBuilder.java b/core-dictionary/src/main/java/org/apache/kylin/dict/TrieDictionaryForestBuilder.java
index 69da472..0e5e63e 100755
--- a/core-dictionary/src/main/java/org/apache/kylin/dict/TrieDictionaryForestBuilder.java
+++ b/core-dictionary/src/main/java/org/apache/kylin/dict/TrieDictionaryForestBuilder.java
@@ -105,7 +105,7 @@ public class TrieDictionaryForestBuilder<T> {
     }
 
     public TrieDictionaryForest<T> build() {
-        if (curTreeSize != 0) { //last tree
+        if (trieBuilder.isHasValue()) { //last tree
             TrieDictionary<T> tree = trieBuilder.build(0);
             addTree(tree);
             reset();
diff --git a/core-dictionary/src/test/java/org/apache/kylin/dict/TrieDictionaryForestTest.java b/core-dictionary/src/test/java/org/apache/kylin/dict/TrieDictionaryForestTest.java
index dd1f951..82380b3 100755
--- a/core-dictionary/src/test/java/org/apache/kylin/dict/TrieDictionaryForestTest.java
+++ b/core-dictionary/src/test/java/org/apache/kylin/dict/TrieDictionaryForestTest.java
@@ -133,6 +133,17 @@ public class TrieDictionaryForestTest {
     }
 
     @Test
+    public void testAllNullValue() {
+        ArrayList<String> strs = new ArrayList<String>();
+        strs.add("");
+        int maxTreeSize = 10;
+        TrieDictionaryForestBuilder<String> builder = newDictBuilder(strs, 0, maxTreeSize);
+        TrieDictionaryForest<String> dict = builder.build();
+        assertEquals(1, dict.getSize());
+        assertEquals(0, dict.getIdFromValue(""));
+    }
+
+    @Test
     public void testBigDataSet() {
         //h=generate data
         ArrayList<String> strs = new ArrayList<>();
diff --git a/core-dictionary/src/test/java/org/apache/kylin/dict/TrieDictionaryTest.java b/core-dictionary/src/test/java/org/apache/kylin/dict/TrieDictionaryTest.java
index 22a93a0..13c83ac 100644
--- a/core-dictionary/src/test/java/org/apache/kylin/dict/TrieDictionaryTest.java
+++ b/core-dictionary/src/test/java/org/apache/kylin/dict/TrieDictionaryTest.java
@@ -221,6 +221,16 @@ public class TrieDictionaryTest {
         testStringDictionary(str, null);
     }
 
+    @Test
+    public void testAllNullValue() {
+        ArrayList<String> strs = new ArrayList<String>();
+        strs.add("");
+        TrieDictionaryBuilder<String> builder = newDictBuilder(strs);
+        TrieDictionary<String> dict = builder.build(0);
+        assertEquals(1, dict.getSize());
+        assertEquals(0, dict.getIdFromValue(""));
+    }
+
     private static void benchmarkStringDictionary(Iterable<String> str) throws IOException {
         TrieDictionaryBuilder<String> b = newDictBuilder(str);
         b.stats().print();
-- 
2.9.3

