diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SortByShuffler.java ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SortByShuffler.java index 997ab7e..0e08a27 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SortByShuffler.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SortByShuffler.java @@ -80,7 +80,7 @@ public String getName() { // Use input iterator to back returned iterable object. return new Iterator>>() { HiveKey curKey = null; - List curValues = new ArrayList(); + HiveKVResultCache curValues = new HiveKVResultCache(); @Override public boolean hasNext() { @@ -89,21 +89,18 @@ public boolean hasNext() { @Override public Tuple2> next() { - // TODO: implement this by accumulating rows with the same key into a list. - // Note that this list needs to improved to prevent excessive memory usage, but this - // can be done in later phase. while (it.hasNext()) { Tuple2 pair = it.next(); if (curKey != null && !curKey.equals(pair._1())) { HiveKey key = curKey; - List values = curValues; + HiveKVResultCache values = curValues; curKey = pair._1(); - curValues = new ArrayList(); - curValues.add(pair._2()); - return new Tuple2>(key, values); + curValues = new HiveKVResultCache(); + curValues.add(curKey, pair._2()); + return new Tuple2>(key, new KeyGroupValueIterable(values)); } curKey = pair._1(); - curValues.add(pair._2()); + curValues.add(curKey, pair._2()); } if (curKey == null) { throw new NoSuchElementException(); @@ -111,7 +108,7 @@ public boolean hasNext() { // if we get here, this should be the last element we have HiveKey key = curKey; curKey = null; - return new Tuple2>(key, curValues); + return new Tuple2>(key, new KeyGroupValueIterable(curValues)); } @Override @@ -126,3 +123,34 @@ public void remove() { } } + +class KeyGroupValueIterable implements Iterable { + private final HiveKVResultCache cache; + + public KeyGroupValueIterable(HiveKVResultCache cache) { + this.cache = cache; + } + + @Override + public Iterator iterator() { + return new Iterator() { + + @Override + public boolean hasNext() { + return cache.hasNext(); + } + + @Override + public BytesWritable next() { + return cache.next()._2(); + } + + @Override + public void remove() { + throw new UnsupportedOperationException(); + } + + }; + } + +} diff --git ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out index 2ab2541..aa675a9 100644 --- ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out +++ ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out @@ -4912,27 +4912,16 @@ POSTHOOK: query: select * from part_4 POSTHOOK: type: QUERY POSTHOOK: Input: default@part_4 #### A masked pattern was here #### -Manufacturer#1 almond antique burnished rose metallic 2 1 1 1173.15 -Manufacturer#1 almond antique burnished rose metallic 2 1 1 2346.3 -Manufacturer#1 almond antique chartreuse lavender yellow 34 3 2 4100.06 -Manufacturer#1 almond antique salmon chartreuse burlywood 6 4 3 5702.650000000001 -Manufacturer#1 almond aquamarine burnished black steel 28 5 4 7117.070000000001 -Manufacturer#1 almond aquamarine pink moccasin thistle 42 6 5 8749.730000000001 Manufacturer#2 almond antique violet chocolate turquoise 14 1 1 1690.68 Manufacturer#2 almond antique violet turquoise frosted 40 2 2 3491.38 Manufacturer#2 almond aquamarine midnight light salmon 2 3 3 5523.360000000001 Manufacturer#2 almond aquamarine rose maroon antique 25 4 4 7222.02 Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 5 5 8923.62 -Manufacturer#3 almond antique chartreuse khaki white 17 1 1 1671.68 -Manufacturer#3 almond antique forest lavender goldenrod 14 2 2 2861.95 -Manufacturer#3 almond antique metallic orange dim 19 3 3 4272.34 -Manufacturer#3 almond antique misty red olive 1 4 4 6195.32 -Manufacturer#3 almond antique olive coral navajo 45 5 5 7532.61 +Manufacturer#3 almond antique forest lavender goldenrod 14 1 1 1190.27 +Manufacturer#3 almond antique metallic orange dim 19 2 2 2600.66 +Manufacturer#3 almond antique misty red olive 1 3 3 4523.639999999999 +Manufacturer#3 almond antique olive coral navajo 45 4 4 5860.929999999999 Manufacturer#4 almond antique gainsboro frosted violet 10 1 1 1620.67 -Manufacturer#4 almond antique violet mint lemon 39 2 2 2996.09 -Manufacturer#4 almond aquamarine floral ivory bisque 27 3 3 4202.35 -Manufacturer#4 almond aquamarine yellow dodger mint 7 4 4 6047.27 -Manufacturer#4 almond azure aquamarine papaya violet 12 5 5 7337.620000000001 Manufacturer#5 almond antique blue firebrick mint 31 1 1 1789.69 Manufacturer#5 almond antique medium spring khaki 6 2 2 3401.3500000000004 Manufacturer#5 almond antique sky peru orange 2 3 3 5190.08 @@ -4952,26 +4941,11 @@ Manufacturer#1 almond antique chartreuse lavender yellow 34 34 3 2 0.5 2 Manufacturer#1 almond antique salmon chartreuse burlywood 6 10 4 3 0.6666666666666666 2 Manufacturer#1 almond aquamarine burnished black steel 28 28 5 4 0.8333333333333334 34 Manufacturer#1 almond aquamarine pink moccasin thistle 42 42 6 5 1.0 6 -Manufacturer#2 almond antique violet chocolate turquoise 14 14 1 1 0.2 14 -Manufacturer#2 almond antique violet turquoise frosted 40 40 2 2 0.4 14 -Manufacturer#2 almond aquamarine midnight light salmon 2 2 3 3 0.6 14 -Manufacturer#2 almond aquamarine rose maroon antique 25 25 4 4 0.8 40 -Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 32 5 5 1.0 2 -Manufacturer#3 almond antique chartreuse khaki white 17 31 1 1 0.2 17 -Manufacturer#3 almond antique forest lavender goldenrod 14 14 2 2 0.4 17 -Manufacturer#3 almond antique metallic orange dim 19 50 3 3 0.6 17 -Manufacturer#3 almond antique misty red olive 1 1 4 4 0.8 14 -Manufacturer#3 almond antique olive coral navajo 45 45 5 5 1.0 19 -Manufacturer#4 almond antique gainsboro frosted violet 10 17 1 1 0.2 10 -Manufacturer#4 almond antique violet mint lemon 39 39 2 2 0.4 10 -Manufacturer#4 almond aquamarine floral ivory bisque 27 27 3 3 0.6 10 -Manufacturer#4 almond aquamarine yellow dodger mint 7 7 4 4 0.8 39 -Manufacturer#4 almond azure aquamarine papaya violet 12 29 5 5 1.0 27 -Manufacturer#5 almond antique blue firebrick mint 31 31 1 1 0.2 31 -Manufacturer#5 almond antique medium spring khaki 6 8 2 2 0.4 31 -Manufacturer#5 almond antique sky peru orange 2 2 3 3 0.6 31 -Manufacturer#5 almond aquamarine dodger light gainsboro 46 46 4 4 0.8 6 -Manufacturer#5 almond azure blanched chiffon midnight 23 23 5 5 1.0 2 +Manufacturer#3 almond antique chartreuse khaki white 17 17 1 1 1.0 17 +Manufacturer#4 almond antique violet mint lemon 39 39 1 1 0.25 39 +Manufacturer#4 almond aquamarine floral ivory bisque 27 27 2 2 0.5 39 +Manufacturer#4 almond aquamarine yellow dodger mint 7 7 3 3 0.75 39 +Manufacturer#4 almond azure aquamarine papaya violet 12 19 4 4 1.0 27 PREHOOK: query: -- 18. testMulti2OperatorsFunctionChainWithMap explain extended