diff --git a/common/pom.xml b/common/pom.xml index 023f084511..fb80db7706 100644 --- a/common/pom.xml +++ b/common/pom.xml @@ -226,6 +226,11 @@ dropwizard-metrics-hadoop-metrics2-reporter ${dropwizard-metrics-hadoop-metrics2-reporter.version} + + javolution + javolution + ${javolution.version} + diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/NumDistinctValueEstimator.java b/common/src/java/org/apache/hadoop/hive/common/ndv/FMSketch.java similarity index 89% rename from ql/src/java/org/apache/hadoop/hive/ql/udf/generic/NumDistinctValueEstimator.java rename to common/src/java/org/apache/hadoop/hive/common/ndv/FMSketch.java index fa70f49857..e20d29954a 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/NumDistinctValueEstimator.java +++ b/common/src/java/org/apache/hadoop/hive/common/ndv/FMSketch.java @@ -15,21 +15,22 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.hadoop.hive.ql.udf.generic; +package org.apache.hadoop.hive.common.ndv; + import java.util.Random; import javolution.util.FastBitSet; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.apache.commons.lang.StringUtils; import org.apache.hadoop.hive.common.classification.InterfaceAudience; import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.ql.util.JavaDataModel; -import org.apache.hadoop.io.Text; -public class NumDistinctValueEstimator { +public class FMSketch implements NumDistinctValueEstimator{ - static final Logger LOG = LoggerFactory.getLogger(NumDistinctValueEstimator.class.getName()); + static final Logger LOG = LoggerFactory.getLogger(FMSketch.class.getName()); /* We want a,b,x to come from a finite field of size 0 to k, where k is a prime number. * 2^p - 1 is prime for p = 31. Hence bitvectorSize has to be 31. Pick k to be 2^p -1. @@ -38,7 +39,6 @@ * thus introducing errors in the estimates. */ private static final int BIT_VECTOR_SIZE = 31; - private final int numBitVectors; // Refer to Flajolet-Martin'86 for the value of phi private static final double PHI = 0.77351; @@ -49,10 +49,12 @@ private final Random aValue; private final Random bValue; + + private int numBitVectors; /* Create a new distinctValueEstimator */ - public NumDistinctValueEstimator(int numBitVectors) { + public FMSketch(int numBitVectors) { this.numBitVectors = numBitVectors; bitVector = new FastBitSet[numBitVectors]; for (int i=0; i< numBitVectors; i++) { @@ -109,9 +111,9 @@ public NumDistinctValueEstimator(int numBitVectors) { } } - public NumDistinctValueEstimator(String s, int numBitVectors) { + public FMSketch(String s, int numBitVectors) { this.numBitVectors = numBitVectors; - FastBitSet bitVectorDeser[] = deserialize(s, numBitVectors); + FastBitSet bitVectorDeser[] = genBitSet(s, numBitVectors); bitVector = new FastBitSet[numBitVectors]; for(int i=0; i >> p; + + // longest run of trailing zeroes + final int lr = Long.numberOfTrailingZeros(w) + 1; + return set(registerIdx, (byte) lr); + } + + public boolean set(int idx, byte value) { + boolean updated = false; + if (idx < register.length && value > register[idx]) { + + // update max register value + if (value > maxRegisterValue) { + maxRegisterValue = value; + } + + // update number of zeros + if (register[idx] == 0 && value > 0) { + numZeroes--; + } + + // set register value and compute inverse pow of 2 for register value + register[idx] = value; + invPow2Register[idx] = Math.pow(2, -value); + + updated = true; + } + return updated; + } + + public int size() { + return register.length; + } + + public int getNumZeroes() { + return numZeroes; + } + + public void merge(HLLRegister hllRegister) { + if (hllRegister instanceof HLLDenseRegister) { + HLLDenseRegister hdr = (HLLDenseRegister) hllRegister; + byte[] inRegister = hdr.getRegister(); + + // merge only if the register length matches + if (register.length != inRegister.length) { + throw new IllegalArgumentException( + "The size of register sets of HyperLogLogs to be merged does not match."); + } + + // compare register values and store the max register value + for (int i = 0; i < inRegister.length; i++) { + if (inRegister[i] > register[i]) { + if (register[i] == 0) { + numZeroes--; + } + register[i] = inRegister[i]; + invPow2Register[i] = Math.pow(2, -inRegister[i]); + } + } + + // update max register value + if (hdr.getMaxRegisterValue() > maxRegisterValue) { + maxRegisterValue = hdr.getMaxRegisterValue(); + } + } else { + throw new IllegalArgumentException("Specified register is not instance of HLLDenseRegister"); + } + } + + public byte[] getRegister() { + return register; + } + + public void setRegister(byte[] register) { + this.register = register; + } + + public int getMaxRegisterValue() { + return maxRegisterValue; + } + + public double getSumInversePow2() { + double sum = 0; + for (double d : invPow2Register) { + sum += d; + } + return sum; + } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder(); + sb.append("HLLDenseRegister - "); + sb.append("p: "); + sb.append(p); + sb.append(" numZeroes: "); + sb.append(numZeroes); + sb.append(" maxRegisterValue: "); + sb.append(maxRegisterValue); + return sb.toString(); + } + + public String toExtendedString() { + return toString() + " register: " + Arrays.toString(register); + } + + @Override + public boolean equals(Object obj) { + if (!(obj instanceof HLLDenseRegister)) { + return false; + } + HLLDenseRegister other = (HLLDenseRegister) obj; + return numZeroes == other.numZeroes && maxRegisterValue == other.maxRegisterValue + && Arrays.equals(register, other.register); + } + + @Override + public int hashCode() { + int hashcode = 0; + hashcode += 31 * numZeroes; + hashcode += 31 * maxRegisterValue; + hashcode += Arrays.hashCode(register); + return hashcode; + } + +} diff --git a/common/src/java/org/apache/hadoop/hive/common/ndv/hll/HLLRegister.java b/common/src/java/org/apache/hadoop/hive/common/ndv/hll/HLLRegister.java new file mode 100644 index 0000000000..eefc60fbd6 --- /dev/null +++ b/common/src/java/org/apache/hadoop/hive/common/ndv/hll/HLLRegister.java @@ -0,0 +1,50 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.common.ndv.hll; + +public interface HLLRegister { + + /** + * Specify a hashcode to add to hyperloglog register. + * @param hashcode + * - hashcode to add + * @return true if register value is updated else false + */ + public boolean add(long hashcode); + + /** + * Instead of specifying hashcode, this interface can be used to directly + * specify the register index and register value. This interface is useful + * when reconstructing hyperloglog from a serialized representation where its + * not possible to regenerate the hashcode. + * @param idx + * - register index + * @param value + * - register value + * @return true if register value is updated else false + */ + public boolean set(int idx, byte value); + + /** + * Merge hyperloglog registers of the same type (SPARSE or DENSE register) + * @param reg + * - register to be merged + */ + public void merge(HLLRegister reg); +} diff --git a/common/src/java/org/apache/hadoop/hive/common/ndv/hll/HLLSparseRegister.java b/common/src/java/org/apache/hadoop/hive/common/ndv/hll/HLLSparseRegister.java new file mode 100644 index 0000000000..a4a5ba9015 --- /dev/null +++ b/common/src/java/org/apache/hadoop/hive/common/ndv/hll/HLLSparseRegister.java @@ -0,0 +1,260 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.common.ndv.hll; + +import java.util.Map; +import java.util.TreeMap; + +public class HLLSparseRegister implements HLLRegister { + + private TreeMap sparseMap; + + // for a better insertion performance values are added to temporary unsorted + // list which will be merged to sparse map after a threshold + private int[] tempList; + private int tempListIdx; + + // number of register bits + private final int p; + + // new number of register bits for higher accuracy + private final int pPrime; + + // number of bits to store the number of zero runs + private final int qPrime; + + // masks for quicker extraction of p, pPrime, qPrime values + private final int mask; + private final int pPrimeMask; + private final int qPrimeMask; + + public HLLSparseRegister(int p, int pp, int qp) { + this.p = p; + this.sparseMap = new TreeMap(); + this.tempList = new int[HLLConstants.TEMP_LIST_DEFAULT_SIZE]; + this.tempListIdx = 0; + this.pPrime = pp; + this.qPrime = qp; + this.mask = ((1 << pPrime) - 1) ^ ((1 << p) - 1); + this.pPrimeMask = ((1 << pPrime) - 1); + this.qPrimeMask = (1 << qPrime) - 1; + } + + public boolean add(long hashcode) { + boolean updated = false; + + // fill the temp list before merging to sparse map + if (tempListIdx < tempList.length) { + int encodedHash = encodeHash(hashcode); + tempList[tempListIdx++] = encodedHash; + updated = true; + } else { + updated = mergeTempListToSparseMap(); + } + + return updated; + } + + /** + * Adds temp list to sparse map. The key for sparse map entry is the register + * index determined by pPrime and value is the number of trailing zeroes. + * @return + */ + private boolean mergeTempListToSparseMap() { + boolean updated = false; + for (int i = 0; i < tempListIdx; i++) { + int encodedHash = tempList[i]; + int key = encodedHash & pPrimeMask; + byte value = (byte) (encodedHash >>> pPrime); + byte nr = 0; + // if MSB is set to 1 then next qPrime MSB bits contains the value of + // number of zeroes. + // if MSB is set to 0 then number of zeroes is contained within pPrime - p + // bits. + if (encodedHash < 0) { + nr = (byte) (value & qPrimeMask); + } else { + nr = (byte) (Integer.numberOfTrailingZeros(encodedHash >>> p) + 1); + } + updated = set(key, nr); + } + + // reset temp list index + tempListIdx = 0; + return updated; + } + + /** + *
+   * Input: 64 bit hashcode
+   * 
+   * |---------w-------------| |------------p'----------|
+   * 10101101.......1010101010 10101010101 01010101010101
+   *                                       |------p-----|
+   *                                       
+   * Output: 32 bit int
+   * 
+   * |b| |-q'-|  |------------p'----------|
+   *  1  010101  01010101010 10101010101010
+   *                         |------p-----|
+   *                    
+   * 
+   * The default values of p', q' and b are 25, 6, 1 (total 32 bits) respectively.
+   * This function will return an int encoded in the following format
+   * 
+   * p  - LSB p bits represent the register index
+   * p' - LSB p' bits are used for increased accuracy in estimation
+   * q' - q' bits after p' are left as such from the hashcode if b = 0 else
+   *      q' bits encodes the longest trailing zero runs from in (w-p) input bits
+   * b  - 0 if longest trailing zero run is contained within (p'-p) bits
+   *      1 if longest trailing zero run is computeed from (w-p) input bits and
+   *      its value is stored in q' bits
+   * 
+ * @param hashcode + * @return + */ + public int encodeHash(long hashcode) { + // x = p' - p + int x = (int) (hashcode & mask); + if (x == 0) { + // more bits should be considered for finding q (longest zero runs) + // set MSB to 1 + int ntr = Long.numberOfTrailingZeros(hashcode >> p) + 1; + long newHashCode = hashcode & pPrimeMask; + newHashCode |= ntr << pPrime; + newHashCode |= 0x80000000; + return (int) newHashCode; + } else { + // q is contained within p' - p + // set MSB to 0 + return (int) (hashcode & 0x7FFFFFFF); + } + } + + public int getSize() { + + // merge temp list before getting the size of sparse map + if (tempListIdx != 0) { + mergeTempListToSparseMap(); + } + return sparseMap.size(); + } + + public void merge(HLLRegister hllRegister) { + if (hllRegister instanceof HLLSparseRegister) { + HLLSparseRegister hsr = (HLLSparseRegister) hllRegister; + + // retain only the largest value for a register index + for (Map.Entry entry : hsr.getSparseMap().entrySet()) { + int key = entry.getKey(); + byte value = entry.getValue(); + set(key, value); + } + } else { + throw new IllegalArgumentException("Specified register not instance of HLLSparseRegister"); + } + } + + public boolean set(int key, byte value) { + boolean updated = false; + + // retain only the largest value for a register index + if (sparseMap.containsKey(key)) { + byte containedVal = sparseMap.get(key); + if (value > containedVal) { + sparseMap.put(key, value); + updated = true; + } + } else { + sparseMap.put(key, value); + updated = true; + } + return updated; + } + + public TreeMap getSparseMap() { + return sparseMap; + } + + public TreeMap getMergedSparseMap() { + if (tempListIdx != 0) { + mergeTempListToSparseMap(); + } + return sparseMap; + } + + public int getP() { + return p; + } + + public int getPPrime() { + return pPrime; + } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder(); + sb.append("HLLSparseRegister - "); + sb.append("p: "); + sb.append(p); + sb.append(" pPrime: "); + sb.append(pPrime); + sb.append(" qPrime: "); + sb.append(qPrime); + return sb.toString(); + } + + public String toExtendedString() { + return toString() + " register: " + sparseMap.toString(); + } + + @Override + public boolean equals(Object obj) { + if (!(obj instanceof HLLSparseRegister)) { + return false; + } + HLLSparseRegister other = (HLLSparseRegister) obj; + boolean result = p == other.p && pPrime == other.pPrime && qPrime == other.qPrime + && tempListIdx == other.tempListIdx; + if (result) { + for (int i = 0; i < tempListIdx; i++) { + if (tempList[i] != other.tempList[i]) { + return false; + } + } + + result = result && sparseMap.equals(other.sparseMap); + } + return result; + } + + @Override + public int hashCode() { + int hashcode = 0; + hashcode += 31 * p; + hashcode += 31 * pPrime; + hashcode += 31 * qPrime; + for (int i = 0; i < tempListIdx; i++) { + hashcode += 31 * tempList[tempListIdx]; + } + hashcode += sparseMap.hashCode(); + return hashcode; + } + +} diff --git a/common/src/java/org/apache/hadoop/hive/common/ndv/hll/HyperLogLog.java b/common/src/java/org/apache/hadoop/hive/common/ndv/hll/HyperLogLog.java new file mode 100644 index 0000000000..d1955468a6 --- /dev/null +++ b/common/src/java/org/apache/hadoop/hive/common/ndv/hll/HyperLogLog.java @@ -0,0 +1,629 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.common.ndv.hll; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.nio.ByteBuffer; +import java.nio.charset.Charset; +import java.util.Map; +import java.util.TreeMap; + +import org.apache.commons.codec.binary.Base64; +import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimator; +import org.apache.hadoop.hive.common.type.HiveDecimal; +import org.apache.hadoop.hive.ql.util.JavaDataModel; +import org.apache.hive.common.util.Murmur3; + +/** + *
+ * This is an implementation of the following variants of hyperloglog (HLL)
+ * algorithm 
+ * Original  - Original HLL algorithm from Flajolet et. al from
+ *             http://algo.inria.fr/flajolet/Publications/FlFuGaMe07.pdf
+ * HLLNoBias - Google's implementation of bias correction based on lookup table
+ *             http://static.googleusercontent.com/media/research.google.com/en//pubs/archive/40671.pdf
+ * HLL++     - Google's implementation of HLL++ algorithm that uses SPARSE registers
+ *             http://static.googleusercontent.com/media/research.google.com/en//pubs/archive/40671.pdf
+ * 
+ * Following are the constructor parameters that determines which algorithm is
+ * used
+ * numRegisterIndexBits - number of LSB hashcode bits to be used as register index.
+ *                        Default is 14. min = 4 and max = 16
+ * numHashBits - number of bits for hashcode. Default is 64. min = 32 and max = 128
+ * encoding - Type of encoding to use (SPARSE or DENSE). The algorithm automatically
+ *            switches to DENSE beyond a threshold. Default: SPARSE
+ * enableBitPacking - To enable bit packing or not. Bit packing improves compression
+ *                    at the cost of more CPU cycles. Default: true
+ * noBias - Use Google's bias table lookup for short range bias correction.
+ *          Enabling this will highly improve the estimation accuracy for short
+ *          range values. Default: true
+ * 
+ * 
+ */ +public class HyperLogLog implements NumDistinctValueEstimator{ + private final static int DEFAULT_HASH_BITS = 64; + private final static long HASH64_ZERO = Murmur3.hash64(new byte[] {0}); + private final static long HASH64_ONE = Murmur3.hash64(new byte[] {1}); + private final static ByteBuffer SHORT_BUFFER = ByteBuffer.allocate(Short.BYTES); + private final static ByteBuffer INT_BUFFER = ByteBuffer.allocate(Integer.BYTES); + private final static ByteBuffer LONG_BUFFER = ByteBuffer.allocate(Long.BYTES); + + public enum EncodingType { + SPARSE, DENSE + } + + // number of bits to address registers + private final int p; + + // number of registers - 2^p + private final int m; + + // refer paper + private float alphaMM; + + // enable/disable bias correction using table lookup + private final boolean noBias; + + // enable/disable bitpacking + private final boolean bitPacking; + + // Not making it configurable for perf reasons (avoid checks) + private final int chosenHashBits = DEFAULT_HASH_BITS; + + private HLLDenseRegister denseRegister; + private HLLSparseRegister sparseRegister; + + // counts are cached to avoid repeated complex computation. If register value + // is updated the count will be computed again. + private long cachedCount; + private boolean invalidateCount; + + private EncodingType encoding; + + // threshold to switch from SPARSE to DENSE encoding + private int encodingSwitchThreshold; + + private HyperLogLog(HyperLogLogBuilder hllBuilder) { + if (hllBuilder.numRegisterIndexBits < HLLConstants.MIN_P_VALUE + || hllBuilder.numRegisterIndexBits > HLLConstants.MAX_P_VALUE) { + throw new IllegalArgumentException("p value should be between " + HLLConstants.MIN_P_VALUE + + " to " + HLLConstants.MAX_P_VALUE); + } + this.p = hllBuilder.numRegisterIndexBits; + this.m = 1 << p; + this.noBias = hllBuilder.noBias; + this.bitPacking = hllBuilder.bitPacking; + + // the threshold should be less than 12K bytes for p = 14. + // The reason to divide by 5 is, in sparse mode after serialization the + // entriesin sparse map are compressed, and delta encoded as varints. The + // worst case size of varints are 5 bytes. Hence, 12K/5 ~= 2400 entries in + // sparse map. + if (bitPacking) { + this.encodingSwitchThreshold = ((m * 6) / 8) / 5; + } else { + // if bitpacking is disabled, all register values takes 8 bits and hence + // we can be more flexible with the threshold. For p=14, 16K/5 = 3200 + // entries in sparse map can be allowed. + this.encodingSwitchThreshold = m / 3; + } + + // initializeAlpha(DEFAULT_HASH_BITS); + // alphaMM value for 128 bits hash seems to perform better for default 64 hash bits + this.alphaMM = 0.7213f / (1 + 1.079f / m); + // For efficiency alpha is multiplied by m^2 + this.alphaMM = this.alphaMM * m * m; + + this.cachedCount = -1; + this.invalidateCount = false; + this.encoding = hllBuilder.encoding; + if (encoding.equals(EncodingType.SPARSE)) { + this.sparseRegister = new HLLSparseRegister(p, HLLConstants.P_PRIME_VALUE, + HLLConstants.Q_PRIME_VALUE); + this.denseRegister = null; + } else { + this.sparseRegister = null; + this.denseRegister = new HLLDenseRegister(p, bitPacking); + } + } + + public static HyperLogLogBuilder builder() { + return new HyperLogLogBuilder(); + } + + public static class HyperLogLogBuilder { + private int numRegisterIndexBits = 14; + private EncodingType encoding = EncodingType.SPARSE; + private boolean bitPacking = true; + private boolean noBias = true; + + public HyperLogLogBuilder() { + } + + public HyperLogLogBuilder setNumRegisterIndexBits(int b) { + this.numRegisterIndexBits = b; + return this; + } + + public HyperLogLogBuilder setEncoding(EncodingType enc) { + this.encoding = enc; + return this; + } + + public HyperLogLogBuilder enableBitPacking(boolean b) { + this.bitPacking = b; + return this; + } + + public HyperLogLogBuilder enableNoBias(boolean nb) { + this.noBias = nb; + return this; + } + + public HyperLogLog build() { + return new HyperLogLog(this); + } + } + + // see paper for alpha initialization. + private void initializeAlpha(final int hashBits) { + if (hashBits <= 16) { + alphaMM = 0.673f; + } else if (hashBits <= 32) { + alphaMM = 0.697f; + } else if (hashBits <= 64) { + alphaMM = 0.709f; + } else { + alphaMM = 0.7213f / (float) (1 + 1.079f / m); + } + + // For efficiency alpha is multiplied by m^2 + alphaMM = alphaMM * m * m; + } + + public void addBoolean(boolean val) { + add(val ? HASH64_ONE : HASH64_ZERO); + } + + public void addByte(byte val) { + add(Murmur3.hash64(new byte[] {val})); + } + + public void addBytes(byte[] val) { + add(Murmur3.hash64(val)); + } + + public void addShort(short val) { + SHORT_BUFFER.putShort(0, val); + add(Murmur3.hash64(SHORT_BUFFER.array())); + } + + public void addInt(int val) { + INT_BUFFER.putInt(0, val); + add(Murmur3.hash64(INT_BUFFER.array())); + } + + public void addLong(long val) { + LONG_BUFFER.putLong(0, val); + add(Murmur3.hash64(LONG_BUFFER.array())); + } + + public void addFloat(float val) { + INT_BUFFER.putFloat(0, val); + add(Murmur3.hash64(INT_BUFFER.array())); + } + + public void addDouble(double val) { + LONG_BUFFER.putDouble(0, val); + add(Murmur3.hash64(LONG_BUFFER.array())); + } + + public void addChar(char val) { + SHORT_BUFFER.putChar(0, val); + add(Murmur3.hash64(SHORT_BUFFER.array())); + } + + /** + * Java's default charset will be used for strings. + * @param val + * - input string + */ + public void addString(String val) { + add(Murmur3.hash64(val.getBytes())); + } + + public void addString(String val, Charset charset) { + add(Murmur3.hash64(val.getBytes(charset))); + } + + public void add(long hashcode) { + if (encoding.equals(EncodingType.SPARSE)) { + if (sparseRegister.add(hashcode)) { + invalidateCount = true; + } + + // if size of sparse map excess the threshold convert the sparse map to + // dense register and switch to DENSE encoding + if (sparseRegister.getSize() > encodingSwitchThreshold) { + encoding = EncodingType.DENSE; + denseRegister = sparseToDenseRegister(sparseRegister); + sparseRegister = null; + invalidateCount = true; + } + } else { + if (denseRegister.add(hashcode)) { + invalidateCount = true; + } + } + } + + public long estimateNumDistinctValues() { + return count(); + } + + public long count() { + // compute count only if the register values are updated else return the + // cached count + if (invalidateCount || cachedCount < 0) { + if (encoding.equals(EncodingType.SPARSE)) { + + // if encoding is still SPARSE use linear counting with increase + // accuracy (as we use pPrime bits for register index) + int mPrime = 1 << sparseRegister.getPPrime(); + cachedCount = linearCount(mPrime, mPrime - sparseRegister.getSize()); + } else { + + // for DENSE encoding, use bias table lookup for HLLNoBias algorithm + // else fallback to HLLOriginal algorithm + double sum = denseRegister.getSumInversePow2(); + long numZeros = denseRegister.getNumZeroes(); + + // cardinality estimate from normalized bias corrected harmonic mean on + // the registers + cachedCount = (long) (alphaMM * (1.0 / sum)); + long pow = (long) Math.pow(2, chosenHashBits); + + // when bias correction is enabled + if (noBias) { + cachedCount = cachedCount <= 5 * m ? (cachedCount - estimateBias(cachedCount)) + : cachedCount; + long h = cachedCount; + if (numZeros != 0) { + h = linearCount(m, numZeros); + } + + if (h < getThreshold()) { + cachedCount = h; + } + } else { + // HLL algorithm shows stronger bias for values in (2.5 * m) range. + // To compensate for this short range bias, linear counting is used + // for values before this short range. The original paper also says + // similar bias is seen for long range values due to hash collisions + // in range >1/30*(2^32). For the default case, we do not have to + // worry about this long range bias as the paper used 32-bit hashing + // and we use 64-bit hashing as default. 2^64 values are too high to + // observe long range bias (hash collisions). + if (cachedCount <= 2.5 * m) { + + // for short range use linear counting + if (numZeros != 0) { + cachedCount = linearCount(m, numZeros); + } + } else if (chosenHashBits < 64 && cachedCount > (0.033333 * pow)) { + + // long range bias for 32-bit hashcodes + if (cachedCount > (1 / 30) * pow) { + cachedCount = (long) (-pow * Math.log(1.0 - (double) cachedCount / (double) pow)); + } + } + } + } + invalidateCount = false; + } + + return cachedCount; + } + + private long getThreshold() { + return (long) (HLLConstants.thresholdData[p - 4] + 0.5); + } + + /** + * Estimate bias from lookup table + * @param count + * - cardinality before bias correction + * @return cardinality after bias correction + */ + private long estimateBias(long count) { + double[] rawEstForP = HLLConstants.rawEstimateData[p - 4]; + + // compute distance and store it in sorted map + TreeMap estIndexMap = new TreeMap<>(); + double distance = 0; + for (int i = 0; i < rawEstForP.length; i++) { + distance = Math.pow(count - rawEstForP[i], 2); + estIndexMap.put(distance, i); + } + + // take top-k closest neighbors and compute the bias corrected cardinality + long result = 0; + double[] biasForP = HLLConstants.biasData[p - 4]; + double biasSum = 0; + int kNeighbors = HLLConstants.K_NEAREST_NEIGHBOR; + for (Map.Entry entry : estIndexMap.entrySet()) { + biasSum += biasForP[entry.getValue()]; + kNeighbors--; + if (kNeighbors <= 0) { + break; + } + } + + // 0.5 added for rounding off + result = (long) ((biasSum / HLLConstants.K_NEAREST_NEIGHBOR) + 0.5); + return result; + } + + public void setCount(long count) { + this.cachedCount = count; + this.invalidateCount = true; + } + + private long linearCount(int mVal, long numZeros) { + return (long) (Math.round(mVal * Math.log(mVal / ((double) numZeros)))); + } + + // refer paper + public double getStandardError() { + return 1.04 / Math.sqrt(m); + } + + public HLLDenseRegister getHLLDenseRegister() { + return denseRegister; + } + + public HLLSparseRegister getHLLSparseRegister() { + return sparseRegister; + } + + /** + * Reconstruct sparse map from serialized integer list + * @param reg + * - uncompressed and delta decoded integer list + */ + public void setHLLSparseRegister(int[] reg) { + for (int i : reg) { + int key = i >>> HLLConstants.Q_PRIME_VALUE; + byte value = (byte) (i & 0x3f); + sparseRegister.set(key, value); + } + } + + /** + * Reconstruct dense registers from byte array + * @param reg + * - unpacked byte array + */ + public void setHLLDenseRegister(byte[] reg) { + int i = 0; + for (byte b : reg) { + denseRegister.set(i, b); + i++; + } + } + + /** + * Merge the specified hyperloglog to the current one. Encoding switches + * automatically after merge if the encoding switch threshold is exceeded. + * @param hll + * - hyperloglog to be merged + * @throws IllegalArgumentException + */ + public void merge(HyperLogLog hll) { + if (p != hll.p || chosenHashBits != hll.chosenHashBits) { + throw new IllegalArgumentException( + "HyperLogLog cannot be merged as either p or hashbits are different. Current: " + + toString() + " Provided: " + hll.toString()); + } + + EncodingType otherEncoding = hll.getEncoding(); + + if (encoding.equals(EncodingType.SPARSE) && otherEncoding.equals(EncodingType.SPARSE)) { + sparseRegister.merge(hll.getHLLSparseRegister()); + // if after merge the sparse switching threshold is exceeded then change + // to dense encoding + if (sparseRegister.getSize() > encodingSwitchThreshold) { + encoding = EncodingType.DENSE; + denseRegister = sparseToDenseRegister(sparseRegister); + sparseRegister = null; + } + } else if (encoding.equals(EncodingType.DENSE) && otherEncoding.equals(EncodingType.DENSE)) { + denseRegister.merge(hll.getHLLDenseRegister()); + } else if (encoding.equals(EncodingType.SPARSE) && otherEncoding.equals(EncodingType.DENSE)) { + denseRegister = sparseToDenseRegister(sparseRegister); + denseRegister.merge(hll.getHLLDenseRegister()); + sparseRegister = null; + encoding = EncodingType.DENSE; + } else if (encoding.equals(EncodingType.DENSE) && otherEncoding.equals(EncodingType.SPARSE)) { + HLLDenseRegister otherDenseRegister = sparseToDenseRegister(hll.getHLLSparseRegister()); + denseRegister.merge(otherDenseRegister); + } + + invalidateCount = true; + } + + /** + * Converts sparse to dense hll register + * @param sparseRegister + * - sparse register to be converted + * @return converted dense register + */ + private HLLDenseRegister sparseToDenseRegister(HLLSparseRegister sparseRegister) { + if (sparseRegister == null) { + return null; + } + int p = sparseRegister.getP(); + int pMask = (1 << p) - 1; + HLLDenseRegister result = new HLLDenseRegister(p, bitPacking); + for (Map.Entry entry : sparseRegister.getSparseMap().entrySet()) { + int key = entry.getKey(); + int idx = key & pMask; + result.set(idx, entry.getValue()); + } + return result; + } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder(); + sb.append("Encoding: "); + sb.append(encoding); + sb.append(", p: "); + sb.append(p); + sb.append(", estimatedCardinality: "); + sb.append(estimateNumDistinctValues()); + return sb.toString(); + } + + public String toStringExtended() { + if (encoding.equals(EncodingType.DENSE)) { + return toString() + ", " + denseRegister.toExtendedString(); + } else if (encoding.equals(EncodingType.SPARSE)) { + return toString() + ", " + sparseRegister.toExtendedString(); + } + + return toString(); + } + + public int getNumRegisterIndexBits() { + return p; + } + + public EncodingType getEncoding() { + return encoding; + } + + public void setEncoding(EncodingType encoding) { + this.encoding = encoding; + } + + @Override + public boolean equals(Object obj) { + if (!(obj instanceof HyperLogLog)) { + return false; + } + + HyperLogLog other = (HyperLogLog) obj; + long count = estimateNumDistinctValues(); + long otherCount = other.estimateNumDistinctValues(); + boolean result = p == other.p && chosenHashBits == other.chosenHashBits + && encoding.equals(other.encoding) && count == otherCount; + if (encoding.equals(EncodingType.DENSE)) { + result = result && denseRegister.equals(other.getHLLDenseRegister()); + } + + if (encoding.equals(EncodingType.SPARSE)) { + result = result && sparseRegister.equals(other.getHLLSparseRegister()); + } + return result; + } + + @Override + public int hashCode() { + int hashcode = 0; + hashcode += 31 * p; + hashcode += 31 * chosenHashBits; + hashcode += encoding.hashCode(); + hashcode += 31 * estimateNumDistinctValues(); + if (encoding.equals(EncodingType.DENSE)) { + hashcode += 31 * denseRegister.hashCode(); + } + + if (encoding.equals(EncodingType.SPARSE)) { + hashcode += 31 * sparseRegister.hashCode(); + } + return hashcode; + } + + @Override + public void reset() { + } + + @Override + public String serialize() { + ByteArrayOutputStream bos = new ByteArrayOutputStream(); + // write bytes to bos ... + try { + HyperLogLogUtils.serializeHLL(bos, this); + } catch (IOException e) { + throw new RuntimeException(e); + } + return Base64.encodeBase64String(bos.toByteArray()); + } + + @Override + public NumDistinctValueEstimator deserialize(String s) { + InputStream is = new ByteArrayInputStream(Base64.decodeBase64(s)); + try { + return HyperLogLogUtils.deserializeHLL(is); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + @Override + public void addToEstimator(long v) { + addLong(v); + } + + @Override + public void addToEstimator(String s) { + addString(s); + } + + @Override + public void addToEstimator(double d) { + addDouble(d); + } + + @Override + public void addToEstimator(HiveDecimal decimal) { + addDouble(decimal.doubleValue()); + } + + @Override + public void mergeEstimators(NumDistinctValueEstimator o) { + merge((HyperLogLog) o); + } + + @Override + public int lengthFor(JavaDataModel model) { + // 5 is the head, 1<

+ * |-4 byte-|------varlong----|varint (optional)|----------| + * --------------------------------------------------------- + * | header | estimated-count | register-length | register | + * --------------------------------------------------------- + * + * 4 byte header is encoded like below + * 3 bytes - HLL magic string to identify serialized stream + * 4 bits - p (number of bits to be used as register index) + * 1 - spare bit (not used) + * 3 bits - encoding (000 - sparse, 001..110 - n bit packing, 111 - no bit packing) + * + * Followed by header are 3 fields that are required for reconstruction + * of hyperloglog + * Estimated count - variable length long to store last computed estimated count. + * This is just for quick lookup without deserializing registers + * Register length - number of entries in the register (required only for + * for sparse representation. For bit-packing, the register + * length can be found from p) + * + * @param out + * - output stream to write to + * @param hll + * - hyperloglog that needs to be serialized + * @throws IOException + */ + public static void serializeHLL(OutputStream out, HyperLogLog hll) throws IOException { + + // write header + out.write(MAGIC); + int fourthByte = 0; + int p = hll.getNumRegisterIndexBits(); + fourthByte = (p & 0xff) << 4; + + int bitWidth = 0; + EncodingType enc = hll.getEncoding(); + + // determine bit width for bitpacking and encode it in header + if (enc.equals(EncodingType.DENSE)) { + int lzr = hll.getHLLDenseRegister().getMaxRegisterValue(); + bitWidth = getBitWidth(lzr); + + // the max value of number of zeroes for 64 bit hash can be encoded using + // only 6 bits. So we will disable bit packing for any values >6 + if (bitWidth > 6) { + fourthByte |= 7; + bitWidth = 8; + } else { + fourthByte |= (bitWidth & 7); + } + } + + // write fourth byte of header + out.write(fourthByte); + + // write estimated count + long estCount = hll.estimateNumDistinctValues(); + writeVulong(out, estCount); + + // serialize dense/sparse registers. Dense registers are bitpacked whereas + // sparse registers are delta and variable length encoded + if (enc.equals(EncodingType.DENSE)) { + byte[] register = hll.getHLLDenseRegister().getRegister(); + bitpackHLLRegister(out, register, bitWidth); + } else if (enc.equals(EncodingType.SPARSE)) { + TreeMap sparseMap = hll.getHLLSparseRegister().getSparseMap(); + + // write the number of elements in sparse map (required for + // reconstruction) + writeVulong(out, sparseMap.size()); + + // compute deltas and write the values as varints + int prev = 0; + for (Map.Entry entry : sparseMap.entrySet()) { + if (prev == 0) { + prev = (entry.getKey() << HLLConstants.Q_PRIME_VALUE) | entry.getValue(); + writeVulong(out, prev); + } else { + int curr = (entry.getKey() << HLLConstants.Q_PRIME_VALUE) | entry.getValue(); + int delta = curr - prev; + writeVulong(out, delta); + prev = curr; + } + } + } + } + + /** + * Refer serializeHLL() for format of serialization. This funtions + * deserializes the serialized hyperloglogs + * @param in + * - input stream + * @return deserialized hyperloglog + * @throws IOException + */ + public static HyperLogLog deserializeHLL(InputStream in) throws IOException { + checkMagicString(in); + int fourthByte = in.read() & 0xff; + int p = fourthByte >>> 4; + + // read type of encoding + int enc = fourthByte & 7; + EncodingType encoding = null; + int bitSize = 0; + if (enc == 0) { + encoding = EncodingType.SPARSE; + } else if (enc > 0 && enc < 7) { + bitSize = enc; + encoding = EncodingType.DENSE; + } else { + // bit packing disabled + bitSize = 8; + encoding = EncodingType.DENSE; + } + + // estimated count + long estCount = readVulong(in); + + HyperLogLog result = null; + if (encoding.equals(EncodingType.SPARSE)) { + result = HyperLogLog.builder().setNumRegisterIndexBits(p) + .setEncoding(EncodingType.SPARSE).build(); + int numRegisterEntries = (int) readVulong(in); + int[] reg = new int[numRegisterEntries]; + int prev = 0; + + // reconstruct the sparse map from delta encoded and varint input stream + if (numRegisterEntries > 0) { + prev = (int) readVulong(in); + reg[0] = prev; + } + int delta = 0; + int curr = 0; + for (int i = 1; i < numRegisterEntries; i++) { + delta = (int) readVulong(in); + curr = prev + delta; + reg[i] = curr; + prev = curr; + } + result.setHLLSparseRegister(reg); + } else { + + // explicitly disable bit packing + if (bitSize == 8) { + result = HyperLogLog.builder().setNumRegisterIndexBits(p) + .setEncoding(EncodingType.DENSE).enableBitPacking(false).build(); + } else { + result = HyperLogLog.builder().setNumRegisterIndexBits(p) + .setEncoding(EncodingType.DENSE).enableBitPacking(true).build(); + } + int m = 1 << p; + byte[] register = unpackHLLRegister(in, m, bitSize); + result.setHLLDenseRegister(register); + } + + result.setCount(estCount); + + return result; + } + + private static void bitpackHLLRegister(OutputStream out, byte[] register, int bitWidth) + throws IOException { + int bitsLeft = 8; + byte current = 0; + + if (bitWidth == 8) { + fastPathWrite(out, register); + return; + } + + // write the blob + for (byte value : register) { + int bitsToWrite = bitWidth; + while (bitsToWrite > bitsLeft) { + // add the bits to the bottom of the current word + current |= value >>> (bitsToWrite - bitsLeft); + // subtract out the bits we just added + bitsToWrite -= bitsLeft; + // zero out the bits above bitsToWrite + value &= (1 << bitsToWrite) - 1; + out.write(current); + current = 0; + bitsLeft = 8; + } + bitsLeft -= bitsToWrite; + current |= value << bitsLeft; + if (bitsLeft == 0) { + out.write(current); + current = 0; + bitsLeft = 8; + } + } + + out.flush(); + } + + private static void fastPathWrite(OutputStream out, byte[] register) throws IOException { + for (byte b : register) { + out.write(b); + } + } + + /** + * Unpack the bitpacked HyperLogLog register. + * @param in + * - input stream + * @param length + * - serialized length + * @return unpacked HLL register + * @throws IOException + */ + private static byte[] unpackHLLRegister(InputStream in, int length, int bitSize) + throws IOException { + int mask = (1 << bitSize) - 1; + int bitsLeft = 8; + + if (bitSize == 8) { + return fastPathRead(in, length); + } + + byte current = (byte) (0xff & in.read()); + + byte[] output = new byte[length]; + for (int i = 0; i < output.length; i++) { + byte result = 0; + int bitsLeftToRead = bitSize; + while (bitsLeftToRead > bitsLeft) { + result <<= bitsLeft; + result |= current & ((1 << bitsLeft) - 1); + bitsLeftToRead -= bitsLeft; + current = (byte) (0xff & in.read()); + bitsLeft = 8; + } + if (bitsLeftToRead > 0) { + result <<= bitsLeftToRead; + bitsLeft -= bitsLeftToRead; + result |= (current >>> bitsLeft) & ((1 << bitsLeftToRead) - 1); + } + output[i] = (byte) (result & mask); + } + return output; + } + + private static byte[] fastPathRead(InputStream in, int length) throws IOException { + byte[] result = new byte[length]; + for (int i = 0; i < length; i++) { + result[i] = (byte) in.read(); + } + return result; + } + + /** + * Get estimated cardinality without deserializing HLL + * @param in + * - serialized HLL + * @return - cardinality + * @throws IOException + */ + public static long getEstimatedCountFromSerializedHLL(InputStream in) throws IOException { + checkMagicString(in); + in.read(); + return readVulong(in); + } + + /** + * Check if the specified input stream is actually a HLL stream + * @param in + * - input stream + * @throws IOException + */ + private static void checkMagicString(InputStream in) throws IOException { + byte[] magic = new byte[3]; + magic[0] = (byte) in.read(); + magic[1] = (byte) in.read(); + magic[2] = (byte) in.read(); + + if (!Arrays.equals(magic, MAGIC)) { + throw new IllegalArgumentException("The input stream is not a HyperLogLog stream."); + } + } + + /** + * Minimum bits required to encode the specified value + * @param val + * - input value + * @return + */ + private static int getBitWidth(int val) { + int count = 0; + while (val != 0) { + count++; + val = (byte) (val >>> 1); + } + return count; + } + + /** + * Return relative error between actual and estimated cardinality + * @param actualCount + * - actual count + * @param estimatedCount + * - estimated count + * @return relative error + */ + public static float getRelativeError(long actualCount, long estimatedCount) { + float err = (1.0f - ((float) estimatedCount / (float) actualCount)) * 100.0f; + return err; + } + + /** + * Write variable length encoded longs to output stream + * @param output + * - out stream + * @param value + * - long + * @throws IOException + */ + private static void writeVulong(OutputStream output, long value) throws IOException { + while (true) { + if ((value & ~0x7f) == 0) { + output.write((byte) value); + return; + } else { + output.write((byte) (0x80 | (value & 0x7f))); + value >>>= 7; + } + } + } + + /** + * Read variable length encoded longs from input stream + * @param in + * - input stream + * @return decoded long value + * @throws IOException + */ + private static long readVulong(InputStream in) throws IOException { + long result = 0; + long b; + int offset = 0; + do { + b = in.read(); + if (b == -1) { + throw new EOFException("Reading Vulong past EOF"); + } + result |= (0x7f & b) << offset; + offset += 7; + } while (b >= 0x80); + return result; + } + +} diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index c7afe2bc4a..9c954be978 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -1724,7 +1724,9 @@ private static void populateLlapDaemonVarsSet(Set llapDaemonVarsSetLocal HIVE_STATS_COLLECT_SCANCOLS("hive.stats.collect.scancols", false, "Whether column accesses are tracked in the QueryPlan.\n" + "This is useful to identify how tables are accessed and to determine if there are wasted columns that can be trimmed."), - // standard error allowed for ndv estimates. A lower value indicates higher accuracy and a + HIVE_STATS_NDV_ALGO("hive.stats.ndv.algo", "hll", new PatternSet("hll", "fm"), + "hll and fm stand for HyperLogLog and FM-sketch, respectively for computing ndv."), + // standard error allowed for ndv estimates for FM-sketch. A lower value indicates higher accuracy and a // higher compute cost. HIVE_STATS_NDV_ERROR("hive.stats.ndv.error", (float)20.0, "Standard error expressed in percentage. Provides a tradeoff between accuracy and compute cost. \n" + diff --git a/common/src/test/org/apache/hadoop/hive/common/ndv/hll/TestHLLNoBias.java b/common/src/test/org/apache/hadoop/hive/common/ndv/hll/TestHLLNoBias.java new file mode 100644 index 0000000000..30f5ca3e61 --- /dev/null +++ b/common/src/test/org/apache/hadoop/hive/common/ndv/hll/TestHLLNoBias.java @@ -0,0 +1,114 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.common.ndv.hll; + +import static org.junit.Assert.assertEquals; + +import java.util.Arrays; +import java.util.Collection; +import java.util.HashSet; +import java.util.Random; +import java.util.Set; + +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; +import org.junit.runners.Parameterized.Parameters; + +@RunWith(value = Parameterized.class) +public class TestHLLNoBias { + + // 1.5% tolerance for long range bias (when no bias enabled) and 5% when (no + // bias is disabled) and + // 0.5% for short range bias + private float noBiaslongRangeTolerance = 2.0f; + private float biasedlongRangeTolerance = 5.0f; + private float shortRangeTolerance = 0.5f; + + private int size; + + public TestHLLNoBias(int n) { + this.size = n; + } + + @Parameters + public static Collection data() { + Object[][] data = new Object[][] { { 30000 }, { 41000 }, { 50000 }, { 60000 }, { 75000 }, + { 80000 }, { 81920 } }; + return Arrays.asList(data); + } + + @Test + public void testHLLAdd() { + Random rand = new Random(size); + HyperLogLog hll = HyperLogLog.builder().build(); + int size = 100; + for (int i = 0; i < size; i++) { + hll.addLong(rand.nextLong()); + } + double threshold = size > 40000 ? noBiaslongRangeTolerance : shortRangeTolerance; + double delta = threshold * size / 100; + assertEquals((double) size, (double) hll.count(), delta); + } + + @Test + public void testHLLAddHalfDistinct() { + Random rand = new Random(size); + HyperLogLog hll = HyperLogLog.builder().build(); + int unique = size / 2; + Set hashset = new HashSet(); + for (int i = 0; i < size; i++) { + long val = rand.nextInt(unique); + hashset.add(val); + hll.addLong(val); + } + double threshold = size > 40000 ? noBiaslongRangeTolerance : shortRangeTolerance; + double delta = threshold * hashset.size() / 100; + assertEquals((double) hashset.size(), (double) hll.count(), delta); + } + + @Test + public void testHLLNoBiasDisabled() { + Random rand = new Random(size); + HyperLogLog hll = HyperLogLog.builder().enableNoBias(false).build(); + int size = 100; + for (int i = 0; i < size; i++) { + hll.addLong(rand.nextLong()); + } + double threshold = size > 40000 ? biasedlongRangeTolerance : shortRangeTolerance; + double delta = threshold * size / 100; + assertEquals((double) size, (double) hll.count(), delta); + } + + @Test + public void testHLLNoBiasDisabledHalfDistinct() { + Random rand = new Random(size); + HyperLogLog hll = HyperLogLog.builder().enableNoBias(false).build(); + int unique = size / 2; + Set hashset = new HashSet(); + for (int i = 0; i < size; i++) { + long val = rand.nextInt(unique); + hashset.add(val); + hll.addLong(val); + } + double threshold = size > 40000 ? biasedlongRangeTolerance : shortRangeTolerance; + double delta = threshold * hashset.size() / 100; + assertEquals((double) hashset.size(), (double) hll.count(), delta); + } + +} diff --git a/common/src/test/org/apache/hadoop/hive/common/ndv/hll/TestHLLSerialization.java b/common/src/test/org/apache/hadoop/hive/common/ndv/hll/TestHLLSerialization.java new file mode 100644 index 0000000000..b4b8df1174 --- /dev/null +++ b/common/src/test/org/apache/hadoop/hive/common/ndv/hll/TestHLLSerialization.java @@ -0,0 +1,267 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.common.ndv.hll; + +import static org.junit.Assert.assertEquals; + +import java.io.DataInputStream; +import java.io.DataOutputStream; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.IOException; +import java.util.Arrays; +import java.util.Collection; +import java.util.HashSet; +import java.util.Random; +import java.util.Set; + +import org.apache.hadoop.hive.common.ndv.hll.HyperLogLog.EncodingType; +import org.junit.After; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.TestName; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; +import org.junit.runners.Parameterized.Parameters; + +@RunWith(value = Parameterized.class) +public class TestHLLSerialization { + + private int size; + private File testFile; + private static final String pathPrefix = "."; + private static final int SEED = 100; + // 5% tolerance for long range bias and 2.5% for short range bias + private float longRangeTolerance = 5.0f; + private float shortRangeTolerance = 2.5f; + + public TestHLLSerialization(int n) { + this.size = n; + this.testFile = new File(pathPrefix + testCaseName.getMethodName() + "_" + size + ".hll"); + } + + @Parameters + public static Collection data() { + Object[][] data = new Object[][] { { 2 }, { 10 }, { 100 }, { 1000 }, { 2000 }, { 3000 }, + { 5000 }, { 6000 }, { 10000 }, { 100000 }, { 1000000 } }; + return Arrays.asList(data); + } + + @After + public void close() { + if (testFile.exists()) { + testFile.delete(); + } + } + + @Rule + public TestName testCaseName = new TestName(); + + @Test + public void testHLLSparseSerialization() throws IOException { + HyperLogLog hll = HyperLogLog.builder().setEncoding(EncodingType.SPARSE).build(); + Random rand = new Random(SEED); + for (int i = 0; i < size; i++) { + hll.addLong(rand.nextLong()); + } + FileOutputStream fos = new FileOutputStream(testFile); + DataOutputStream out = new DataOutputStream(fos); + HyperLogLogUtils.serializeHLL(out, hll); + FileInputStream fis = new FileInputStream(testFile); + DataInputStream in = new DataInputStream(fis); + HyperLogLog deserializedHLL = HyperLogLogUtils.deserializeHLL(in); + assertEquals(hll, deserializedHLL); + assertEquals(hll.toString(), deserializedHLL.toString()); + assertEquals(hll.toStringExtended(), deserializedHLL.toStringExtended()); + assertEquals(hll.hashCode(), deserializedHLL.hashCode()); + assertEquals(hll.count(), deserializedHLL.count()); + } + + @Test + public void testHLLSparseSerializationHalfDistinct() throws IOException { + HyperLogLog hll = HyperLogLog.builder().setEncoding(EncodingType.SPARSE).build(); + Random rand = new Random(SEED); + Set hashset = new HashSet(); + for (int i = 0; i < size; i++) { + int val = rand.nextInt(size / 2); + hll.addLong(val); + hashset.add(val); + } + FileOutputStream fos = new FileOutputStream(testFile); + DataOutputStream out = new DataOutputStream(fos); + HyperLogLogUtils.serializeHLL(out, hll); + double threshold = size > 40000 ? longRangeTolerance : shortRangeTolerance; + double delta = threshold * hashset.size() / 100; + FileInputStream fis = new FileInputStream(testFile); + DataInputStream in = new DataInputStream(fis); + HyperLogLog deserializedHLL = HyperLogLogUtils.deserializeHLL(in); + assertEquals(hll, deserializedHLL); + assertEquals(hll.toString(), deserializedHLL.toString()); + assertEquals(hll.toStringExtended(), deserializedHLL.toStringExtended()); + assertEquals(hll.hashCode(), deserializedHLL.hashCode()); + assertEquals(hll.count(), deserializedHLL.count()); + assertEquals(hashset.size(), hll.count(), delta); + assertEquals(hashset.size(), deserializedHLL.count(), delta); + } + + @Test + public void testHLLSparseNoBitPacking() throws IOException { + HyperLogLog hll = HyperLogLog.builder().setEncoding(EncodingType.SPARSE) + .enableBitPacking(false).build(); + Random rand = new Random(SEED); + for (int i = 0; i < size; i++) { + hll.addLong(rand.nextLong()); + } + FileOutputStream fos = new FileOutputStream(testFile); + DataOutputStream out = new DataOutputStream(fos); + HyperLogLogUtils.serializeHLL(out, hll); + FileInputStream fis = new FileInputStream(testFile); + DataInputStream in = new DataInputStream(fis); + HyperLogLog deserializedHLL = HyperLogLogUtils.deserializeHLL(in); + assertEquals(hll, deserializedHLL); + assertEquals(hll.toString(), deserializedHLL.toString()); + assertEquals(hll.toStringExtended(), deserializedHLL.toStringExtended()); + assertEquals(hll.hashCode(), deserializedHLL.hashCode()); + assertEquals(hll.count(), deserializedHLL.count()); + } + + @Test + public void testHLLSparseNoBitPackingHalfDistinct() throws IOException { + HyperLogLog hll = HyperLogLog.builder().setEncoding(EncodingType.SPARSE) + .enableBitPacking(false).build(); + Random rand = new Random(SEED); + Set hashset = new HashSet(); + for (int i = 0; i < size; i++) { + int val = rand.nextInt(size / 2); + hll.addLong(val); + hashset.add(val); + } + FileOutputStream fos = new FileOutputStream(testFile); + DataOutputStream out = new DataOutputStream(fos); + HyperLogLogUtils.serializeHLL(out, hll); + double threshold = size > 40000 ? longRangeTolerance : shortRangeTolerance; + double delta = threshold * hashset.size() / 100; + FileInputStream fis = new FileInputStream(testFile); + DataInputStream in = new DataInputStream(fis); + HyperLogLog deserializedHLL = HyperLogLogUtils.deserializeHLL(in); + assertEquals(hll, deserializedHLL); + assertEquals(hll.toString(), deserializedHLL.toString()); + assertEquals(hll.toStringExtended(), deserializedHLL.toStringExtended()); + assertEquals(hll.hashCode(), deserializedHLL.hashCode()); + assertEquals(hll.count(), deserializedHLL.count()); + assertEquals(hashset.size(), hll.count(), delta); + assertEquals(hashset.size(), deserializedHLL.count(), delta); + } + + @Test + public void testHLLDenseSerialization() throws IOException { + HyperLogLog hll = HyperLogLog.builder().setEncoding(EncodingType.DENSE).build(); + Random rand = new Random(SEED); + for (int i = 0; i < size; i++) { + hll.addLong(rand.nextLong()); + } + FileOutputStream fos = new FileOutputStream(testFile); + DataOutputStream out = new DataOutputStream(fos); + HyperLogLogUtils.serializeHLL(out, hll); + FileInputStream fis = new FileInputStream(testFile); + DataInputStream in = new DataInputStream(fis); + HyperLogLog deserializedHLL = HyperLogLogUtils.deserializeHLL(in); + assertEquals(hll, deserializedHLL); + assertEquals(hll.toString(), deserializedHLL.toString()); + assertEquals(hll.toStringExtended(), deserializedHLL.toStringExtended()); + assertEquals(hll.hashCode(), deserializedHLL.hashCode()); + assertEquals(hll.count(), deserializedHLL.count()); + } + + @Test + public void testHLLDenseSerializationHalfDistinct() throws IOException { + HyperLogLog hll = HyperLogLog.builder().setEncoding(EncodingType.DENSE).build(); + Random rand = new Random(SEED); + Set hashset = new HashSet(); + for (int i = 0; i < size; i++) { + int val = rand.nextInt(size / 2); + hll.addLong(val); + hashset.add(val); + } + FileOutputStream fos = new FileOutputStream(testFile); + DataOutputStream out = new DataOutputStream(fos); + HyperLogLogUtils.serializeHLL(out, hll); + double threshold = size > 40000 ? longRangeTolerance : shortRangeTolerance; + double delta = threshold * hashset.size() / 100; + FileInputStream fis = new FileInputStream(testFile); + DataInputStream in = new DataInputStream(fis); + HyperLogLog deserializedHLL = HyperLogLogUtils.deserializeHLL(in); + assertEquals(hll, deserializedHLL); + assertEquals(hll.toString(), deserializedHLL.toString()); + assertEquals(hll.toStringExtended(), deserializedHLL.toStringExtended()); + assertEquals(hll.hashCode(), deserializedHLL.hashCode()); + assertEquals(hll.count(), deserializedHLL.count()); + assertEquals(hashset.size(), hll.count(), delta); + assertEquals(hashset.size(), deserializedHLL.count(), delta); + } + + @Test + public void testHLLDenseNoBitPacking() throws IOException { + HyperLogLog hll = HyperLogLog.builder().setEncoding(EncodingType.DENSE).enableBitPacking(false) + .build(); + Random rand = new Random(SEED); + for (int i = 0; i < size; i++) { + hll.addLong(rand.nextLong()); + } + FileOutputStream fos = new FileOutputStream(testFile); + DataOutputStream out = new DataOutputStream(fos); + HyperLogLogUtils.serializeHLL(out, hll); + FileInputStream fis = new FileInputStream(testFile); + DataInputStream in = new DataInputStream(fis); + HyperLogLog deserializedHLL = HyperLogLogUtils.deserializeHLL(in); + assertEquals(hll, deserializedHLL); + assertEquals(hll.toString(), deserializedHLL.toString()); + assertEquals(hll.toStringExtended(), deserializedHLL.toStringExtended()); + assertEquals(hll.hashCode(), deserializedHLL.hashCode()); + assertEquals(hll.count(), deserializedHLL.count()); + } + + @Test + public void testHLLDenseNoBitPackingHalfDistinct() throws IOException { + HyperLogLog hll = HyperLogLog.builder().setEncoding(EncodingType.DENSE).enableBitPacking(false) + .build(); + Random rand = new Random(SEED); + Set hashset = new HashSet(); + for (int i = 0; i < size; i++) { + int val = rand.nextInt(size / 2); + hll.addLong(val); + hashset.add(val); + } + FileOutputStream fos = new FileOutputStream(testFile); + DataOutputStream out = new DataOutputStream(fos); + HyperLogLogUtils.serializeHLL(out, hll); + double threshold = size > 40000 ? longRangeTolerance : shortRangeTolerance; + double delta = threshold * hashset.size() / 100; + FileInputStream fis = new FileInputStream(testFile); + DataInputStream in = new DataInputStream(fis); + HyperLogLog deserializedHLL = HyperLogLogUtils.deserializeHLL(in); + assertEquals(hll, deserializedHLL); + assertEquals(hll.toString(), deserializedHLL.toString()); + assertEquals(hll.toStringExtended(), deserializedHLL.toStringExtended()); + assertEquals(hll.hashCode(), deserializedHLL.hashCode()); + assertEquals(hll.count(), deserializedHLL.count()); + assertEquals(hashset.size(), hll.count(), delta); + assertEquals(hashset.size(), deserializedHLL.count(), delta); + } +} diff --git a/common/src/test/org/apache/hadoop/hive/common/ndv/hll/TestHyperLogLog.java b/common/src/test/org/apache/hadoop/hive/common/ndv/hll/TestHyperLogLog.java new file mode 100644 index 0000000000..635073fc26 --- /dev/null +++ b/common/src/test/org/apache/hadoop/hive/common/ndv/hll/TestHyperLogLog.java @@ -0,0 +1,227 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.common.ndv.hll; + +import static org.junit.Assert.assertEquals; + +import org.apache.hadoop.hive.common.ndv.hll.HyperLogLog.EncodingType; +import org.junit.Test; + +public class TestHyperLogLog { + // 5% tolerance for estimated count + private float longRangeTolerance = 5.0f; + private float shortRangeTolerance = 2.0f; + + @Test(expected = IllegalArgumentException.class) + public void testHLLDenseMerge() { + HyperLogLog hll = HyperLogLog.builder().setEncoding(EncodingType.DENSE).build(); + HyperLogLog hll2 = HyperLogLog.builder().setEncoding(EncodingType.DENSE).build(); + HyperLogLog hll3 = HyperLogLog.builder().setEncoding(EncodingType.DENSE).build(); + HyperLogLog hll4 = HyperLogLog.builder().setNumRegisterIndexBits(16) + .setEncoding(EncodingType.DENSE).build(); + int size = 1000; + for (int i = 0; i < size; i++) { + hll.addLong(i); + hll2.addLong(size + i); + hll3.addLong(2 * size + i); + } + double threshold = size > 40000 ? longRangeTolerance : shortRangeTolerance; + double delta = threshold * size / 100; + assertEquals((double) size, (double) hll.count(), delta); + assertEquals((double) size, (double) hll2.count(), delta); + + // merge + hll.merge(hll2); + assertEquals((double) 2 * size, (double) hll.count(), delta); + assertEquals(EncodingType.DENSE, hll.getEncoding()); + + // merge should update registers and hence the count + hll.merge(hll2); + assertEquals((double) 2 * size, (double) hll.count(), delta); + assertEquals(EncodingType.DENSE, hll.getEncoding()); + + // new merge + hll.merge(hll3); + assertEquals((double) 3 * size, (double) hll.count(), delta); + assertEquals(EncodingType.DENSE, hll.getEncoding()); + + // invalid merge -- register set size doesn't match + hll.merge(hll4); + } + + @Test(expected = IllegalArgumentException.class) + public void testHLLSparseMerge() { + HyperLogLog hll = HyperLogLog.builder().setEncoding(EncodingType.SPARSE).build(); + HyperLogLog hll2 = HyperLogLog.builder().setEncoding(EncodingType.SPARSE).build(); + HyperLogLog hll3 = HyperLogLog.builder().setEncoding(EncodingType.SPARSE).build(); + HyperLogLog hll4 = HyperLogLog.builder().setNumRegisterIndexBits(16) + .setEncoding(EncodingType.SPARSE).build(); + int size = 500; + for (int i = 0; i < size; i++) { + hll.addLong(i); + hll2.addLong(size + i); + hll3.addLong(2 * size + i); + } + double threshold = size > 40000 ? longRangeTolerance : shortRangeTolerance; + double delta = threshold * size / 100; + assertEquals((double) size, (double) hll.count(), delta); + assertEquals((double) size, (double) hll2.count(), delta); + + // merge + hll.merge(hll2); + assertEquals((double) 2 * size, (double) hll.count(), delta); + assertEquals(EncodingType.SPARSE, hll.getEncoding()); + + // merge should update registers and hence the count + hll.merge(hll2); + assertEquals((double) 2 * size, (double) hll.count(), delta); + assertEquals(EncodingType.SPARSE, hll.getEncoding()); + + // new merge + hll.merge(hll3); + assertEquals((double) 3 * size, (double) hll.count(), delta); + assertEquals(EncodingType.SPARSE, hll.getEncoding()); + + // invalid merge -- register set size doesn't match + hll.merge(hll4); + } + + @Test(expected = IllegalArgumentException.class) + public void testHLLSparseDenseMerge() { + HyperLogLog hll = HyperLogLog.builder().setEncoding(EncodingType.SPARSE).build(); + HyperLogLog hll2 = HyperLogLog.builder().setEncoding(EncodingType.SPARSE).build(); + HyperLogLog hll3 = HyperLogLog.builder().setEncoding(EncodingType.DENSE).build(); + HyperLogLog hll4 = HyperLogLog.builder().setNumRegisterIndexBits(16) + .setEncoding(EncodingType.DENSE).build(); + int size = 1000; + for (int i = 0; i < size; i++) { + hll.addLong(i); + hll2.addLong(size + i); + hll3.addLong(2 * size + i); + } + double threshold = size > 40000 ? longRangeTolerance : shortRangeTolerance; + double delta = threshold * size / 100; + assertEquals((double) size, (double) hll.count(), delta); + assertEquals((double) size, (double) hll2.count(), delta); + + // sparse-sparse merge + hll.merge(hll2); + assertEquals((double) 2 * size, (double) hll.count(), delta); + assertEquals(EncodingType.SPARSE, hll.getEncoding()); + + // merge should update registers and hence the count + hll.merge(hll2); + assertEquals((double) 2 * size, (double) hll.count(), delta); + assertEquals(EncodingType.SPARSE, hll.getEncoding()); + + // sparse-dense merge + hll.merge(hll3); + assertEquals((double) 3 * size, (double) hll.count(), delta); + assertEquals(EncodingType.DENSE, hll.getEncoding()); + + // invalid merge -- register set size doesn't match + hll.merge(hll4); + } + + @Test(expected = IllegalArgumentException.class) + public void testHLLDenseSparseMerge() { + HyperLogLog hll = HyperLogLog.builder().setEncoding(EncodingType.DENSE).build(); + HyperLogLog hll2 = HyperLogLog.builder().setEncoding(EncodingType.DENSE).build(); + HyperLogLog hll3 = HyperLogLog.builder().setEncoding(EncodingType.SPARSE).build(); + HyperLogLog hll4 = HyperLogLog.builder().setNumRegisterIndexBits(16) + .setEncoding(EncodingType.SPARSE).build(); + int size = 1000; + for (int i = 0; i < size; i++) { + hll.addLong(i); + hll2.addLong(size + i); + hll3.addLong(2 * size + i); + } + double threshold = size > 40000 ? longRangeTolerance : shortRangeTolerance; + double delta = threshold * size / 100; + assertEquals((double) size, (double) hll.count(), delta); + assertEquals((double) size, (double) hll2.count(), delta); + + // sparse-sparse merge + hll.merge(hll2); + assertEquals((double) 2 * size, (double) hll.count(), delta); + assertEquals(EncodingType.DENSE, hll.getEncoding()); + + // merge should update registers and hence the count + hll.merge(hll2); + assertEquals((double) 2 * size, (double) hll.count(), delta); + assertEquals(EncodingType.DENSE, hll.getEncoding()); + + // sparse-dense merge + hll.merge(hll3); + assertEquals((double) 3 * size, (double) hll.count(), delta); + assertEquals(EncodingType.DENSE, hll.getEncoding()); + + // invalid merge -- register set size doesn't match + hll.merge(hll4); + } + + @Test(expected = IllegalArgumentException.class) + public void testHLLSparseOverflowMerge() { + HyperLogLog hll = HyperLogLog.builder().setEncoding(EncodingType.SPARSE).build(); + HyperLogLog hll2 = HyperLogLog.builder().setEncoding(EncodingType.SPARSE).build(); + HyperLogLog hll3 = HyperLogLog.builder().setEncoding(EncodingType.SPARSE).build(); + HyperLogLog hll4 = HyperLogLog.builder().setNumRegisterIndexBits(16) + .setEncoding(EncodingType.SPARSE).build(); + int size = 1000; + for (int i = 0; i < size; i++) { + hll.addLong(i); + hll2.addLong(size + i); + hll3.addLong(2 * size + i); + } + double threshold = size > 40000 ? longRangeTolerance : shortRangeTolerance; + double delta = threshold * size / 100; + assertEquals((double) size, (double) hll.count(), delta); + assertEquals((double) size, (double) hll2.count(), delta); + + // sparse-sparse merge + hll.merge(hll2); + assertEquals((double) 2 * size, (double) hll.count(), delta); + assertEquals(EncodingType.SPARSE, hll.getEncoding()); + + // merge should update registers and hence the count + hll.merge(hll2); + assertEquals((double) 2 * size, (double) hll.count(), delta); + assertEquals(EncodingType.SPARSE, hll.getEncoding()); + + // sparse-sparse overload to dense + hll.merge(hll3); + assertEquals((double) 3 * size, (double) hll.count(), delta); + assertEquals(EncodingType.DENSE, hll.getEncoding()); + + // invalid merge -- register set size doesn't match + hll.merge(hll4); + } + + @Test + public void testHLLSparseMoreRegisterBits() { + HyperLogLog hll = HyperLogLog.builder().setEncoding(EncodingType.SPARSE) + .setNumRegisterIndexBits(16).build(); + int size = 1000; + for (int i = 0; i < size; i++) { + hll.addLong(i); + } + double threshold = size > 40000 ? longRangeTolerance : shortRangeTolerance; + double delta = threshold * size / 100; + assertEquals((double) size, (double) hll.count(), delta); + } +} diff --git a/common/src/test/org/apache/hadoop/hive/common/ndv/hll/TestHyperLogLogDense.java b/common/src/test/org/apache/hadoop/hive/common/ndv/hll/TestHyperLogLogDense.java new file mode 100644 index 0000000000..00fd785b6f --- /dev/null +++ b/common/src/test/org/apache/hadoop/hive/common/ndv/hll/TestHyperLogLogDense.java @@ -0,0 +1,82 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.common.ndv.hll; + +import static org.junit.Assert.assertEquals; + +import java.util.Arrays; +import java.util.Collection; +import java.util.HashSet; +import java.util.Random; +import java.util.Set; + +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; +import org.junit.runners.Parameterized.Parameters; + +@RunWith(value = Parameterized.class) +public class TestHyperLogLogDense { + + // 5% tolerance for long range bias and 3% for short range bias + private float longRangeTolerance = 5.0f; + private float shortRangeTolerance = 3.0f; + + private int size; + + public TestHyperLogLogDense(int n) { + this.size = n; + } + + @Parameters + public static Collection data() { + Object[][] data = new Object[][] { { 2 }, { 10 }, { 100 }, { 1000 }, { 10000 }, { 100000 }, + { 1000000 } }; + return Arrays.asList(data); + } + + @Test + public void testHLLAdd() { + Random rand = new Random(size); + HyperLogLog hll = HyperLogLog.builder().setEncoding(HyperLogLog.EncodingType.DENSE).build(); + int size = 100; + for (int i = 0; i < size; i++) { + hll.addLong(rand.nextLong()); + } + double threshold = size > 40000 ? longRangeTolerance : shortRangeTolerance; + double delta = threshold * size / 100; + assertEquals((double) size, (double) hll.count(), delta); + } + + @Test + public void testHLLAddHalfDistinct() { + Random rand = new Random(size); + HyperLogLog hll = HyperLogLog.builder().setEncoding(HyperLogLog.EncodingType.DENSE).build(); + int unique = size / 2; + Set hashset = new HashSet(); + for (int i = 0; i < size; i++) { + long val = rand.nextInt(unique); + hashset.add(val); + hll.addLong(val); + } + double threshold = size > 40000 ? longRangeTolerance : shortRangeTolerance; + double delta = threshold * hashset.size() / 100; + assertEquals((double) hashset.size(), (double) hll.count(), delta); + } + +} diff --git a/common/src/test/org/apache/hadoop/hive/common/ndv/hll/TestHyperLogLogSparse.java b/common/src/test/org/apache/hadoop/hive/common/ndv/hll/TestHyperLogLogSparse.java new file mode 100644 index 0000000000..cfa58868e5 --- /dev/null +++ b/common/src/test/org/apache/hadoop/hive/common/ndv/hll/TestHyperLogLogSparse.java @@ -0,0 +1,81 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.common.ndv.hll; + +import static org.junit.Assert.assertEquals; + +import java.util.Arrays; +import java.util.Collection; +import java.util.HashSet; +import java.util.Random; +import java.util.Set; + +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; +import org.junit.runners.Parameterized.Parameters; + +@RunWith(value = Parameterized.class) +public class TestHyperLogLogSparse { + + // 5% tolerance for long range bias and 1% for short range bias + private float longRangeTolerance = 5.0f; + private float shortRangeTolerance = 1.0f; + + private int size; + + public TestHyperLogLogSparse(int n) { + this.size = n; + } + + @Parameters + public static Collection data() { + Object[][] data = new Object[][] { { 2 }, { 10 }, { 100 }, { 1000 }, { 10000 }, { 100000 }, + { 1000000 } }; + return Arrays.asList(data); + } + + @Test + public void testHLLAdd() { + Random rand = new Random(size); + HyperLogLog hll = HyperLogLog.builder().build(); + int size = 100; + for (int i = 0; i < size; i++) { + hll.addLong(rand.nextLong()); + } + double threshold = size > 40000 ? longRangeTolerance : shortRangeTolerance; + double delta = threshold * size / 100; + assertEquals((double) size, (double) hll.count(), delta); + } + + @Test + public void testHLLAddHalfDistinct() { + Random rand = new Random(size); + HyperLogLog hll = HyperLogLog.builder().build(); + int unique = size / 2; + Set hashset = new HashSet(); + for (int i = 0; i < size; i++) { + long val = rand.nextInt(unique); + hashset.add(val); + hll.addLong(val); + } + double threshold = size > 40000 ? longRangeTolerance : shortRangeTolerance; + double delta = threshold * hashset.size() / 100; + assertEquals((double) hashset.size(), (double) hll.count(), delta); + } +} diff --git a/common/src/test/org/apache/hadoop/hive/common/ndv/hll/TestSparseEncodeHash.java b/common/src/test/org/apache/hadoop/hive/common/ndv/hll/TestSparseEncodeHash.java new file mode 100644 index 0000000000..2c7e89b5e6 --- /dev/null +++ b/common/src/test/org/apache/hadoop/hive/common/ndv/hll/TestSparseEncodeHash.java @@ -0,0 +1,56 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.common.ndv.hll; + +import static org.junit.Assert.assertEquals; + +import java.util.Arrays; +import java.util.Collection; + +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; +import org.junit.runners.Parameterized.Parameters; + +@RunWith(value = Parameterized.class) +public class TestSparseEncodeHash { + + private long input; + private int expected; + + public TestSparseEncodeHash(long i, int e) { + this.input = i; + this.expected = e; + } + + @Parameters + public static Collection data() { + Object[][] data = new Object[][] { { 11111111111L, 373692871 }, + { 4314495982023L, -1711269433 }, { 4314529536455L, -1744823865 }, + { 4314563074503L, 268425671 }, { 17257983908295L, -1644160569 }, { 536861127L, 536861127 }, + { 536844743L, 536844743 }, { 144115188075862471L, -671082041 } }; + return Arrays.asList(data); + } + + @Test + public void testEncodeHash() { + HLLSparseRegister reg = new HLLSparseRegister(14, 25, 6); + int got = reg.encodeHash(input); + assertEquals(expected, got); + } +} diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/NumDistinctValueEstimator.java b/metastore/src/java/org/apache/hadoop/hive/metastore/NumDistinctValueEstimator.java deleted file mode 100644 index 92f9a845e3..0000000000 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/NumDistinctValueEstimator.java +++ /dev/null @@ -1,367 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hive.metastore; -import java.util.Random; - -import javolution.util.FastBitSet; - -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.hive.common.type.HiveDecimal; -import org.apache.hadoop.io.Text; - -/* - * https://en.wikipedia.org/wiki/Flajolet%E2%80%93Martin_algorithm - * We implement Flajolet–Martin algorithm in this class. - * The Flajolet–Martin algorithm is an algorithm for approximating the number of distinct elements - * in a stream with a single pass and space-consumption which is logarithmic in the maximum number - * of possible distinct elements in the stream. The algorithm was introduced by Philippe Flajolet - * and G. Nigel Martin in their 1984 paper "Probabilistic Counting Algorithms for Data Base Applications". - * Later it has been refined in the papers "LogLog counting of large cardinalities" by Marianne Durand - * and Philippe Flajolet, and "HyperLogLog: The analysis of a near-optimal cardinality estimation - * algorithm" by Philippe Flajolet et al. - */ - -/* - * The algorithm works like this. - * (1) Set the number of bit vectors, i.e., numBitVectors, based on the precision. - * (2) For each bit vector, generate hash value of the long value and mod it by 2^bitVectorSize-1. (addToEstimator) - * (3) Set the index (addToEstimator) - * (4) Take the average of the index for all the bit vectors and get the estimated NDV (estimateNumDistinctValues). - */ -public class NumDistinctValueEstimator { - - static final Log LOG = LogFactory.getLog(NumDistinctValueEstimator.class.getName()); - - /* We want a,b,x to come from a finite field of size 0 to k, where k is a prime number. - * 2^p - 1 is prime for p = 31. Hence bitvectorSize has to be 31. Pick k to be 2^p -1. - * If a,b,x didn't come from a finite field ax1 + b mod k and ax2 + b mod k will not be pair wise - * independent. As a consequence, the hash values will not distribute uniformly from 0 to 2^p-1 - * thus introducing errors in the estimates. - */ - private static final int BIT_VECTOR_SIZE = 31; - private final int numBitVectors; - - // Refer to Flajolet-Martin'86 for the value of phi - private static final double PHI = 0.77351; - - private final int[] a; - private final int[] b; - private final FastBitSet[] bitVector; - - private final Random aValue; - private final Random bValue; - - /* Create a new distinctValueEstimator - */ - public NumDistinctValueEstimator(int numBitVectors) { - this.numBitVectors = numBitVectors; - bitVector = new FastBitSet[numBitVectors]; - for (int i=0; i< numBitVectors; i++) { - bitVector[i] = new FastBitSet(BIT_VECTOR_SIZE); - } - - a = new int[numBitVectors]; - b = new int[numBitVectors]; - - /* Use a large prime number as a seed to the random number generator. - * Java's random number generator uses the Linear Congruential Generator to generate random - * numbers using the following recurrence relation, - * - * X(n+1) = (a X(n) + c ) mod m - * - * where X0 is the seed. Java implementation uses m = 2^48. This is problematic because 2^48 - * is not a prime number and hence the set of numbers from 0 to m don't form a finite field. - * If these numbers don't come from a finite field any give X(n) and X(n+1) may not be pair - * wise independent. - * - * However, empirically passing in prime numbers as seeds seems to work better than when passing - * composite numbers as seeds. Ideally Java's Random should pick m such that m is prime. - * - */ - aValue = new Random(99397); - bValue = new Random(9876413); - - for (int i = 0; i < numBitVectors; i++) { - int randVal; - /* a and b shouldn't be even; If a and b are even, then none of the values - * will set bit 0 thus introducing errors in the estimate. Both a and b can be even - * 25% of the times and as a result 25% of the bit vectors could be inaccurate. To avoid this - * always pick odd values for a and b. - */ - do { - randVal = aValue.nextInt(); - } while (randVal % 2 == 0); - - a[i] = randVal; - - do { - randVal = bValue.nextInt(); - } while (randVal % 2 == 0); - - b[i] = randVal; - - if (a[i] < 0) { - a[i] = a[i] + (1 << BIT_VECTOR_SIZE - 1); - } - - if (b[i] < 0) { - b[i] = b[i] + (1 << BIT_VECTOR_SIZE - 1); - } - } - } - - public NumDistinctValueEstimator(String s, int numBitVectors) { - this.numBitVectors = numBitVectors; - FastBitSet bitVectorDeser[] = deserialize(s, numBitVectors); - bitVector = new FastBitSet[numBitVectors]; - for(int i=0; i = '0' && c <= '9') { - String t = new String(); - t = t + c; - c = s.charAt(i); - i = i + 1; - - while (c != ',' && c!= '}') { - t = t + c; - c = s.charAt(i); - i = i + 1; - } - - int bitIndex = Integer.parseInt(t); - assert(bitIndex >= 0); - assert(vectorIndex < numBitVectors); - b[vectorIndex].set(bitIndex); - if (c == '}') { - vectorIndex = vectorIndex + 1; - } - } - } - return b; - } - - private int generateHash(long v, int hashNum) { - int mod = (1<> 1; - } - - // Set bitvector[index] := 1 - bitVector[i].set(index); - } - } - - public void addToEstimatorPCSA(long v) { - int hash = generateHashForPCSA(v); - int rho = hash/numBitVectors; - int index; - - // Find the index of the least significant bit that is 1 - for (index=0; index> 1; - } - - // Set bitvector[index] := 1 - bitVector[hash%numBitVectors].set(index); - } - - public void addToEstimator(double d) { - int v = new Double(d).hashCode(); - addToEstimator(v); - } - - public void addToEstimatorPCSA(double d) { - int v = new Double(d).hashCode(); - addToEstimatorPCSA(v); - } - - public void addToEstimator(HiveDecimal decimal) { - int v = decimal.hashCode(); - addToEstimator(v); - } - - public void addToEstimatorPCSA(HiveDecimal decimal) { - int v = decimal.hashCode(); - addToEstimatorPCSA(v); - } - - public void mergeEstimators(NumDistinctValueEstimator o) { - // Bitwise OR the bitvector with the bitvector in the agg buffer - for (int i=0; i() { @Override public AggrStats load(StatsCacheKey key) throws Exception { - int numBitVectors = HiveStatsUtils.getNumBitVectorsForNDVEstimation(conf); boolean useDensityFunctionForNDVEstimation = HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_METASTORE_STATS_NDV_DENSITY_FUNCTION); HBaseReadWrite hrw = HBaseReadWrite.getInstance(); AggrStats aggrStats = hrw.getAggregatedStats(key.hashed); @@ -101,7 +100,7 @@ public AggrStats load(StatsCacheKey key) throws Exception { if (aggregator == null) { aggregator = ColumnStatsAggregatorFactory.getColumnStatsAggregator(css.iterator() .next().getStatsObj().iterator().next().getStatsData().getSetField(), - numBitVectors, useDensityFunctionForNDVEstimation); + useDensityFunctionForNDVEstimation); } ColumnStatisticsObj statsObj = aggregator .aggregate(key.colName, key.partNames, css); diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/ColumnStatsAggregator.java b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/ColumnStatsAggregator.java index 31955b4363..29a05390bf 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/ColumnStatsAggregator.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/ColumnStatsAggregator.java @@ -26,7 +26,6 @@ import org.apache.hadoop.hive.metastore.api.MetaException; public abstract class ColumnStatsAggregator { - public int numBitVectors; public boolean useDensityFunctionForNDVEstimation; public abstract ColumnStatisticsObj aggregate(String colName, List partNames, diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/ColumnStatsAggregatorFactory.java b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/ColumnStatsAggregatorFactory.java index daf85692eb..568bf0609b 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/ColumnStatsAggregatorFactory.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/ColumnStatsAggregatorFactory.java @@ -34,7 +34,7 @@ private ColumnStatsAggregatorFactory() { } - public static ColumnStatsAggregator getColumnStatsAggregator(_Fields type, int numBitVectors, boolean useDensityFunctionForNDVEstimation) { + public static ColumnStatsAggregator getColumnStatsAggregator(_Fields type, boolean useDensityFunctionForNDVEstimation) { ColumnStatsAggregator agg; switch (type) { case BOOLEAN_STATS: @@ -58,7 +58,6 @@ public static ColumnStatsAggregator getColumnStatsAggregator(_Fields type, int n default: throw new RuntimeException("Woh, bad. Unknown stats type " + type.toString()); } - agg.numBitVectors = numBitVectors; agg.useDensityFunctionForNDVEstimation = useDensityFunctionForNDVEstimation; return agg; } diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/DecimalColumnStatsAggregator.java b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/DecimalColumnStatsAggregator.java index 36b2c9c56b..8eb64e0143 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/DecimalColumnStatsAggregator.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/DecimalColumnStatsAggregator.java @@ -26,7 +26,8 @@ import java.util.List; import java.util.Map; -import org.apache.hadoop.hive.metastore.NumDistinctValueEstimator; +import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimator; +import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimatorFactory; import org.apache.hadoop.hive.metastore.StatObjectConverter; import org.apache.hadoop.hive.metastore.api.ColumnStatistics; import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData; @@ -46,7 +47,7 @@ public ColumnStatisticsObj aggregate(String colName, List partNames, // check if all the ColumnStatisticsObjs contain stats and all the ndv are // bitvectors boolean doAllPartitionContainStats = partNames.size() == css.size(); - boolean isNDVBitVectorSet = true; + NumDistinctValueEstimator ndvEstimator = null; String colType = null; for (ColumnStatistics cs : css) { if (cs.getStatsObjSize() != 1) { @@ -60,22 +61,36 @@ public ColumnStatisticsObj aggregate(String colName, List partNames, statsObj = ColumnStatsAggregatorFactory.newColumnStaticsObj(colName, colType, cso .getStatsData().getSetField()); } - if (numBitVectors <= 0 || !cso.getStatsData().getDecimalStats().isSetBitVectors() + if (!cso.getStatsData().getDecimalStats().isSetBitVectors() || cso.getStatsData().getDecimalStats().getBitVectors().length() == 0) { - isNDVBitVectorSet = false; + ndvEstimator = null; break; + } else { + // check if all of the bit vectors can merge + NumDistinctValueEstimator estimator = NumDistinctValueEstimatorFactory + .getNumDistinctValueEstimator(cso.getStatsData().getDecimalStats().getBitVectors()); + if (ndvEstimator == null) { + ndvEstimator = estimator; + } else { + if (ndvEstimator.canMerge(estimator)) { + continue; + } else { + ndvEstimator = null; + break; + } + } } } + if (ndvEstimator != null) { + ndvEstimator = NumDistinctValueEstimatorFactory + .getEmptyNumDistinctValueEstimator(ndvEstimator); + } ColumnStatisticsData columnStatisticsData = new ColumnStatisticsData(); if (doAllPartitionContainStats || css.size() < 2) { DecimalColumnStatsData aggregateData = null; long lowerBound = 0; long higherBound = 0; double densityAvgSum = 0.0; - NumDistinctValueEstimator ndvEstimator = null; - if (isNDVBitVectorSet) { - ndvEstimator = new NumDistinctValueEstimator(numBitVectors); - } for (ColumnStatistics cs : css) { ColumnStatisticsObj cso = cs.getStatsObjIterator().next(); DecimalColumnStatsData newData = cso.getStatsData().getDecimalStats(); @@ -85,9 +100,9 @@ public ColumnStatisticsObj aggregate(String colName, List partNames, densityAvgSum += (HBaseUtils.getDoubleValue(newData.getHighValue()) - HBaseUtils .getDoubleValue(newData.getLowValue())) / newData.getNumDVs(); } - if (isNDVBitVectorSet) { - ndvEstimator.mergeEstimators(new NumDistinctValueEstimator(newData.getBitVectors(), - ndvEstimator.getnumBitVectors())); + if (ndvEstimator != null) { + ndvEstimator.mergeEstimators(NumDistinctValueEstimatorFactory + .getNumDistinctValueEstimator(newData.getBitVectors())); } if (aggregateData == null) { aggregateData = newData.deepCopy(); @@ -108,7 +123,7 @@ public ColumnStatisticsObj aggregate(String colName, List partNames, aggregateData.setNumDVs(Math.max(aggregateData.getNumDVs(), newData.getNumDVs())); } } - if (isNDVBitVectorSet) { + if (ndvEstimator != null) { // if all the ColumnStatisticsObjs contain bitvectors, we do not need to // use uniform distribution assumption because we can merge bitvectors // to get a good estimation. @@ -145,7 +160,7 @@ public ColumnStatisticsObj aggregate(String colName, List partNames, // while we scan the css, we also get the densityAvg, lowerbound and // higerbound when useDensityFunctionForNDVEstimation is true. double densityAvgSum = 0.0; - if (!isNDVBitVectorSet) { + if (ndvEstimator == null) { // if not every partition uses bitvector for ndv, we just fall back to // the traditional extrapolation methods. for (ColumnStatistics cs : css) { @@ -162,7 +177,6 @@ public ColumnStatisticsObj aggregate(String colName, List partNames, } else { // we first merge all the adjacent bitvectors that we could merge and // derive new partition names and index. - NumDistinctValueEstimator ndvEstimator = new NumDistinctValueEstimator(numBitVectors); StringBuilder pseudoPartName = new StringBuilder(); double pseudoIndexSum = 0; int length = 0; @@ -191,6 +205,7 @@ public ColumnStatisticsObj aggregate(String colName, List partNames, pseudoPartName = new StringBuilder(); pseudoIndexSum = 0; length = 0; + ndvEstimator = NumDistinctValueEstimatorFactory.getEmptyNumDistinctValueEstimator(ndvEstimator); } aggregateData = null; } @@ -216,8 +231,8 @@ public ColumnStatisticsObj aggregate(String colName, List partNames, } aggregateData.setNumNulls(aggregateData.getNumNulls() + newData.getNumNulls()); } - ndvEstimator.mergeEstimators(new NumDistinctValueEstimator(newData.getBitVectors(), - ndvEstimator.getnumBitVectors())); + ndvEstimator.mergeEstimators(NumDistinctValueEstimatorFactory + .getNumDistinctValueEstimator(newData.getBitVectors())); } if (length > 0) { // we have to set ndv diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/DoubleColumnStatsAggregator.java b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/DoubleColumnStatsAggregator.java index a88ef84e5c..b6b86123b2 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/DoubleColumnStatsAggregator.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/DoubleColumnStatsAggregator.java @@ -26,7 +26,8 @@ import java.util.List; import java.util.Map; -import org.apache.hadoop.hive.metastore.NumDistinctValueEstimator; +import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimator; +import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimatorFactory; import org.apache.hadoop.hive.metastore.api.ColumnStatistics; import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData; import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; @@ -44,7 +45,7 @@ public ColumnStatisticsObj aggregate(String colName, List partNames, // check if all the ColumnStatisticsObjs contain stats and all the ndv are // bitvectors boolean doAllPartitionContainStats = partNames.size() == css.size(); - boolean isNDVBitVectorSet = true; + NumDistinctValueEstimator ndvEstimator = null; String colType = null; for (ColumnStatistics cs : css) { if (cs.getStatsObjSize() != 1) { @@ -58,22 +59,36 @@ public ColumnStatisticsObj aggregate(String colName, List partNames, statsObj = ColumnStatsAggregatorFactory.newColumnStaticsObj(colName, colType, cso .getStatsData().getSetField()); } - if (numBitVectors <= 0 || !cso.getStatsData().getDoubleStats().isSetBitVectors() + if (!cso.getStatsData().getDoubleStats().isSetBitVectors() || cso.getStatsData().getDoubleStats().getBitVectors().length() == 0) { - isNDVBitVectorSet = false; + ndvEstimator = null; break; + } else { + // check if all of the bit vectors can merge + NumDistinctValueEstimator estimator = NumDistinctValueEstimatorFactory + .getNumDistinctValueEstimator(cso.getStatsData().getDoubleStats().getBitVectors()); + if (ndvEstimator == null) { + ndvEstimator = estimator; + } else { + if (ndvEstimator.canMerge(estimator)) { + continue; + } else { + ndvEstimator = null; + break; + } + } } } + if (ndvEstimator != null) { + ndvEstimator = NumDistinctValueEstimatorFactory + .getEmptyNumDistinctValueEstimator(ndvEstimator); + } ColumnStatisticsData columnStatisticsData = new ColumnStatisticsData(); if (doAllPartitionContainStats || css.size() < 2) { DoubleColumnStatsData aggregateData = null; long lowerBound = 0; long higherBound = 0; double densityAvgSum = 0.0; - NumDistinctValueEstimator ndvEstimator = null; - if (isNDVBitVectorSet) { - ndvEstimator = new NumDistinctValueEstimator(numBitVectors); - } for (ColumnStatistics cs : css) { ColumnStatisticsObj cso = cs.getStatsObjIterator().next(); DoubleColumnStatsData newData = cso.getStatsData().getDoubleStats(); @@ -82,9 +97,9 @@ public ColumnStatisticsObj aggregate(String colName, List partNames, higherBound += newData.getNumDVs(); densityAvgSum += (newData.getHighValue() - newData.getLowValue()) / newData.getNumDVs(); } - if (isNDVBitVectorSet) { - ndvEstimator.mergeEstimators(new NumDistinctValueEstimator(newData.getBitVectors(), - ndvEstimator.getnumBitVectors())); + if (ndvEstimator != null) { + ndvEstimator.mergeEstimators(NumDistinctValueEstimatorFactory + .getNumDistinctValueEstimator(newData.getBitVectors())); } if (aggregateData == null) { aggregateData = newData.deepCopy(); @@ -96,7 +111,7 @@ public ColumnStatisticsObj aggregate(String colName, List partNames, aggregateData.setNumDVs(Math.max(aggregateData.getNumDVs(), newData.getNumDVs())); } } - if (isNDVBitVectorSet) { + if (ndvEstimator != null) { // if all the ColumnStatisticsObjs contain bitvectors, we do not need to // use uniform distribution assumption because we can merge bitvectors // to get a good estimation. @@ -132,7 +147,7 @@ public ColumnStatisticsObj aggregate(String colName, List partNames, // while we scan the css, we also get the densityAvg, lowerbound and // higerbound when useDensityFunctionForNDVEstimation is true. double densityAvgSum = 0.0; - if (!isNDVBitVectorSet) { + if (ndvEstimator == null) { // if not every partition uses bitvector for ndv, we just fall back to // the traditional extrapolation methods. for (ColumnStatistics cs : css) { @@ -148,7 +163,6 @@ public ColumnStatisticsObj aggregate(String colName, List partNames, } else { // we first merge all the adjacent bitvectors that we could merge and // derive new partition names and index. - NumDistinctValueEstimator ndvEstimator = new NumDistinctValueEstimator(numBitVectors); StringBuilder pseudoPartName = new StringBuilder(); double pseudoIndexSum = 0; int length = 0; @@ -176,6 +190,7 @@ public ColumnStatisticsObj aggregate(String colName, List partNames, pseudoPartName = new StringBuilder(); pseudoIndexSum = 0; length = 0; + ndvEstimator = NumDistinctValueEstimatorFactory.getEmptyNumDistinctValueEstimator(ndvEstimator); } aggregateData = null; } @@ -192,8 +207,8 @@ public ColumnStatisticsObj aggregate(String colName, List partNames, newData.getHighValue())); aggregateData.setNumNulls(aggregateData.getNumNulls() + newData.getNumNulls()); } - ndvEstimator.mergeEstimators(new NumDistinctValueEstimator(newData.getBitVectors(), - ndvEstimator.getnumBitVectors())); + ndvEstimator.mergeEstimators(NumDistinctValueEstimatorFactory + .getNumDistinctValueEstimator(newData.getBitVectors())); } if (length > 0) { // we have to set ndv diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/LongColumnStatsAggregator.java b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/LongColumnStatsAggregator.java index 8ac6561aec..2da6f60167 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/LongColumnStatsAggregator.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/LongColumnStatsAggregator.java @@ -26,7 +26,8 @@ import java.util.List; import java.util.Map; -import org.apache.hadoop.hive.metastore.NumDistinctValueEstimator; +import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimator; +import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimatorFactory; import org.apache.hadoop.hive.metastore.api.ColumnStatistics; import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData; import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; @@ -44,7 +45,7 @@ public ColumnStatisticsObj aggregate(String colName, List partNames, // check if all the ColumnStatisticsObjs contain stats and all the ndv are // bitvectors boolean doAllPartitionContainStats = partNames.size() == css.size(); - boolean isNDVBitVectorSet = true; + NumDistinctValueEstimator ndvEstimator = null; String colType = null; for (ColumnStatistics cs : css) { if (cs.getStatsObjSize() != 1) { @@ -58,22 +59,36 @@ public ColumnStatisticsObj aggregate(String colName, List partNames, statsObj = ColumnStatsAggregatorFactory.newColumnStaticsObj(colName, colType, cso .getStatsData().getSetField()); } - if (numBitVectors <= 0 || !cso.getStatsData().getLongStats().isSetBitVectors() + if (!cso.getStatsData().getLongStats().isSetBitVectors() || cso.getStatsData().getLongStats().getBitVectors().length() == 0) { - isNDVBitVectorSet = false; + ndvEstimator = null; break; + } else { + // check if all of the bit vectors can merge + NumDistinctValueEstimator estimator = NumDistinctValueEstimatorFactory + .getNumDistinctValueEstimator(cso.getStatsData().getLongStats().getBitVectors()); + if (ndvEstimator == null) { + ndvEstimator = estimator; + } else { + if (ndvEstimator.canMerge(estimator)) { + continue; + } else { + ndvEstimator = null; + break; + } + } } } + if (ndvEstimator != null) { + ndvEstimator = NumDistinctValueEstimatorFactory + .getEmptyNumDistinctValueEstimator(ndvEstimator); + } ColumnStatisticsData columnStatisticsData = new ColumnStatisticsData(); if (doAllPartitionContainStats || css.size() < 2) { LongColumnStatsData aggregateData = null; long lowerBound = 0; long higherBound = 0; double densityAvgSum = 0.0; - NumDistinctValueEstimator ndvEstimator = null; - if (isNDVBitVectorSet) { - ndvEstimator = new NumDistinctValueEstimator(numBitVectors); - } for (ColumnStatistics cs : css) { ColumnStatisticsObj cso = cs.getStatsObjIterator().next(); LongColumnStatsData newData = cso.getStatsData().getLongStats(); @@ -82,9 +97,9 @@ public ColumnStatisticsObj aggregate(String colName, List partNames, higherBound += newData.getNumDVs(); densityAvgSum += (newData.getHighValue() - newData.getLowValue()) / newData.getNumDVs(); } - if (isNDVBitVectorSet) { - ndvEstimator.mergeEstimators(new NumDistinctValueEstimator(newData.getBitVectors(), - ndvEstimator.getnumBitVectors())); + if (ndvEstimator != null) { + ndvEstimator.mergeEstimators(NumDistinctValueEstimatorFactory + .getNumDistinctValueEstimator(newData.getBitVectors())); } if (aggregateData == null) { aggregateData = newData.deepCopy(); @@ -96,7 +111,7 @@ public ColumnStatisticsObj aggregate(String colName, List partNames, aggregateData.setNumDVs(Math.max(aggregateData.getNumDVs(), newData.getNumDVs())); } } - if (isNDVBitVectorSet) { + if (ndvEstimator != null) { // if all the ColumnStatisticsObjs contain bitvectors, we do not need to // use uniform distribution assumption because we can merge bitvectors // to get a good estimation. @@ -132,7 +147,7 @@ public ColumnStatisticsObj aggregate(String colName, List partNames, // while we scan the css, we also get the densityAvg, lowerbound and // higerbound when useDensityFunctionForNDVEstimation is true. double densityAvgSum = 0.0; - if (!isNDVBitVectorSet) { + if (ndvEstimator == null) { // if not every partition uses bitvector for ndv, we just fall back to // the traditional extrapolation methods. for (ColumnStatistics cs : css) { @@ -148,7 +163,6 @@ public ColumnStatisticsObj aggregate(String colName, List partNames, } else { // we first merge all the adjacent bitvectors that we could merge and // derive new partition names and index. - NumDistinctValueEstimator ndvEstimator = new NumDistinctValueEstimator(numBitVectors); StringBuilder pseudoPartName = new StringBuilder(); double pseudoIndexSum = 0; int length = 0; @@ -176,6 +190,7 @@ public ColumnStatisticsObj aggregate(String colName, List partNames, pseudoPartName = new StringBuilder(); pseudoIndexSum = 0; length = 0; + ndvEstimator = NumDistinctValueEstimatorFactory.getEmptyNumDistinctValueEstimator(ndvEstimator); } aggregateData = null; } @@ -192,8 +207,8 @@ public ColumnStatisticsObj aggregate(String colName, List partNames, newData.getHighValue())); aggregateData.setNumNulls(aggregateData.getNumNulls() + newData.getNumNulls()); } - ndvEstimator.mergeEstimators(new NumDistinctValueEstimator(newData.getBitVectors(), - ndvEstimator.getnumBitVectors())); + ndvEstimator.mergeEstimators(NumDistinctValueEstimatorFactory + .getNumDistinctValueEstimator(newData.getBitVectors())); } if (length > 0) { // we have to set ndv diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/StringColumnStatsAggregator.java b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/StringColumnStatsAggregator.java index 2aa4046a46..83c6c54fd2 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/StringColumnStatsAggregator.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/StringColumnStatsAggregator.java @@ -21,7 +21,8 @@ import java.util.List; -import org.apache.hadoop.hive.metastore.NumDistinctValueEstimator; +import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimator; +import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimatorFactory; import org.apache.hadoop.hive.metastore.api.ColumnStatistics; import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData; import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; @@ -39,7 +40,7 @@ public ColumnStatisticsObj aggregate(String colName, List partNames, // bitvectors. Only when both of the conditions are true, we merge bit // vectors. Otherwise, just use the maximum function. boolean doAllPartitionContainStats = partNames.size() == css.size(); - boolean isNDVBitVectorSet = true; + NumDistinctValueEstimator ndvEstimator = null; String colType = null; for (ColumnStatistics cs : css) { if (cs.getStatsObjSize() != 1) { @@ -53,21 +54,37 @@ public ColumnStatisticsObj aggregate(String colName, List partNames, statsObj = ColumnStatsAggregatorFactory.newColumnStaticsObj(colName, colType, cso .getStatsData().getSetField()); } - if (numBitVectors <= 0 || !cso.getStatsData().getStringStats().isSetBitVectors() + if (!cso.getStatsData().getStringStats().isSetBitVectors() || cso.getStatsData().getStringStats().getBitVectors().length() == 0) { - isNDVBitVectorSet = false; + ndvEstimator = null; break; + } else { + // check if all of the bit vectors can merge + NumDistinctValueEstimator estimator = NumDistinctValueEstimatorFactory + .getNumDistinctValueEstimator(cso.getStatsData().getStringStats().getBitVectors()); + if (ndvEstimator == null) { + ndvEstimator = estimator; + } else { + if (ndvEstimator.canMerge(estimator)) { + continue; + } else { + ndvEstimator = null; + break; + } + } } } + if (ndvEstimator != null) { + ndvEstimator = NumDistinctValueEstimatorFactory.getEmptyNumDistinctValueEstimator(ndvEstimator); + } ColumnStatisticsData columnStatisticsData = new ColumnStatisticsData(); - if (doAllPartitionContainStats && isNDVBitVectorSet) { + if (doAllPartitionContainStats && ndvEstimator!=null) { StringColumnStatsData aggregateData = null; - NumDistinctValueEstimator ndvEstimator = new NumDistinctValueEstimator(numBitVectors); for (ColumnStatistics cs : css) { ColumnStatisticsObj cso = cs.getStatsObjIterator().next(); StringColumnStatsData newData = cso.getStatsData().getStringStats(); - ndvEstimator.mergeEstimators(new NumDistinctValueEstimator(newData.getBitVectors(), - ndvEstimator.getnumBitVectors())); + ndvEstimator.mergeEstimators(NumDistinctValueEstimatorFactory + .getNumDistinctValueEstimator(newData.getBitVectors())); if (aggregateData == null) { aggregateData = newData.deepCopy(); } else { diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/ColumnStatsMerger.java b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/ColumnStatsMerger.java index 33c7e3e52c..d3051a2b00 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/ColumnStatsMerger.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/ColumnStatsMerger.java @@ -19,7 +19,6 @@ package org.apache.hadoop.hive.metastore.hbase.stats.merge; -import org.apache.hadoop.hive.metastore.NumDistinctValueEstimator; import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -27,8 +26,6 @@ public abstract class ColumnStatsMerger { protected final Logger LOG = LoggerFactory.getLogger(ColumnStatsMerger.class.getName()); - NumDistinctValueEstimator ndvEstimator = null; - public abstract void merge(ColumnStatisticsObj aggregateColStats, ColumnStatisticsObj newColStats); } diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/ColumnStatsMergerFactory.java b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/ColumnStatsMergerFactory.java index fe890e4e27..c013ba5c5d 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/ColumnStatsMergerFactory.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/ColumnStatsMergerFactory.java @@ -20,7 +20,8 @@ package org.apache.hadoop.hive.metastore.hbase.stats.merge; import org.apache.commons.lang.StringUtils; -import org.apache.hadoop.hive.metastore.NumDistinctValueEstimator; +import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimatorFactory; +import org.apache.hadoop.hive.common.ndv.hll.HyperLogLog; import org.apache.hadoop.hive.metastore.api.BinaryColumnStatsData; import org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData; import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData; @@ -37,15 +38,6 @@ private ColumnStatsMergerFactory() { } - // we depend on the toString() method for javolution.util.FastCollection. - private static int countNumBitVectors(String s) { - if (s != null) { - return StringUtils.countMatches(s, "{"); - } else { - return 0; - } - } - public static ColumnStatsMerger getColumnStatsMerger(ColumnStatisticsObj statsObjNew, ColumnStatisticsObj statsObjOld) { ColumnStatsMerger agg; @@ -53,30 +45,20 @@ public static ColumnStatsMerger getColumnStatsMerger(ColumnStatisticsObj statsOb _Fields typeOld = statsObjOld.getStatsData().getSetField(); // make sure that they have the same type typeNew = typeNew == typeOld ? typeNew : null; - int numBitVectors = 0; switch (typeNew) { case BOOLEAN_STATS: agg = new BooleanColumnStatsMerger(); break; case LONG_STATS: { agg = new LongColumnStatsMerger(); - int nbvNew = countNumBitVectors(statsObjNew.getStatsData().getLongStats().getBitVectors()); - int nbvOld = countNumBitVectors(statsObjOld.getStatsData().getLongStats().getBitVectors()); - numBitVectors = nbvNew == nbvOld ? nbvNew : 0; break; } case DOUBLE_STATS: { agg = new DoubleColumnStatsMerger(); - int nbvNew = countNumBitVectors(statsObjNew.getStatsData().getDoubleStats().getBitVectors()); - int nbvOld = countNumBitVectors(statsObjOld.getStatsData().getDoubleStats().getBitVectors()); - numBitVectors = nbvNew == nbvOld ? nbvNew : 0; break; } case STRING_STATS: { agg = new StringColumnStatsMerger(); - int nbvNew = countNumBitVectors(statsObjNew.getStatsData().getStringStats().getBitVectors()); - int nbvOld = countNumBitVectors(statsObjOld.getStatsData().getStringStats().getBitVectors()); - numBitVectors = nbvNew == nbvOld ? nbvNew : 0; break; } case BINARY_STATS: @@ -84,24 +66,15 @@ public static ColumnStatsMerger getColumnStatsMerger(ColumnStatisticsObj statsOb break; case DECIMAL_STATS: { agg = new DecimalColumnStatsMerger(); - int nbvNew = countNumBitVectors(statsObjNew.getStatsData().getDecimalStats().getBitVectors()); - int nbvOld = countNumBitVectors(statsObjOld.getStatsData().getDecimalStats().getBitVectors()); - numBitVectors = nbvNew == nbvOld ? nbvNew : 0; break; } case DATE_STATS: { agg = new DateColumnStatsMerger(); - int nbvNew = countNumBitVectors(statsObjNew.getStatsData().getDateStats().getBitVectors()); - int nbvOld = countNumBitVectors(statsObjOld.getStatsData().getDateStats().getBitVectors()); - numBitVectors = nbvNew == nbvOld ? nbvNew : 0; break; } default: throw new IllegalArgumentException("Unknown stats type " + typeNew.toString()); } - if (numBitVectors > 0) { - agg.ndvEstimator = new NumDistinctValueEstimator(numBitVectors); - } return agg; } diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/DateColumnStatsMerger.java b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/DateColumnStatsMerger.java index 3179b23438..e899bfe85f 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/DateColumnStatsMerger.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/DateColumnStatsMerger.java @@ -19,7 +19,8 @@ package org.apache.hadoop.hive.metastore.hbase.stats.merge; -import org.apache.hadoop.hive.metastore.NumDistinctValueEstimator; +import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimator; +import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimatorFactory; import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; import org.apache.hadoop.hive.metastore.api.Date; import org.apache.hadoop.hive.metastore.api.DateColumnStatsData; @@ -29,27 +30,32 @@ public void merge(ColumnStatisticsObj aggregateColStats, ColumnStatisticsObj newColStats) { DateColumnStatsData aggregateData = aggregateColStats.getStatsData().getDateStats(); DateColumnStatsData newData = newColStats.getStatsData().getDateStats(); - Date lowValue = - aggregateData.getLowValue().compareTo(newData.getLowValue()) < 0 ? aggregateData - .getLowValue() : newData.getLowValue(); + Date lowValue = aggregateData.getLowValue().compareTo(newData.getLowValue()) < 0 ? aggregateData + .getLowValue() : newData.getLowValue(); aggregateData.setLowValue(lowValue); - Date highValue = - aggregateData.getHighValue().compareTo(newData.getHighValue()) >= 0 ? aggregateData - .getHighValue() : newData.getHighValue(); + Date highValue = aggregateData.getHighValue().compareTo(newData.getHighValue()) >= 0 ? aggregateData + .getHighValue() : newData.getHighValue(); aggregateData.setHighValue(highValue); aggregateData.setNumNulls(aggregateData.getNumNulls() + newData.getNumNulls()); - if (ndvEstimator == null || !newData.isSetBitVectors() || newData.getBitVectors().length() == 0) { + if (!aggregateData.isSetBitVectors() || aggregateData.getBitVectors().length() == 0 + || !newData.isSetBitVectors() || newData.getBitVectors().length() == 0) { aggregateData.setNumDVs(Math.max(aggregateData.getNumDVs(), newData.getNumDVs())); } else { - ndvEstimator.mergeEstimators(new NumDistinctValueEstimator(aggregateData.getBitVectors(), - ndvEstimator.getnumBitVectors())); - ndvEstimator.mergeEstimators(new NumDistinctValueEstimator(newData.getBitVectors(), - ndvEstimator.getnumBitVectors())); - long ndv = ndvEstimator.estimateNumDistinctValues(); + NumDistinctValueEstimator oldEst = NumDistinctValueEstimatorFactory + .getNumDistinctValueEstimator(aggregateData.getBitVectors()); + NumDistinctValueEstimator newEst = NumDistinctValueEstimatorFactory + .getNumDistinctValueEstimator(newData.getBitVectors()); + long ndv = -1; + if (oldEst.canMerge(newEst)) { + oldEst.mergeEstimators(newEst); + ndv = oldEst.estimateNumDistinctValues(); + aggregateData.setBitVectors(oldEst.serialize()); + } else { + ndv = Math.max(aggregateData.getNumDVs(), newData.getNumDVs()); + } LOG.debug("Use bitvector to merge column " + aggregateColStats.getColName() + "'s ndvs of " + aggregateData.getNumDVs() + " and " + newData.getNumDVs() + " to be " + ndv); aggregateData.setNumDVs(ndv); - aggregateData.setBitVectors(ndvEstimator.serialize().toString()); } } } diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/DecimalColumnStatsMerger.java b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/DecimalColumnStatsMerger.java index c13add9d9c..4099ffcace 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/DecimalColumnStatsMerger.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/DecimalColumnStatsMerger.java @@ -19,7 +19,8 @@ package org.apache.hadoop.hive.metastore.hbase.stats.merge; -import org.apache.hadoop.hive.metastore.NumDistinctValueEstimator; +import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimator; +import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimatorFactory; import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; import org.apache.hadoop.hive.metastore.api.Decimal; import org.apache.hadoop.hive.metastore.api.DecimalColumnStatsData; @@ -38,18 +39,25 @@ public void merge(ColumnStatisticsObj aggregateColStats, ColumnStatisticsObj new .getHighValue() : newData.getHighValue(); aggregateData.setHighValue(highValue); aggregateData.setNumNulls(aggregateData.getNumNulls() + newData.getNumNulls()); - if (ndvEstimator == null || !newData.isSetBitVectors() || newData.getBitVectors().length() == 0) { + if (!aggregateData.isSetBitVectors() || aggregateData.getBitVectors().length() == 0 + || !newData.isSetBitVectors() || newData.getBitVectors().length() == 0) { aggregateData.setNumDVs(Math.max(aggregateData.getNumDVs(), newData.getNumDVs())); } else { - ndvEstimator.mergeEstimators(new NumDistinctValueEstimator(aggregateData.getBitVectors(), - ndvEstimator.getnumBitVectors())); - ndvEstimator.mergeEstimators(new NumDistinctValueEstimator(newData.getBitVectors(), - ndvEstimator.getnumBitVectors())); - long ndv = ndvEstimator.estimateNumDistinctValues(); + NumDistinctValueEstimator oldEst = NumDistinctValueEstimatorFactory + .getNumDistinctValueEstimator(aggregateData.getBitVectors()); + NumDistinctValueEstimator newEst = NumDistinctValueEstimatorFactory + .getNumDistinctValueEstimator(newData.getBitVectors()); + long ndv = -1; + if (oldEst.canMerge(newEst)) { + oldEst.mergeEstimators(newEst); + ndv = oldEst.estimateNumDistinctValues(); + aggregateData.setBitVectors(oldEst.serialize()); + } else { + ndv = Math.max(aggregateData.getNumDVs(), newData.getNumDVs()); + } LOG.debug("Use bitvector to merge column " + aggregateColStats.getColName() + "'s ndvs of " + aggregateData.getNumDVs() + " and " + newData.getNumDVs() + " to be " + ndv); aggregateData.setNumDVs(ndv); - aggregateData.setBitVectors(ndvEstimator.serialize().toString()); } } } diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/DoubleColumnStatsMerger.java b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/DoubleColumnStatsMerger.java index fbdba24b0a..1691fc97df 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/DoubleColumnStatsMerger.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/DoubleColumnStatsMerger.java @@ -19,7 +19,8 @@ package org.apache.hadoop.hive.metastore.hbase.stats.merge; -import org.apache.hadoop.hive.metastore.NumDistinctValueEstimator; +import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimator; +import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimatorFactory; import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; import org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData; @@ -31,18 +32,25 @@ public void merge(ColumnStatisticsObj aggregateColStats, ColumnStatisticsObj new aggregateData.setLowValue(Math.min(aggregateData.getLowValue(), newData.getLowValue())); aggregateData.setHighValue(Math.max(aggregateData.getHighValue(), newData.getHighValue())); aggregateData.setNumNulls(aggregateData.getNumNulls() + newData.getNumNulls()); - if (ndvEstimator == null || !newData.isSetBitVectors() || newData.getBitVectors().length() == 0) { + if (!aggregateData.isSetBitVectors() || aggregateData.getBitVectors().length() == 0 + || !newData.isSetBitVectors() || newData.getBitVectors().length() == 0) { aggregateData.setNumDVs(Math.max(aggregateData.getNumDVs(), newData.getNumDVs())); } else { - ndvEstimator.mergeEstimators(new NumDistinctValueEstimator(aggregateData.getBitVectors(), - ndvEstimator.getnumBitVectors())); - ndvEstimator.mergeEstimators(new NumDistinctValueEstimator(newData.getBitVectors(), - ndvEstimator.getnumBitVectors())); - long ndv = ndvEstimator.estimateNumDistinctValues(); + NumDistinctValueEstimator oldEst = NumDistinctValueEstimatorFactory + .getNumDistinctValueEstimator(aggregateData.getBitVectors()); + NumDistinctValueEstimator newEst = NumDistinctValueEstimatorFactory + .getNumDistinctValueEstimator(newData.getBitVectors()); + long ndv = -1; + if (oldEst.canMerge(newEst)) { + oldEst.mergeEstimators(newEst); + ndv = oldEst.estimateNumDistinctValues(); + aggregateData.setBitVectors(oldEst.serialize()); + } else { + ndv = Math.max(aggregateData.getNumDVs(), newData.getNumDVs()); + } LOG.debug("Use bitvector to merge column " + aggregateColStats.getColName() + "'s ndvs of " + aggregateData.getNumDVs() + " and " + newData.getNumDVs() + " to be " + ndv); aggregateData.setNumDVs(ndv); - aggregateData.setBitVectors(ndvEstimator.serialize().toString()); } } } diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/LongColumnStatsMerger.java b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/LongColumnStatsMerger.java index ac65590505..361af350fe 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/LongColumnStatsMerger.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/LongColumnStatsMerger.java @@ -19,7 +19,8 @@ package org.apache.hadoop.hive.metastore.hbase.stats.merge; -import org.apache.hadoop.hive.metastore.NumDistinctValueEstimator; +import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimator; +import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimatorFactory; import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; import org.apache.hadoop.hive.metastore.api.LongColumnStatsData; @@ -31,18 +32,25 @@ public void merge(ColumnStatisticsObj aggregateColStats, ColumnStatisticsObj new aggregateData.setLowValue(Math.min(aggregateData.getLowValue(), newData.getLowValue())); aggregateData.setHighValue(Math.max(aggregateData.getHighValue(), newData.getHighValue())); aggregateData.setNumNulls(aggregateData.getNumNulls() + newData.getNumNulls()); - if (ndvEstimator == null || !newData.isSetBitVectors() || newData.getBitVectors().length() == 0) { + if (!aggregateData.isSetBitVectors() || aggregateData.getBitVectors().length() == 0 + || !newData.isSetBitVectors() || newData.getBitVectors().length() == 0) { aggregateData.setNumDVs(Math.max(aggregateData.getNumDVs(), newData.getNumDVs())); } else { - ndvEstimator.mergeEstimators(new NumDistinctValueEstimator(aggregateData.getBitVectors(), - ndvEstimator.getnumBitVectors())); - ndvEstimator.mergeEstimators(new NumDistinctValueEstimator(newData.getBitVectors(), - ndvEstimator.getnumBitVectors())); - long ndv = ndvEstimator.estimateNumDistinctValues(); + NumDistinctValueEstimator oldEst = NumDistinctValueEstimatorFactory + .getNumDistinctValueEstimator(aggregateData.getBitVectors()); + NumDistinctValueEstimator newEst = NumDistinctValueEstimatorFactory + .getNumDistinctValueEstimator(newData.getBitVectors()); + long ndv = -1; + if (oldEst.canMerge(newEst)) { + oldEst.mergeEstimators(newEst); + ndv = oldEst.estimateNumDistinctValues(); + aggregateData.setBitVectors(oldEst.serialize()); + } else { + ndv = Math.max(aggregateData.getNumDVs(), newData.getNumDVs()); + } LOG.debug("Use bitvector to merge column " + aggregateColStats.getColName() + "'s ndvs of " + aggregateData.getNumDVs() + " and " + newData.getNumDVs() + " to be " + ndv); aggregateData.setNumDVs(ndv); - aggregateData.setBitVectors(ndvEstimator.serialize().toString()); } } } diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/StringColumnStatsMerger.java b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/StringColumnStatsMerger.java index 41587477d3..8e28f907ee 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/StringColumnStatsMerger.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/StringColumnStatsMerger.java @@ -19,10 +19,10 @@ package org.apache.hadoop.hive.metastore.hbase.stats.merge; -import org.apache.hadoop.hive.metastore.NumDistinctValueEstimator; +import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimator; +import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimatorFactory; import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; import org.apache.hadoop.hive.metastore.api.StringColumnStatsData; -import org.apache.parquet.Log; public class StringColumnStatsMerger extends ColumnStatsMerger { @Override @@ -32,18 +32,25 @@ public void merge(ColumnStatisticsObj aggregateColStats, ColumnStatisticsObj new aggregateData.setMaxColLen(Math.max(aggregateData.getMaxColLen(), newData.getMaxColLen())); aggregateData.setAvgColLen(Math.max(aggregateData.getAvgColLen(), newData.getAvgColLen())); aggregateData.setNumNulls(aggregateData.getNumNulls() + newData.getNumNulls()); - if (ndvEstimator == null || !newData.isSetBitVectors() || newData.getBitVectors().length() == 0) { + if (!aggregateData.isSetBitVectors() || aggregateData.getBitVectors().length() == 0 + || !newData.isSetBitVectors() || newData.getBitVectors().length() == 0) { aggregateData.setNumDVs(Math.max(aggregateData.getNumDVs(), newData.getNumDVs())); } else { - ndvEstimator.mergeEstimators(new NumDistinctValueEstimator(aggregateData.getBitVectors(), - ndvEstimator.getnumBitVectors())); - ndvEstimator.mergeEstimators(new NumDistinctValueEstimator(newData.getBitVectors(), - ndvEstimator.getnumBitVectors())); - long ndv = ndvEstimator.estimateNumDistinctValues(); + NumDistinctValueEstimator oldEst = NumDistinctValueEstimatorFactory + .getNumDistinctValueEstimator(aggregateData.getBitVectors()); + NumDistinctValueEstimator newEst = NumDistinctValueEstimatorFactory + .getNumDistinctValueEstimator(newData.getBitVectors()); + long ndv = -1; + if (oldEst.canMerge(newEst)) { + oldEst.mergeEstimators(newEst); + ndv = oldEst.estimateNumDistinctValues(); + aggregateData.setBitVectors(oldEst.serialize()); + } else { + ndv = Math.max(aggregateData.getNumDVs(), newData.getNumDVs()); + } LOG.debug("Use bitvector to merge column " + aggregateColStats.getColName() + "'s ndvs of " + aggregateData.getNumDVs() + " and " + newData.getNumDVs() + " to be " + ndv); aggregateData.setNumDVs(ndv); - aggregateData.setBitVectors(ndvEstimator.serialize().toString()); } } } diff --git a/metastore/src/test/org/apache/hadoop/hive/metastore/hbase/TestHBaseAggregateStatsNDVUniformDist.java b/metastore/src/test/org/apache/hadoop/hive/metastore/hbase/TestHBaseAggregateStatsNDVUniformDist.java index 87b1ac870d..74e16695a9 100644 --- a/metastore/src/test/org/apache/hadoop/hive/metastore/hbase/TestHBaseAggregateStatsNDVUniformDist.java +++ b/metastore/src/test/org/apache/hadoop/hive/metastore/hbase/TestHBaseAggregateStatsNDVUniformDist.java @@ -60,7 +60,7 @@ private HBaseStore store; SortedMap rows = new TreeMap<>(); - // NDV will be 3 for bitVectors[0] and 12 for bitVectors[1] + // NDV will be 3 for bitVectors[0] and 1 for bitVectors[1] String bitVectors[] = { "{0, 4, 5, 7}{0, 1}{0, 1, 2}{0, 1, 4}{0}{0, 2}{0, 3}{0, 2, 3, 4}{0, 1, 4}{0, 1}{0}{0, 1, 3, 8}{0, 2}{0, 2}{0, 9}{0, 1, 4}", "{1, 2}{1, 2}{1, 2}{1, 2}{1, 2}{1, 2}{1, 2}{1, 2}{1, 2}{1, 2}{1, 2}{1, 2}{1, 2}{1, 2}{1, 2}{1, 2}" }; @@ -278,7 +278,7 @@ public void checkStats(AggrStats aggrStats) throws Exception { Assert.assertEquals(1010, lcsd.getHighValue(), 0.01); Assert.assertEquals(-1010, lcsd.getLowValue(), 0.01); Assert.assertEquals(45, lcsd.getNumNulls()); - Assert.assertEquals(12, lcsd.getNumDVs()); + Assert.assertEquals(3, lcsd.getNumDVs()); } }; List partNames = new ArrayList<>(); @@ -422,7 +422,7 @@ public void checkStats(AggrStats aggrStats) throws Exception { Assert.assertEquals(1010, lcsd.getHighValue(), 0.01); Assert.assertEquals(-1010, lcsd.getLowValue(), 0.01); Assert.assertEquals(40, lcsd.getNumNulls()); - Assert.assertEquals(12, lcsd.getNumDVs()); + Assert.assertEquals(3, lcsd.getNumDVs()); } }; List partNames = new ArrayList<>(); @@ -494,7 +494,7 @@ public void checkStats(AggrStats aggrStats) throws Exception { Assert.assertEquals(1010, HBaseUtils.getDoubleValue(lcsd.getHighValue()), 0.01); Assert.assertEquals(-1010, HBaseUtils.getDoubleValue(lcsd.getLowValue()), 0.01); Assert.assertEquals(40, lcsd.getNumNulls()); - Assert.assertEquals(12, lcsd.getNumDVs()); + Assert.assertEquals(3, lcsd.getNumDVs()); } }; List partNames = new ArrayList<>(); @@ -566,7 +566,7 @@ public void checkStats(AggrStats aggrStats) throws Exception { Assert.assertEquals(1010, lcsd.getHighValue(), 0.01); Assert.assertEquals(-1010, lcsd.getLowValue(), 0.01); Assert.assertEquals(40, lcsd.getNumNulls()); - Assert.assertEquals(12, lcsd.getNumDVs()); + Assert.assertEquals(3, lcsd.getNumDVs()); } }; List partNames = new ArrayList<>(); diff --git a/ql/pom.xml b/ql/pom.xml index 5732965e47..e17fe50b94 100644 --- a/ql/pom.xml +++ b/ql/pom.xml @@ -364,6 +364,11 @@ ${datanucleus-core.version} + javolution + javolution + ${javolution.version} + + org.apache.calcite calcite-core ${calcite.version} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java index 0a5cf00c44..1923a9b516 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java @@ -27,6 +27,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.hive.common.HiveStatsUtils; +import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.conf.HiveConf.ConfVars; import org.apache.hadoop.hive.conf.HiveVariableSource; import org.apache.hadoop.hive.conf.VariableSubstitution; @@ -37,8 +38,6 @@ import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.metadata.Table; -import org.apache.hadoop.hive.ql.session.OperationLog; -import org.apache.hadoop.hive.ql.session.OperationLog.LoggingLevel; import org.apache.hadoop.hive.ql.session.SessionState; import org.apache.hadoop.hive.ql.session.SessionState.LogHelper; import org.apache.hadoop.hive.serde.serdeConstants; @@ -246,7 +245,7 @@ private String escapeBackTicks(String colName) { return colName.replaceAll("`", "``"); } - private String genRewrittenQuery(List colNames, int numBitVectors, Map partSpec, + private String genRewrittenQuery(List colNames, HiveConf conf, Map partSpec, boolean isPartitionStats) throws SemanticException{ StringBuilder rewrittenQueryBuilder = new StringBuilder("select "); String rewrittenQuery; @@ -255,11 +254,20 @@ private String genRewrittenQuery(List colNames, int numBitVectors, Map 0) { rewrittenQueryBuilder.append(" , "); } + String func = HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_STATS_NDV_ALGO).toLowerCase(); rewrittenQueryBuilder.append("compute_stats(`"); rewrittenQueryBuilder.append(escapeBackTicks(colNames.get(i))); - rewrittenQueryBuilder.append("` , "); - rewrittenQueryBuilder.append(numBitVectors); - rewrittenQueryBuilder.append(" )"); + rewrittenQueryBuilder.append("`, '" + func + "'"); + if (func.equals("fm")) { + int numBitVectors = 0; + try { + numBitVectors = HiveStatsUtils.getNumBitVectorsForNDVEstimation(conf); + } catch (Exception e) { + throw new SemanticException(e.getMessage()); + } + rewrittenQueryBuilder.append(", " + numBitVectors); + } + rewrittenQueryBuilder.append(")"); } if (isPartitionStats) { @@ -377,13 +385,7 @@ public void analyze(ASTNode ast, Context origCtx) throws SemanticException { isTableLevel = true; } colType = getColumnTypes(colNames); - int numBitVectors; - try { - numBitVectors = HiveStatsUtils.getNumBitVectorsForNDVEstimation(conf); - } catch (Exception e) { - throw new SemanticException(e.getMessage()); - } - rewrittenQuery = genRewrittenQuery(colNames, numBitVectors, partSpec, isPartitionStats); + rewrittenQuery = genRewrittenQuery(colNames, conf, partSpec, isPartitionStats); rewrittenTree = genRewrittenTree(rewrittenQuery); } else { // Not an analyze table column compute statistics statement - don't do any rewrites @@ -447,13 +449,7 @@ public ASTNode rewriteAST(ASTNode ast, ColumnStatsAutoGatherContext context) isTableLevel = true; } colType = getColumnTypes(colNames); - int numBitVectors = 0; - try { - numBitVectors = HiveStatsUtils.getNumBitVectorsForNDVEstimation(conf); - } catch (Exception e) { - throw new SemanticException(e.getMessage()); - } - rewrittenQuery = genRewrittenQuery(colNames, numBitVectors, partSpec, isPartitionStats); + rewrittenQuery = genRewrittenQuery(colNames, conf, partSpec, isPartitionStats); rewrittenTree = genRewrittenTree(rewrittenQuery); context.analyzeRewrite = new AnalyzeRewriteContext(); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java index 76f7daeb1b..3b9ab41bed 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java @@ -1640,60 +1640,6 @@ public static long safeMult(long a, long b) { } } - public static int getNumBitVectorsForNDVEstimation(HiveConf conf) throws SemanticException { - int numBitVectors; - float percentageError = HiveConf.getFloatVar(conf, HiveConf.ConfVars.HIVE_STATS_NDV_ERROR); - - if (percentageError < 0.0) { - throw new SemanticException("hive.stats.ndv.error can't be negative"); - } else if (percentageError <= 2.4) { - numBitVectors = 1024; - LOG.info("Lowest error achievable is 2.4% but error requested is " + percentageError + "%"); - LOG.info("Choosing 1024 bit vectors.."); - } else if (percentageError <= 3.4 ) { - numBitVectors = 1024; - LOG.info("Error requested is " + percentageError + "%"); - LOG.info("Choosing 1024 bit vectors.."); - } else if (percentageError <= 4.8) { - numBitVectors = 512; - LOG.info("Error requested is " + percentageError + "%"); - LOG.info("Choosing 512 bit vectors.."); - } else if (percentageError <= 6.8) { - numBitVectors = 256; - LOG.info("Error requested is " + percentageError + "%"); - LOG.info("Choosing 256 bit vectors.."); - } else if (percentageError <= 9.7) { - numBitVectors = 128; - LOG.info("Error requested is " + percentageError + "%"); - LOG.info("Choosing 128 bit vectors.."); - } else if (percentageError <= 13.8) { - numBitVectors = 64; - LOG.info("Error requested is " + percentageError + "%"); - LOG.info("Choosing 64 bit vectors.."); - } else if (percentageError <= 19.6) { - numBitVectors = 32; - LOG.info("Error requested is " + percentageError + "%"); - LOG.info("Choosing 32 bit vectors.."); - } else if (percentageError <= 28.2) { - numBitVectors = 16; - LOG.info("Error requested is " + percentageError + "%"); - LOG.info("Choosing 16 bit vectors.."); - } else if (percentageError <= 40.9) { - numBitVectors = 8; - LOG.info("Error requested is " + percentageError + "%"); - LOG.info("Choosing 8 bit vectors.."); - } else if (percentageError <= 61.0) { - numBitVectors = 4; - LOG.info("Error requested is " + percentageError + "%"); - LOG.info("Choosing 4 bit vectors.."); - } else { - numBitVectors = 2; - LOG.info("Error requested is " + percentageError + "%"); - LOG.info("Choosing 2 bit vectors.."); - } - return numBitVectors; - } - public static boolean hasDiscreteRange(ColStatistics colStat) { if (colStat.getRange() != null) { TypeInfo colType = TypeInfoUtils.getTypeInfoFromTypeString(colStat.getColumnType()); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/DoubleNumDistinctValueEstimator.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/DoubleNumDistinctValueEstimator.java deleted file mode 100644 index e76fc74dbc..0000000000 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/DoubleNumDistinctValueEstimator.java +++ /dev/null @@ -1,39 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hive.ql.udf.generic; - -public class DoubleNumDistinctValueEstimator extends NumDistinctValueEstimator { - - public DoubleNumDistinctValueEstimator(int numBitVectors) { - super(numBitVectors); - } - - public DoubleNumDistinctValueEstimator(String s, int numVectors) { - super(s, numVectors); - } - - public void addToEstimator(double d) { - int v = new Double(d).hashCode(); - super.addToEstimator(v); - } - - public void addToEstimatorPCSA(double d) { - int v = new Double(d).hashCode(); - super.addToEstimatorPCSA(v); - } -} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFComputeStats.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFComputeStats.java index 2ebfcb2360..2d56950cb1 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFComputeStats.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFComputeStats.java @@ -22,6 +22,11 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.apache.hadoop.hive.common.classification.InterfaceAudience; +import org.apache.hadoop.hive.common.ndv.FMSketch; +import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimator; +import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimatorFactory; +import org.apache.hadoop.hive.common.ndv.hll.HyperLogLog; import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.ql.exec.Description; import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException; @@ -53,13 +58,13 @@ public class GenericUDAFComputeStats extends AbstractGenericUDAFResolver { static final Logger LOG = LoggerFactory.getLogger(GenericUDAFComputeStats.class.getName()); - + @Override public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters) throws SemanticException { - if (parameters.length != 2 ) { + if (parameters.length < 2 ) { throw new UDFArgumentTypeException(parameters.length - 1, - "Exactly two arguments are expected."); + "Exactly 2 (col + hll) or 3 (col + fm + #bitvectors) arguments are expected."); } if (parameters[0].getCategory() != ObjectInspector.Category.PRIMITIVE) { @@ -235,23 +240,12 @@ public void iterate(AggregationBuffer agg, Object[] parameters) throws HiveExcep if (!emptyTable) { if (p == null) { myagg.countNulls++; - } - else { - try { - boolean v = PrimitiveObjectInspectorUtils.getBoolean(p, inputOI); - if (v == false) { - myagg.countFalses++; - } else if (v == true){ - myagg.countTrues++; - } - } catch (NumberFormatException e) { - if (!warned) { - warned = true; - LOG.warn(getClass().getSimpleName() + " " - + StringUtils.stringifyException(e)); - LOG.warn(getClass().getSimpleName() - + " ignoring similar exceptions."); - } + } else { + boolean v = PrimitiveObjectInspectorUtils.getBoolean(p, inputOI); + if (v == false) { + myagg.countFalses++; + } else if (v == true) { + myagg.countTrues++; } } } @@ -302,6 +296,7 @@ public Object terminate(AggregationBuffer agg) throws HiveException { /* Object Inspector corresponding to the input parameter. */ protected transient PrimitiveObjectInspector inputOI; + protected transient PrimitiveObjectInspector funcOI; protected transient PrimitiveObjectInspector numVectorsOI; @@ -322,9 +317,6 @@ public Object terminate(AggregationBuffer agg) throws HiveException { protected transient StructField ndvField; protected transient StringObjectInspector ndvFieldOI; - protected transient StructField numBitVectorsField; - protected transient IntObjectInspector numBitVectorsFieldOI; - /* Partial aggregation result returned by TerminatePartial. Partial result is a struct * containing a long field named "count". */ @@ -334,8 +326,6 @@ public Object terminate(AggregationBuffer agg) throws HiveException { */ protected transient Object[] result; - protected transient boolean warned; - protected abstract OI getValueObjectInspector(); protected abstract OI getValueObjectInspector(PrimitiveTypeInfo typeInfo); @@ -347,7 +337,10 @@ public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveExc // initialize input if (mode == Mode.PARTIAL1 || mode == Mode.COMPLETE) { inputOI = (PrimitiveObjectInspector) parameters[0]; - numVectorsOI = (PrimitiveObjectInspector) parameters[1]; + funcOI = (PrimitiveObjectInspector) parameters[1]; + if (parameters.length > 2) { + numVectorsOI = (PrimitiveObjectInspector) parameters[2]; + } } else { soi = (StructObjectInspector) parameters[0]; @@ -363,9 +356,6 @@ public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveExc ndvField = soi.getStructFieldRef("bitvector"); ndvFieldOI = (StringObjectInspector) ndvField.getFieldObjectInspector(); - numBitVectorsField = soi.getStructFieldRef("numbitvectors"); - numBitVectorsFieldOI = (IntObjectInspector) - numBitVectorsField.getFieldObjectInspector(); } // initialize output @@ -376,7 +366,6 @@ public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveExc foi.add(getValueObjectInspector(inputOI.getTypeInfo())); foi.add(PrimitiveObjectInspectorFactory.writableLongObjectInspector); foi.add(PrimitiveObjectInspectorFactory.writableStringObjectInspector); - foi.add(PrimitiveObjectInspectorFactory.writableIntObjectInspector); List fname = new ArrayList(); fname.add("columnType"); @@ -384,13 +373,11 @@ public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveExc fname.add("max"); fname.add("countnulls"); fname.add("bitvector"); - fname.add("numbitvectors"); partialResult = new Object[6]; partialResult[0] = new Text(); partialResult[3] = new LongWritable(0); partialResult[4] = new Text(); - partialResult[5] = new IntWritable(0); return ObjectInspectorFactory.getStandardStructObjectInspector(fname, foi); @@ -436,12 +423,13 @@ public int estimate() { return (int) (model.lengthFor(columnType) + model.primitive1() + model.primitive2() - + ((numDV == null) ? NumDistinctValueEstimator.lengthFor(model, null) : + + ((numDV == null) ? lengthFor(model, null) : numDV.lengthFor(model))); } - protected void initNDVEstimator(int numBitVectors) { - numDV = new NumDistinctValueEstimator(numBitVectors); + protected void initNDVEstimator(String func, int numBitVectors) { + numDV = NumDistinctValueEstimatorFactory.getEmptyNumDistinctValueEstimator(func, + numBitVectors); } protected abstract void update(Object p, PrimitiveObjectInspector inputOI); @@ -457,7 +445,6 @@ protected Object serialize(Object[] result) { if (numDV != null) { ((Text) result[5]).set(numDV.serialize()); } - return result; } @@ -465,11 +452,10 @@ protected Object serializePartial(Object[] result) { // Serialize the rest of the values in the AggBuffer serializeCommon(result); - // Serialize numDistinctValue Estimator - Text t = numDV.serialize(); - ((Text) result[4]).set(t); - ((IntWritable) result[5]).set(numDV.getnumBitVectors()); - + if (numDV != null) { + // Serialize numDistinctValue Estimator + ((Text) result[4]).set(numDV.serialize()); + } return result; } @@ -495,30 +481,29 @@ public void iterate(AggregationBuffer agg, Object[] parameters) throws HiveExcep NumericStatsAgg myagg = (NumericStatsAgg) agg; if (myagg.numDV == null) { - int numVectors = parameters[1] == null ? 0 : - PrimitiveObjectInspectorUtils.getInt(parameters[1], numVectorsOI); - if (numVectors > MAX_BIT_VECTORS) { - throw new HiveException("The maximum allowed value for number of bit vectors " + - " is " + MAX_BIT_VECTORS + ", but was passed " + numVectors + " bit vectors"); + String func = null; + int numVectors = 0; + // func may be null when GBY op is closing. + // see mvn test -Dtest=TestMiniTezCliDriver -Dqfile=explainuser_3.q + // original behavior is to create FMSketch + func = parameters[1] == null ? "fm" : PrimitiveObjectInspectorUtils.getString( + parameters[1], funcOI); + if (parameters.length == 3) { + numVectors = parameters[2] == null ? 0 : PrimitiveObjectInspectorUtils.getInt( + parameters[2], numVectorsOI); + if (numVectors > MAX_BIT_VECTORS) { + throw new HiveException("The maximum allowed value for number of bit vectors " + " is " + + MAX_BIT_VECTORS + ", but was passed " + numVectors + " bit vectors"); + } } - myagg.initNDVEstimator(numVectors); + myagg.initNDVEstimator(func, numVectors); } - //Update null counter if a null value is seen + // Update null counter if a null value is seen if (parameters[0] == null) { myagg.countNulls++; } else { - try { - myagg.update(parameters[0], inputOI); - } catch (NumberFormatException e) { - if (!warned) { - warned = true; - LOG.warn(getClass().getSimpleName() + " " - + StringUtils.stringifyException(e)); - LOG.warn(getClass().getSimpleName() - + " ignoring similar exceptions."); - } - } + myagg.update(parameters[0], inputOI); } } @@ -537,15 +522,6 @@ public void merge(AggregationBuffer agg, Object partial) throws HiveException { if (partial != null) { NumericStatsAgg myagg = (NumericStatsAgg) agg; - if (myagg.numDV == null) { - Object partialValue = soi.getStructFieldData(partial, numBitVectorsField); - int numVectors = numBitVectorsFieldOI.get(partialValue); - if (numVectors <= 0) { - return; - } - myagg.initNDVEstimator(numVectors); - } - // Update min if min is lesser than the smallest value seen so far Object minValue = soi.getStructFieldData(partial, minField); myagg.updateMin(minValue, minFieldOI); @@ -561,9 +537,15 @@ public void merge(AggregationBuffer agg, Object partial) throws HiveException { // Merge numDistinctValue Estimators Object numDistinct = soi.getStructFieldData(partial, ndvField); String v = ndvFieldOI.getPrimitiveJavaObject(numDistinct); - NumDistinctValueEstimator o = - new NumDistinctValueEstimator(v, myagg.numDV.getnumBitVectors()); - myagg.numDV.mergeEstimators(o); + + if (v != null && v.length() != 0) { + if (myagg.numDV == null) { + myagg.numDV = NumDistinctValueEstimatorFactory.getNumDistinctValueEstimator(v); + } else { + myagg.numDV.mergeEstimators(NumDistinctValueEstimatorFactory + .getNumDistinctValueEstimator(v)); + } + } } } } @@ -713,6 +695,7 @@ public void reset(AggregationBuffer agg) throws HiveException { /* Object Inspector corresponding to the input parameter. */ private transient PrimitiveObjectInspector inputOI; + private transient PrimitiveObjectInspector funcOI; private transient PrimitiveObjectInspector numVectorsOI; private final static int MAX_BIT_VECTORS = 1024; @@ -741,9 +724,6 @@ public void reset(AggregationBuffer agg) throws HiveException { private transient StructField ndvField; private transient StringObjectInspector ndvFieldOI; - private transient StructField numBitVectorsField; - private transient IntObjectInspector numBitVectorsFieldOI; - /* Output of final result of the aggregation */ private transient Object[] result; @@ -755,7 +735,10 @@ public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveExc // initialize input if (mode == Mode.PARTIAL1 || mode == Mode.COMPLETE) { inputOI = (PrimitiveObjectInspector) parameters[0]; - numVectorsOI = (PrimitiveObjectInspector) parameters[1]; + funcOI = (PrimitiveObjectInspector) parameters[1]; + if (parameters.length > 2) { + numVectorsOI = (PrimitiveObjectInspector) parameters[2]; + } } else { soi = (StructObjectInspector) parameters[0]; @@ -774,9 +757,6 @@ public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveExc ndvField = soi.getStructFieldRef("bitvector"); ndvFieldOI = (StringObjectInspector) ndvField.getFieldObjectInspector(); - numBitVectorsField = soi.getStructFieldRef("numbitvectors"); - numBitVectorsFieldOI = (IntObjectInspector) - numBitVectorsField.getFieldObjectInspector(); } // initialize output @@ -788,7 +768,6 @@ public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveExc foi.add(PrimitiveObjectInspectorFactory.writableLongObjectInspector); foi.add(PrimitiveObjectInspectorFactory.writableLongObjectInspector); foi.add(PrimitiveObjectInspectorFactory.writableStringObjectInspector); - foi.add(PrimitiveObjectInspectorFactory.writableIntObjectInspector); List fname = new ArrayList(); fname.add("columntype"); @@ -797,7 +776,6 @@ public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveExc fname.add("count"); fname.add("countnulls"); fname.add("bitvector"); - fname.add("numbitvectors"); partialResult = new Object[7]; partialResult[0] = new Text(); @@ -806,7 +784,6 @@ public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveExc partialResult[3] = new LongWritable(0); partialResult[4] = new LongWritable(0); partialResult[5] = new Text(); - partialResult[6] = new IntWritable(0); return ObjectInspectorFactory.getStandardStructObjectInspector(fname, foi); @@ -847,15 +824,14 @@ public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveExc public long sumLength; /* Sum of lengths of all values seen so far */ public long count; /* Count of all values seen so far */ public long countNulls; /* Count of number of null values seen so far */ - public StringNumDistinctValueEstimator numDV; /* Distinct value estimator */ - public int numBitVectors; + public NumDistinctValueEstimator numDV; /* Distinct value estimator */ public boolean firstItem; @Override public int estimate() { JavaDataModel model = JavaDataModel.get(); return (int) (model.primitive1() * 2 + model.primitive2() * 4 + model.lengthFor(columnType) + - ((numDV == null) ? NumDistinctValueEstimator.lengthFor(model, null) : + ((numDV == null) ? lengthFor(model, null) : numDV.lengthFor(model))); } @@ -868,8 +844,9 @@ public AggregationBuffer getNewAggregationBuffer() throws HiveException { return result; } - public void initNDVEstimator(StringStatsAgg aggBuffer, int numBitVectors) { - aggBuffer.numDV = new StringNumDistinctValueEstimator(numBitVectors); + public void initNDVEstimator(StringStatsAgg aggBuffer, String func, int numBitVectors) { + aggBuffer.numDV = NumDistinctValueEstimatorFactory.getEmptyNumDistinctValueEstimator(func, + numBitVectors); aggBuffer.numDV.reset(); } @@ -890,83 +867,59 @@ public void reset(AggregationBuffer agg) throws HiveException { public void iterate(AggregationBuffer agg, Object[] parameters) throws HiveException { Object p = parameters[0]; StringStatsAgg myagg = (StringStatsAgg) agg; - boolean emptyTable = false; - - if (parameters[1] == null) { - emptyTable = true; - } if (myagg.firstItem) { int numVectors = 0; - if (!emptyTable) { - numVectors = PrimitiveObjectInspectorUtils.getInt(parameters[1], numVectorsOI); - } - - if (numVectors > MAX_BIT_VECTORS) { - throw new HiveException("The maximum allowed value for number of bit vectors " + - " is " + MAX_BIT_VECTORS + " , but was passed " + numVectors + " bit vectors"); + String func = parameters[1] == null ? "fm" : PrimitiveObjectInspectorUtils.getString( + parameters[1], funcOI); + if (parameters.length > 2) { + numVectors = PrimitiveObjectInspectorUtils.getInt(parameters[2], numVectorsOI); + if (numVectors > MAX_BIT_VECTORS) { + throw new HiveException("The maximum allowed value for number of bit vectors " + " is " + + MAX_BIT_VECTORS + " , but was passed " + numVectors + " bit vectors"); + } } - initNDVEstimator(myagg, numVectors); + initNDVEstimator(myagg, func, numVectors); myagg.firstItem = false; - myagg.numBitVectors = numVectors; } - if (!emptyTable) { - - // Update null counter if a null value is seen - if (p == null) { - myagg.countNulls++; - } - else { - try { - - String v = PrimitiveObjectInspectorUtils.getString(p, inputOI); - - // Update max length if new length is greater than the ones seen so far - int len = v.length(); - if (len > myagg.maxLength) { - myagg.maxLength = len; - } - - // Update sum length with the new length - myagg.sumLength += len; - - // Increment count of values seen so far - myagg.count++; - - // Add string value to NumDistinctValue Estimator - myagg.numDV.addToEstimator(v); - - } catch (NumberFormatException e) { - if (!warned) { - warned = true; - LOG.warn(getClass().getSimpleName() + " " - + StringUtils.stringifyException(e)); - LOG.warn(getClass().getSimpleName() - + " ignoring similar exceptions."); - } - } + // Update null counter if a null value is seen + String v = PrimitiveObjectInspectorUtils.getString(p, inputOI); + if (v == null) { + myagg.countNulls++; + } else { + // Update max length if new length is greater than the ones seen so + // far + int len = v.length(); + if (len > myagg.maxLength) { + myagg.maxLength = len; } + + // Update sum length with the new length + myagg.sumLength += len; + + // Increment count of values seen so far + myagg.count++; + + // Add string value to NumDistinctValue Estimator + myagg.numDV.addToEstimator(v); } } @Override public Object terminatePartial(AggregationBuffer agg) throws HiveException { StringStatsAgg myagg = (StringStatsAgg) agg; - - // Serialize numDistinctValue Estimator - Text t = myagg.numDV.serialize(); - // Serialize the rest of the values in the AggBuffer ((Text) partialResult[0]).set(myagg.columnType); ((LongWritable) partialResult[1]).set(myagg.maxLength); ((LongWritable) partialResult[2]).set(myagg.sumLength); ((LongWritable) partialResult[3]).set(myagg.count); ((LongWritable) partialResult[4]).set(myagg.countNulls); - ((Text) partialResult[5]).set(t); - ((IntWritable) partialResult[6]).set(myagg.numBitVectors); - + // Serialize numDistinctValue Estimator + if (myagg.numDV != null) { + ((Text) partialResult[5]).set(myagg.numDV.serialize()); + } return partialResult; } @@ -975,17 +928,6 @@ public void merge(AggregationBuffer agg, Object partial) throws HiveException { if (partial != null) { StringStatsAgg myagg = (StringStatsAgg) agg; - if (myagg.firstItem) { - Object partialValue = soi.getStructFieldData(partial, numBitVectorsField); - int numVectors = numBitVectorsFieldOI.get(partialValue); - if (numVectors <= 0) { - return; - } - initNDVEstimator(myagg, numVectors); - myagg.firstItem = false; - myagg.numBitVectors = numVectors; - } - // Update maxLength if length is greater than the largest value seen so far Object partialValue = soi.getStructFieldData(partial, maxLengthField); if (myagg.maxLength < maxLengthFieldOI.get(partialValue)) { @@ -1007,8 +949,15 @@ public void merge(AggregationBuffer agg, Object partial) throws HiveException { // Merge numDistinctValue Estimators partialValue = soi.getStructFieldData(partial, ndvField); String v = ndvFieldOI.getPrimitiveJavaObject(partialValue); - NumDistinctValueEstimator o = new NumDistinctValueEstimator(v, myagg.numBitVectors); - myagg.numDV.mergeEstimators(o); + + if (v != null && v.length() != 0) { + if (myagg.numDV == null) { + myagg.numDV = NumDistinctValueEstimatorFactory.getNumDistinctValueEstimator(v); + } else { + myagg.numDV.mergeEstimators(NumDistinctValueEstimatorFactory + .getNumDistinctValueEstimator(v)); + } + } } } @@ -1016,16 +965,12 @@ public void merge(AggregationBuffer agg, Object partial) throws HiveException { public Object terminate(AggregationBuffer agg) throws HiveException { StringStatsAgg myagg = (StringStatsAgg) agg; - long numDV = 0; + long numDV = myagg.numDV == null ? 0 : myagg.numDV.estimateNumDistinctValues(); double avgLength = 0.0; long total = myagg.count + myagg.countNulls; - if (myagg.numBitVectors != 0) { - numDV = myagg.numDV.estimateNumDistinctValues(); - } - if (total != 0) { - avgLength = myagg.sumLength / (1.0 * total); + avgLength = myagg.sumLength / (1.0 * total); } // Serialize the result struct @@ -1034,7 +979,7 @@ public Object terminate(AggregationBuffer agg) throws HiveException { ((DoubleWritable) result[2]).set(avgLength); ((LongWritable) result[3]).set(myagg.countNulls); ((LongWritable) result[4]).set(numDV); - if (myagg.numBitVectors != 0) { + if (myagg.numDV != null) { ((Text) result[5]).set(myagg.numDV.serialize()); } return result; @@ -1181,8 +1126,6 @@ public void reset(AggregationBuffer agg) throws HiveException { myagg.countNulls = 0; } - boolean warned = false; - @Override public void iterate(AggregationBuffer agg, Object[] parameters) throws HiveException { Object p = parameters[0]; @@ -1197,32 +1140,21 @@ public void iterate(AggregationBuffer agg, Object[] parameters) throws HiveExcep // Update null counter if a null value is seen if (p == null) { myagg.countNulls++; - } - else { - try { - BytesWritable v = PrimitiveObjectInspectorUtils.getBinary(p, inputOI); - - // Update max length if new length is greater than the ones seen so far - int len = v.getLength(); - if (len > myagg.maxLength) { - myagg.maxLength = len; - } - - // Update sum length with the new length - myagg.sumLength += len; - - // Increment count of values seen so far - myagg.count++; - - } catch (NumberFormatException e) { - if (!warned) { - warned = true; - LOG.warn(getClass().getSimpleName() + " " - + StringUtils.stringifyException(e)); - LOG.warn(getClass().getSimpleName() - + " ignoring similar exceptions."); - } + } else { + BytesWritable v = PrimitiveObjectInspectorUtils.getBinary(p, inputOI); + + // Update max length if new length is greater than the ones seen so + // far + int len = v.getLength(); + if (len > myagg.maxLength) { + myagg.maxLength = len; } + + // Update sum length with the new length + myagg.sumLength += len; + + // Increment count of values seen so far + myagg.count++; } } } @@ -1425,4 +1357,25 @@ public void reset(AggregationBuffer agg) throws HiveException { ((NumericStatsAgg)agg).reset("Date"); } } + + @InterfaceAudience.LimitedPrivate(value = { "Hive" }) + static int lengthFor(JavaDataModel model, Integer numVector) { + int length = model.object(); + length += model.primitive1() * 2; // two int + length += model.primitive2(); // one double + length += model.lengthForRandom() * 2; // two Random + + if (numVector == null) { + numVector = 16; // HiveConf hive.stats.ndv.error default produces 16 + // vectors + } + + if (numVector > 0) { + length += model.array() * 3; // three array + length += model.primitive1() * numVector * 2; // two int array + length += (model.object() + model.array() + model.primitive1() + model.primitive2()) + * numVector; // bitset array + } + return length; + } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/LongNumDistinctValueEstimator.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/LongNumDistinctValueEstimator.java deleted file mode 100644 index 1c197a028a..0000000000 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/LongNumDistinctValueEstimator.java +++ /dev/null @@ -1,43 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hive.ql.udf.generic; - -public class LongNumDistinctValueEstimator extends NumDistinctValueEstimator { - - public LongNumDistinctValueEstimator(int numBitVectors) { - super(numBitVectors); - } - - public LongNumDistinctValueEstimator(String s, int numVectors) { - super(s, numVectors); - } - - @Override - public void addToEstimator(long v) { - /* Update summary bitVector : - * Generate hash value of the long value and mod it by 2^bitVectorSize-1. - * In this implementation bitVectorSize is 31. - */ - super.addToEstimator(v); - } - - @Override - public void addToEstimatorPCSA(long v) { - super.addToEstimatorPCSA(v); - } -} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/StringNumDistinctValueEstimator.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/StringNumDistinctValueEstimator.java deleted file mode 100644 index 601901c163..0000000000 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/StringNumDistinctValueEstimator.java +++ /dev/null @@ -1,39 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hive.ql.udf.generic; - -public class StringNumDistinctValueEstimator extends NumDistinctValueEstimator { - - public StringNumDistinctValueEstimator(int numVectors) { - super(numVectors); - } - - public StringNumDistinctValueEstimator(String s, int numVectors) { - super(s, numVectors); - } - - public void addToEstimator(String s) { - int v = s.hashCode(); - super.addToEstimator(v); - } - - public void addToEstimatorPCSA(String s) { - int v = s.hashCode(); - super.addToEstimatorPCSA(v); - } -} diff --git a/ql/src/test/queries/clientpositive/char_udf1.q b/ql/src/test/queries/clientpositive/char_udf1.q index 39aa0e0e17..fa3a261c4b 100644 --- a/ql/src/test/queries/clientpositive/char_udf1.q +++ b/ql/src/test/queries/clientpositive/char_udf1.q @@ -142,8 +142,8 @@ from char_udf_1 limit 1; -- Aggregate Functions select - compute_stats(c2, 16), - compute_stats(c4, 16) + compute_stats(c2, 'fm', 16), + compute_stats(c4, 'fm', 16) from char_udf_1; select diff --git a/ql/src/test/queries/clientpositive/compute_stats_date.q b/ql/src/test/queries/clientpositive/compute_stats_date.q index 09128f6fb9..bf478526ba 100644 --- a/ql/src/test/queries/clientpositive/compute_stats_date.q +++ b/ql/src/test/queries/clientpositive/compute_stats_date.q @@ -13,7 +13,7 @@ load data local inpath '../../data/files/flights_join.txt' overwrite into table select count(*) from tab_date; -- compute statistical summary of data -select compute_stats(fl_date, 16) from tab_date; +select compute_stats(fl_date, 'hll') from tab_date; explain analyze table tab_date compute statistics for columns fl_date; diff --git a/ql/src/test/queries/clientpositive/compute_stats_decimal.q b/ql/src/test/queries/clientpositive/compute_stats_decimal.q index 76e1468ada..2beafaf219 100644 --- a/ql/src/test/queries/clientpositive/compute_stats_decimal.q +++ b/ql/src/test/queries/clientpositive/compute_stats_decimal.q @@ -8,4 +8,4 @@ LOAD DATA LOCAL INPATH "../../data/files/decimal.txt" INTO TABLE tab_decimal; select count(*) from tab_decimal; -- compute statistical summary of data -select compute_stats(a, 18) from tab_decimal; +select compute_stats(a, 'fm', 18) from tab_decimal; diff --git a/ql/src/test/queries/clientpositive/compute_stats_double.q b/ql/src/test/queries/clientpositive/compute_stats_double.q index 7a1e0f6295..6bae0643a8 100644 --- a/ql/src/test/queries/clientpositive/compute_stats_double.q +++ b/ql/src/test/queries/clientpositive/compute_stats_double.q @@ -6,4 +6,4 @@ LOAD DATA LOCAL INPATH "../../data/files/double.txt" INTO TABLE tab_double; select count(*) from tab_double; -- compute statistical summary of data -select compute_stats(a, 16) from tab_double; +select compute_stats(a, 'fm', 16) from tab_double; diff --git a/ql/src/test/queries/clientpositive/compute_stats_long.q b/ql/src/test/queries/clientpositive/compute_stats_long.q index 6a2070f780..48f4ebb979 100644 --- a/ql/src/test/queries/clientpositive/compute_stats_long.q +++ b/ql/src/test/queries/clientpositive/compute_stats_long.q @@ -6,4 +6,4 @@ LOAD DATA LOCAL INPATH "../../data/files/int.txt" INTO TABLE tab_int; select count(*) from tab_int; -- compute statistical summary of data -select compute_stats(a, 16) from tab_int; +select compute_stats(a, 'fm', 16) from tab_int; diff --git a/ql/src/test/queries/clientpositive/compute_stats_string.q b/ql/src/test/queries/clientpositive/compute_stats_string.q index 0023e7f6bd..79a531e8ec 100644 --- a/ql/src/test/queries/clientpositive/compute_stats_string.q +++ b/ql/src/test/queries/clientpositive/compute_stats_string.q @@ -6,4 +6,4 @@ LOAD DATA LOCAL INPATH "../../data/files/string.txt" INTO TABLE tab_string; select count(*) from tab_string; -- compute statistical summary of data -select compute_stats(a, 16) from tab_string; +select compute_stats(a, 'fm', 16) from tab_string; diff --git a/ql/src/test/queries/clientpositive/hll.q b/ql/src/test/queries/clientpositive/hll.q new file mode 100644 index 0000000000..edfdce8a29 --- /dev/null +++ b/ql/src/test/queries/clientpositive/hll.q @@ -0,0 +1,46 @@ +set hive.mapred.mode=nonstrict; + +create table i(key int); + +insert overwrite table i select key from src; + +explain analyze table i compute statistics for columns; + +analyze table i compute statistics for columns; + +desc formatted i key; + +drop table i; + +create table i(key double); + +insert overwrite table i select key from src; + +analyze table i compute statistics for columns; + +desc formatted i key; + +drop table i; + +create table i(key decimal); + +insert overwrite table i select key from src; + +analyze table i compute statistics for columns; + +desc formatted i key; + +drop table i; + +create table i(key date); + +insert into i values ('2012-08-17'); +insert into i values ('2012-08-17'); +insert into i values ('2013-08-17'); +insert into i values ('2012-03-17'); +insert into i values ('2012-05-17'); + +analyze table i compute statistics for columns; + +desc formatted i key; + diff --git a/ql/src/test/queries/clientpositive/reduceSinkDeDuplication_pRS_key_empty.q b/ql/src/test/queries/clientpositive/reduceSinkDeDuplication_pRS_key_empty.q index 8bbae3914d..d72fad5c0f 100644 --- a/ql/src/test/queries/clientpositive/reduceSinkDeDuplication_pRS_key_empty.q +++ b/ql/src/test/queries/clientpositive/reduceSinkDeDuplication_pRS_key_empty.q @@ -7,7 +7,7 @@ set hive.groupby.skewindata=false; set mapred.reduce.tasks=31; -select compute_stats(a,16),compute_stats(b,16),compute_stats(c,16),compute_stats(d,16) +select compute_stats(a,'fm',16),compute_stats(b,'fm',16),compute_stats(c,'fm',16),compute_stats(d,'fm',16) from ( select @@ -17,7 +17,7 @@ select var_samp(substr(src.value,5)) as d from src)subq; -explain select compute_stats(a,16),compute_stats(b,16),compute_stats(c,16),compute_stats(d,16) +explain select compute_stats(a,'fm',16),compute_stats(b,'fm',16),compute_stats(c,'fm',16),compute_stats(d,'fm',16) from ( select @@ -27,7 +27,7 @@ select var_samp(substr(src.value,5)) as d from src)subq; -select compute_stats(a,16),compute_stats(b,16),compute_stats(c,16),compute_stats(d,16) +select compute_stats(a,'fm',16),compute_stats(b,'fm',16),compute_stats(c,'fm',16),compute_stats(d,'fm',16) from ( select @@ -39,7 +39,7 @@ select set hive.optimize.reducededuplication=false; -explain select compute_stats(a,16),compute_stats(b,16),compute_stats(c,16),compute_stats(d,16) +explain select compute_stats(a,'fm',16),compute_stats(b,'fm',16),compute_stats(c,'fm',16),compute_stats(d,'fm',16) from ( select @@ -49,7 +49,7 @@ select var_samp(substr(src.value,5)) as d from src)subq; -select compute_stats(a,16),compute_stats(b,16),compute_stats(c,16),compute_stats(d,16) +select compute_stats(a,'fm',16),compute_stats(b,'fm',16),compute_stats(c,'fm',16),compute_stats(d,'fm',16) from ( select diff --git a/ql/src/test/queries/clientpositive/varchar_udf1.q b/ql/src/test/queries/clientpositive/varchar_udf1.q index 4d1f884ea7..1039ed9848 100644 --- a/ql/src/test/queries/clientpositive/varchar_udf1.q +++ b/ql/src/test/queries/clientpositive/varchar_udf1.q @@ -139,8 +139,8 @@ from varchar_udf_1 limit 1; -- Aggregate Functions select - compute_stats(c2, 16), - compute_stats(c4, 16) + compute_stats(c2, 'fm', 16), + compute_stats(c4, 'fm', 16) from varchar_udf_1; select diff --git a/ql/src/test/queries/clientpositive/vector_udf1.q b/ql/src/test/queries/clientpositive/vector_udf1.q index 48d3e1ee4d..c1d43725d2 100644 --- a/ql/src/test/queries/clientpositive/vector_udf1.q +++ b/ql/src/test/queries/clientpositive/vector_udf1.q @@ -351,8 +351,8 @@ select from varchar_udf_1; select - compute_stats(c2, 16), - compute_stats(c4, 16) + compute_stats(c2, 'fm', 16), + compute_stats(c4, 'fm', 16) from varchar_udf_1; explain vectorization detail diff --git a/ql/src/test/results/clientpositive/alter_partition_update_status.q.out b/ql/src/test/results/clientpositive/alter_partition_update_status.q.out index 922822e6d2..c0d4eeefb4 100644 --- a/ql/src/test/results/clientpositive/alter_partition_update_status.q.out +++ b/ql/src/test/results/clientpositive/alter_partition_update_status.q.out @@ -36,7 +36,7 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@src_stat_part_one # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -key string 0 14 1.72 3 from deserializer +key string 0 16 1.72 3 from deserializer PREHOOK: query: ALTER TABLE src_stat_part_one PARTITION(partitionId=1) UPDATE STATISTICS for column key SET ('numDVs'='11','avgColLen'='2.2') PREHOOK: type: ALTERTABLE_UPDATEPARTSTATS POSTHOOK: query: ALTER TABLE src_stat_part_one PARTITION(partitionId=1) UPDATE STATISTICS for column key SET ('numDVs'='11','avgColLen'='2.2') @@ -88,7 +88,7 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@src_stat_part_two # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -key string 0 14 1.72 3 from deserializer +key string 0 16 1.72 3 from deserializer PREHOOK: query: ALTER TABLE src_stat_part_two PARTITION(px=1, py='a') UPDATE STATISTICS for column key SET ('numDVs'='30','maxColLen'='40') PREHOOK: type: ALTERTABLE_UPDATEPARTSTATS POSTHOOK: query: ALTER TABLE src_stat_part_two PARTITION(px=1, py='a') UPDATE STATISTICS for column key SET ('numDVs'='30','maxColLen'='40') diff --git a/ql/src/test/results/clientpositive/alter_table_column_stats.q.out b/ql/src/test/results/clientpositive/alter_table_column_stats.q.out index 2cc7cbc7b6..96dce1e2c5 100644 --- a/ql/src/test/results/clientpositive/alter_table_column_stats.q.out +++ b/ql/src/test/results/clientpositive/alter_table_column_stats.q.out @@ -125,7 +125,7 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testtable0 # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -col1 int 27 484 0 8 from deserializer +col1 int 27 484 0 10 from deserializer COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}} PREHOOK: query: describe formatted statsdb1.testtable0 col2 PREHOOK: type: DESCTABLE @@ -135,7 +135,7 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testtable0 # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -col2 string 0 12 6.7 7 from deserializer +col2 string 0 10 6.7 7 from deserializer COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}} PREHOOK: query: describe formatted statsdb1.testtable0 col3 PREHOOK: type: DESCTABLE @@ -201,7 +201,7 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testtable1 # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -col1 int 27 484 0 8 from deserializer +col1 int 27 484 0 10 from deserializer COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}} PREHOOK: query: describe formatted statsdb1.testtable1 col2 PREHOOK: type: DESCTABLE @@ -211,7 +211,7 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testtable1 # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -col2 string 0 12 6.7 7 from deserializer +col2 string 0 10 6.7 7 from deserializer COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}} PREHOOK: query: describe formatted statsdb1.testtable1 col3 PREHOOK: type: DESCTABLE @@ -276,7 +276,7 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testtable1 # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -col1 int 27 484 0 8 from deserializer +col1 int 27 484 0 10 from deserializer COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\"}} PREHOOK: query: describe formatted statsdb1.testtable1 col2 PREHOOK: type: DESCTABLE @@ -286,7 +286,7 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testtable1 # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -col2 string 0 12 6.7 7 from deserializer +col2 string 0 10 6.7 7 from deserializer COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\"}} PREHOOK: query: describe formatted statsdb1.testtable1 col4 PREHOOK: type: DESCTABLE @@ -361,7 +361,7 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testtable1 # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -col2 string 0 12 6.7 7 from deserializer +col2 string 0 10 6.7 7 from deserializer COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col2\":\"true\"}} PREHOOK: query: describe formatted statsdb1.testtable1 col4 PREHOOK: type: DESCTABLE @@ -437,7 +437,7 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb2@testtable2 # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -col2 string 0 12 6.7 7 from deserializer +col2 string 0 10 6.7 7 from deserializer COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col2\":\"true\"}} PREHOOK: query: describe formatted statsdb2.testtable2 col4 PREHOOK: type: DESCTABLE @@ -551,7 +551,7 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testpart0 # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -col1 int 27 484 0 8 from deserializer +col1 int 27 484 0 10 from deserializer PREHOOK: query: describe formatted statsdb1.testpart0 partition (part = 'part1') col2 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testpart0 @@ -560,7 +560,7 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testpart0 # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -col2 string 0 12 6.7 7 from deserializer +col2 string 0 10 6.7 7 from deserializer PREHOOK: query: describe formatted statsdb1.testpart0 partition (part = 'part1') col3 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testpart0 @@ -618,7 +618,7 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testpart0 # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -col1 int 27 484 0 18 from deserializer +col1 int 27 484 0 20 from deserializer PREHOOK: query: describe formatted statsdb1.testpart0 partition (part = 'part2') col2 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testpart0 @@ -627,7 +627,7 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testpart0 # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -col2 string 0 18 6.8 7 from deserializer +col2 string 0 20 6.8 7 from deserializer PREHOOK: query: describe formatted statsdb1.testpart0 partition (part = 'part2') col3 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testpart0 @@ -737,7 +737,7 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testpart1 # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -col1 int 27 484 0 8 from deserializer +col1 int 27 484 0 10 from deserializer PREHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part1') col2 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testpart1 @@ -746,7 +746,7 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testpart1 # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -col2 string 0 12 6.7 7 from deserializer +col2 string 0 10 6.7 7 from deserializer PREHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part1') col3 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testpart1 @@ -804,7 +804,7 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testpart1 # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -col1 int 27 484 0 18 from deserializer +col1 int 27 484 0 20 from deserializer PREHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part2') col2 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testpart1 @@ -813,7 +813,7 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testpart1 # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -col2 string 0 18 6.8 7 from deserializer +col2 string 0 20 6.8 7 from deserializer PREHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part2') col3 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testpart1 @@ -924,7 +924,7 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testpart1 # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -col1 int 27 484 0 8 from deserializer +col1 int 27 484 0 10 from deserializer PREHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part11') col2 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testpart1 @@ -933,7 +933,7 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testpart1 # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -col2 string 0 12 6.7 7 from deserializer +col2 string 0 10 6.7 7 from deserializer PREHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part11') col3 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testpart1 @@ -991,7 +991,7 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testpart1 # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -col1 int 27 484 0 18 from deserializer +col1 int 27 484 0 20 from deserializer PREHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part2') col2 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testpart1 @@ -1000,7 +1000,7 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testpart1 # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -col2 string 0 18 6.8 7 from deserializer +col2 string 0 20 6.8 7 from deserializer PREHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part2') col3 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testpart1 @@ -1113,7 +1113,7 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testpart1 # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -col1 int 27 484 0 8 from deserializer +col1 int 27 484 0 10 from deserializer PREHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part11') col2 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testpart1 @@ -1122,7 +1122,7 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testpart1 # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -col2 string 0 12 6.7 7 from deserializer +col2 string 0 10 6.7 7 from deserializer PREHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part11') col4 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testpart1 @@ -1180,7 +1180,7 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testpart1 # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -col1 int 27 484 0 18 from deserializer +col1 int 27 484 0 20 from deserializer PREHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part2') col2 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testpart1 @@ -1189,7 +1189,7 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testpart1 # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -col2 string 0 18 6.8 7 from deserializer +col2 string 0 20 6.8 7 from deserializer PREHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part2') col4 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testpart1 @@ -1311,7 +1311,7 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testpart1 # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -col2 string 0 12 6.7 7 from deserializer +col2 string 0 10 6.7 7 from deserializer PREHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part11') col4 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testpart1 @@ -1378,7 +1378,7 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testpart1 # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -col2 string 0 18 6.8 7 from deserializer +col2 string 0 20 6.8 7 from deserializer PREHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part2') col4 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testpart1 @@ -1457,7 +1457,7 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb2@testpart2 # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -col2 string 0 12 6.7 7 from deserializer +col2 string 0 10 6.7 7 from deserializer PREHOOK: query: describe formatted statsdb2.testpart2 partition (part = 'part11') col4 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb2@testpart2 @@ -1484,7 +1484,7 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb2@testpart2 # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -col2 string 0 18 6.8 7 from deserializer +col2 string 0 20 6.8 7 from deserializer PREHOOK: query: describe formatted statsdb2.testpart2 partition (part = 'part2') col4 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb2@testpart2 @@ -1665,7 +1665,7 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testtable0 # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -col1 int 27 484 0 8 from deserializer +col1 int 27 484 0 10 from deserializer COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}} PREHOOK: query: describe formatted statsdb1.testtable0 col2 PREHOOK: type: DESCTABLE @@ -1675,7 +1675,7 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testtable0 # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -col2 string 0 12 6.7 7 from deserializer +col2 string 0 10 6.7 7 from deserializer COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}} PREHOOK: query: describe formatted statsdb1.testtable0 col3 PREHOOK: type: DESCTABLE @@ -1741,7 +1741,7 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testtable1 # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -col1 int 27 484 0 8 from deserializer +col1 int 27 484 0 10 from deserializer COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}} PREHOOK: query: describe formatted statsdb1.testtable1 col2 PREHOOK: type: DESCTABLE @@ -1751,7 +1751,7 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testtable1 # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -col2 string 0 12 6.7 7 from deserializer +col2 string 0 10 6.7 7 from deserializer COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}} PREHOOK: query: describe formatted statsdb1.testtable1 col3 PREHOOK: type: DESCTABLE @@ -1816,7 +1816,7 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testtable1 # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -col1 int 27 484 0 8 from deserializer +col1 int 27 484 0 10 from deserializer COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\"}} PREHOOK: query: describe formatted statsdb1.testtable1 col2 PREHOOK: type: DESCTABLE @@ -1826,7 +1826,7 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testtable1 # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -col2 string 0 12 6.7 7 from deserializer +col2 string 0 10 6.7 7 from deserializer COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\"}} PREHOOK: query: describe formatted statsdb1.testtable1 col4 PREHOOK: type: DESCTABLE @@ -1901,7 +1901,7 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testtable1 # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -col2 string 0 12 6.7 7 from deserializer +col2 string 0 10 6.7 7 from deserializer COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col2\":\"true\"}} PREHOOK: query: describe formatted statsdb1.testtable1 col4 PREHOOK: type: DESCTABLE @@ -1977,7 +1977,7 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb2@testtable2 # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -col2 string 0 12 6.7 7 from deserializer +col2 string 0 10 6.7 7 from deserializer COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col2\":\"true\"}} PREHOOK: query: describe formatted statsdb2.testtable2 col4 PREHOOK: type: DESCTABLE @@ -2091,7 +2091,7 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testpart0 # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -col1 int 27 484 0 8 from deserializer +col1 int 27 484 0 10 from deserializer PREHOOK: query: describe formatted statsdb1.testpart0 partition (part = 'part1') col2 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testpart0 @@ -2100,7 +2100,7 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testpart0 # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -col2 string 0 12 6.7 7 from deserializer +col2 string 0 10 6.7 7 from deserializer PREHOOK: query: describe formatted statsdb1.testpart0 partition (part = 'part1') col3 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testpart0 @@ -2158,7 +2158,7 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testpart0 # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -col1 int 27 484 0 18 from deserializer +col1 int 27 484 0 20 from deserializer PREHOOK: query: describe formatted statsdb1.testpart0 partition (part = 'part2') col2 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testpart0 @@ -2167,7 +2167,7 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testpart0 # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -col2 string 0 18 6.8 7 from deserializer +col2 string 0 20 6.8 7 from deserializer PREHOOK: query: describe formatted statsdb1.testpart0 partition (part = 'part2') col3 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testpart0 @@ -2277,7 +2277,7 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testpart1 # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -col1 int 27 484 0 8 from deserializer +col1 int 27 484 0 10 from deserializer PREHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part1') col2 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testpart1 @@ -2286,7 +2286,7 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testpart1 # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -col2 string 0 12 6.7 7 from deserializer +col2 string 0 10 6.7 7 from deserializer PREHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part1') col3 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testpart1 @@ -2344,7 +2344,7 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testpart1 # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -col1 int 27 484 0 18 from deserializer +col1 int 27 484 0 20 from deserializer PREHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part2') col2 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testpart1 @@ -2353,7 +2353,7 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testpart1 # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -col2 string 0 18 6.8 7 from deserializer +col2 string 0 20 6.8 7 from deserializer PREHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part2') col3 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testpart1 @@ -2464,7 +2464,7 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testpart1 # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -col1 int 27 484 0 8 from deserializer +col1 int 27 484 0 10 from deserializer PREHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part11') col2 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testpart1 @@ -2473,7 +2473,7 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testpart1 # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -col2 string 0 12 6.7 7 from deserializer +col2 string 0 10 6.7 7 from deserializer PREHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part11') col3 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testpart1 @@ -2531,7 +2531,7 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testpart1 # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -col1 int 27 484 0 18 from deserializer +col1 int 27 484 0 20 from deserializer PREHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part2') col2 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testpart1 @@ -2540,7 +2540,7 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testpart1 # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -col2 string 0 18 6.8 7 from deserializer +col2 string 0 20 6.8 7 from deserializer PREHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part2') col3 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testpart1 @@ -2653,7 +2653,7 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testpart1 # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -col1 int 27 484 0 8 from deserializer +col1 int 27 484 0 10 from deserializer PREHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part11') col2 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testpart1 @@ -2662,7 +2662,7 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testpart1 # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -col2 string 0 12 6.7 7 from deserializer +col2 string 0 10 6.7 7 from deserializer PREHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part11') col4 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testpart1 @@ -2720,7 +2720,7 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testpart1 # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -col1 int 27 484 0 18 from deserializer +col1 int 27 484 0 20 from deserializer PREHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part2') col2 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testpart1 @@ -2729,7 +2729,7 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testpart1 # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -col2 string 0 18 6.8 7 from deserializer +col2 string 0 20 6.8 7 from deserializer PREHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part2') col4 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testpart1 @@ -2851,7 +2851,7 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testpart1 # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -col2 string 0 12 6.7 7 from deserializer +col2 string 0 10 6.7 7 from deserializer PREHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part11') col4 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testpart1 @@ -2918,7 +2918,7 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testpart1 # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -col2 string 0 18 6.8 7 from deserializer +col2 string 0 20 6.8 7 from deserializer PREHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part2') col4 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testpart1 @@ -2997,7 +2997,7 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb2@testpart2 # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -col2 string 0 12 6.7 7 from deserializer +col2 string 0 10 6.7 7 from deserializer PREHOOK: query: describe formatted statsdb2.testpart2 partition (part = 'part11') col4 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb2@testpart2 @@ -3024,7 +3024,7 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb2@testpart2 # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -col2 string 0 18 6.8 7 from deserializer +col2 string 0 20 6.8 7 from deserializer PREHOOK: query: describe formatted statsdb2.testpart2 partition (part = 'part2') col4 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb2@testpart2 diff --git a/ql/src/test/results/clientpositive/alter_table_update_status.q.out b/ql/src/test/results/clientpositive/alter_table_update_status.q.out index e26e8cba1c..9cd9a8dbe0 100644 --- a/ql/src/test/results/clientpositive/alter_table_update_status.q.out +++ b/ql/src/test/results/clientpositive/alter_table_update_status.q.out @@ -48,7 +48,7 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@src_stat # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -key string 0 14 1.72 3 from deserializer +key string 0 16 1.72 3 from deserializer COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\"}} PREHOOK: query: ALTER TABLE src_stat UPDATE STATISTICS for column key SET ('numDVs'='1111','avgColLen'='1.111') PREHOOK: type: ALTERTABLE_UPDATETABLESTATS @@ -94,7 +94,7 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@src_stat_int # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -key double 66.0 406.0 10 14 from deserializer +key double 66.0 406.0 10 15 from deserializer COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"key\":\"true\"}} PREHOOK: query: ALTER TABLE src_stat_int UPDATE STATISTICS for column key SET ('numDVs'='2222','lowValue'='333.22','highValue'='22.22') PREHOOK: type: ALTERTABLE_UPDATETABLESTATS diff --git a/ql/src/test/results/clientpositive/analyze_tbl_part.q.out b/ql/src/test/results/clientpositive/analyze_tbl_part.q.out index ed90b6fc92..6a3fbc0cc7 100644 --- a/ql/src/test/results/clientpositive/analyze_tbl_part.q.out +++ b/ql/src/test/results/clientpositive/analyze_tbl_part.q.out @@ -50,7 +50,7 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@src_stat_part # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -key string 0 14 1.72 3 from deserializer +key string 0 16 1.72 3 from deserializer PREHOOK: query: ANALYZE TABLE src_stat_part partition (partitionId) COMPUTE STATISTICS for columns key, value PREHOOK: type: QUERY PREHOOK: Input: default@src_stat_part @@ -71,7 +71,7 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@src_stat_part # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -key string 0 14 1.72 3 from deserializer +key string 0 16 1.72 3 from deserializer PREHOOK: query: describe formatted src_stat_part PARTITION(partitionId=2) value PREHOOK: type: DESCTABLE PREHOOK: Input: default@src_stat_part @@ -80,7 +80,7 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@src_stat_part # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -value string 0 14 4.92 7 from deserializer +value string 0 19 4.92 7 from deserializer PREHOOK: query: create table src_stat_string_part(key string, value string) partitioned by (partitionName string) PREHOOK: type: CREATETABLE PREHOOK: Output: database:default diff --git a/ql/src/test/results/clientpositive/annotate_stats_deep_filters.q.out b/ql/src/test/results/clientpositive/annotate_stats_deep_filters.q.out index 95dd6abaec..6e2975e671 100644 --- a/ql/src/test/results/clientpositive/annotate_stats_deep_filters.q.out +++ b/ql/src/test/results/clientpositive/annotate_stats_deep_filters.q.out @@ -121,9 +121,9 @@ STAGE PLANS: Statistics: Num rows: 2098 Data size: 16744 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (((t = 1) and (si = 2)) or ((t = 2) and (si = 3)) or ((t = 3) and (si = 4)) or ((t = 4) and (si = 5)) or ((t = 5) and (si = 6)) or ((t = 6) and (si = 7)) or ((t = 7) and (si = 8)) or ((t = 9) and (si = 10)) or ((t = 10) and (si = 11)) or ((t = 11) and (si = 12)) or ((t = 12) and (si = 13)) or ((t = 13) and (si = 14)) or ((t = 14) and (si = 15)) or ((t = 15) and (si = 16)) or ((t = 16) and (si = 17)) or ((t = 17) and (si = 18)) or ((t = 27) and (si = 28)) or ((t = 37) and (si = 38)) or ((t = 47) and (si = 48)) or ((t = 52) and (si = 53))) (type: boolean) - Statistics: Num rows: 300 Data size: 2400 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 160 Data size: 1280 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - Statistics: Num rows: 300 Data size: 2400 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 160 Data size: 1280 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash diff --git a/ql/src/test/results/clientpositive/annotate_stats_groupby.q.out b/ql/src/test/results/clientpositive/annotate_stats_groupby.q.out index a8e4854a00..fccfabd5d1 100644 --- a/ql/src/test/results/clientpositive/annotate_stats_groupby.q.out +++ b/ql/src/test/results/clientpositive/annotate_stats_groupby.q.out @@ -129,13 +129,13 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 7 Data size: 658 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 6 Data size: 564 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator aggregations: min(_col1) keys: _col0 (type: string), _col2 (type: bigint) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 7 Data size: 686 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 6 Data size: 588 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false table: @@ -151,7 +151,7 @@ STAGE PLANS: key expressions: _col0 (type: string), _col1 (type: bigint) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: bigint) - Statistics: Num rows: 7 Data size: 686 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 6 Data size: 588 Basic stats: COMPLETE Column stats: PARTIAL value expressions: _col2 (type: int) Reduce Operator Tree: Group By Operator @@ -159,10 +159,10 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: bigint) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 7 Data size: 686 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 6 Data size: 588 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false - Statistics: Num rows: 7 Data size: 686 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 6 Data size: 588 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/annotate_stats_join.q.out b/ql/src/test/results/clientpositive/annotate_stats_join.q.out index c1a140b558..736016f538 100644 --- a/ql/src/test/results/clientpositive/annotate_stats_join.q.out +++ b/ql/src/test/results/clientpositive/annotate_stats_join.q.out @@ -236,10 +236,10 @@ STAGE PLANS: 0 _col0 (type: string), _col1 (type: int) 1 _col1 (type: string), _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 6 Data size: 1164 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8 Data size: 1552 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 6 Data size: 1164 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8 Data size: 1552 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -302,10 +302,10 @@ STAGE PLANS: 0 _col0 (type: string), _col1 (type: int) 1 _col1 (type: string), _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 6 Data size: 1164 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8 Data size: 1552 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 6 Data size: 1164 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8 Data size: 1552 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -368,10 +368,10 @@ STAGE PLANS: 0 _col0 (type: string), _col1 (type: int) 1 _col1 (type: string), _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 6 Data size: 1164 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8 Data size: 1552 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 6 Data size: 1164 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8 Data size: 1552 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -538,10 +538,10 @@ STAGE PLANS: 1 _col0 (type: int) 2 _col1 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 47 Data size: 13912 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 64 Data size: 18944 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 47 Data size: 13912 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 64 Data size: 18944 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -682,10 +682,10 @@ STAGE PLANS: 0 _col0 (type: string), _col1 (type: int) 1 _col1 (type: string), _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 48 Data size: 5417 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 48 Data size: 5607 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 48 Data size: 5417 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 48 Data size: 5607 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -753,10 +753,10 @@ STAGE PLANS: 0 _col0 (type: string), _col1 (type: int) 1 _col0 (type: string), _col1 (type: int) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 3 Data size: 297 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 3 Data size: 297 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -813,10 +813,10 @@ STAGE PLANS: 0 _col0 (type: string), _col1 (type: int) 1 _col1 (type: string), _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 6 Data size: 1164 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8 Data size: 1552 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 6 Data size: 1164 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8 Data size: 1552 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -873,10 +873,10 @@ STAGE PLANS: 0 _col0 (type: string), _col1 (type: int) 1 _col1 (type: string), _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 54 Data size: 1358 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 54 Data size: 1746 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 54 Data size: 1358 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 54 Data size: 1746 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/annotate_stats_join_pkfk.q.out b/ql/src/test/results/clientpositive/annotate_stats_join_pkfk.q.out index f7d73c9ddf..e04c1c6bc5 100644 --- a/ql/src/test/results/clientpositive/annotate_stats_join_pkfk.q.out +++ b/ql/src/test/results/clientpositive/annotate_stats_join_pkfk.q.out @@ -807,14 +807,14 @@ STAGE PLANS: 1 _col0 (type: int) 2 _col0 (type: int) outputColumnNames: _col1 - Statistics: Num rows: 821 Data size: 3284 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 964 Data size: 3856 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 821 Data size: 3284 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 964 Data size: 3856 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 821 Data size: 3284 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 964 Data size: 3856 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -979,14 +979,14 @@ STAGE PLANS: 1 _col0 (type: int) 2 _col0 (type: int) outputColumnNames: _col1 - Statistics: Num rows: 821 Data size: 3284 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 964 Data size: 3856 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 821 Data size: 3284 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 964 Data size: 3856 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 821 Data size: 3284 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 964 Data size: 3856 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1065,14 +1065,14 @@ STAGE PLANS: 1 _col0 (type: int) 2 _col0 (type: int) outputColumnNames: _col2 - Statistics: Num rows: 273 Data size: 1092 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 321 Data size: 1284 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 273 Data size: 1092 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 321 Data size: 1284 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 273 Data size: 1092 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 321 Data size: 1284 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1177,14 +1177,14 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col2 - Statistics: Num rows: 210 Data size: 840 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 241 Data size: 964 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 210 Data size: 840 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 241 Data size: 964 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 210 Data size: 840 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 241 Data size: 964 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/autoColumnStats_4.q.out b/ql/src/test/results/clientpositive/autoColumnStats_4.q.out index fe3b9e53ef..e84499995b 100644 --- a/ql/src/test/results/clientpositive/autoColumnStats_4.q.out +++ b/ql/src/test/results/clientpositive/autoColumnStats_4.q.out @@ -116,10 +116,10 @@ STAGE PLANS: outputColumnNames: a, b Statistics: Num rows: 10 Data size: 2150 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(a, 16), compute_stats(b, 16) + aggregations: compute_stats(a, 'hll'), compute_stats(b, 'hll') mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 944 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -153,17 +153,17 @@ STAGE PLANS: TableScan Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 944 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Reduce Operator Tree: Group By Operator aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/autoColumnStats_5.q.out b/ql/src/test/results/clientpositive/autoColumnStats_5.q.out index e19fb5f504..e3abba5bd0 100644 --- a/ql/src/test/results/clientpositive/autoColumnStats_5.q.out +++ b/ql/src/test/results/clientpositive/autoColumnStats_5.q.out @@ -46,7 +46,7 @@ STAGE PLANS: outputColumnNames: a, b, part Statistics: Num rows: 1 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(a, 16), compute_stats(b, 16) + aggregations: compute_stats(a, 'hll'), compute_stats(b, 'hll') keys: part (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 @@ -56,7 +56,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 1 Data size: 44 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: struct), _col2 (type: struct) + value expressions: _col1 (type: struct), _col2 (type: struct) Reduce Operator Tree: Group By Operator aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) @@ -195,7 +195,7 @@ POSTHOOK: Input: default@partitioned1 col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -a int 1 4 0 5 from deserializer +a int 1 4 0 4 from deserializer PREHOOK: query: alter table partitioned1 add columns(c int, d string) PREHOOK: type: ALTERTABLE_ADDCOLS PREHOOK: Input: default@partitioned1 @@ -284,7 +284,7 @@ STAGE PLANS: outputColumnNames: a, b, c, d, part Statistics: Num rows: 1 Data size: 60 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(a, 16), compute_stats(b, 16), compute_stats(c, 16), compute_stats(d, 16) + aggregations: compute_stats(a, 'hll'), compute_stats(b, 'hll'), compute_stats(c, 'hll'), compute_stats(d, 'hll') keys: part (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4 @@ -294,7 +294,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 1 Data size: 60 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) Reduce Operator Tree: Group By Operator aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) @@ -478,7 +478,7 @@ STAGE PLANS: outputColumnNames: a, b, c, d, part Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(a, 16), compute_stats(b, 16), compute_stats(c, 16), compute_stats(d, 16) + aggregations: compute_stats(a, 'hll'), compute_stats(b, 'hll'), compute_stats(c, 'hll'), compute_stats(d, 'hll') keys: part (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4 @@ -488,7 +488,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) Reduce Operator Tree: Group By Operator aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) @@ -629,7 +629,7 @@ POSTHOOK: Input: default@partitioned1 col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -a int 1 6 0 5 from deserializer +a int 1 6 0 4 from deserializer PREHOOK: query: desc formatted partitioned1 partition(part=1) c PREHOOK: type: DESCTABLE PREHOOK: Input: default@partitioned1 @@ -639,4 +639,4 @@ POSTHOOK: Input: default@partitioned1 col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -c int 100 200 0 3 from deserializer +c int 100 200 0 2 from deserializer diff --git a/ql/src/test/results/clientpositive/autoColumnStats_6.q.out b/ql/src/test/results/clientpositive/autoColumnStats_6.q.out index 29b3373e10..1b125701d7 100644 --- a/ql/src/test/results/clientpositive/autoColumnStats_6.q.out +++ b/ql/src/test/results/clientpositive/autoColumnStats_6.q.out @@ -59,7 +59,7 @@ STAGE PLANS: outputColumnNames: key, value, one, two, three Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(key, 16), compute_stats(value, 16) + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') keys: one (type: string), two (type: string), three (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4 @@ -69,7 +69,7 @@ STAGE PLANS: sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: struct), _col4 (type: struct) + value expressions: _col3 (type: struct), _col4 (type: struct) Reduce Operator Tree: Group By Operator aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) diff --git a/ql/src/test/results/clientpositive/autoColumnStats_7.q.out b/ql/src/test/results/clientpositive/autoColumnStats_7.q.out index 9d24bc53ab..9e2121e0de 100644 --- a/ql/src/test/results/clientpositive/autoColumnStats_7.q.out +++ b/ql/src/test/results/clientpositive/autoColumnStats_7.q.out @@ -132,10 +132,10 @@ STAGE PLANS: value expressions: key (type: string), c1 (type: int), c2 (type: string) Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0, 16), compute_stats(VALUE._col2, 16), compute_stats(VALUE._col3, 16) + aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll'), compute_stats(VALUE._col3, 'hll') mode: partial1 outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1424 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -149,17 +149,17 @@ STAGE PLANS: TableScan Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 1424 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) Reduce Operator Tree: Group By Operator aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) mode: final outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1464 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 1464 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/autoColumnStats_8.q.out b/ql/src/test/results/clientpositive/autoColumnStats_8.q.out index 681d962ed0..cdf2082d53 100644 --- a/ql/src/test/results/clientpositive/autoColumnStats_8.q.out +++ b/ql/src/test/results/clientpositive/autoColumnStats_8.q.out @@ -104,7 +104,7 @@ STAGE PLANS: outputColumnNames: key, value, ds, hr Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(key, 16), compute_stats(value, 16) + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') keys: ds (type: string), hr (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 @@ -116,7 +116,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: _col2 (type: struct), _col3 (type: struct) + value expressions: _col2 (type: struct), _col3 (type: struct) auto parallelism: false Filter Operator isSamplingPred: false @@ -161,7 +161,7 @@ STAGE PLANS: outputColumnNames: key, value, hr Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(key, 16), compute_stats(value, 16) + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') keys: '2008-12-31' (type: string), hr (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 @@ -177,7 +177,7 @@ STAGE PLANS: properties: column.name.delimiter , columns _col0,_col1,_col2,_col3 - columns.types string,string,struct,struct + columns.types string,string,struct,struct escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe @@ -511,7 +511,7 @@ STAGE PLANS: Map-reduce partition columns: '2008-12-31' (type: string), _col1 (type: string) Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: _col2 (type: struct), _col3 (type: struct) + value expressions: _col2 (type: struct), _col3 (type: struct) auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -524,7 +524,7 @@ STAGE PLANS: properties: column.name.delimiter , columns _col0,_col1,_col2,_col3 - columns.types string,string,struct,struct + columns.types string,string,struct,struct escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe @@ -534,7 +534,7 @@ STAGE PLANS: properties: column.name.delimiter , columns _col0,_col1,_col2,_col3 - columns.types string,string,struct,struct + columns.types string,string,struct,struct escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe diff --git a/ql/src/test/results/clientpositive/autoColumnStats_9.q.out b/ql/src/test/results/clientpositive/autoColumnStats_9.q.out index d26e2c02b7..06f23b1e7c 100644 --- a/ql/src/test/results/clientpositive/autoColumnStats_9.q.out +++ b/ql/src/test/results/clientpositive/autoColumnStats_9.q.out @@ -86,10 +86,10 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(key, 16), compute_stats(value, 16) + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 944 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -142,10 +142,10 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(key, 16), compute_stats(value, 16) + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 944 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -181,17 +181,17 @@ STAGE PLANS: TableScan Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 944 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Reduce Operator Tree: Group By Operator aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -252,7 +252,7 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@dest_j1 # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -key int 0 498 0 196 from deserializer +key int 0 498 0 309 from deserializer COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} PREHOOK: query: desc formatted dest_j1 value PREHOOK: type: DESCTABLE @@ -262,5 +262,5 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@dest_j1 # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -value string 0 214 6.834630350194552 7 from deserializer +value string 0 309 6.834630350194552 7 from deserializer COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} diff --git a/ql/src/test/results/clientpositive/auto_join_without_localtask.q.out b/ql/src/test/results/clientpositive/auto_join_without_localtask.q.out index 17a912ec13..57f00674de 100644 --- a/ql/src/test/results/clientpositive/auto_join_without_localtask.q.out +++ b/ql/src/test/results/clientpositive/auto_join_without_localtask.q.out @@ -285,24 +285,24 @@ STAGE PLANS: Stage: Stage-14 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_1:c + $hdt$_1:b Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $hdt$_1:c + $hdt$_1:b TableScan - alias: c + alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: value is not null (type: boolean) + predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: value (type: string) + expressions: key (type: string) outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator keys: - 0 _col1 (type: string) + 0 _col0 (type: string) 1 _col0 (type: string) Stage: Stage-9 @@ -322,7 +322,7 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: string) + 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE @@ -341,24 +341,24 @@ STAGE PLANS: Stage: Stage-12 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_2:b + $hdt$_2:c Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $hdt$_2:b + $hdt$_2:c TableScan - alias: b + alias: c Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: key is not null (type: boolean) + predicate: value is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string) + expressions: value (type: string) outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator keys: - 0 _col0 (type: string) + 0 _col1 (type: string) 1 _col0 (type: string) Stage: Stage-6 @@ -369,7 +369,7 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: string) + 0 _col1 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE @@ -411,20 +411,20 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: b + alias: c Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: key is not null (type: boolean) + predicate: value is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string) + expressions: value (type: string) outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: string) + 0 _col1 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE @@ -449,19 +449,19 @@ STAGE PLANS: Map Operator Tree: TableScan Reduce Output Operator - key expressions: _col0 (type: string) + key expressions: _col1 (type: string) sort order: + - Map-reduce partition columns: _col0 (type: string) + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + value expressions: _col0 (type: string) TableScan - alias: b + alias: c Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: key is not null (type: boolean) + predicate: value is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string) + expressions: value (type: string) outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator @@ -474,7 +474,7 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: string) + 0 _col1 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE @@ -505,27 +505,27 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator keys: - 0 _col1 (type: string) + 0 _col0 (type: string) 1 _col0 (type: string) Stage: Stage-10 Map Reduce Map Operator Tree: TableScan - alias: c + alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: value is not null (type: boolean) + predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: value (type: string) + expressions: key (type: string) outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: string) + 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE @@ -552,19 +552,19 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string) + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: _col1 (type: string) + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) + value expressions: _col1 (type: string) TableScan - alias: c + alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: value is not null (type: boolean) + predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: value (type: string) + expressions: key (type: string) outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator @@ -577,7 +577,7 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: string) + 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE diff --git a/ql/src/test/results/clientpositive/avro_decimal.q.out b/ql/src/test/results/clientpositive/avro_decimal.q.out index 5a3b72defe..e1045ebea1 100644 --- a/ql/src/test/results/clientpositive/avro_decimal.q.out +++ b/ql/src/test/results/clientpositive/avro_decimal.q.out @@ -34,7 +34,7 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@dec # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -value decimal(8,4) -12.25 234.79 0 6 from deserializer +value decimal(8,4) -12.25 234.79 0 10 from deserializer COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"value\":\"true\"}} PREHOOK: query: DROP TABLE IF EXISTS avro_dec PREHOOK: type: DROPTABLE diff --git a/ql/src/test/results/clientpositive/avro_decimal_native.q.out b/ql/src/test/results/clientpositive/avro_decimal_native.q.out index fe77512191..b73b5f5679 100644 --- a/ql/src/test/results/clientpositive/avro_decimal_native.q.out +++ b/ql/src/test/results/clientpositive/avro_decimal_native.q.out @@ -38,7 +38,7 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@dec # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -value decimal(8,4) -12.25 234.79 0 6 from deserializer +value decimal(8,4) -12.25 234.79 0 10 from deserializer COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"value\":\"true\"}} PREHOOK: query: DROP TABLE IF EXISTS avro_dec PREHOOK: type: DROPTABLE diff --git a/ql/src/test/results/clientpositive/cbo_rp_annotate_stats_groupby.q.out b/ql/src/test/results/clientpositive/cbo_rp_annotate_stats_groupby.q.out index f260f034b6..23f5fcfc76 100644 --- a/ql/src/test/results/clientpositive/cbo_rp_annotate_stats_groupby.q.out +++ b/ql/src/test/results/clientpositive/cbo_rp_annotate_stats_groupby.q.out @@ -129,13 +129,13 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: int) mode: mergepartial outputColumnNames: state, locid, $f2 - Statistics: Num rows: 7 Data size: 658 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 6 Data size: 564 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator aggregations: min(locid) keys: state (type: string), $f2 (type: bigint) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 7 Data size: 686 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 6 Data size: 588 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false table: @@ -151,7 +151,7 @@ STAGE PLANS: key expressions: _col0 (type: string), _col1 (type: bigint) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: bigint) - Statistics: Num rows: 7 Data size: 686 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 6 Data size: 588 Basic stats: COMPLETE Column stats: PARTIAL value expressions: _col2 (type: int) Reduce Operator Tree: Group By Operator @@ -159,10 +159,10 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: bigint) mode: mergepartial outputColumnNames: state, $f2, $f2_0 - Statistics: Num rows: 7 Data size: 686 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 6 Data size: 588 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false - Statistics: Num rows: 7 Data size: 686 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 6 Data size: 588 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/cbo_rp_join0.q.out b/ql/src/test/results/clientpositive/cbo_rp_join0.q.out index b9cf3ceab4..29499a1f54 100644 --- a/ql/src/test/results/clientpositive/cbo_rp_join0.q.out +++ b/ql/src/test/results/clientpositive/cbo_rp_join0.q.out @@ -68,14 +68,14 @@ STAGE PLANS: 1 key (type: string) 2 key (type: string) outputColumnNames: key, c_int, key0, c_int0 - Statistics: Num rows: 324 Data size: 57494 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 216 Data size: 38270 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string), c_int (type: int), key0 (type: string), c_int0 (type: int) outputColumnNames: key, c_int, p, q - Statistics: Num rows: 324 Data size: 57494 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 216 Data size: 38270 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 324 Data size: 57494 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 216 Data size: 38270 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -730,14 +730,14 @@ STAGE PLANS: 2 key (type: string) 3 key (type: string) outputColumnNames: key, c_int, key0, c_int0, key1, c_int2 - Statistics: Num rows: 1620 Data size: 432273 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1080 Data size: 288093 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string), c_int (type: int), key0 (type: string), c_int0 (type: int), key1 (type: string), c_int2 (type: int) outputColumnNames: key, c_int, p, q, x, b - Statistics: Num rows: 1620 Data size: 432273 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1080 Data size: 288093 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1620 Data size: 432273 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1080 Data size: 288093 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/char_udf1.q.out b/ql/src/test/results/clientpositive/char_udf1.q.out index 07ce108a75..fefc7407e0 100644 --- a/ql/src/test/results/clientpositive/char_udf1.q.out +++ b/ql/src/test/results/clientpositive/char_udf1.q.out @@ -393,15 +393,15 @@ POSTHOOK: Input: default@char_udf_1 #### A masked pattern was here #### val_238 val_238 true PREHOOK: query: select - compute_stats(c2, 16), - compute_stats(c4, 16) + compute_stats(c2, 'fm', 16), + compute_stats(c4, 'fm', 16) from char_udf_1 PREHOOK: type: QUERY PREHOOK: Input: default@char_udf_1 #### A masked pattern was here #### POSTHOOK: query: select - compute_stats(c2, 16), - compute_stats(c4, 16) + compute_stats(c2, 'fm', 16), + compute_stats(c4, 'fm', 16) from char_udf_1 POSTHOOK: type: QUERY POSTHOOK: Input: default@char_udf_1 diff --git a/ql/src/test/results/clientpositive/colstats_all_nulls.q.out b/ql/src/test/results/clientpositive/colstats_all_nulls.q.out index 14c5d5b59b..0f2822504f 100644 --- a/ql/src/test/results/clientpositive/colstats_all_nulls.q.out +++ b/ql/src/test/results/clientpositive/colstats_all_nulls.q.out @@ -43,7 +43,7 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@all_nulls # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -a bigint 0 0 5 1 from deserializer +a bigint 0 0 5 0 from deserializer COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"a\":\"true\",\"b\":\"true\",\"c\":\"true\"}} PREHOOK: query: describe formatted all_nulls b PREHOOK: type: DESCTABLE @@ -53,7 +53,7 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@all_nulls # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -b double 0.0 0.0 5 1 from deserializer +b double 0.0 0.0 5 0 from deserializer COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"a\":\"true\",\"b\":\"true\",\"c\":\"true\"}} PREHOOK: query: drop table all_nulls PREHOOK: type: DROPTABLE diff --git a/ql/src/test/results/clientpositive/column_pruner_multiple_children.q.out b/ql/src/test/results/clientpositive/column_pruner_multiple_children.q.out index 96feeed49c..9925928da7 100644 --- a/ql/src/test/results/clientpositive/column_pruner_multiple_children.q.out +++ b/ql/src/test/results/clientpositive/column_pruner_multiple_children.q.out @@ -108,7 +108,7 @@ STAGE PLANS: value expressions: key (type: int), value (type: string) Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0, 16), compute_stats(VALUE._col2, 16) + aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll') mode: complete outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE diff --git a/ql/src/test/results/clientpositive/columnstats_partlvl.q.out b/ql/src/test/results/clientpositive/columnstats_partlvl.q.out index 07d26e92bb..5ecb20501b 100644 --- a/ql/src/test/results/clientpositive/columnstats_partlvl.q.out +++ b/ql/src/test/results/clientpositive/columnstats_partlvl.q.out @@ -52,7 +52,7 @@ STAGE PLANS: outputColumnNames: employeeid Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(employeeid, 16) + aggregations: compute_stats(employeeid, 'hll') keys: 2000.0 (type: double) mode: hash outputColumnNames: _col0, _col1 @@ -62,7 +62,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: 2000.0 (type: double) Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: struct) + value expressions: _col1 (type: struct) Reduce Operator Tree: Group By Operator aggregations: compute_stats(VALUE._col0) @@ -112,7 +112,7 @@ STAGE PLANS: outputColumnNames: employeeid Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(employeeid, 16) + aggregations: compute_stats(employeeid, 'hll') keys: 2000.0 (type: double) mode: hash outputColumnNames: _col0, _col1 @@ -124,7 +124,7 @@ STAGE PLANS: Map-reduce partition columns: 2000.0 (type: double) Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: _col1 (type: struct) + value expressions: _col1 (type: struct) auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -254,7 +254,7 @@ STAGE PLANS: outputColumnNames: employeeid Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(employeeid, 16) + aggregations: compute_stats(employeeid, 'hll') keys: 4000.0 (type: double) mode: hash outputColumnNames: _col0, _col1 @@ -264,7 +264,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: 4000.0 (type: double) Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: struct) + value expressions: _col1 (type: struct) Reduce Operator Tree: Group By Operator aggregations: compute_stats(VALUE._col0) @@ -314,7 +314,7 @@ STAGE PLANS: outputColumnNames: employeeid Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(employeeid, 16) + aggregations: compute_stats(employeeid, 'hll') keys: 4000.0 (type: double) mode: hash outputColumnNames: _col0, _col1 @@ -326,7 +326,7 @@ STAGE PLANS: Map-reduce partition columns: 4000.0 (type: double) Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: _col1 (type: struct) + value expressions: _col1 (type: struct) auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -456,7 +456,7 @@ STAGE PLANS: outputColumnNames: employeeid, employeename Statistics: Num rows: 1 Data size: 105 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(employeeid, 16), compute_stats(employeename, 16) + aggregations: compute_stats(employeeid, 'hll'), compute_stats(employeename, 'hll') keys: 2000.0 (type: double) mode: hash outputColumnNames: _col0, _col1, _col2 @@ -466,7 +466,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: 2000.0 (type: double) Statistics: Num rows: 1 Data size: 105 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: struct), _col2 (type: struct) + value expressions: _col1 (type: struct), _col2 (type: struct) Reduce Operator Tree: Group By Operator aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) @@ -511,7 +511,7 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@employee_part # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -employeeID int 16 34 1 14 from deserializer +employeeID int 16 34 1 12 from deserializer PREHOOK: query: describe formatted Employee_Part partition (employeeSalary=2000.0) employeeName PREHOOK: type: DESCTABLE PREHOOK: Input: default@employee_part @@ -520,7 +520,7 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@employee_part # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -employeeName string 1 9 4.3076923076923075 6 from deserializer +employeeName string 1 12 4.3076923076923075 6 from deserializer PREHOOK: query: explain analyze table Employee_Part compute statistics for columns PREHOOK: type: QUERY @@ -543,7 +543,7 @@ STAGE PLANS: outputColumnNames: employeeid, employeename, employeesalary Statistics: Num rows: 2 Data size: 210 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(employeeid, 16), compute_stats(employeename, 16) + aggregations: compute_stats(employeeid, 'hll'), compute_stats(employeename, 'hll') keys: employeesalary (type: double) mode: hash outputColumnNames: _col0, _col1, _col2 @@ -553,7 +553,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: double) Statistics: Num rows: 2 Data size: 210 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: struct), _col2 (type: struct) + value expressions: _col1 (type: struct), _col2 (type: struct) Reduce Operator Tree: Group By Operator aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) @@ -600,7 +600,7 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@employee_part # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -employeeID int 16 34 1 14 from deserializer +employeeID int 16 34 1 12 from deserializer PREHOOK: query: describe formatted Employee_Part partition(employeeSalary=4000.0) employeeID PREHOOK: type: DESCTABLE PREHOOK: Input: default@employee_part @@ -609,7 +609,7 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@employee_part # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -employeeID int 16 34 1 14 from deserializer +employeeID int 16 34 1 12 from deserializer PREHOOK: query: explain analyze table Employee_Part compute statistics for columns PREHOOK: type: QUERY @@ -632,23 +632,23 @@ STAGE PLANS: outputColumnNames: employeeid, employeename Statistics: Num rows: 2 Data size: 210 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(employeeid, 16), compute_stats(employeename, 16) + aggregations: compute_stats(employeeid, 'hll'), compute_stats(employeename, 'hll') mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 944 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 944 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Reduce Operator Tree: Group By Operator aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -681,7 +681,7 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@employee_part # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -employeeID int 16 34 2 14 from deserializer +employeeID int 16 34 2 12 from deserializer COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"employeeid\":\"true\",\"employeename\":\"true\"}} PREHOOK: query: create database if not exists dummydb PREHOOK: type: CREATEDATABASE @@ -713,7 +713,7 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@employee_part # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -employeeID int 16 34 1 14 from deserializer +employeeID int 16 34 1 12 from deserializer COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"employeeid\":\"true\",\"employeename\":\"true\"}} PREHOOK: query: analyze table default.Employee_Part compute statistics for columns PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/columnstats_partlvl_dp.q.out b/ql/src/test/results/clientpositive/columnstats_partlvl_dp.q.out index 468d2e797b..a64c76badf 100644 --- a/ql/src/test/results/clientpositive/columnstats_partlvl_dp.q.out +++ b/ql/src/test/results/clientpositive/columnstats_partlvl_dp.q.out @@ -88,7 +88,7 @@ STAGE PLANS: outputColumnNames: employeeid, employeename, country Statistics: Num rows: 1 Data size: 64 Basic stats: PARTIAL Column stats: NONE Group By Operator - aggregations: compute_stats(employeename, 16), compute_stats(employeeid, 16) + aggregations: compute_stats(employeename, 'hll'), compute_stats(employeeid, 'hll') keys: 4000.0 (type: double), country (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 @@ -98,7 +98,7 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: 4000.0 (type: double), _col1 (type: string) Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: struct), _col3 (type: struct) + value expressions: _col2 (type: struct), _col3 (type: struct) Reduce Operator Tree: Group By Operator aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) @@ -143,7 +143,7 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@employee_part # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -employeeName string 0 6 5.142857142857143 6 from deserializer +employeeName string 0 7 5.142857142857143 6 from deserializer PREHOOK: query: explain analyze table Employee_Part partition (employeeSalary='2000.0') compute statistics for columns employeeID PREHOOK: type: QUERY @@ -166,7 +166,7 @@ STAGE PLANS: outputColumnNames: employeeid, country Statistics: Num rows: 42 Data size: 169 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(employeeid, 16) + aggregations: compute_stats(employeeid, 'hll') keys: 2000.0 (type: double), country (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 @@ -176,7 +176,7 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: 2000.0 (type: double), _col1 (type: string) Statistics: Num rows: 42 Data size: 169 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: struct) + value expressions: _col2 (type: struct) Reduce Operator Tree: Group By Operator aggregations: compute_stats(VALUE._col0) @@ -223,7 +223,7 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@employee_part # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -employeeID int 16 34 1 14 from deserializer +employeeID int 16 34 1 12 from deserializer PREHOOK: query: describe formatted Employee_Part partition (employeeSalary='2000.0', country='UK') employeeID PREHOOK: type: DESCTABLE PREHOOK: Input: default@employee_part @@ -232,7 +232,7 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@employee_part # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -employeeID int 16 31 0 9 from deserializer +employeeID int 16 31 0 7 from deserializer PREHOOK: query: explain analyze table Employee_Part partition (employeeSalary) compute statistics for columns employeeID PREHOOK: type: QUERY @@ -255,7 +255,7 @@ STAGE PLANS: outputColumnNames: employeeid, employeesalary, country Statistics: Num rows: 116 Data size: 466 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(employeeid, 16) + aggregations: compute_stats(employeeid, 'hll') keys: employeesalary (type: double), country (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 @@ -265,7 +265,7 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: double), _col1 (type: string) Statistics: Num rows: 116 Data size: 466 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: struct) + value expressions: _col2 (type: struct) Reduce Operator Tree: Group By Operator aggregations: compute_stats(VALUE._col0) @@ -320,7 +320,7 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@employee_part # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -employeeID int 16 34 1 14 from deserializer +employeeID int 16 34 1 12 from deserializer PREHOOK: query: explain analyze table Employee_Part partition (employeeSalary,country) compute statistics for columns PREHOOK: type: QUERY @@ -343,7 +343,7 @@ STAGE PLANS: outputColumnNames: employeeid, employeename, employeesalary, country Statistics: Num rows: 2 Data size: 466 Basic stats: PARTIAL Column stats: NONE Group By Operator - aggregations: compute_stats(employeeid, 16), compute_stats(employeename, 16) + aggregations: compute_stats(employeeid, 'hll'), compute_stats(employeename, 'hll') keys: employeesalary (type: double), country (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 @@ -353,7 +353,7 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: double), _col1 (type: string) Statistics: Num rows: 2 Data size: 466 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: struct), _col3 (type: struct) + value expressions: _col2 (type: struct), _col3 (type: struct) Reduce Operator Tree: Group By Operator aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) @@ -408,7 +408,7 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@employee_part # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -employeeName string 0 6 5.142857142857143 6 from deserializer +employeeName string 0 12 5.142857142857143 6 from deserializer PREHOOK: query: drop table Employee PREHOOK: type: DROPTABLE POSTHOOK: query: drop table Employee @@ -483,7 +483,7 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@employee # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -employeeName string 0 6 5.142857142857143 6 from deserializer +employeeName string 0 12 5.142857142857143 6 from deserializer PREHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee partition(employeeSalary='3000.0', country='USA') PREHOOK: type: LOAD #### A masked pattern was here #### @@ -530,7 +530,7 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@employee # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -employeeName string 0 6 5.142857142857143 6 from deserializer +employeeName string 0 12 5.142857142857143 6 from deserializer PREHOOK: query: alter table Employee add columns (c int ,d string) PREHOOK: type: ALTERTABLE_ADDCOLS PREHOOK: Input: default@employee @@ -575,7 +575,7 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@employee # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -c int 2000 4000 0 4 from deserializer +c int 2000 4000 0 3 from deserializer PREHOOK: query: describe formatted Employee partition (employeeSalary='6000.0', country='UK') d PREHOOK: type: DESCTABLE PREHOOK: Input: default@employee diff --git a/ql/src/test/results/clientpositive/columnstats_quoting.q.out b/ql/src/test/results/clientpositive/columnstats_quoting.q.out index 52e35385a1..7e080fec9b 100644 --- a/ql/src/test/results/clientpositive/columnstats_quoting.q.out +++ b/ql/src/test/results/clientpositive/columnstats_quoting.q.out @@ -30,23 +30,23 @@ STAGE PLANS: outputColumnNames: user id, user name Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Group By Operator - aggregations: compute_stats(user id, 16), compute_stats(user name, 16) + aggregations: compute_stats(user id, 'hll'), compute_stats(user name, 'hll') mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 944 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 944 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Reduce Operator Tree: Group By Operator aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -87,14 +87,14 @@ STAGE PLANS: outputColumnNames: user id Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Group By Operator - aggregations: compute_stats(user id, 16) + aggregations: compute_stats(user id, 'hll') mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct) Reduce Operator Tree: Group By Operator aggregations: compute_stats(VALUE._col0) diff --git a/ql/src/test/results/clientpositive/columnstats_tbllvl.q.out b/ql/src/test/results/clientpositive/columnstats_tbllvl.q.out index 462d4c1771..91c8f150a2 100644 --- a/ql/src/test/results/clientpositive/columnstats_tbllvl.q.out +++ b/ql/src/test/results/clientpositive/columnstats_tbllvl.q.out @@ -60,23 +60,23 @@ STAGE PLANS: outputColumnNames: sourceip, adrevenue, avgtimeonsite Statistics: Num rows: 65 Data size: 7060 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(sourceip, 16), compute_stats(avgtimeonsite, 16), compute_stats(adrevenue, 16) + aggregations: compute_stats(sourceip, 'hll'), compute_stats(avgtimeonsite, 'hll'), compute_stats(adrevenue, 'hll') mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1444 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1408 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 1444 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 1408 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) Reduce Operator Tree: Group By Operator aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1452 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 1452 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -112,16 +112,16 @@ STAGE PLANS: outputColumnNames: sourceip, adrevenue, avgtimeonsite Statistics: Num rows: 65 Data size: 7060 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(sourceip, 16), compute_stats(avgtimeonsite, 16), compute_stats(adrevenue, 16) + aggregations: compute_stats(sourceip, 'hll'), compute_stats(avgtimeonsite, 'hll'), compute_stats(adrevenue, 'hll') mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1444 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1408 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1444 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1408 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -180,13 +180,13 @@ STAGE PLANS: aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1452 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1452 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -242,23 +242,23 @@ STAGE PLANS: outputColumnNames: sourceip, desturl, visitdate, adrevenue, useragent, ccode, lcode, skeyword, avgtimeonsite Statistics: Num rows: 9 Data size: 7060 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(sourceip, 16), compute_stats(desturl, 16), compute_stats(visitdate, 16), compute_stats(adrevenue, 16), compute_stats(useragent, 16), compute_stats(ccode, 16), compute_stats(lcode, 16), compute_stats(skeyword, 16), compute_stats(avgtimeonsite, 16) + aggregations: compute_stats(sourceip, 'hll'), compute_stats(desturl, 'hll'), compute_stats(visitdate, 'hll'), compute_stats(adrevenue, 'hll'), compute_stats(useragent, 'hll'), compute_stats(ccode, 'hll'), compute_stats(lcode, 'hll'), compute_stats(skeyword, 'hll'), compute_stats(avgtimeonsite, 'hll') mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 1 Data size: 4396 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4288 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 4396 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct), _col8 (type: struct) + Statistics: Num rows: 1 Data size: 4288 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct), _col8 (type: struct) Reduce Operator Tree: Group By Operator aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4), compute_stats(VALUE._col5), compute_stats(VALUE._col6), compute_stats(VALUE._col7), compute_stats(VALUE._col8) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 1 Data size: 4404 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4320 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 4404 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4320 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -287,7 +287,7 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@uservisits_web_text_none # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -destURL string 0 56 48.945454545454545 96 from deserializer +destURL string 0 55 48.945454545454545 96 from deserializer COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"adrevenue\":\"true\",\"avgtimeonsite\":\"true\",\"ccode\":\"true\",\"desturl\":\"true\",\"lcode\":\"true\",\"skeyword\":\"true\",\"sourceip\":\"true\",\"useragent\":\"true\",\"visitdate\":\"true\"}} PREHOOK: query: describe formatted UserVisits_web_text_none adRevenue PREHOOK: type: DESCTABLE @@ -297,7 +297,7 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@uservisits_web_text_none # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -adRevenue float 13.099044799804688 492.98870849609375 0 58 from deserializer +adRevenue float 13.099044799804688 492.98870849609375 0 55 from deserializer COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"adrevenue\":\"true\",\"avgtimeonsite\":\"true\",\"ccode\":\"true\",\"desturl\":\"true\",\"lcode\":\"true\",\"skeyword\":\"true\",\"sourceip\":\"true\",\"useragent\":\"true\",\"visitdate\":\"true\"}} PREHOOK: query: describe formatted UserVisits_web_text_none avgTimeOnSite PREHOOK: type: DESCTABLE @@ -307,7 +307,7 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@uservisits_web_text_none # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -avgTimeOnSite int 1 9 0 11 from deserializer +avgTimeOnSite int 1 9 0 9 from deserializer COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"adrevenue\":\"true\",\"avgtimeonsite\":\"true\",\"ccode\":\"true\",\"desturl\":\"true\",\"lcode\":\"true\",\"skeyword\":\"true\",\"sourceip\":\"true\",\"useragent\":\"true\",\"visitdate\":\"true\"}} PREHOOK: query: CREATE TABLE empty_tab( a int, @@ -351,23 +351,23 @@ STAGE PLANS: outputColumnNames: a, b, c, d, e Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Group By Operator - aggregations: compute_stats(a, 16), compute_stats(b, 16), compute_stats(c, 16), compute_stats(d, 16), compute_stats(e, 16) + aggregations: compute_stats(a, 'hll'), compute_stats(b, 'hll'), compute_stats(c, 'hll'), compute_stats(d, 'hll'), compute_stats(e, 'hll') mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 2004 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 2004 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) Reduce Operator Tree: Group By Operator aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 2012 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 2000 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 2012 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 2000 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -416,7 +416,7 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@uservisits_web_text_none # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -destURL string 0 56 48.945454545454545 96 from deserializer +destURL string 0 55 48.945454545454545 96 from deserializer COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"adrevenue\":\"true\",\"avgtimeonsite\":\"true\",\"ccode\":\"true\",\"desturl\":\"true\",\"lcode\":\"true\",\"skeyword\":\"true\",\"sourceip\":\"true\",\"useragent\":\"true\",\"visitdate\":\"true\"}} PREHOOK: query: CREATE TABLE UserVisits_in_dummy_db ( sourceIP string, @@ -482,23 +482,23 @@ STAGE PLANS: outputColumnNames: sourceip, adrevenue, avgtimeonsite Statistics: Num rows: 65 Data size: 7060 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(sourceip, 16), compute_stats(avgtimeonsite, 16), compute_stats(adrevenue, 16) + aggregations: compute_stats(sourceip, 'hll'), compute_stats(avgtimeonsite, 'hll'), compute_stats(adrevenue, 'hll') mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1444 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1408 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 1444 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 1408 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) Reduce Operator Tree: Group By Operator aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1452 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 1452 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -534,16 +534,16 @@ STAGE PLANS: outputColumnNames: sourceip, adrevenue, avgtimeonsite Statistics: Num rows: 65 Data size: 7060 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(sourceip, 16), compute_stats(avgtimeonsite, 16), compute_stats(adrevenue, 16) + aggregations: compute_stats(sourceip, 'hll'), compute_stats(avgtimeonsite, 'hll'), compute_stats(adrevenue, 'hll') mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1444 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1408 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1444 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1408 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -602,13 +602,13 @@ STAGE PLANS: aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1452 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1452 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -664,23 +664,23 @@ STAGE PLANS: outputColumnNames: sourceip, desturl, visitdate, adrevenue, useragent, ccode, lcode, skeyword, avgtimeonsite Statistics: Num rows: 9 Data size: 7060 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(sourceip, 16), compute_stats(desturl, 16), compute_stats(visitdate, 16), compute_stats(adrevenue, 16), compute_stats(useragent, 16), compute_stats(ccode, 16), compute_stats(lcode, 16), compute_stats(skeyword, 16), compute_stats(avgtimeonsite, 16) + aggregations: compute_stats(sourceip, 'hll'), compute_stats(desturl, 'hll'), compute_stats(visitdate, 'hll'), compute_stats(adrevenue, 'hll'), compute_stats(useragent, 'hll'), compute_stats(ccode, 'hll'), compute_stats(lcode, 'hll'), compute_stats(skeyword, 'hll'), compute_stats(avgtimeonsite, 'hll') mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 1 Data size: 4396 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4288 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 4396 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct), _col8 (type: struct) + Statistics: Num rows: 1 Data size: 4288 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct), _col8 (type: struct) Reduce Operator Tree: Group By Operator aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4), compute_stats(VALUE._col5), compute_stats(VALUE._col6), compute_stats(VALUE._col7), compute_stats(VALUE._col8) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 1 Data size: 4404 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4320 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 4404 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4320 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -709,7 +709,7 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: dummydb@uservisits_in_dummy_db # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -destURL string 0 56 48.945454545454545 96 from deserializer +destURL string 0 55 48.945454545454545 96 from deserializer COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"adrevenue\":\"true\",\"avgtimeonsite\":\"true\",\"ccode\":\"true\",\"desturl\":\"true\",\"lcode\":\"true\",\"skeyword\":\"true\",\"sourceip\":\"true\",\"useragent\":\"true\",\"visitdate\":\"true\"}} PREHOOK: query: describe formatted dummydb.UserVisits_in_dummy_db adRevenue PREHOOK: type: DESCTABLE @@ -719,7 +719,7 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: dummydb@uservisits_in_dummy_db # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -adRevenue float 13.099044799804688 492.98870849609375 0 58 from deserializer +adRevenue float 13.099044799804688 492.98870849609375 0 55 from deserializer COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"adrevenue\":\"true\",\"avgtimeonsite\":\"true\",\"ccode\":\"true\",\"desturl\":\"true\",\"lcode\":\"true\",\"skeyword\":\"true\",\"sourceip\":\"true\",\"useragent\":\"true\",\"visitdate\":\"true\"}} PREHOOK: query: describe formatted dummydb.UserVisits_in_dummy_db avgTimeOnSite PREHOOK: type: DESCTABLE @@ -729,7 +729,7 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: dummydb@uservisits_in_dummy_db # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -avgTimeOnSite int 1 9 0 11 from deserializer +avgTimeOnSite int 1 9 0 9 from deserializer COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"adrevenue\":\"true\",\"avgtimeonsite\":\"true\",\"ccode\":\"true\",\"desturl\":\"true\",\"lcode\":\"true\",\"skeyword\":\"true\",\"sourceip\":\"true\",\"useragent\":\"true\",\"visitdate\":\"true\"}} PREHOOK: query: drop table dummydb.UserVisits_in_dummy_db PREHOOK: type: DROPTABLE diff --git a/ql/src/test/results/clientpositive/compute_stats_date.q.out b/ql/src/test/results/clientpositive/compute_stats_date.q.out index c2472377a8..5cd2180108 100644 --- a/ql/src/test/results/clientpositive/compute_stats_date.q.out +++ b/ql/src/test/results/clientpositive/compute_stats_date.q.out @@ -35,15 +35,15 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@tab_date #### A masked pattern was here #### 20 -PREHOOK: query: select compute_stats(fl_date, 16) from tab_date +PREHOOK: query: select compute_stats(fl_date, 'hll') from tab_date PREHOOK: type: QUERY PREHOOK: Input: default@tab_date #### A masked pattern was here #### -POSTHOOK: query: select compute_stats(fl_date, 16) from tab_date +POSTHOOK: query: select compute_stats(fl_date, 'hll') from tab_date POSTHOOK: type: QUERY POSTHOOK: Input: default@tab_date #### A masked pattern was here #### -{"columntype":"Date","min":"2000-11-20","max":"2010-10-29","countnulls":0,"numdistinctvalues":18,"ndvbitvector":"{0, 1, 2, 3, 4, 5}{0, 1, 2, 3}{0}{0, 1, 2, 6}{0, 1, 2, 3}{0, 1, 2, 3}{0, 1, 2}{0, 1, 2, 3}{0, 1, 2, 3}{0, 2}{0, 1, 2, 3, 4}{0, 1, 2, 4, 5}{0, 1, 2, 3}{0, 1, 2, 3, 5}{0, 1, 2, 3, 4, 5}{0, 1, 2, 3, 4}"} +{"columntype":"Date","min":"2000-11-20","max":"2010-10-29","countnulls":0,"numdistinctvalues":19,"ndvbitvector":"SExM4BMTw6qAFv+ogCGC/7ZdgMDTH73K3+4Bgq+jE766tgWAh/xZgIqTVIDhgVDA655SwfXHA4Dy\r\n/Ve//Z0LwMSIToCZ6QOAhZ8Gg8jOEb38rBw=\r\n"} PREHOOK: query: explain analyze table tab_date compute statistics for columns fl_date PREHOOK: type: QUERY @@ -66,14 +66,14 @@ STAGE PLANS: outputColumnNames: fl_date Statistics: Num rows: 13 Data size: 778 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(fl_date, 16) + aggregations: compute_stats(fl_date, 'hll') mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 572 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 560 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 572 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct) + Statistics: Num rows: 1 Data size: 560 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct) Reduce Operator Tree: Group By Operator aggregations: compute_stats(VALUE._col0) @@ -111,7 +111,7 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@tab_date # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -fl_date date 2000-11-20 2010-10-29 0 18 from deserializer +fl_date date 2000-11-20 2010-10-29 0 19 from deserializer COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"fl_date\":\"true\"}} PREHOOK: query: alter table tab_date update statistics for column fl_date set ('numDVs'='19', 'highValue'='2015-01-01', 'lowValue'='0') PREHOOK: type: ALTERTABLE_UPDATETABLESTATS diff --git a/ql/src/test/results/clientpositive/compute_stats_decimal.q.out b/ql/src/test/results/clientpositive/compute_stats_decimal.q.out index e0584c50a8..fcfce78b82 100644 --- a/ql/src/test/results/clientpositive/compute_stats_decimal.q.out +++ b/ql/src/test/results/clientpositive/compute_stats_decimal.q.out @@ -23,11 +23,11 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@tab_decimal #### A masked pattern was here #### 19 -PREHOOK: query: select compute_stats(a, 18) from tab_decimal +PREHOOK: query: select compute_stats(a, 'fm', 18) from tab_decimal PREHOOK: type: QUERY PREHOOK: Input: default@tab_decimal #### A masked pattern was here #### -POSTHOOK: query: select compute_stats(a, 18) from tab_decimal +POSTHOOK: query: select compute_stats(a, 'fm', 18) from tab_decimal POSTHOOK: type: QUERY POSTHOOK: Input: default@tab_decimal #### A masked pattern was here #### diff --git a/ql/src/test/results/clientpositive/compute_stats_double.q.out b/ql/src/test/results/clientpositive/compute_stats_double.q.out index 5b921735f0..e6a087dd98 100644 --- a/ql/src/test/results/clientpositive/compute_stats_double.q.out +++ b/ql/src/test/results/clientpositive/compute_stats_double.q.out @@ -23,11 +23,11 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@tab_double #### A masked pattern was here #### 16 -PREHOOK: query: select compute_stats(a, 16) from tab_double +PREHOOK: query: select compute_stats(a, 'fm', 16) from tab_double PREHOOK: type: QUERY PREHOOK: Input: default@tab_double #### A masked pattern was here #### -POSTHOOK: query: select compute_stats(a, 16) from tab_double +POSTHOOK: query: select compute_stats(a, 'fm', 16) from tab_double POSTHOOK: type: QUERY POSTHOOK: Input: default@tab_double #### A masked pattern was here #### diff --git a/ql/src/test/results/clientpositive/compute_stats_long.q.out b/ql/src/test/results/clientpositive/compute_stats_long.q.out index 119d1731cc..fb985d8266 100644 --- a/ql/src/test/results/clientpositive/compute_stats_long.q.out +++ b/ql/src/test/results/clientpositive/compute_stats_long.q.out @@ -23,11 +23,11 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@tab_int #### A masked pattern was here #### 12 -PREHOOK: query: select compute_stats(a, 16) from tab_int +PREHOOK: query: select compute_stats(a, 'fm', 16) from tab_int PREHOOK: type: QUERY PREHOOK: Input: default@tab_int #### A masked pattern was here #### -POSTHOOK: query: select compute_stats(a, 16) from tab_int +POSTHOOK: query: select compute_stats(a, 'fm', 16) from tab_int POSTHOOK: type: QUERY POSTHOOK: Input: default@tab_int #### A masked pattern was here #### diff --git a/ql/src/test/results/clientpositive/compute_stats_string.q.out b/ql/src/test/results/clientpositive/compute_stats_string.q.out index 8c40490bc0..a5d66eba31 100644 --- a/ql/src/test/results/clientpositive/compute_stats_string.q.out +++ b/ql/src/test/results/clientpositive/compute_stats_string.q.out @@ -23,11 +23,11 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@tab_string #### A masked pattern was here #### 10 -PREHOOK: query: select compute_stats(a, 16) from tab_string +PREHOOK: query: select compute_stats(a, 'fm', 16) from tab_string PREHOOK: type: QUERY PREHOOK: Input: default@tab_string #### A masked pattern was here #### -POSTHOOK: query: select compute_stats(a, 16) from tab_string +POSTHOOK: query: select compute_stats(a, 'fm', 16) from tab_string POSTHOOK: type: QUERY POSTHOOK: Input: default@tab_string #### A masked pattern was here #### diff --git a/ql/src/test/results/clientpositive/confirm_initial_tbl_stats.q.out b/ql/src/test/results/clientpositive/confirm_initial_tbl_stats.q.out index faa14ba9c5..5593e422b6 100644 --- a/ql/src/test/results/clientpositive/confirm_initial_tbl_stats.q.out +++ b/ql/src/test/results/clientpositive/confirm_initial_tbl_stats.q.out @@ -16,7 +16,7 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@src # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -key string 0 205 2.812 3 from deserializer +key string 0 309 2.812 3 from deserializer COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} PREHOOK: query: describe extended src1 PREHOOK: type: DESCTABLE @@ -36,7 +36,7 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@src1 # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -value string 0 14 4.92 7 from deserializer +value string 0 19 4.92 7 from deserializer COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} PREHOOK: query: describe extended src_json PREHOOK: type: DESCTABLE @@ -75,7 +75,7 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@src_sequencefile # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -value string 0 214 6.812 7 from deserializer +value string 0 309 6.812 7 from deserializer COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} PREHOOK: query: describe extended srcbucket PREHOOK: type: DESCTABLE @@ -95,7 +95,7 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@srcbucket # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -value string 0 234 6.802 7 from deserializer +value string 0 430 6.802 7 from deserializer COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} PREHOOK: query: describe extended srcbucket2 PREHOOK: type: DESCTABLE @@ -115,7 +115,7 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@srcbucket2 # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -value string 0 214 6.812 7 from deserializer +value string 0 309 6.812 7 from deserializer COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} PREHOOK: query: describe extended srcpart PREHOOK: type: DESCTABLE @@ -143,7 +143,7 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@srcpart # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -key string 0 205 2.812 3 from deserializer +key string 0 309 2.812 3 from deserializer PREHOOK: query: describe extended alltypesorc PREHOOK: type: DESCTABLE PREHOOK: Input: default@alltypesorc @@ -172,7 +172,7 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@alltypesorc # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -ctinyint tinyint -64 62 3115 94 from deserializer +ctinyint tinyint -64 62 3115 127 from deserializer COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"cbigint\":\"true\",\"cboolean1\":\"true\",\"cboolean2\":\"true\",\"cdouble\":\"true\",\"cfloat\":\"true\",\"cint\":\"true\",\"csmallint\":\"true\",\"cstring1\":\"true\",\"cstring2\":\"true\",\"ctimestamp1\":\"true\",\"ctimestamp2\":\"true\",\"ctinyint\":\"true\"}} PREHOOK: query: describe formatted alltypesorc cfloat PREHOOK: type: DESCTABLE @@ -182,7 +182,7 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@alltypesorc # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -cfloat float -64.0 79.5530014038086 3115 117 from deserializer +cfloat float -64.0 79.5530014038086 3115 131 from deserializer COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"cbigint\":\"true\",\"cboolean1\":\"true\",\"cboolean2\":\"true\",\"cdouble\":\"true\",\"cfloat\":\"true\",\"cint\":\"true\",\"csmallint\":\"true\",\"cstring1\":\"true\",\"cstring2\":\"true\",\"ctimestamp1\":\"true\",\"ctimestamp2\":\"true\",\"ctinyint\":\"true\"}} PREHOOK: query: describe formatted alltypesorc ctimestamp1 PREHOOK: type: DESCTABLE @@ -192,7 +192,7 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@alltypesorc # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -ctimestamp1 timestamp -30 31 3115 31 from deserializer +ctimestamp1 timestamp -30 31 3115 35 from deserializer COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"cbigint\":\"true\",\"cboolean1\":\"true\",\"cboolean2\":\"true\",\"cdouble\":\"true\",\"cfloat\":\"true\",\"cint\":\"true\",\"csmallint\":\"true\",\"cstring1\":\"true\",\"cstring2\":\"true\",\"ctimestamp1\":\"true\",\"ctimestamp2\":\"true\",\"ctinyint\":\"true\"}} PREHOOK: query: describe formatted alltypesorc cboolean2 PREHOOK: type: DESCTABLE diff --git a/ql/src/test/results/clientpositive/constant_prop_2.q.out b/ql/src/test/results/clientpositive/constant_prop_2.q.out index 24be5188e2..93050417c6 100644 --- a/ql/src/test/results/clientpositive/constant_prop_2.q.out +++ b/ql/src/test/results/clientpositive/constant_prop_2.q.out @@ -43,7 +43,7 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(key, 16), compute_stats(value, 16) + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') keys: '2008-04-08' (type: string), '11' (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 @@ -53,7 +53,7 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: '2008-04-08' (type: string), '11' (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: struct), _col3 (type: struct) + value expressions: _col2 (type: struct), _col3 (type: struct) Reduce Operator Tree: Group By Operator aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) diff --git a/ql/src/test/results/clientpositive/correlated_join_keys.q.out b/ql/src/test/results/clientpositive/correlated_join_keys.q.out index ec5d008728..b81a5611b1 100644 --- a/ql/src/test/results/clientpositive/correlated_join_keys.q.out +++ b/ql/src/test/results/clientpositive/correlated_join_keys.q.out @@ -207,7 +207,7 @@ STAGE PLANS: keys: 0 _col0 (type: string), _col1 (type: string) 1 _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 16 Data size: 128 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash diff --git a/ql/src/test/results/clientpositive/cross_join_merge.q.out b/ql/src/test/results/clientpositive/cross_join_merge.q.out index f4956ded22..7241dfe45f 100644 --- a/ql/src/test/results/clientpositive/cross_join_merge.q.out +++ b/ql/src/test/results/clientpositive/cross_join_merge.q.out @@ -233,7 +233,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join JOIN[14][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-2:MAPRED' is a cross product +Warning: Shuffle Join JOIN[11][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: explain select src1.key from src src1 join src src2 on 5 = src2.key join src src3 on src1.key=src3.key PREHOOK: type: QUERY @@ -250,7 +250,7 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: src1 + alias: src3 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) @@ -260,34 +260,29 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) + sort order: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) TableScan - alias: src3 + alias: src2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + predicate: (5.0 = UDFToDouble(key)) (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + sort order: + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: string) - 1 _col0 (type: string) + 0 + 1 outputColumnNames: _col0 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 125000 Data size: 2781000 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -300,36 +295,45 @@ STAGE PLANS: Map Operator Tree: TableScan Reduce Output Operator - sort order: - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 125000 Data size: 2781000 Basic stats: COMPLETE Column stats: NONE TableScan - alias: src2 + alias: src1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (5.0 = UDFToDouble(key)) (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - sort order: - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 - 1 - outputColumnNames: _col0 - Statistics: Num rows: 137500 Data size: 3059050 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 137500 Data size: 3059050 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col2 + Statistics: Num rows: 137500 Data size: 3059100 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 137500 Data size: 3059100 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 137500 Data size: 3059100 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/decimal_stats.q.out b/ql/src/test/results/clientpositive/decimal_stats.q.out index 5d86866e2a..f58a7cc8e1 100644 --- a/ql/src/test/results/clientpositive/decimal_stats.q.out +++ b/ql/src/test/results/clientpositive/decimal_stats.q.out @@ -48,7 +48,7 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@decimal_1 # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -v decimal(10,0) 500 1 from deserializer +v decimal(10,0) 500 0 from deserializer COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"t\":\"true\",\"u\":\"true\",\"v\":\"true\"}} PREHOOK: query: explain select * from decimal_1 order by t limit 100 PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/describe_table.q.out b/ql/src/test/results/clientpositive/describe_table.q.out index 7869494252..3ba9a7b942 100644 --- a/ql/src/test/results/clientpositive/describe_table.q.out +++ b/ql/src/test/results/clientpositive/describe_table.q.out @@ -212,7 +212,7 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@srcpart # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -key string 0 205 2.812 3 from deserializer +key string 0 309 2.812 3 from deserializer COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"key\":\"true\"}} PREHOOK: query: describe formatted srcpart PARTITION(ds='2008-04-08', hr='12') PREHOOK: type: DESCTABLE @@ -304,7 +304,7 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@srcpart # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -key string 0 205 2.812 3 from deserializer +key string 0 309 2.812 3 from deserializer COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"key\":\"true\"}} PREHOOK: query: describe formatted `srcpart` PARTITION(ds='2008-04-08', hr='12') PREHOOK: type: DESCTABLE diff --git a/ql/src/test/results/clientpositive/display_colstats_tbllvl.q.out b/ql/src/test/results/clientpositive/display_colstats_tbllvl.q.out index a4b18d7cec..73d4cd7660 100644 --- a/ql/src/test/results/clientpositive/display_colstats_tbllvl.q.out +++ b/ql/src/test/results/clientpositive/display_colstats_tbllvl.q.out @@ -76,23 +76,23 @@ STAGE PLANS: outputColumnNames: sourceip, adrevenue, avgtimeonsite Statistics: Num rows: 65 Data size: 7060 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(sourceip, 16), compute_stats(avgtimeonsite, 16), compute_stats(adrevenue, 16) + aggregations: compute_stats(sourceip, 'hll'), compute_stats(avgtimeonsite, 'hll'), compute_stats(adrevenue, 'hll') mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1444 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1408 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 1444 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 1408 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) Reduce Operator Tree: Group By Operator aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1452 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 1452 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -128,16 +128,16 @@ STAGE PLANS: outputColumnNames: sourceip, adrevenue, avgtimeonsite Statistics: Num rows: 65 Data size: 7060 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(sourceip, 16), compute_stats(avgtimeonsite, 16), compute_stats(adrevenue, 16) + aggregations: compute_stats(sourceip, 'hll'), compute_stats(avgtimeonsite, 'hll'), compute_stats(adrevenue, 'hll') mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1444 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1408 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1444 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1408 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -196,13 +196,13 @@ STAGE PLANS: aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1452 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1452 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -244,7 +244,7 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@uservisits_web_text_none # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -sourceIP string 0 69 12.763636363636364 13 from deserializer +sourceIP string 0 55 12.763636363636364 13 from deserializer COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"adrevenue\":\"true\",\"avgtimeonsite\":\"true\",\"sourceip\":\"true\"}} PREHOOK: query: desc formatted UserVisits_web_text_none avgTimeOnSite PREHOOK: type: DESCTABLE @@ -254,7 +254,7 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@uservisits_web_text_none # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -avgTimeOnSite int 1 9 0 11 from deserializer +avgTimeOnSite int 1 9 0 9 from deserializer COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"adrevenue\":\"true\",\"avgtimeonsite\":\"true\",\"sourceip\":\"true\"}} PREHOOK: query: desc formatted UserVisits_web_text_none adRevenue PREHOOK: type: DESCTABLE @@ -264,7 +264,7 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@uservisits_web_text_none # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -adRevenue float 13.099044799804688 492.98870849609375 0 58 from deserializer +adRevenue float 13.099044799804688 492.98870849609375 0 55 from deserializer COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"adrevenue\":\"true\",\"avgtimeonsite\":\"true\",\"sourceip\":\"true\"}} PREHOOK: query: CREATE TABLE empty_tab( a int, @@ -318,23 +318,23 @@ STAGE PLANS: outputColumnNames: a, b, c, d, e Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Group By Operator - aggregations: compute_stats(a, 16), compute_stats(b, 16), compute_stats(c, 16), compute_stats(d, 16), compute_stats(e, 16) + aggregations: compute_stats(a, 'hll'), compute_stats(b, 'hll'), compute_stats(c, 'hll'), compute_stats(d, 'hll'), compute_stats(e, 'hll') mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 2004 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 2004 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) Reduce Operator Tree: Group By Operator aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 2012 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 2000 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 2012 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 2000 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -471,7 +471,7 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@uservisits_web_text_none # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -sourceIP string 0 69 12.763636363636364 13 from deserializer +sourceIP string 0 55 12.763636363636364 13 from deserializer COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"adrevenue\":\"true\",\"avgtimeonsite\":\"true\",\"sourceip\":\"true\"}} PREHOOK: query: analyze table UserVisits_web_text_none compute statistics for columns sKeyword PREHOOK: type: QUERY @@ -497,7 +497,7 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: test@uservisits_web_text_none # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -sKeyword string 0 49 7.872727272727273 19 from deserializer +sKeyword string 0 54 7.872727272727273 19 from deserializer COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"skeyword\":\"true\"}} PREHOOK: query: desc formatted test.UserVisits_web_text_none sKeyword PREHOOK: type: DESCTABLE @@ -507,5 +507,5 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: test@uservisits_web_text_none # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -sKeyword string 0 49 7.872727272727273 19 from deserializer +sKeyword string 0 54 7.872727272727273 19 from deserializer COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"skeyword\":\"true\"}} diff --git a/ql/src/test/results/clientpositive/encrypted/encryption_move_tbl.q.out b/ql/src/test/results/clientpositive/encrypted/encryption_move_tbl.q.out index 06f8408327..1096e9fc64 100644 --- a/ql/src/test/results/clientpositive/encrypted/encryption_move_tbl.q.out +++ b/ql/src/test/results/clientpositive/encrypted/encryption_move_tbl.q.out @@ -61,7 +61,7 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@encrypted_table # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -key int 0 498 0 196 from deserializer +key int 0 498 0 309 from deserializer COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} PREHOOK: query: DESCRIBE FORMATTED encrypted_table value PREHOOK: type: DESCTABLE @@ -71,7 +71,7 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@encrypted_table # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -value string 0 214 6.812 7 from deserializer +value string 0 309 6.812 7 from deserializer COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} PREHOOK: query: ALTER TABLE default.encrypted_table RENAME TO encrypted_db.encrypted_table_2 PREHOOK: type: ALTERTABLE_RENAME @@ -94,7 +94,7 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@encrypted_table # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -key int 0 498 0 196 from deserializer +key int 0 498 0 309 from deserializer COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} PREHOOK: query: DESCRIBE FORMATTED encrypted_table value PREHOOK: type: DESCTABLE @@ -104,7 +104,7 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@encrypted_table # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -value string 0 214 6.812 7 from deserializer +value string 0 309 6.812 7 from deserializer COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} PREHOOK: query: ALTER TABLE default.encrypted_table RENAME TO default.plain_table PREHOOK: type: ALTERTABLE_RENAME diff --git a/ql/src/test/results/clientpositive/exec_parallel_column_stats.q.out b/ql/src/test/results/clientpositive/exec_parallel_column_stats.q.out index f256ec11bf..f6c4237ca7 100644 --- a/ql/src/test/results/clientpositive/exec_parallel_column_stats.q.out +++ b/ql/src/test/results/clientpositive/exec_parallel_column_stats.q.out @@ -18,23 +18,23 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(key, 16), compute_stats(value, 16) + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Reduce Operator Tree: Group By Operator aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/hll.q.out b/ql/src/test/results/clientpositive/hll.q.out new file mode 100644 index 0000000000..b9357c3043 --- /dev/null +++ b/ql/src/test/results/clientpositive/hll.q.out @@ -0,0 +1,239 @@ +PREHOOK: query: create table i(key int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@i +POSTHOOK: query: create table i(key int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@i +PREHOOK: query: insert overwrite table i select key from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@i +POSTHOOK: query: insert overwrite table i select key from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@i +POSTHOOK: Lineage: i.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: explain analyze table i compute statistics for columns +PREHOOK: type: QUERY +POSTHOOK: query: explain analyze table i compute statistics for columns +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + Stage-1 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-0 + Map Reduce + Map Operator Tree: + TableScan + alias: i + Statistics: Num rows: 500 Data size: 1406 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: key + Statistics: Num rows: 500 Data size: 1406 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll') + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-1 + Column Stats Work + Column Stats Desc: + Columns: key + Column Types: int + Table: default.i + +PREHOOK: query: analyze table i compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@i +#### A masked pattern was here #### +POSTHOOK: query: analyze table i compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@i +#### A masked pattern was here #### +PREHOOK: query: desc formatted i key +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@i +POSTHOOK: query: desc formatted i key +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@i +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment + +key int 0 498 0 309 from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\"}} +PREHOOK: query: drop table i +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@i +PREHOOK: Output: default@i +POSTHOOK: query: drop table i +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@i +POSTHOOK: Output: default@i +PREHOOK: query: create table i(key double) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@i +POSTHOOK: query: create table i(key double) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@i +PREHOOK: query: insert overwrite table i select key from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@i +POSTHOOK: query: insert overwrite table i select key from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@i +POSTHOOK: Lineage: i.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: analyze table i compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@i +#### A masked pattern was here #### +POSTHOOK: query: analyze table i compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@i +#### A masked pattern was here #### +PREHOOK: query: desc formatted i key +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@i +POSTHOOK: query: desc formatted i key +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@i +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment + +key double 0.0 498.0 0 309 from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\"}} +PREHOOK: query: drop table i +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@i +PREHOOK: Output: default@i +POSTHOOK: query: drop table i +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@i +POSTHOOK: Output: default@i +PREHOOK: query: create table i(key decimal) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@i +POSTHOOK: query: create table i(key decimal) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@i +PREHOOK: query: insert overwrite table i select key from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@i +POSTHOOK: query: insert overwrite table i select key from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@i +POSTHOOK: Lineage: i.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: analyze table i compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@i +#### A masked pattern was here #### +POSTHOOK: query: analyze table i compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@i +#### A masked pattern was here #### +PREHOOK: query: desc formatted i key +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@i +POSTHOOK: query: desc formatted i key +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@i +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment + +key decimal(10,0) 0 498 0 309 from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\"}} +PREHOOK: query: drop table i +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@i +PREHOOK: Output: default@i +POSTHOOK: query: drop table i +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@i +POSTHOOK: Output: default@i +PREHOOK: query: create table i(key date) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@i +POSTHOOK: query: create table i(key date) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@i +PREHOOK: query: insert into i values ('2012-08-17') +PREHOOK: type: QUERY +PREHOOK: Output: default@i +POSTHOOK: query: insert into i values ('2012-08-17') +POSTHOOK: type: QUERY +POSTHOOK: Output: default@i +POSTHOOK: Lineage: i.key EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: insert into i values ('2012-08-17') +PREHOOK: type: QUERY +PREHOOK: Output: default@i +POSTHOOK: query: insert into i values ('2012-08-17') +POSTHOOK: type: QUERY +POSTHOOK: Output: default@i +POSTHOOK: Lineage: i.key EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: insert into i values ('2013-08-17') +PREHOOK: type: QUERY +PREHOOK: Output: default@i +POSTHOOK: query: insert into i values ('2013-08-17') +POSTHOOK: type: QUERY +POSTHOOK: Output: default@i +POSTHOOK: Lineage: i.key EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: insert into i values ('2012-03-17') +PREHOOK: type: QUERY +PREHOOK: Output: default@i +POSTHOOK: query: insert into i values ('2012-03-17') +POSTHOOK: type: QUERY +POSTHOOK: Output: default@i +POSTHOOK: Lineage: i.key EXPRESSION [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: insert into i values ('2012-05-17') +PREHOOK: type: QUERY +PREHOOK: Output: default@i +POSTHOOK: query: insert into i values ('2012-05-17') +POSTHOOK: type: QUERY +POSTHOOK: Output: default@i +POSTHOOK: Lineage: i.key EXPRESSION [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: analyze table i compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@i +#### A masked pattern was here #### +POSTHOOK: query: analyze table i compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@i +#### A masked pattern was here #### +PREHOOK: query: desc formatted i key +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@i +POSTHOOK: query: desc formatted i key +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@i +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment + +key date 2012-03-17 2013-08-17 0 4 from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\"}} diff --git a/ql/src/test/results/clientpositive/infer_bucket_sort.q.out b/ql/src/test/results/clientpositive/infer_bucket_sort.q.out index 1aea388815..f09f01d971 100644 --- a/ql/src/test/results/clientpositive/infer_bucket_sort.q.out +++ b/ql/src/test/results/clientpositive/infer_bucket_sort.q.out @@ -359,8 +359,8 @@ InputFormat: org.apache.hadoop.mapred.TextInputFormat OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat Compressed: No Num Buckets: 1 -Bucket Columns: [key] -Sort Columns: [Order(col:key, order:1)] +Bucket Columns: [value] +Sort Columns: [Order(col:value, order:1)] Storage Desc Params: serialization.format 1 PREHOOK: query: INSERT OVERWRITE TABLE test_table PARTITION (part = '1') diff --git a/ql/src/test/results/clientpositive/join32.q.out b/ql/src/test/results/clientpositive/join32.q.out index a191284aca..176989caff 100644 --- a/ql/src/test/results/clientpositive/join32.q.out +++ b/ql/src/test/results/clientpositive/join32.q.out @@ -28,25 +28,73 @@ STAGE PLANS: Stage: Stage-8 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_0:y + $hdt$_0:z Fetch Operator limit: -1 + Partition Description: + Partition + base file name: hr=11 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 11 + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count -1 + column.name.delimiter , + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + column.name.delimiter , + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart $hdt$_2:x Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $hdt$_0:y + $hdt$_0:z TableScan - alias: y + alias: z Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: key is not null (type: boolean) + predicate: value is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + expressions: value (type: string) + outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator keys: @@ -69,31 +117,31 @@ STAGE PLANS: HashTable Sink Operator keys: 0 _col0 (type: string) - 1 _col1 (type: string) + 1 _col0 (type: string) Position of Big Table: 0 Stage: Stage-6 Map Reduce Map Operator Tree: TableScan - alias: z + alias: y Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: value is not null (type: boolean) + predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: value (type: string) - outputColumnNames: _col0 + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: string) - 1 _col1 (type: string) - outputColumnNames: _col0, _col3 + 1 _col0 (type: string) + outputColumnNames: _col1, _col2, _col3 Position of Big Table: 0 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -102,11 +150,11 @@ STAGE PLANS: keys: 0 _col3 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0, _col3, _col6 + outputColumnNames: _col1, _col2, _col4 Position of Big Table: 0 Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col3 (type: string), _col0 (type: string), _col6 (type: string) + expressions: _col2 (type: string), _col4 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -287,7 +335,7 @@ STAGE PLANS: name: default.srcpart name: default.srcpart Truncated Path -> Alias: - /srcpart/ds=2008-04-08/hr=11 [$hdt$_1:z] + /src [$hdt$_1:y] Stage: Stage-0 Move Operator diff --git a/ql/src/test/results/clientpositive/join33.q.out b/ql/src/test/results/clientpositive/join33.q.out index a191284aca..176989caff 100644 --- a/ql/src/test/results/clientpositive/join33.q.out +++ b/ql/src/test/results/clientpositive/join33.q.out @@ -28,25 +28,73 @@ STAGE PLANS: Stage: Stage-8 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_0:y + $hdt$_0:z Fetch Operator limit: -1 + Partition Description: + Partition + base file name: hr=11 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 11 + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count -1 + column.name.delimiter , + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + column.name.delimiter , + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart $hdt$_2:x Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $hdt$_0:y + $hdt$_0:z TableScan - alias: y + alias: z Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: key is not null (type: boolean) + predicate: value is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + expressions: value (type: string) + outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator keys: @@ -69,31 +117,31 @@ STAGE PLANS: HashTable Sink Operator keys: 0 _col0 (type: string) - 1 _col1 (type: string) + 1 _col0 (type: string) Position of Big Table: 0 Stage: Stage-6 Map Reduce Map Operator Tree: TableScan - alias: z + alias: y Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: value is not null (type: boolean) + predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: value (type: string) - outputColumnNames: _col0 + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: string) - 1 _col1 (type: string) - outputColumnNames: _col0, _col3 + 1 _col0 (type: string) + outputColumnNames: _col1, _col2, _col3 Position of Big Table: 0 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -102,11 +150,11 @@ STAGE PLANS: keys: 0 _col3 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0, _col3, _col6 + outputColumnNames: _col1, _col2, _col4 Position of Big Table: 0 Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col3 (type: string), _col0 (type: string), _col6 (type: string) + expressions: _col2 (type: string), _col4 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -287,7 +335,7 @@ STAGE PLANS: name: default.srcpart name: default.srcpart Truncated Path -> Alias: - /srcpart/ds=2008-04-08/hr=11 [$hdt$_1:z] + /src [$hdt$_1:y] Stage: Stage-0 Move Operator diff --git a/ql/src/test/results/clientpositive/join_parse.q.out b/ql/src/test/results/clientpositive/join_parse.q.out index 17733acdc3..d7a33a9b13 100644 --- a/ql/src/test/results/clientpositive/join_parse.q.out +++ b/ql/src/test/results/clientpositive/join_parse.q.out @@ -28,19 +28,19 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string) + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: _col1 (type: string) + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) + value expressions: _col1 (type: string) TableScan - alias: src1 + alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: value is not null (type: boolean) + predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: value (type: string) + expressions: key (type: string) outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator @@ -53,9 +53,9 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: string) + 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0, _col2 + outputColumnNames: _col0, _col1 Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -69,19 +69,19 @@ STAGE PLANS: Map Operator Tree: TableScan Reduce Output Operator - key expressions: _col0 (type: string) + key expressions: _col1 (type: string) sort order: + - Map-reduce partition columns: _col0 (type: string) + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: string) + value expressions: _col0 (type: string) TableScan - alias: src + alias: src1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: key is not null (type: boolean) + predicate: value is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string) + expressions: value (type: string) outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator @@ -94,12 +94,12 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: string) + 0 _col1 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0, _col2 + outputColumnNames: _col0, _col3 Statistics: Num rows: 2420 Data size: 25709 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col2 (type: string) + expressions: _col0 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 2420 Data size: 25709 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -146,19 +146,19 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string) + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: _col1 (type: string) + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) + value expressions: _col1 (type: string) TableScan - alias: src1 + alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: value is not null (type: boolean) + predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: value (type: string) + expressions: key (type: string) outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator @@ -171,9 +171,9 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: string) + 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0, _col2 + outputColumnNames: _col0, _col1 Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -187,19 +187,19 @@ STAGE PLANS: Map Operator Tree: TableScan Reduce Output Operator - key expressions: _col0 (type: string) + key expressions: _col1 (type: string) sort order: + - Map-reduce partition columns: _col0 (type: string) + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: string) + value expressions: _col0 (type: string) TableScan - alias: src + alias: src1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: key is not null (type: boolean) + predicate: value is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string) + expressions: value (type: string) outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator @@ -212,12 +212,12 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: string) + 0 _col1 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0, _col2 + outputColumnNames: _col0, _col3 Statistics: Num rows: 2420 Data size: 25709 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col2 (type: string) + expressions: _col0 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 2420 Data size: 25709 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -264,19 +264,19 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string) + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: _col1 (type: string) + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) + value expressions: _col1 (type: string) TableScan - alias: src1 + alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: value is not null (type: boolean) + predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: value (type: string) + expressions: key (type: string) outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator @@ -289,9 +289,9 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: string) + 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0, _col2 + outputColumnNames: _col0, _col1 Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -305,19 +305,19 @@ STAGE PLANS: Map Operator Tree: TableScan Reduce Output Operator - key expressions: _col0 (type: string) + key expressions: _col1 (type: string) sort order: + - Map-reduce partition columns: _col0 (type: string) + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: string) + value expressions: _col0 (type: string) TableScan - alias: src + alias: src1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: key is not null (type: boolean) + predicate: value is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string) + expressions: value (type: string) outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator @@ -330,12 +330,12 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: string) + 0 _col1 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0, _col2 + outputColumnNames: _col0, _col3 Statistics: Num rows: 2420 Data size: 25709 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col2 (type: string) + expressions: _col0 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 2420 Data size: 25709 Basic stats: COMPLETE Column stats: NONE File Output Operator diff --git a/ql/src/test/results/clientpositive/llap/autoColumnStats_2.q.out b/ql/src/test/results/clientpositive/llap/autoColumnStats_2.q.out index ec209b2ef1..f29f7b5d1a 100644 --- a/ql/src/test/results/clientpositive/llap/autoColumnStats_2.q.out +++ b/ql/src/test/results/clientpositive/llap/autoColumnStats_2.q.out @@ -125,7 +125,7 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@a # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -key string 0 205 2.812 3 from deserializer +key string 0 309 2.812 3 from deserializer COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} PREHOOK: query: describe formatted b key PREHOOK: type: DESCTABLE @@ -135,7 +135,7 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@b # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -key string 0 205 2.812 3 from deserializer +key string 0 309 2.812 3 from deserializer COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} PREHOOK: query: from src insert overwrite table a select * @@ -233,7 +233,7 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@b # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -key string 0 205 2.812 3 from deserializer +key string 0 309 2.812 3 from deserializer COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} PREHOOK: query: describe formatted b value PREHOOK: type: DESCTABLE @@ -243,7 +243,7 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@b # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -value string 0 214 6.812 7 from deserializer +value string 0 309 6.812 7 from deserializer COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} PREHOOK: query: insert into table b select NULL, NULL from src limit 10 PREHOOK: type: QUERY @@ -263,7 +263,7 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@b # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -key string 10 205 2.812 3 from deserializer +key string 10 309 2.812 3 from deserializer COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} PREHOOK: query: describe formatted b value PREHOOK: type: DESCTABLE @@ -273,7 +273,7 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@b # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -value string 10 214 6.812 7 from deserializer +value string 10 309 6.812 7 from deserializer COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} PREHOOK: query: insert into table b(value) select key+100000 from src limit 10 PREHOOK: type: QUERY @@ -293,7 +293,7 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@b # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -key string 20 205 2.812 3 from deserializer +key string 20 309 2.812 3 from deserializer COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} PREHOOK: query: describe formatted b value PREHOOK: type: DESCTABLE @@ -303,7 +303,7 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@b # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -value string 10 214 8.0 8 from deserializer +value string 10 309 8.0 8 from deserializer COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} PREHOOK: query: drop table src_multi2 PREHOOK: type: DROPTABLE diff --git a/ql/src/test/results/clientpositive/llap/auto_join1.q.out b/ql/src/test/results/clientpositive/llap/auto_join1.q.out index 6a0a1d5d09..5329f84f34 100644 --- a/ql/src/test/results/clientpositive/llap/auto_join1.q.out +++ b/ql/src/test/results/clientpositive/llap/auto_join1.q.out @@ -68,14 +68,14 @@ STAGE PLANS: outputColumnNames: _col0, _col2 input vertices: 0 Map 1 - Statistics: Num rows: 1219 Data size: 216982 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 809 Data size: 144002 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: UDFToInteger(_col0) (type: int), _col2 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1219 Data size: 115805 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 809 Data size: 76855 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1219 Data size: 115805 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 809 Data size: 76855 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/auto_join21.q.out b/ql/src/test/results/clientpositive/llap/auto_join21.q.out index 25cd67e7f5..4956af4035 100644 --- a/ql/src/test/results/clientpositive/llap/auto_join21.q.out +++ b/ql/src/test/results/clientpositive/llap/auto_join21.q.out @@ -75,25 +75,25 @@ STAGE PLANS: 1 key (type: string) 2 key (type: string) outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 987 Data size: 527058 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 434 Data size: 231756 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 987 Data size: 527058 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 434 Data size: 231756 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) sort order: ++++++ - Statistics: Num rows: 987 Data size: 527058 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 434 Data size: 231756 Basic stats: COMPLETE Column stats: COMPLETE Reducer 3 Execution mode: llap Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 987 Data size: 527058 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 434 Data size: 231756 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 987 Data size: 527058 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 434 Data size: 231756 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/auto_join29.q.out b/ql/src/test/results/clientpositive/llap/auto_join29.q.out index f693ce4512..7f97c0e446 100644 --- a/ql/src/test/results/clientpositive/llap/auto_join29.q.out +++ b/ql/src/test/results/clientpositive/llap/auto_join29.q.out @@ -75,25 +75,25 @@ STAGE PLANS: 1 key (type: string) 2 key (type: string) outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 987 Data size: 527058 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 434 Data size: 231756 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 987 Data size: 527058 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 434 Data size: 231756 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) sort order: ++++++ - Statistics: Num rows: 987 Data size: 527058 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 434 Data size: 231756 Basic stats: COMPLETE Column stats: COMPLETE Reducer 3 Execution mode: llap Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 987 Data size: 527058 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 434 Data size: 231756 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 987 Data size: 527058 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 434 Data size: 231756 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -693,25 +693,25 @@ STAGE PLANS: 1 key (type: string) 2 key (type: string) outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 322 Data size: 171948 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 143 Data size: 76362 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 322 Data size: 171948 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 143 Data size: 76362 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) sort order: ++++++ - Statistics: Num rows: 322 Data size: 171948 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 143 Data size: 76362 Basic stats: COMPLETE Column stats: COMPLETE Reducer 3 Execution mode: llap Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 322 Data size: 171948 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 143 Data size: 76362 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 322 Data size: 171948 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 143 Data size: 76362 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1311,25 +1311,25 @@ STAGE PLANS: 1 key (type: string) 2 key (type: string) outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 974 Data size: 520116 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 432 Data size: 230688 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 974 Data size: 520116 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 432 Data size: 230688 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) sort order: ++++++ - Statistics: Num rows: 974 Data size: 520116 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 432 Data size: 230688 Basic stats: COMPLETE Column stats: COMPLETE Reducer 3 Execution mode: llap Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 974 Data size: 520116 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 432 Data size: 230688 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 974 Data size: 520116 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 432 Data size: 230688 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1938,25 +1938,25 @@ STAGE PLANS: 1 key (type: string) 2 key (type: string) outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 987 Data size: 527058 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 434 Data size: 231756 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 987 Data size: 527058 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 434 Data size: 231756 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) sort order: ++++++ - Statistics: Num rows: 987 Data size: 527058 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 434 Data size: 231756 Basic stats: COMPLETE Column stats: COMPLETE Reducer 3 Execution mode: llap Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 987 Data size: 527058 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 434 Data size: 231756 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 987 Data size: 527058 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 434 Data size: 231756 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2567,25 +2567,25 @@ STAGE PLANS: 1 key (type: string) 2 key (type: string) outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 314 Data size: 167676 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 143 Data size: 76362 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 314 Data size: 167676 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 143 Data size: 76362 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) sort order: ++++++ - Statistics: Num rows: 314 Data size: 167676 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 143 Data size: 76362 Basic stats: COMPLETE Column stats: COMPLETE Reducer 3 Execution mode: llap Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 314 Data size: 167676 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 143 Data size: 76362 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 314 Data size: 167676 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 143 Data size: 76362 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2685,25 +2685,25 @@ STAGE PLANS: 1 key (type: string) 2 key (type: string) outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 320 Data size: 170880 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 143 Data size: 76362 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 320 Data size: 170880 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 143 Data size: 76362 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) sort order: ++++++ - Statistics: Num rows: 320 Data size: 170880 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 143 Data size: 76362 Basic stats: COMPLETE Column stats: COMPLETE Reducer 3 Execution mode: llap Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 320 Data size: 170880 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 143 Data size: 76362 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 320 Data size: 170880 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 143 Data size: 76362 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -3263,15 +3263,15 @@ STAGE PLANS: input vertices: 1 Map 3 2 Map 4 - Statistics: Num rows: 320 Data size: 170880 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 143 Data size: 76362 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 320 Data size: 170880 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 143 Data size: 76362 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) sort order: ++++++ - Statistics: Num rows: 320 Data size: 170880 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 143 Data size: 76362 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 3 @@ -3312,10 +3312,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 320 Data size: 170880 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 143 Data size: 76362 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 320 Data size: 170880 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 143 Data size: 76362 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -3394,15 +3394,15 @@ STAGE PLANS: input vertices: 0 Map 1 2 Map 4 - Statistics: Num rows: 960 Data size: 512640 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 431 Data size: 230154 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 960 Data size: 512640 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 431 Data size: 230154 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) sort order: ++++++ - Statistics: Num rows: 960 Data size: 512640 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 431 Data size: 230154 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 4 @@ -3427,10 +3427,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 960 Data size: 512640 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 431 Data size: 230154 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 960 Data size: 512640 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 431 Data size: 230154 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -3511,15 +3511,15 @@ STAGE PLANS: input vertices: 1 Map 3 2 Map 4 - Statistics: Num rows: 314 Data size: 167676 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 143 Data size: 76362 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 314 Data size: 167676 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 143 Data size: 76362 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) sort order: ++++++ - Statistics: Num rows: 314 Data size: 167676 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 143 Data size: 76362 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 3 @@ -3560,10 +3560,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 314 Data size: 167676 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 143 Data size: 76362 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 314 Data size: 167676 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 143 Data size: 76362 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/auto_join30.q.out b/ql/src/test/results/clientpositive/llap/auto_join30.q.out index 91a80127a9..e5f5fda73a 100644 --- a/ql/src/test/results/clientpositive/llap/auto_join30.q.out +++ b/ql/src/test/results/clientpositive/llap/auto_join30.q.out @@ -67,7 +67,7 @@ STAGE PLANS: outputColumnNames: _col2, _col3 input vertices: 0 Map 1 - Statistics: Num rows: 1219 Data size: 216982 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 809 Data size: 144002 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(hash(_col2,_col3)) mode: hash @@ -167,7 +167,7 @@ STAGE PLANS: outputColumnNames: _col2, _col3 input vertices: 1 Map 3 - Statistics: Num rows: 1219 Data size: 216982 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 809 Data size: 144002 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(hash(_col2,_col3)) mode: hash @@ -300,7 +300,7 @@ STAGE PLANS: outputColumnNames: _col2, _col3 input vertices: 0 Map 1 - Statistics: Num rows: 1219 Data size: 216982 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 809 Data size: 144002 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(hash(_col2,_col3)) mode: hash @@ -431,7 +431,7 @@ STAGE PLANS: input vertices: 0 Map 1 2 Map 4 - Statistics: Num rows: 2974 Data size: 529372 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1309 Data size: 233002 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(hash(_col2,_col3)) mode: hash @@ -605,7 +605,7 @@ STAGE PLANS: 1 _col0 (type: string) 2 _col0 (type: string) outputColumnNames: _col2, _col3 - Statistics: Num rows: 2974 Data size: 529372 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1309 Data size: 233002 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(hash(_col2,_col3)) mode: hash @@ -758,7 +758,7 @@ STAGE PLANS: 1 _col0 (type: string) 2 _col0 (type: string) outputColumnNames: _col2, _col3 - Statistics: Num rows: 2974 Data size: 529372 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1309 Data size: 233002 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(hash(_col2,_col3)) mode: hash @@ -911,7 +911,7 @@ STAGE PLANS: 1 _col0 (type: string) 2 _col0 (type: string) outputColumnNames: _col2, _col3 - Statistics: Num rows: 2974 Data size: 529372 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1309 Data size: 233002 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(hash(_col2,_col3)) mode: hash @@ -1064,7 +1064,7 @@ STAGE PLANS: 1 _col0 (type: string) 2 _col0 (type: string) outputColumnNames: _col2, _col3 - Statistics: Num rows: 2974 Data size: 529372 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1309 Data size: 233002 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(hash(_col2,_col3)) mode: hash diff --git a/ql/src/test/results/clientpositive/llap/cluster.q.out b/ql/src/test/results/clientpositive/llap/cluster.q.out index 2fa976b6d5..33c3319415 100644 --- a/ql/src/test/results/clientpositive/llap/cluster.q.out +++ b/ql/src/test/results/clientpositive/llap/cluster.q.out @@ -602,16 +602,16 @@ STAGE PLANS: 0 key (type: string) 1 key (type: string) outputColumnNames: _col0, _col1, _col5 - Statistics: Num rows: 4 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: string) sort order: + Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 4 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string), _col2 (type: string) Reducer 3 Execution mode: llap @@ -619,10 +619,10 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 4 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -704,16 +704,16 @@ STAGE PLANS: 0 key (type: string) 1 key (type: string) outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 4 Data size: 1424 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 4 Data size: 1424 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: string) sort order: + Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 4 Data size: 1424 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string), _col2 (type: string), _col3 (type: string) Reducer 3 Execution mode: llap @@ -721,10 +721,10 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 4 Data size: 1424 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 4 Data size: 1424 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -806,16 +806,16 @@ STAGE PLANS: 0 key (type: string) 1 key (type: string) outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 4 Data size: 1424 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 4 Data size: 1424 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 4 Data size: 1424 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string) Reducer 3 Execution mode: llap @@ -823,10 +823,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 4 Data size: 1424 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 4 Data size: 1424 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -907,16 +907,16 @@ STAGE PLANS: 0 key (type: string) 1 key (type: string) outputColumnNames: _col0, _col1, _col5 - Statistics: Num rows: 4 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 4 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string), _col2 (type: string) Reducer 3 Execution mode: llap @@ -924,10 +924,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), VALUE._col1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 4 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/column_table_stats.q.out b/ql/src/test/results/clientpositive/llap/column_table_stats.q.out index fb04ee8cf9..c7726fec30 100644 --- a/ql/src/test/results/clientpositive/llap/column_table_stats.q.out +++ b/ql/src/test/results/clientpositive/llap/column_table_stats.q.out @@ -81,16 +81,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(key, 16), compute_stats(value, 16) + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct) + value expressions: _col0 (type: struct), _col1 (type: struct) auto parallelism: false Execution mode: llap LLAP IO: no inputs @@ -151,13 +151,13 @@ STAGE PLANS: aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -340,19 +340,19 @@ STAGE PLANS: outputColumnNames: key, value, ds, hr Statistics: Num rows: 58 Data size: 32968 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator - aggregations: compute_stats(key, 16), compute_stats(value, 16) + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') keys: ds (type: string), hr (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 2704 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 2 Data size: 2656 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) null sort order: aa sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 2 Data size: 2704 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 2 Data size: 2656 Basic stats: COMPLETE Column stats: PARTIAL tag: -1 - value expressions: _col2 (type: struct), _col3 (type: struct) + value expressions: _col2 (type: struct), _col3 (type: struct) auto parallelism: true Execution mode: llap LLAP IO: no inputs @@ -757,19 +757,19 @@ STAGE PLANS: outputColumnNames: key, value, ds, hr Statistics: Num rows: 58 Data size: 32968 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator - aggregations: compute_stats(key, 16), compute_stats(value, 16) + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') keys: ds (type: string), hr (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 2704 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 2 Data size: 2656 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) null sort order: aa sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 2 Data size: 2704 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 2 Data size: 2656 Basic stats: COMPLETE Column stats: PARTIAL tag: -1 - value expressions: _col2 (type: struct), _col3 (type: struct) + value expressions: _col2 (type: struct), _col3 (type: struct) auto parallelism: true Execution mode: llap LLAP IO: no inputs @@ -1174,19 +1174,19 @@ STAGE PLANS: outputColumnNames: key, value, ds Statistics: Num rows: 29 Data size: 11148 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator - aggregations: compute_stats(key, 16), compute_stats(value, 16) + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') keys: ds (type: string), '11' (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1254 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 1 Data size: 1230 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator key expressions: _col0 (type: string), '11' (type: string) null sort order: aa sort order: ++ Map-reduce partition columns: _col0 (type: string), '11' (type: string) - Statistics: Num rows: 1 Data size: 1254 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 1 Data size: 1230 Basic stats: COMPLETE Column stats: PARTIAL tag: -1 - value expressions: _col2 (type: struct), _col3 (type: struct) + value expressions: _col2 (type: struct), _col3 (type: struct) auto parallelism: true Execution mode: llap LLAP IO: no inputs diff --git a/ql/src/test/results/clientpositive/llap/column_table_stats_orc.q.out b/ql/src/test/results/clientpositive/llap/column_table_stats_orc.q.out index d55cf30331..6dff50f9f1 100644 --- a/ql/src/test/results/clientpositive/llap/column_table_stats_orc.q.out +++ b/ql/src/test/results/clientpositive/llap/column_table_stats_orc.q.out @@ -82,16 +82,16 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 1 Data size: 170 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(key, 16), compute_stats(value, 16) + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct) + value expressions: _col0 (type: struct), _col1 (type: struct) auto parallelism: false Execution mode: llap LLAP IO: all inputs @@ -154,13 +154,13 @@ STAGE PLANS: aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -337,19 +337,19 @@ STAGE PLANS: outputColumnNames: key, value, ds, hr Statistics: Num rows: 2 Data size: 1076 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator - aggregations: compute_stats(key, 16), compute_stats(value, 16) + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') keys: ds (type: string), hr (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1352 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 1 Data size: 1328 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) null sort order: aa sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 1 Data size: 1352 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 1 Data size: 1328 Basic stats: COMPLETE Column stats: PARTIAL tag: -1 - value expressions: _col2 (type: struct), _col3 (type: struct) + value expressions: _col2 (type: struct), _col3 (type: struct) auto parallelism: true Execution mode: llap LLAP IO: all inputs @@ -742,19 +742,19 @@ STAGE PLANS: outputColumnNames: key, value, ds Statistics: Num rows: 1 Data size: 354 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator - aggregations: compute_stats(key, 16), compute_stats(value, 16) + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') keys: ds (type: string), '11' (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1254 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 1 Data size: 1230 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator key expressions: _col0 (type: string), '11' (type: string) null sort order: aa sort order: ++ Map-reduce partition columns: _col0 (type: string), '11' (type: string) - Statistics: Num rows: 1 Data size: 1254 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 1 Data size: 1230 Basic stats: COMPLETE Column stats: PARTIAL tag: -1 - value expressions: _col2 (type: struct), _col3 (type: struct) + value expressions: _col2 (type: struct), _col3 (type: struct) auto parallelism: true Execution mode: llap LLAP IO: all inputs diff --git a/ql/src/test/results/clientpositive/llap/columnstats_part_coltype.q.out b/ql/src/test/results/clientpositive/llap/columnstats_part_coltype.q.out index dc50fb7fc1..5e647433f1 100644 --- a/ql/src/test/results/clientpositive/llap/columnstats_part_coltype.q.out +++ b/ql/src/test/results/clientpositive/llap/columnstats_part_coltype.q.out @@ -82,7 +82,7 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@partcolstats # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -key int 27 484 0 18 from deserializer +key int 27 484 0 20 from deserializer PREHOOK: query: describe formatted partcolstats partition (ds=date '2015-04-02', hr=2, part='partA') value PREHOOK: type: DESCTABLE PREHOOK: Input: default@partcolstats @@ -91,7 +91,7 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@partcolstats # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -value string 0 18 6.8 7 from deserializer +value string 0 20 6.8 7 from deserializer PREHOOK: query: describe formatted partcolstats partition (ds=date '2015-04-02', hr=2, part='partB') key PREHOOK: type: DESCTABLE PREHOOK: Input: default@partcolstats @@ -136,7 +136,7 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@partcolstats # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -key int 27 484 0 18 from deserializer +key int 27 484 0 20 from deserializer PREHOOK: query: describe formatted partcolstats partition (ds=date '2015-04-02', hr=2, part='partB') value PREHOOK: type: DESCTABLE PREHOOK: Input: default@partcolstats @@ -145,7 +145,7 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@partcolstats # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -value string 0 18 6.8 7 from deserializer +value string 0 20 6.8 7 from deserializer PREHOOK: query: describe formatted partcolstats partition (ds=date '2015-04-02', hr=3, part='partA') key PREHOOK: type: DESCTABLE PREHOOK: Input: default@partcolstats @@ -194,7 +194,7 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@partcolstats # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -key int 27 495 0 28 from deserializer +key int 27 495 0 30 from deserializer PREHOOK: query: describe formatted partcolstats partition (ds=date '2015-04-02', hr=3, part='partA') value PREHOOK: type: DESCTABLE PREHOOK: Input: default@partcolstats @@ -203,7 +203,7 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@partcolstats # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -value string 0 18 6.833333333333333 7 from deserializer +value string 0 30 6.833333333333333 7 from deserializer PREHOOK: query: describe formatted partcolstats partition (ds=date '2015-04-03', hr=3, part='partA') key PREHOOK: type: DESCTABLE PREHOOK: Input: default@partcolstats @@ -278,7 +278,7 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@partcolstats # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -key int 15 495 0 43 from deserializer +key int 15 495 0 40 from deserializer PREHOOK: query: describe formatted partcolstats partition (ds=date '2015-04-03', hr=3, part='partA') value PREHOOK: type: DESCTABLE PREHOOK: Input: default@partcolstats @@ -287,7 +287,7 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@partcolstats # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -value string 0 34 6.825 7 from deserializer +value string 0 40 6.825 7 from deserializer PREHOOK: query: describe formatted partcolstats partition (ds=date '2015-04-03', hr=3, part='partB') key PREHOOK: type: DESCTABLE PREHOOK: Input: default@partcolstats @@ -296,7 +296,7 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@partcolstats # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -key int 15 495 0 51 from deserializer +key int 15 495 0 58 from deserializer PREHOOK: query: describe formatted partcolstats partition (ds=date '2015-04-03', hr=3, part='partB') value PREHOOK: type: DESCTABLE PREHOOK: Input: default@partcolstats @@ -305,7 +305,7 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@partcolstats # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -value string 0 53 6.883333333333334 7 from deserializer +value string 0 58 6.883333333333334 7 from deserializer PREHOOK: query: drop table partcolstats PREHOOK: type: DROPTABLE PREHOOK: Input: default@partcolstats @@ -358,7 +358,7 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@partcolstatsnum # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -value string 0 18 6.833333333333333 7 from deserializer +value string 0 30 6.833333333333333 7 from deserializer PREHOOK: query: drop table partcolstatsnum PREHOOK: type: DROPTABLE PREHOOK: Input: default@partcolstatsnum @@ -411,7 +411,7 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@partcolstatsdec # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -value string 0 18 6.833333333333333 7 from deserializer +value string 0 30 6.833333333333333 7 from deserializer PREHOOK: query: drop table partcolstatsdec PREHOOK: type: DROPTABLE PREHOOK: Input: default@partcolstatsdec @@ -464,7 +464,7 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@partcolstatschar # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -value string 0 18 6.833333333333333 7 from deserializer +value string 0 30 6.833333333333333 7 from deserializer PREHOOK: query: drop table partcolstatschar PREHOOK: type: DROPTABLE PREHOOK: Input: default@partcolstatschar diff --git a/ql/src/test/results/clientpositive/llap/correlationoptimizer1.q.out b/ql/src/test/results/clientpositive/llap/correlationoptimizer1.q.out index fab5c9c029..1e4e08453b 100644 --- a/ql/src/test/results/clientpositive/llap/correlationoptimizer1.q.out +++ b/ql/src/test/results/clientpositive/llap/correlationoptimizer1.q.out @@ -72,18 +72,18 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 60 Data size: 5160 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 40 Data size: 3440 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 14 Data size: 1316 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 16 Data size: 1504 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 14 Data size: 1316 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 16 Data size: 1504 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Reducer 3 Execution mode: llap @@ -93,11 +93,11 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 14 Data size: 1316 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 16 Data size: 1504 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: hash(_col0) (type: int), hash(_col1) (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 14 Data size: 1316 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 16 Data size: 1504 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col0), sum(_col1) mode: hash @@ -220,18 +220,18 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 60 Data size: 5160 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 40 Data size: 3440 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 14 Data size: 1316 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 16 Data size: 1504 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 14 Data size: 1316 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 16 Data size: 1504 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Reducer 3 Execution mode: llap @@ -241,11 +241,11 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 14 Data size: 1316 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 16 Data size: 1504 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: hash(_col0) (type: int), hash(_col1) (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 14 Data size: 1316 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 16 Data size: 1504 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col0), sum(_col1) mode: hash @@ -360,18 +360,18 @@ STAGE PLANS: outputColumnNames: _col0 input vertices: 0 Map 1 - Statistics: Num rows: 60 Data size: 5160 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 40 Data size: 3440 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 14 Data size: 1316 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 16 Data size: 1504 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 14 Data size: 1316 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 16 Data size: 1504 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -383,11 +383,11 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 14 Data size: 1316 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 16 Data size: 1504 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: hash(_col0) (type: int), hash(_col1) (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 14 Data size: 1316 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 16 Data size: 1504 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col0), sum(_col1) mode: hash @@ -497,12 +497,12 @@ STAGE PLANS: keys: _col0 (type: string) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 205 Data size: 17835 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 205 Data size: 17835 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -515,18 +515,18 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 20 Data size: 1720 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1128 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 10 Data size: 940 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 12 Data size: 1128 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 10 Data size: 940 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Reducer 3 Execution mode: llap @@ -536,11 +536,11 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1128 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 10 Data size: 940 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: hash(_col0) (type: int), hash(_col1) (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1128 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 10 Data size: 940 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col0), sum(_col1) mode: hash @@ -650,12 +650,12 @@ STAGE PLANS: keys: _col0 (type: string) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 205 Data size: 17835 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 205 Data size: 17835 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -668,18 +668,18 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 20 Data size: 1720 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1128 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 10 Data size: 940 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 12 Data size: 1128 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 10 Data size: 940 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Reducer 3 Execution mode: llap @@ -689,11 +689,11 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1128 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 10 Data size: 940 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: hash(_col0) (type: int), hash(_col1) (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1128 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 10 Data size: 940 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col0), sum(_col1) mode: hash @@ -810,18 +810,18 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 60 Data size: 5160 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 40 Data size: 3440 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 14 Data size: 1316 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 16 Data size: 1504 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 14 Data size: 1316 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 16 Data size: 1504 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Reducer 3 Execution mode: llap @@ -831,11 +831,11 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 14 Data size: 1316 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 16 Data size: 1504 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: hash(_col0) (type: int), hash(_col1) (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 14 Data size: 1316 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 16 Data size: 1504 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col0), sum(_col1) mode: hash @@ -952,18 +952,18 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 60 Data size: 5160 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 40 Data size: 3440 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 14 Data size: 1316 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 16 Data size: 1504 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 14 Data size: 1316 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 16 Data size: 1504 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Reducer 3 Execution mode: llap @@ -973,11 +973,11 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 14 Data size: 1316 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 16 Data size: 1504 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: hash(_col0) (type: int), hash(_col1) (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 14 Data size: 1316 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 16 Data size: 1504 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col0), sum(_col1) mode: hash @@ -1094,7 +1094,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col1 - Statistics: Num rows: 60 Data size: 5220 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 40 Data size: 3480 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: _col1 (type: string) @@ -1236,7 +1236,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col1 - Statistics: Num rows: 60 Data size: 5220 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 40 Data size: 3480 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: _col1 (type: string) @@ -1646,12 +1646,12 @@ STAGE PLANS: keys: _col1 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 14 Data size: 1330 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 16 Data size: 1520 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 14 Data size: 1330 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 16 Data size: 1520 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Reducer 3 Execution mode: llap @@ -1661,11 +1661,11 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 14 Data size: 1330 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 16 Data size: 1520 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: hash(_col0) (type: int), hash(_col1) (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 14 Data size: 1330 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 16 Data size: 1520 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col0), sum(_col1) mode: hash @@ -1788,12 +1788,12 @@ STAGE PLANS: keys: _col1 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 14 Data size: 1330 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 16 Data size: 1520 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 14 Data size: 1330 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 16 Data size: 1520 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Reducer 3 Execution mode: llap @@ -1803,11 +1803,11 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 14 Data size: 1330 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 16 Data size: 1520 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: hash(_col0) (type: int), hash(_col1) (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 14 Data size: 1330 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 16 Data size: 1520 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col0), sum(_col1) mode: hash @@ -1924,18 +1924,18 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5246 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 3526 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 15 Data size: 378 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 17 Data size: 308 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 15 Data size: 378 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 17 Data size: 308 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Reducer 3 Execution mode: llap @@ -1945,11 +1945,11 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 15 Data size: 378 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 17 Data size: 308 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: hash(_col0) (type: int), hash(_col1) (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 15 Data size: 378 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 17 Data size: 308 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col0), sum(_col1) mode: hash @@ -2066,18 +2066,18 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5246 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 3526 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 15 Data size: 378 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 17 Data size: 308 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 15 Data size: 378 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 17 Data size: 308 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Reducer 3 Execution mode: llap @@ -2087,11 +2087,11 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 15 Data size: 378 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 17 Data size: 308 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: hash(_col0) (type: int), hash(_col1) (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 15 Data size: 378 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 17 Data size: 308 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col0), sum(_col1) mode: hash @@ -2208,18 +2208,18 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 525 Data size: 5246 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 525 Data size: 3526 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 15 Data size: 378 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 17 Data size: 308 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 15 Data size: 378 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 17 Data size: 308 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Reducer 3 Execution mode: llap @@ -2229,11 +2229,11 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 15 Data size: 378 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 17 Data size: 308 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: hash(_col0) (type: int), hash(_col1) (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 15 Data size: 378 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 17 Data size: 308 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col0), sum(_col1) mode: hash @@ -2350,18 +2350,18 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 525 Data size: 5246 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 525 Data size: 3526 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 15 Data size: 378 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 17 Data size: 308 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 15 Data size: 378 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 17 Data size: 308 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Reducer 3 Execution mode: llap @@ -2371,11 +2371,11 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 15 Data size: 378 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 17 Data size: 308 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: hash(_col0) (type: int), hash(_col1) (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 15 Data size: 378 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 17 Data size: 308 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col0), sum(_col1) mode: hash @@ -2499,18 +2499,18 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 60 Data size: 10500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 40 Data size: 7000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: _col0 (type: string), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 30 Data size: 5490 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 20 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 30 Data size: 5490 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 20 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Reducer 3 Execution mode: llap @@ -2520,11 +2520,11 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 30 Data size: 5490 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 20 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: hash(_col0) (type: int), hash(_col1) (type: int), hash(_col2) (type: int) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 30 Data size: 5490 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 20 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col0), sum(_col1), sum(_col2) mode: hash @@ -2648,18 +2648,18 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 60 Data size: 10500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 40 Data size: 7000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: _col0 (type: string), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 30 Data size: 5490 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 20 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 30 Data size: 5490 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 20 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Reducer 3 Execution mode: llap @@ -2669,11 +2669,11 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 30 Data size: 5490 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 20 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: hash(_col0) (type: int), hash(_col1) (type: int), hash(_col2) (type: int) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 30 Data size: 5490 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 20 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col0), sum(_col1), sum(_col2) mode: hash diff --git a/ql/src/test/results/clientpositive/llap/correlationoptimizer2.q.out b/ql/src/test/results/clientpositive/llap/correlationoptimizer2.q.out index ee645410b9..799de7329f 100644 --- a/ql/src/test/results/clientpositive/llap/correlationoptimizer2.q.out +++ b/ql/src/test/results/clientpositive/llap/correlationoptimizer2.q.out @@ -40,12 +40,12 @@ STAGE PLANS: keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -79,12 +79,12 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Reducer 3 Execution mode: llap @@ -96,11 +96,11 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 12 Data size: 2268 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 9 Data size: 1701 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: hash(_col0) (type: int), hash(_col1) (type: int), hash(_col2) (type: int), hash(_col3) (type: int) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 12 Data size: 2268 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 9 Data size: 1701 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col0), sum(_col1), sum(_col2), sum(_col3) mode: hash @@ -208,12 +208,12 @@ STAGE PLANS: keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -247,12 +247,12 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Reducer 3 Execution mode: llap @@ -264,11 +264,11 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 12 Data size: 2268 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 9 Data size: 1701 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: hash(_col0) (type: int), hash(_col1) (type: int), hash(_col2) (type: int), hash(_col3) (type: int) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 12 Data size: 2268 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 9 Data size: 1701 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col0), sum(_col1), sum(_col2), sum(_col3) mode: hash @@ -377,12 +377,12 @@ STAGE PLANS: keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -413,12 +413,12 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Reducer 3 Execution mode: llap @@ -430,11 +430,11 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 205 Data size: 20697 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 24690 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: hash(_col0) (type: int), hash(_col1) (type: int), hash(_col2) (type: int), hash(_col3) (type: int) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 205 Data size: 20697 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 24690 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col0), sum(_col1), sum(_col2), sum(_col3) mode: hash @@ -543,12 +543,12 @@ STAGE PLANS: keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -579,12 +579,12 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Reducer 3 Execution mode: llap @@ -596,11 +596,11 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 205 Data size: 20697 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 24690 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: hash(_col0) (type: int), hash(_col1) (type: int), hash(_col2) (type: int), hash(_col3) (type: int) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 205 Data size: 20697 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 24690 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col0), sum(_col1), sum(_col2), sum(_col3) mode: hash @@ -705,12 +705,12 @@ STAGE PLANS: keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -745,12 +745,12 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Reducer 3 Execution mode: llap @@ -762,11 +762,11 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 12 Data size: 2268 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12 Data size: 2078 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: hash(_col0) (type: int), hash(_col1) (type: int), hash(_col2) (type: int), hash(_col3) (type: int) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 12 Data size: 2268 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12 Data size: 2078 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col0), sum(_col1), sum(_col2), sum(_col3) mode: hash @@ -871,12 +871,12 @@ STAGE PLANS: keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -911,12 +911,12 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Reducer 3 Execution mode: llap @@ -928,11 +928,11 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 12 Data size: 2268 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12 Data size: 2078 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: hash(_col0) (type: int), hash(_col1) (type: int), hash(_col2) (type: int), hash(_col3) (type: int) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 12 Data size: 2268 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12 Data size: 2078 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col0), sum(_col1), sum(_col2), sum(_col3) mode: hash @@ -1041,12 +1041,12 @@ STAGE PLANS: keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -1081,12 +1081,12 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Reducer 3 Execution mode: llap @@ -1098,11 +1098,11 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 217 Data size: 2457 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 262 Data size: 1890 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: hash(_col0) (type: int), hash(_col1) (type: int), hash(_col2) (type: int), hash(_col3) (type: int) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 217 Data size: 2457 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 262 Data size: 1890 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col0), sum(_col1), sum(_col2), sum(_col3) mode: hash @@ -1211,12 +1211,12 @@ STAGE PLANS: keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -1251,12 +1251,12 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Reducer 3 Execution mode: llap @@ -1268,11 +1268,11 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 217 Data size: 2457 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 262 Data size: 1890 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: hash(_col0) (type: int), hash(_col1) (type: int), hash(_col2) (type: int), hash(_col3) (type: int) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 217 Data size: 2457 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 262 Data size: 1890 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col0), sum(_col1), sum(_col2), sum(_col3) mode: hash @@ -1383,12 +1383,12 @@ STAGE PLANS: keys: key (type: string) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 205 Data size: 17835 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 205 Data size: 17835 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 6 @@ -1419,12 +1419,12 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 205 Data size: 17835 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 205 Data size: 17835 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE Reducer 3 Execution mode: llap Reduce Operator Tree: @@ -1435,18 +1435,18 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 217 Data size: 1131 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 262 Data size: 870 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 15 Data size: 294 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 17 Data size: 310 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 15 Data size: 294 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 17 Data size: 310 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Reducer 4 Execution mode: llap @@ -1456,11 +1456,11 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 15 Data size: 294 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 17 Data size: 310 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: hash(_col0) (type: int), hash(_col1) (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 15 Data size: 294 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 17 Data size: 310 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col0), sum(_col1) mode: hash @@ -1571,12 +1571,12 @@ STAGE PLANS: keys: key (type: string) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 205 Data size: 17835 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 205 Data size: 17835 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 6 @@ -1607,12 +1607,12 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 205 Data size: 17835 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 205 Data size: 17835 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE Reducer 3 Execution mode: llap Reduce Operator Tree: @@ -1623,18 +1623,18 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 217 Data size: 1131 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 262 Data size: 870 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 15 Data size: 294 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 17 Data size: 310 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 15 Data size: 294 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 17 Data size: 310 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Reducer 4 Execution mode: llap @@ -1644,11 +1644,11 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 15 Data size: 294 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 17 Data size: 310 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: hash(_col0) (type: int), hash(_col1) (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 15 Data size: 294 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 17 Data size: 310 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col0), sum(_col1) mode: hash @@ -1804,16 +1804,16 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col1, _col2 - Statistics: Num rows: 60 Data size: 10500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 40 Data size: 7000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: string), _col2 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 60 Data size: 10500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 40 Data size: 7000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 60 Data size: 10500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 40 Data size: 7000 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Reducer 3 Execution mode: llap @@ -1825,11 +1825,11 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 51 Data size: 13719 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 30 Data size: 8070 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: hash(_col0) (type: int), hash(_col1) (type: int), hash(_col2) (type: int), hash(_col3) (type: int) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 51 Data size: 13719 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 30 Data size: 8070 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col0), sum(_col1), sum(_col2), sum(_col3) mode: hash @@ -1985,16 +1985,16 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col1, _col2 - Statistics: Num rows: 60 Data size: 10500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 40 Data size: 7000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: string), _col2 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 60 Data size: 10500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 40 Data size: 7000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 60 Data size: 10500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 40 Data size: 7000 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Reducer 3 Execution mode: llap @@ -2006,11 +2006,11 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 51 Data size: 13719 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 30 Data size: 8070 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: hash(_col0) (type: int), hash(_col1) (type: int), hash(_col2) (type: int), hash(_col3) (type: int) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 51 Data size: 13719 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 30 Data size: 8070 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col0), sum(_col1), sum(_col2), sum(_col3) mode: hash diff --git a/ql/src/test/results/clientpositive/llap/correlationoptimizer3.q.out b/ql/src/test/results/clientpositive/llap/correlationoptimizer3.q.out index f1af97d98d..611ba06b0d 100644 --- a/ql/src/test/results/clientpositive/llap/correlationoptimizer3.q.out +++ b/ql/src/test/results/clientpositive/llap/correlationoptimizer3.q.out @@ -101,16 +101,16 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col1, _col2 - Statistics: Num rows: 60 Data size: 10500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 40 Data size: 7000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: string), _col2 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 60 Data size: 10500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 40 Data size: 7000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 60 Data size: 10500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 40 Data size: 7000 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Reducer 3 Execution mode: llap @@ -122,11 +122,11 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 60 Data size: 10980 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 40 Data size: 7320 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: hash(_col0) (type: int), hash(_col3) (type: int), hash(_col1) (type: int) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 60 Data size: 10980 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 40 Data size: 7320 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col0), sum(_col1), sum(_col2) mode: hash @@ -161,18 +161,18 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col1 - Statistics: Num rows: 60 Data size: 5160 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 40 Data size: 3440 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: _col1 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 14 Data size: 1316 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 16 Data size: 1504 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 14 Data size: 1316 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 16 Data size: 1504 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Reducer 6 Execution mode: llap @@ -182,12 +182,12 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 14 Data size: 1316 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 16 Data size: 1504 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 14 Data size: 1316 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 16 Data size: 1504 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Stage: Stage-0 @@ -318,16 +318,16 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col1, _col2 - Statistics: Num rows: 60 Data size: 10500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 40 Data size: 7000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: string), _col2 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 60 Data size: 10500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 40 Data size: 7000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 60 Data size: 10500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 40 Data size: 7000 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Reducer 3 Execution mode: llap @@ -339,11 +339,11 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 60 Data size: 10980 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 40 Data size: 7320 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: hash(_col0) (type: int), hash(_col3) (type: int), hash(_col1) (type: int) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 60 Data size: 10980 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 40 Data size: 7320 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col0), sum(_col1), sum(_col2) mode: hash @@ -378,18 +378,18 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col1 - Statistics: Num rows: 60 Data size: 5160 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 40 Data size: 3440 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: _col1 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 14 Data size: 1316 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 16 Data size: 1504 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 14 Data size: 1316 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 16 Data size: 1504 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Reducer 6 Execution mode: llap @@ -399,12 +399,12 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 14 Data size: 1316 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 16 Data size: 1504 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 14 Data size: 1316 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 16 Data size: 1504 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Stage: Stage-0 @@ -482,11 +482,11 @@ STAGE PLANS: outputColumnNames: _col1, _col2 input vertices: 1 Map 3 - Statistics: Num rows: 60 Data size: 10500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 40 Data size: 7000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: string), _col2 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 60 Data size: 10500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 40 Data size: 7000 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -496,11 +496,11 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col3 input vertices: 1 Reducer 5 - Statistics: Num rows: 60 Data size: 10980 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 40 Data size: 7320 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: hash(_col0) (type: int), hash(_col3) (type: int), hash(_col1) (type: int) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 60 Data size: 10980 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 40 Data size: 7320 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col0), sum(_col1), sum(_col2) mode: hash @@ -553,18 +553,18 @@ STAGE PLANS: outputColumnNames: _col1 input vertices: 1 Map 6 - Statistics: Num rows: 60 Data size: 5160 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 40 Data size: 3440 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: _col1 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 14 Data size: 1316 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 16 Data size: 1504 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 14 Data size: 1316 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 16 Data size: 1504 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -610,12 +610,12 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 14 Data size: 1316 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 16 Data size: 1504 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 14 Data size: 1316 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 16 Data size: 1504 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Stage: Stage-0 @@ -746,16 +746,16 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col1, _col2 - Statistics: Num rows: 60 Data size: 10500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 40 Data size: 7000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: string), _col2 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 60 Data size: 10500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 40 Data size: 7000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 60 Data size: 10500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 40 Data size: 7000 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Reducer 3 Execution mode: llap @@ -767,11 +767,11 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col1, _col2, _col3 - Statistics: Num rows: 60 Data size: 10980 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 40 Data size: 7320 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: hash(_col2) (type: int), hash(_col3) (type: int), hash(_col1) (type: int) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 60 Data size: 10980 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 40 Data size: 7320 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col0), sum(_col1), sum(_col2) mode: hash @@ -806,18 +806,18 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col1 - Statistics: Num rows: 60 Data size: 5160 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 40 Data size: 3440 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: _col1 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 14 Data size: 1316 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 16 Data size: 1504 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 14 Data size: 1316 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 16 Data size: 1504 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Reducer 6 Execution mode: llap @@ -827,12 +827,12 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 14 Data size: 1316 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 16 Data size: 1504 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 14 Data size: 1316 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 16 Data size: 1504 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Stage: Stage-0 @@ -963,16 +963,16 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col1, _col2 - Statistics: Num rows: 60 Data size: 10500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 40 Data size: 7000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: string), _col2 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 60 Data size: 10500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 40 Data size: 7000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 60 Data size: 10500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 40 Data size: 7000 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Reducer 3 Execution mode: llap @@ -984,11 +984,11 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col1, _col2, _col3 - Statistics: Num rows: 60 Data size: 10980 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 40 Data size: 7320 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: hash(_col2) (type: int), hash(_col3) (type: int), hash(_col1) (type: int) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 60 Data size: 10980 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 40 Data size: 7320 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col0), sum(_col1), sum(_col2) mode: hash @@ -1023,18 +1023,18 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col1 - Statistics: Num rows: 60 Data size: 5160 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 40 Data size: 3440 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: _col1 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 14 Data size: 1316 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 16 Data size: 1504 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 14 Data size: 1316 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 16 Data size: 1504 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Reducer 6 Execution mode: llap @@ -1044,12 +1044,12 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 14 Data size: 1316 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 16 Data size: 1504 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 14 Data size: 1316 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 16 Data size: 1504 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Stage: Stage-0 @@ -1127,11 +1127,11 @@ STAGE PLANS: outputColumnNames: _col1, _col2 input vertices: 1 Map 3 - Statistics: Num rows: 60 Data size: 10500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 40 Data size: 7000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: string), _col2 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 60 Data size: 10500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 40 Data size: 7000 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -1141,11 +1141,11 @@ STAGE PLANS: outputColumnNames: _col1, _col2, _col3 input vertices: 1 Reducer 5 - Statistics: Num rows: 60 Data size: 10980 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 40 Data size: 7320 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: hash(_col2) (type: int), hash(_col3) (type: int), hash(_col1) (type: int) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 60 Data size: 10980 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 40 Data size: 7320 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col0), sum(_col1), sum(_col2) mode: hash @@ -1198,18 +1198,18 @@ STAGE PLANS: outputColumnNames: _col1 input vertices: 1 Map 6 - Statistics: Num rows: 60 Data size: 5160 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 40 Data size: 3440 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: _col1 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 14 Data size: 1316 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 16 Data size: 1504 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 14 Data size: 1316 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 16 Data size: 1504 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -1255,12 +1255,12 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 14 Data size: 1316 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 16 Data size: 1504 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 14 Data size: 1316 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 16 Data size: 1504 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Stage: Stage-0 diff --git a/ql/src/test/results/clientpositive/llap/correlationoptimizer6.q.out b/ql/src/test/results/clientpositive/llap/correlationoptimizer6.q.out index 9b71d3ec82..177cc97016 100644 --- a/ql/src/test/results/clientpositive/llap/correlationoptimizer6.q.out +++ b/ql/src/test/results/clientpositive/llap/correlationoptimizer6.q.out @@ -116,18 +116,18 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 44 Data size: 3784 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 39 Data size: 3354 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 14 Data size: 1316 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 16 Data size: 1504 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 14 Data size: 1316 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 16 Data size: 1504 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Reducer 3 Execution mode: llap @@ -137,12 +137,12 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 14 Data size: 1316 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 16 Data size: 1504 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 14 Data size: 1316 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 16 Data size: 1504 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Reducer 4 Execution mode: llap @@ -154,10 +154,10 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 14 Data size: 2646 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 16 Data size: 3024 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 14 Data size: 2646 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 16 Data size: 3024 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -172,18 +172,18 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1219 Data size: 106053 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 809 Data size: 70383 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 309 Data size: 29355 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 309 Data size: 29355 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Reducer 8 Execution mode: llap @@ -193,12 +193,12 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 309 Data size: 29355 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 309 Data size: 29355 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Stage: Stage-0 @@ -360,18 +360,18 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 44 Data size: 3784 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 39 Data size: 3354 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 14 Data size: 1316 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 16 Data size: 1504 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 14 Data size: 1316 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 16 Data size: 1504 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Reducer 3 Execution mode: llap @@ -381,12 +381,12 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 14 Data size: 1316 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 16 Data size: 1504 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 14 Data size: 1316 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 16 Data size: 1504 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Reducer 4 Execution mode: llap @@ -398,10 +398,10 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 14 Data size: 2646 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 16 Data size: 3024 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 14 Data size: 2646 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 16 Data size: 3024 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -416,18 +416,18 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1219 Data size: 106053 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 809 Data size: 70383 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 309 Data size: 29355 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 309 Data size: 29355 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Reducer 8 Execution mode: llap @@ -437,12 +437,12 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 309 Data size: 29355 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 309 Data size: 29355 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Stage: Stage-0 @@ -538,18 +538,18 @@ STAGE PLANS: outputColumnNames: _col0 input vertices: 1 Map 3 - Statistics: Num rows: 44 Data size: 3784 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 39 Data size: 3354 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 14 Data size: 1316 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 16 Data size: 1504 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 14 Data size: 1316 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 16 Data size: 1504 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -593,18 +593,18 @@ STAGE PLANS: outputColumnNames: _col0 input vertices: 1 Map 6 - Statistics: Num rows: 1219 Data size: 106053 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 809 Data size: 70383 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 309 Data size: 29355 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 309 Data size: 29355 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -635,12 +635,12 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 14 Data size: 1316 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 16 Data size: 1504 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 14 Data size: 1316 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 16 Data size: 1504 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Reducer 5 Execution mode: llap @@ -650,7 +650,7 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 309 Data size: 29355 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -660,10 +660,10 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3 input vertices: 0 Reducer 2 - Statistics: Num rows: 14 Data size: 2646 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 16 Data size: 3024 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 14 Data size: 2646 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 16 Data size: 3024 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -769,12 +769,12 @@ STAGE PLANS: keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -788,10 +788,10 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 25 Data size: 4525 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 20 Data size: 3620 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 25 Data size: 4525 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 20 Data size: 3620 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -804,12 +804,12 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Stage: Stage-0 @@ -910,12 +910,12 @@ STAGE PLANS: keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -929,10 +929,10 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 25 Data size: 4525 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 20 Data size: 3620 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 25 Data size: 4525 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 20 Data size: 3620 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -945,12 +945,12 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Stage: Stage-0 @@ -1105,18 +1105,18 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1219 Data size: 106053 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 809 Data size: 70383 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 309 Data size: 29355 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 309 Data size: 29355 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Reducer 5 Execution mode: llap @@ -1126,12 +1126,12 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 309 Data size: 29355 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 309 Data size: 29355 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Stage: Stage-0 @@ -1286,18 +1286,18 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1219 Data size: 106053 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 809 Data size: 70383 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 309 Data size: 29355 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 309 Data size: 29355 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Reducer 5 Execution mode: llap @@ -1307,12 +1307,12 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 309 Data size: 29355 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 309 Data size: 29355 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Stage: Stage-0 @@ -1449,18 +1449,18 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 44 Data size: 3784 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 39 Data size: 3354 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 14 Data size: 1316 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 16 Data size: 1504 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 14 Data size: 1316 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 16 Data size: 1504 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Reducer 3 Execution mode: llap @@ -1470,12 +1470,12 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 14 Data size: 1316 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 16 Data size: 1504 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 14 Data size: 1316 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 16 Data size: 1504 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Reducer 4 Execution mode: llap @@ -1487,10 +1487,10 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 34 Data size: 6154 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 25 Data size: 4525 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 34 Data size: 6154 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 25 Data size: 4525 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1652,18 +1652,18 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 44 Data size: 3784 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 39 Data size: 3354 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 14 Data size: 1316 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 16 Data size: 1504 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 14 Data size: 1316 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 16 Data size: 1504 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Reducer 3 Execution mode: llap @@ -1673,12 +1673,12 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 14 Data size: 1316 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 16 Data size: 1504 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 14 Data size: 1316 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 16 Data size: 1504 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Reducer 4 Execution mode: llap @@ -1690,10 +1690,10 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 34 Data size: 6154 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 25 Data size: 4525 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 34 Data size: 6154 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 25 Data size: 4525 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1864,14 +1864,14 @@ STAGE PLANS: 1 _col0 (type: string) 2 _col0 (type: string) outputColumnNames: _col1, _col2, _col3 - Statistics: Num rows: 60 Data size: 10860 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 40 Data size: 7240 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: string), _col2 (type: string), _col3 (type: bigint) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 60 Data size: 10860 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 40 Data size: 7240 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 60 Data size: 10860 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 40 Data size: 7240 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1886,18 +1886,18 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1219 Data size: 106053 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 809 Data size: 70383 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 309 Data size: 29355 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 309 Data size: 29355 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Reducer 4 Execution mode: llap @@ -1907,12 +1907,12 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 309 Data size: 29355 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 309 Data size: 29355 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Stage: Stage-0 @@ -2082,14 +2082,14 @@ STAGE PLANS: 1 _col0 (type: string) 2 _col0 (type: string) outputColumnNames: _col1, _col2, _col3 - Statistics: Num rows: 60 Data size: 10860 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 40 Data size: 7240 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: string), _col2 (type: string), _col3 (type: bigint) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 60 Data size: 10860 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 40 Data size: 7240 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 60 Data size: 10860 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 40 Data size: 7240 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2104,18 +2104,18 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1219 Data size: 106053 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 809 Data size: 70383 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 309 Data size: 29355 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 309 Data size: 29355 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Reducer 4 Execution mode: llap @@ -2125,12 +2125,12 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 309 Data size: 29355 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 309 Data size: 29355 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Stage: Stage-0 @@ -2298,18 +2298,18 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1219 Data size: 106053 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 809 Data size: 70383 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 309 Data size: 29355 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 309 Data size: 29355 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Reducer 3 Execution mode: llap @@ -2319,12 +2319,12 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 309 Data size: 29355 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 309 Data size: 29355 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Reducer 4 Execution mode: llap @@ -2338,14 +2338,14 @@ STAGE PLANS: 1 _col0 (type: string) 2 _col0 (type: string) outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 60 Data size: 10860 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 40 Data size: 7240 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col3 (type: string), _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 60 Data size: 10860 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 40 Data size: 7240 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 60 Data size: 10860 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 40 Data size: 7240 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2516,18 +2516,18 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1219 Data size: 106053 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 809 Data size: 70383 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 309 Data size: 29355 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 309 Data size: 29355 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Reducer 3 Execution mode: llap @@ -2537,12 +2537,12 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 309 Data size: 29355 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 309 Data size: 29355 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Reducer 4 Execution mode: llap @@ -2556,14 +2556,14 @@ STAGE PLANS: 1 _col0 (type: string) 2 _col0 (type: string) outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 60 Data size: 10860 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 40 Data size: 7240 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col3 (type: string), _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 60 Data size: 10860 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 40 Data size: 7240 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 60 Data size: 10860 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 40 Data size: 7240 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2679,12 +2679,12 @@ STAGE PLANS: keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -2738,12 +2738,12 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Reducer 3 Execution mode: llap @@ -2755,7 +2755,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 12 Data size: 1236 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 9 Data size: 927 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col1), sum(_col3) keys: _col0 (type: string) @@ -2793,14 +2793,14 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col4 - Statistics: Num rows: 2 Data size: 388 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 194 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: bigint), _col0 (type: string), _col4 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 2 Data size: 562 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 281 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 562 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 281 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2933,12 +2933,12 @@ STAGE PLANS: keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -2992,12 +2992,12 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Reducer 3 Execution mode: llap @@ -3009,7 +3009,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 12 Data size: 1236 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 9 Data size: 927 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col1), sum(_col3) keys: _col0 (type: string) @@ -3047,14 +3047,14 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col4 - Statistics: Num rows: 2 Data size: 388 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 194 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: bigint), _col0 (type: string), _col4 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 2 Data size: 562 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 281 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 562 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 281 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -3260,18 +3260,18 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 44 Data size: 3784 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 39 Data size: 3354 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 14 Data size: 1316 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 16 Data size: 1504 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 14 Data size: 1316 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 16 Data size: 1504 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Reducer 3 Execution mode: llap @@ -3281,12 +3281,12 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 14 Data size: 1316 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 16 Data size: 1504 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 14 Data size: 1316 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 16 Data size: 1504 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Reducer 4 Execution mode: llap @@ -3298,21 +3298,21 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 41 Data size: 11480 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 20 Data size: 5600 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string), _col3 (type: string), _col4 (type: bigint) sort order: +++++ - Statistics: Num rows: 41 Data size: 11480 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 20 Data size: 5600 Basic stats: COMPLETE Column stats: COMPLETE Reducer 5 Execution mode: llap Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: bigint), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 41 Data size: 11480 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 20 Data size: 5600 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 41 Data size: 11480 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 20 Data size: 5600 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -3327,18 +3327,18 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1219 Data size: 216982 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 809 Data size: 144002 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: _col0 (type: string), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 609 Data size: 113274 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 404 Data size: 75144 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 609 Data size: 113274 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 404 Data size: 75144 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Reducer 9 Execution mode: llap @@ -3348,12 +3348,12 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 609 Data size: 113274 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 404 Data size: 75144 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 609 Data size: 113274 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 404 Data size: 75144 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string), _col2 (type: bigint) Stage: Stage-0 @@ -3516,18 +3516,18 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 44 Data size: 3784 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 39 Data size: 3354 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 14 Data size: 1316 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 16 Data size: 1504 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 14 Data size: 1316 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 16 Data size: 1504 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Reducer 3 Execution mode: llap @@ -3537,12 +3537,12 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 14 Data size: 1316 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 16 Data size: 1504 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 14 Data size: 1316 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 16 Data size: 1504 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Reducer 4 Execution mode: llap @@ -3554,10 +3554,10 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 41 Data size: 11480 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 20 Data size: 5600 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 41 Data size: 11480 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 20 Data size: 5600 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -3572,18 +3572,18 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1219 Data size: 216982 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 809 Data size: 144002 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: _col0 (type: string), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 609 Data size: 113274 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 404 Data size: 75144 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 609 Data size: 113274 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 404 Data size: 75144 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Reducer 8 Execution mode: llap @@ -3593,12 +3593,12 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 609 Data size: 113274 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 404 Data size: 75144 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 609 Data size: 113274 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 404 Data size: 75144 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string), _col2 (type: bigint) Stage: Stage-0 @@ -3694,18 +3694,18 @@ STAGE PLANS: outputColumnNames: _col0 input vertices: 1 Map 3 - Statistics: Num rows: 44 Data size: 3784 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 39 Data size: 3354 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 14 Data size: 1316 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 16 Data size: 1504 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 14 Data size: 1316 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 16 Data size: 1504 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -3749,18 +3749,18 @@ STAGE PLANS: outputColumnNames: _col0, _col1 input vertices: 1 Map 6 - Statistics: Num rows: 1219 Data size: 216982 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 809 Data size: 144002 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: _col0 (type: string), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 609 Data size: 113274 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 404 Data size: 75144 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 609 Data size: 113274 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 404 Data size: 75144 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -3791,12 +3791,12 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 14 Data size: 1316 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 16 Data size: 1504 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 14 Data size: 1316 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 16 Data size: 1504 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Reducer 5 Execution mode: llap @@ -3806,7 +3806,7 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 609 Data size: 113274 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 404 Data size: 75144 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -3816,10 +3816,10 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4 input vertices: 0 Reducer 2 - Statistics: Num rows: 41 Data size: 11480 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 20 Data size: 5600 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 41 Data size: 11480 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 20 Data size: 5600 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/count_dist_rewrite.q.out b/ql/src/test/results/clientpositive/llap/count_dist_rewrite.q.out index 6d411365b8..347ae3780d 100644 --- a/ql/src/test/results/clientpositive/llap/count_dist_rewrite.q.out +++ b/ql/src/test/results/clientpositive/llap/count_dist_rewrite.q.out @@ -28,12 +28,12 @@ STAGE PLANS: keys: key (type: string) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 205 Data size: 17835 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 205 Data size: 17835 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -43,7 +43,7 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 205 Data size: 17835 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(_col0) mode: hash @@ -115,12 +115,12 @@ STAGE PLANS: keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 205 Data size: 55555 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 67750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 205 Data size: 55555 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 67750 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -132,7 +132,7 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: partial2 outputColumnNames: _col0, _col1 - Statistics: Num rows: 205 Data size: 55555 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 67750 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: max(_col1), count(_col0) mode: partial2 @@ -204,12 +204,12 @@ STAGE PLANS: keys: key (type: string) mode: hash outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 205 Data size: 93275 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 113750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 205 Data size: 93275 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 113750 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string), _col3 (type: string) Execution mode: llap LLAP IO: no inputs @@ -221,7 +221,7 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: partial2 outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 205 Data size: 93275 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 113750 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: max(_col1), count(_col0), min(_col2) mode: partial2 @@ -293,12 +293,12 @@ STAGE PLANS: keys: key (type: string) mode: hash outputColumnNames: _col0, _col1, _col3, _col4 - Statistics: Num rows: 205 Data size: 145755 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 177750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 205 Data size: 145755 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 177750 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string), _col3 (type: string), _col4 (type: struct) Execution mode: llap LLAP IO: no inputs @@ -310,7 +310,7 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: partial2 outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 205 Data size: 145755 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 177750 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: max(_col1), count(_col0), min(_col2), avg(_col3) mode: partial2 @@ -382,12 +382,12 @@ STAGE PLANS: keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -399,7 +399,7 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: partial2 outputColumnNames: _col0, _col1 - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(_col1), count(_col0) mode: partial2 @@ -483,12 +483,12 @@ STAGE PLANS: keys: key (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col4, _col5 - Statistics: Num rows: 205 Data size: 96555 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 117750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 205 Data size: 96555 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 117750 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint), _col2 (type: bigint), _col4 (type: string), _col5 (type: string) Execution mode: llap LLAP IO: no inputs @@ -500,7 +500,7 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: partial2 outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 205 Data size: 96555 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 117750 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(_col1), count(_col2), count(_col0), max(_col3), max(_col4) mode: partial2 @@ -584,12 +584,12 @@ STAGE PLANS: keys: key (type: string) mode: hash outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 205 Data size: 35875 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 43750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 205 Data size: 35875 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 43750 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint), _col3 (type: struct) Execution mode: llap LLAP IO: no inputs @@ -601,7 +601,7 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: partial2 outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 205 Data size: 35875 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 43750 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(_col1), count(_col0), stddev(_col2) mode: partial2 @@ -704,12 +704,12 @@ STAGE PLANS: keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col4, _col5, _col6, _col7, _col8, _col9 - Statistics: Num rows: 214 Data size: 243104 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 284000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 214 Data size: 243104 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 284000 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: double), _col2 (type: struct), _col4 (type: string), _col5 (type: string), _col6 (type: struct), _col7 (type: struct), _col8 (type: struct), _col9 (type: struct) Execution mode: llap LLAP IO: no inputs @@ -721,7 +721,7 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: partial2 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 214 Data size: 243104 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 284000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col1), avg(_col2), count(_col0), max(_col3), min(_col4), std(_col5), stddev_samp(_col6), variance(_col7), var_samp(_col8) mode: partial2 @@ -831,14 +831,14 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 214 Data size: 101650 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 118750 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: string), _col2 (type: bigint), _col3 (type: string), _col4 (type: double) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 214 Data size: 82176 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 96000 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 214 Data size: 82176 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 96000 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/cross_join.q.out b/ql/src/test/results/clientpositive/llap/cross_join.q.out index ae3f9bf6f5..9d664af47e 100644 --- a/ql/src/test/results/clientpositive/llap/cross_join.q.out +++ b/ql/src/test/results/clientpositive/llap/cross_join.q.out @@ -200,10 +200,10 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1219 Data size: 106053 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 809 Data size: 70383 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1219 Data size: 106053 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 809 Data size: 70383 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -380,10 +380,10 @@ STAGE PLANS: outputColumnNames: _col0 input vertices: 1 Map 2 - Statistics: Num rows: 1219 Data size: 106053 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 809 Data size: 70383 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1219 Data size: 106053 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 809 Data size: 70383 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction.q.out b/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction.q.out index 7fd7e20e15..2eedb6efb3 100644 --- a/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction.q.out +++ b/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction.q.out @@ -214,7 +214,7 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - Statistics: Num rows: 195 Data size: 1560 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 129 Data size: 1032 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator aggregations: count() mode: hash @@ -327,7 +327,7 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 20 Data size: 1740 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=32) + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=40) mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL @@ -346,7 +346,7 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - Statistics: Num rows: 195 Data size: 1560 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 129 Data size: 1032 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator aggregations: count() mode: hash @@ -375,7 +375,7 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=32) + aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=40) mode: final outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL @@ -452,12 +452,12 @@ STAGE PLANS: keys: _col0 (type: string) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 205 Data size: 17835 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 309 Data size: 26883 Basic stats: COMPLETE Column stats: COMPLETE Dynamic Partitioning Event Operator Target column: ds (string) Target Input: srcpart_small Partition key expr: ds - Statistics: Num rows: 205 Data size: 17835 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 309 Data size: 26883 Basic stats: COMPLETE Column stats: COMPLETE Target Vertex: Map 4 Execution mode: llap LLAP IO: all inputs @@ -487,7 +487,7 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - Statistics: Num rows: 195 Data size: 1560 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 129 Data size: 1032 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash @@ -558,21 +558,21 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: alltypesorc_int - filterExpr: cstring is not null (type: boolean) - Statistics: Num rows: 12288 Data size: 862450 Basic stats: COMPLETE Column stats: COMPLETE + alias: srcpart_date + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: cstring is not null (type: boolean) - Statistics: Num rows: 9174 Data size: 643900 Basic stats: COMPLETE Column stats: COMPLETE + predicate: key is not null (type: boolean) + Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: cstring (type: string) + expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 9174 Data size: 643900 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 9174 Data size: 643900 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: all inputs Map 4 @@ -598,21 +598,21 @@ STAGE PLANS: Map 5 Map Operator Tree: TableScan - alias: srcpart_date - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE + alias: alltypesorc_int + filterExpr: cstring is not null (type: boolean) + Statistics: Num rows: 12288 Data size: 862450 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE + predicate: cstring is not null (type: boolean) + Statistics: Num rows: 9174 Data size: 643900 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: key (type: string) + expressions: cstring (type: string) outputColumnNames: _col0 - Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 9174 Data size: 643900 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 9174 Data size: 643900 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: all inputs Reducer 2 @@ -626,7 +626,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) 2 _col0 (type: string) - Statistics: Num rows: 320 Data size: 2560 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 262 Data size: 2096 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator aggregations: count() mode: hash @@ -692,35 +692,48 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Reducer 5 (BROADCAST_EDGE), Reducer 7 (BROADCAST_EDGE) - Map 6 <- Reducer 5 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) + Map 1 <- Reducer 6 (BROADCAST_EDGE) + Map 7 <- Reducer 4 (BROADCAST_EDGE), Reducer 6 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) - Reducer 5 <- Map 4 (CUSTOM_SIMPLE_EDGE) - Reducer 7 <- Map 6 (CUSTOM_SIMPLE_EDGE) + Reducer 4 <- Map 1 (CUSTOM_SIMPLE_EDGE) + Reducer 6 <- Map 5 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: alltypesorc_int - filterExpr: (cstring is not null and (cstring BETWEEN DynamicValue(RS_10_srcpart_small_key1_min) AND DynamicValue(RS_10_srcpart_small_key1_max) and in_bloom_filter(cstring, DynamicValue(RS_10_srcpart_small_key1_bloom_filter))) and (cstring BETWEEN DynamicValue(RS_11_srcpart_date_key_min) AND DynamicValue(RS_11_srcpart_date_key_max) and in_bloom_filter(cstring, DynamicValue(RS_11_srcpart_date_key_bloom_filter)))) (type: boolean) - Statistics: Num rows: 12288 Data size: 862450 Basic stats: COMPLETE Column stats: COMPLETE + alias: srcpart_date + filterExpr: (key is not null and (key BETWEEN DynamicValue(RS_10_srcpart_small_key1_min) AND DynamicValue(RS_10_srcpart_small_key1_max) and in_bloom_filter(key, DynamicValue(RS_10_srcpart_small_key1_bloom_filter)))) (type: boolean) + Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (cstring is not null and (cstring BETWEEN DynamicValue(RS_10_srcpart_small_key1_min) AND DynamicValue(RS_10_srcpart_small_key1_max) and in_bloom_filter(cstring, DynamicValue(RS_10_srcpart_small_key1_bloom_filter))) and (cstring BETWEEN DynamicValue(RS_11_srcpart_date_key_min) AND DynamicValue(RS_11_srcpart_date_key_max) and in_bloom_filter(cstring, DynamicValue(RS_11_srcpart_date_key_bloom_filter)))) (type: boolean) - Statistics: Num rows: 9174 Data size: 643900 Basic stats: COMPLETE Column stats: COMPLETE + predicate: (key is not null and (key BETWEEN DynamicValue(RS_10_srcpart_small_key1_min) AND DynamicValue(RS_10_srcpart_small_key1_max) and in_bloom_filter(key, DynamicValue(RS_10_srcpart_small_key1_bloom_filter)))) (type: boolean) + Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: cstring (type: string) + expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 9174 Data size: 643900 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 9174 Data size: 643900 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=618) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) Execution mode: llap LLAP IO: all inputs - Map 4 + Map 5 Map Operator Tree: TableScan alias: srcpart_small @@ -743,7 +756,7 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 20 Data size: 1740 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=32) + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=40) mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL @@ -753,37 +766,24 @@ STAGE PLANS: value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) Execution mode: llap LLAP IO: all inputs - Map 6 + Map 7 Map Operator Tree: TableScan - alias: srcpart_date - filterExpr: (key is not null and (key BETWEEN DynamicValue(RS_10_srcpart_small_key1_min) AND DynamicValue(RS_10_srcpart_small_key1_max) and in_bloom_filter(key, DynamicValue(RS_10_srcpart_small_key1_bloom_filter)))) (type: boolean) - Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE + alias: alltypesorc_int + filterExpr: (cstring is not null and (cstring BETWEEN DynamicValue(RS_9_srcpart_date_key_min) AND DynamicValue(RS_9_srcpart_date_key_max) and in_bloom_filter(cstring, DynamicValue(RS_9_srcpart_date_key_bloom_filter))) and (cstring BETWEEN DynamicValue(RS_10_srcpart_small_key1_min) AND DynamicValue(RS_10_srcpart_small_key1_max) and in_bloom_filter(cstring, DynamicValue(RS_10_srcpart_small_key1_bloom_filter)))) (type: boolean) + Statistics: Num rows: 12288 Data size: 862450 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (key is not null and (key BETWEEN DynamicValue(RS_10_srcpart_small_key1_min) AND DynamicValue(RS_10_srcpart_small_key1_max) and in_bloom_filter(key, DynamicValue(RS_10_srcpart_small_key1_bloom_filter)))) (type: boolean) - Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE + predicate: (cstring is not null and (cstring BETWEEN DynamicValue(RS_9_srcpart_date_key_min) AND DynamicValue(RS_9_srcpart_date_key_max) and in_bloom_filter(cstring, DynamicValue(RS_9_srcpart_date_key_bloom_filter))) and (cstring BETWEEN DynamicValue(RS_10_srcpart_small_key1_min) AND DynamicValue(RS_10_srcpart_small_key1_max) and in_bloom_filter(cstring, DynamicValue(RS_10_srcpart_small_key1_bloom_filter)))) (type: boolean) + Statistics: Num rows: 9174 Data size: 643900 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: key (type: string) + expressions: cstring (type: string) outputColumnNames: _col0 - Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 9174 Data size: 643900 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=410) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) + Statistics: Num rows: 9174 Data size: 643900 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: all inputs Reducer 2 @@ -797,7 +797,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) 2 _col0 (type: string) - Statistics: Num rows: 320 Data size: 2560 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 262 Data size: 2096 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator aggregations: count() mode: hash @@ -822,33 +822,33 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 5 + Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=32) + aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=618) mode: final outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL - value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) + Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) - Reducer 7 + Reducer 6 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=410) + aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=40) mode: final outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) Stage: Stage-0 @@ -1058,7 +1058,7 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 20 Data size: 1740 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=32) + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=40) mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL @@ -1071,7 +1071,7 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 20 Data size: 1820 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=36) + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=40) mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL @@ -1119,7 +1119,7 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=32) + aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=40) mode: final outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL @@ -1131,7 +1131,7 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=36) + aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=40) mode: final outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL @@ -1254,12 +1254,12 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col1 - Statistics: Num rows: 195 Data size: 17745 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 129 Data size: 11739 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator key expressions: _col1 (type: string) sort order: + Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 195 Data size: 17745 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 129 Data size: 11739 Basic stats: COMPLETE Column stats: PARTIAL Reducer 3 Execution mode: llap Reduce Operator Tree: @@ -1269,7 +1269,7 @@ STAGE PLANS: keys: 0 _col1 (type: string) 1 _col0 (type: string) - Statistics: Num rows: 319 Data size: 2552 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 261 Data size: 2088 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator aggregations: count() mode: hash @@ -1388,7 +1388,7 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 20 Data size: 1740 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=32) + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=40) mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL @@ -1428,18 +1428,18 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col1 - Statistics: Num rows: 195 Data size: 17745 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 129 Data size: 11739 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator key expressions: _col1 (type: string) sort order: + Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 195 Data size: 17745 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 129 Data size: 11739 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: _col1 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 195 Data size: 17745 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 129 Data size: 11739 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=42) + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=40) mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL @@ -1456,7 +1456,7 @@ STAGE PLANS: keys: 0 _col1 (type: string) 1 _col0 (type: string) - Statistics: Num rows: 319 Data size: 2552 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 261 Data size: 2088 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator aggregations: count() mode: hash @@ -1485,7 +1485,7 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=42) + aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=40) mode: final outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL @@ -1497,7 +1497,7 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=32) + aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=40) mode: final outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL @@ -1697,7 +1697,7 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 20 Data size: 1740 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=32) + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=40) mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL @@ -1814,7 +1814,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) Position of Big Table: 0 - Statistics: Num rows: 195 Data size: 1560 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 129 Data size: 1032 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator aggregations: count() mode: hash @@ -1863,7 +1863,7 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=32) + aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=40) mode: final outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL @@ -1919,7 +1919,7 @@ STAGE PLANS: 1 _col0 (type: string) input vertices: 1 Map 3 - Statistics: Num rows: 195 Data size: 1560 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 129 Data size: 1032 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator aggregations: count() mode: hash @@ -2030,7 +2030,7 @@ STAGE PLANS: 1 _col0 (type: string) input vertices: 1 Map 3 - Statistics: Num rows: 195 Data size: 1560 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 129 Data size: 1032 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator aggregations: count() mode: hash @@ -2143,12 +2143,12 @@ STAGE PLANS: outputColumnNames: _col1 input vertices: 1 Map 2 - Statistics: Num rows: 195 Data size: 17745 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 129 Data size: 11739 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator key expressions: _col1 (type: string) sort order: + Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 195 Data size: 17745 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 129 Data size: 11739 Basic stats: COMPLETE Column stats: PARTIAL Execution mode: llap LLAP IO: all inputs Map 2 @@ -2192,7 +2192,7 @@ STAGE PLANS: 1 _col0 (type: string) input vertices: 0 Map 1 - Statistics: Num rows: 319 Data size: 2552 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 261 Data size: 2088 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator aggregations: count() mode: hash @@ -2287,12 +2287,12 @@ STAGE PLANS: outputColumnNames: _col1 input vertices: 1 Map 2 - Statistics: Num rows: 195 Data size: 17745 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 129 Data size: 11739 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator key expressions: _col1 (type: string) sort order: + Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 195 Data size: 17745 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 129 Data size: 11739 Basic stats: COMPLETE Column stats: PARTIAL Execution mode: llap LLAP IO: all inputs Map 2 @@ -2336,7 +2336,7 @@ STAGE PLANS: 1 _col0 (type: string) input vertices: 0 Map 1 - Statistics: Num rows: 319 Data size: 2552 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 261 Data size: 2088 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator aggregations: count() mode: hash @@ -2437,10 +2437,10 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 input vertices: 1 Union 3 - Statistics: Num rows: 3314 Data size: 385334 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 4097 Data size: 530189 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false - Statistics: Num rows: 3314 Data size: 385334 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 4097 Data size: 530189 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/dynamic_semijoin_user_level.q.out b/ql/src/test/results/clientpositive/llap/dynamic_semijoin_user_level.q.out index c4b18b7118..a73faebcb7 100644 --- a/ql/src/test/results/clientpositive/llap/dynamic_semijoin_user_level.q.out +++ b/ql/src/test/results/clientpositive/llap/dynamic_semijoin_user_level.q.out @@ -172,7 +172,7 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_11] Group By Operator [GBY_10] (rows=1 width=8) Output:["_col0"],aggregations:["count()"] - Merge Join Operator [MERGEJOIN_29] (rows=195 width=8) + Merge Join Operator [MERGEJOIN_29] (rows=129 width=8) Conds:RS_6._col0=RS_7._col0(Inner) <-Map 4 [SIMPLE_EDGE] llap PARTITION_ONLY_SHUFFLE [RS_7] @@ -195,11 +195,11 @@ Stage-0 <-Reducer 5 [BROADCAST_EDGE] llap BROADCAST [RS_23] Group By Operator [GBY_22] (rows=1 width=552) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=32)"] + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=40)"] <-Map 4 [CUSTOM_SIMPLE_EDGE] llap PARTITION_ONLY_SHUFFLE [RS_21] Group By Operator [GBY_20] (rows=1 width=552) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=32)"] + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=40)"] Select Operator [SEL_19] (rows=20 width=87) Output:["_col0"] Please refer to the previous Select Operator [SEL_5] @@ -245,7 +245,7 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_11] Group By Operator [GBY_10] (rows=1 width=8) Output:["_col0"],aggregations:["count()"] - Merge Join Operator [MERGEJOIN_22] (rows=195 width=8) + Merge Join Operator [MERGEJOIN_22] (rows=129 width=8) Conds:RS_6._col0=RS_7._col0(Inner) <-Map 1 [SIMPLE_EDGE] llap SHUFFLE [RS_6] @@ -256,8 +256,8 @@ Stage-0 predicate:key is not null TableScan [TS_0] (rows=2000 width=87) default@srcpart_date,srcpart_date,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] - Dynamic Partitioning Event Operator [EVENT_21] (rows=205 width=87) - Group By Operator [GBY_20] (rows=205 width=87) + Dynamic Partitioning Event Operator [EVENT_21] (rows=309 width=87) + Group By Operator [GBY_20] (rows=309 width=87) Output:["_col0"],keys:_col0 Select Operator [SEL_19] (rows=2000 width=87) Output:["_col0"] @@ -311,17 +311,17 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_15] Group By Operator [GBY_14] (rows=1 width=8) Output:["_col0"],aggregations:["count()"] - Merge Join Operator [MERGEJOIN_28] (rows=320 width=8) + Merge Join Operator [MERGEJOIN_28] (rows=262 width=8) Conds:RS_9._col0=RS_10._col0(Inner),RS_10._col0=RS_11._col0(Inner) <-Map 1 [SIMPLE_EDGE] llap SHUFFLE [RS_9] PartitionCols:_col0 - Select Operator [SEL_2] (rows=9174 width=70) + Select Operator [SEL_2] (rows=2000 width=87) Output:["_col0"] - Filter Operator [FIL_25] (rows=9174 width=70) - predicate:cstring is not null - TableScan [TS_0] (rows=12288 width=70) - default@alltypesorc_int,alltypesorc_int,Tbl:COMPLETE,Col:COMPLETE,Output:["cstring"] + Filter Operator [FIL_25] (rows=2000 width=87) + predicate:key is not null + TableScan [TS_0] (rows=2000 width=87) + default@srcpart_date,srcpart_date,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] <-Map 4 [SIMPLE_EDGE] llap SHUFFLE [RS_10] PartitionCols:_col0 @@ -334,12 +334,12 @@ Stage-0 <-Map 5 [SIMPLE_EDGE] llap SHUFFLE [RS_11] PartitionCols:_col0 - Select Operator [SEL_8] (rows=2000 width=87) + Select Operator [SEL_8] (rows=9174 width=70) Output:["_col0"] - Filter Operator [FIL_27] (rows=2000 width=87) - predicate:key is not null - TableScan [TS_6] (rows=2000 width=87) - default@srcpart_date,srcpart_date,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] + Filter Operator [FIL_27] (rows=9174 width=70) + predicate:cstring is not null + TableScan [TS_6] (rows=12288 width=70) + default@alltypesorc_int,alltypesorc_int,Tbl:COMPLETE,Col:COMPLETE,Output:["cstring"] PREHOOK: query: select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1) join alltypesorc_int on (srcpart_small.key1 = alltypesorc_int.cstring) PREHOOK: type: QUERY @@ -369,12 +369,12 @@ POSTHOOK: type: QUERY Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Reducer 5 (BROADCAST_EDGE), Reducer 7 (BROADCAST_EDGE) -Map 6 <- Reducer 5 (BROADCAST_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) +Map 1 <- Reducer 6 (BROADCAST_EDGE) +Map 7 <- Reducer 4 (BROADCAST_EDGE), Reducer 6 (BROADCAST_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) -Reducer 5 <- Map 4 (CUSTOM_SIMPLE_EDGE) -Reducer 7 <- Map 6 (CUSTOM_SIMPLE_EDGE) +Reducer 4 <- Map 1 (CUSTOM_SIMPLE_EDGE) +Reducer 6 <- Map 5 (CUSTOM_SIMPLE_EDGE) Stage-0 Fetch Operator @@ -388,60 +388,60 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_15] Group By Operator [GBY_14] (rows=1 width=8) Output:["_col0"],aggregations:["count()"] - Merge Join Operator [MERGEJOIN_46] (rows=320 width=8) + Merge Join Operator [MERGEJOIN_46] (rows=262 width=8) Conds:RS_9._col0=RS_10._col0(Inner),RS_10._col0=RS_11._col0(Inner) - <-Map 4 [SIMPLE_EDGE] llap - PARTITION_ONLY_SHUFFLE [RS_10] - PartitionCols:_col0 - Select Operator [SEL_5] (rows=20 width=87) - Output:["_col0"] - Filter Operator [FIL_26] (rows=20 width=87) - predicate:key1 is not null - TableScan [TS_3] (rows=20 width=87) - default@srcpart_small,srcpart_small,Tbl:COMPLETE,Col:PARTIAL,Output:["key1"] - <-Map 6 [SIMPLE_EDGE] llap - PARTITION_ONLY_SHUFFLE [RS_11] + <-Map 1 [SIMPLE_EDGE] llap + PARTITION_ONLY_SHUFFLE [RS_9] PartitionCols:_col0 - Select Operator [SEL_8] (rows=2000 width=87) + Select Operator [SEL_2] (rows=2000 width=87) Output:["_col0"] - Filter Operator [FIL_27] (rows=2000 width=87) + Filter Operator [FIL_25] (rows=2000 width=87) predicate:(key is not null and (key BETWEEN DynamicValue(RS_10_srcpart_small_key1_min) AND DynamicValue(RS_10_srcpart_small_key1_max) and in_bloom_filter(key, DynamicValue(RS_10_srcpart_small_key1_bloom_filter)))) - TableScan [TS_6] (rows=2000 width=87) + TableScan [TS_0] (rows=2000 width=87) default@srcpart_date,srcpart_date,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] - <-Reducer 5 [BROADCAST_EDGE] llap - BROADCAST [RS_45] + <-Reducer 6 [BROADCAST_EDGE] llap + BROADCAST [RS_32] Group By Operator [GBY_31] (rows=1 width=552) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=32)"] - <-Map 4 [CUSTOM_SIMPLE_EDGE] llap + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=40)"] + <-Map 5 [CUSTOM_SIMPLE_EDGE] llap PARTITION_ONLY_SHUFFLE [RS_30] Group By Operator [GBY_29] (rows=1 width=552) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=32)"] + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=40)"] Select Operator [SEL_28] (rows=20 width=87) Output:["_col0"] - Please refer to the previous Select Operator [SEL_5] - <-Map 1 [SIMPLE_EDGE] llap - SHUFFLE [RS_9] + Select Operator [SEL_5] (rows=20 width=87) + Output:["_col0"] + Filter Operator [FIL_26] (rows=20 width=87) + predicate:key1 is not null + TableScan [TS_3] (rows=20 width=87) + default@srcpart_small,srcpart_small,Tbl:COMPLETE,Col:PARTIAL,Output:["key1"] + <-Map 5 [SIMPLE_EDGE] llap + PARTITION_ONLY_SHUFFLE [RS_10] PartitionCols:_col0 - Select Operator [SEL_2] (rows=9174 width=70) + Please refer to the previous Select Operator [SEL_5] + <-Map 7 [SIMPLE_EDGE] llap + SHUFFLE [RS_11] + PartitionCols:_col0 + Select Operator [SEL_8] (rows=9174 width=70) Output:["_col0"] - Filter Operator [FIL_25] (rows=9174 width=70) - predicate:(cstring is not null and (cstring BETWEEN DynamicValue(RS_10_srcpart_small_key1_min) AND DynamicValue(RS_10_srcpart_small_key1_max) and in_bloom_filter(cstring, DynamicValue(RS_10_srcpart_small_key1_bloom_filter))) and (cstring BETWEEN DynamicValue(RS_11_srcpart_date_key_min) AND DynamicValue(RS_11_srcpart_date_key_max) and in_bloom_filter(cstring, DynamicValue(RS_11_srcpart_date_key_bloom_filter)))) - TableScan [TS_0] (rows=12288 width=70) + Filter Operator [FIL_27] (rows=9174 width=70) + predicate:(cstring is not null and (cstring BETWEEN DynamicValue(RS_9_srcpart_date_key_min) AND DynamicValue(RS_9_srcpart_date_key_max) and in_bloom_filter(cstring, DynamicValue(RS_9_srcpart_date_key_bloom_filter))) and (cstring BETWEEN DynamicValue(RS_10_srcpart_small_key1_min) AND DynamicValue(RS_10_srcpart_small_key1_max) and in_bloom_filter(cstring, DynamicValue(RS_10_srcpart_small_key1_bloom_filter)))) + TableScan [TS_6] (rows=12288 width=70) default@alltypesorc_int,alltypesorc_int,Tbl:COMPLETE,Col:COMPLETE,Output:["cstring"] - <-Reducer 5 [BROADCAST_EDGE] llap - BROADCAST [RS_32] + <-Reducer 6 [BROADCAST_EDGE] llap + BROADCAST [RS_45] Please refer to the previous Group By Operator [GBY_31] - <-Reducer 7 [BROADCAST_EDGE] llap - BROADCAST [RS_37] - Group By Operator [GBY_36] (rows=1 width=552) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=410)"] - <-Map 6 [CUSTOM_SIMPLE_EDGE] llap - PARTITION_ONLY_SHUFFLE [RS_35] - Group By Operator [GBY_34] (rows=1 width=552) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=410)"] - Select Operator [SEL_33] (rows=2000 width=87) + <-Reducer 4 [BROADCAST_EDGE] llap + BROADCAST [RS_44] + Group By Operator [GBY_41] (rows=1 width=552) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=618)"] + <-Map 1 [CUSTOM_SIMPLE_EDGE] llap + PARTITION_ONLY_SHUFFLE [RS_40] + Group By Operator [GBY_39] (rows=1 width=552) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=618)"] + Select Operator [SEL_38] (rows=2000 width=87) Output:["_col0"] - Please refer to the previous Select Operator [SEL_8] + Please refer to the previous Select Operator [SEL_2] PREHOOK: query: select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1) join alltypesorc_int on (srcpart_small.key1 = alltypesorc_int.cstring) PREHOOK: type: QUERY @@ -574,22 +574,22 @@ Stage-0 <-Reducer 5 [BROADCAST_EDGE] llap BROADCAST [RS_23] Group By Operator [GBY_22] (rows=1 width=552) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=32)"] + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=40)"] <-Map 4 [CUSTOM_SIMPLE_EDGE] llap PARTITION_ONLY_SHUFFLE [RS_21] Group By Operator [GBY_20] (rows=1 width=552) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=32)"] + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=40)"] Select Operator [SEL_19] (rows=20 width=87) Output:["_col0"] Please refer to the previous Select Operator [SEL_5] <-Reducer 6 [BROADCAST_EDGE] llap BROADCAST [RS_28] Group By Operator [GBY_27] (rows=1 width=552) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=36)"] + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=40)"] <-Map 4 [CUSTOM_SIMPLE_EDGE] llap PARTITION_ONLY_SHUFFLE [RS_26] Group By Operator [GBY_25] (rows=1 width=552) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=36)"] + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=40)"] Select Operator [SEL_24] (rows=20 width=91) Output:["_col0"] Please refer to the previous Select Operator [SEL_5] @@ -636,7 +636,7 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_17] Group By Operator [GBY_16] (rows=1 width=8) Output:["_col0"],aggregations:["count()"] - Merge Join Operator [MERGEJOIN_30] (rows=319 width=8) + Merge Join Operator [MERGEJOIN_30] (rows=261 width=8) Conds:RS_12._col1=RS_13._col0(Inner) <-Map 6 [SIMPLE_EDGE] llap SHUFFLE [RS_13] @@ -650,7 +650,7 @@ Stage-0 <-Reducer 2 [SIMPLE_EDGE] llap SHUFFLE [RS_12] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_29] (rows=195 width=91) + Merge Join Operator [MERGEJOIN_29] (rows=129 width=91) Conds:RS_9._col0=RS_10._col0(Inner),Output:["_col1"] <-Map 1 [SIMPLE_EDGE] llap SHUFFLE [RS_9] @@ -719,12 +719,12 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_17] Group By Operator [GBY_16] (rows=1 width=8) Output:["_col0"],aggregations:["count()"] - Merge Join Operator [MERGEJOIN_50] (rows=319 width=8) + Merge Join Operator [MERGEJOIN_50] (rows=261 width=8) Conds:RS_12._col1=RS_13._col0(Inner) <-Reducer 2 [SIMPLE_EDGE] llap PARTITION_ONLY_SHUFFLE [RS_12] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_49] (rows=195 width=91) + Merge Join Operator [MERGEJOIN_49] (rows=129 width=91) Conds:RS_9._col0=RS_10._col0(Inner),Output:["_col1"] <-Map 6 [SIMPLE_EDGE] llap PARTITION_ONLY_SHUFFLE [RS_10] @@ -747,11 +747,11 @@ Stage-0 <-Reducer 7 [BROADCAST_EDGE] llap BROADCAST [RS_33] Group By Operator [GBY_32] (rows=1 width=552) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=32)"] + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=40)"] <-Map 6 [CUSTOM_SIMPLE_EDGE] llap PARTITION_ONLY_SHUFFLE [RS_31] Group By Operator [GBY_30] (rows=1 width=552) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=32)"] + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=40)"] Select Operator [SEL_29] (rows=20 width=87) Output:["_col0"] Please refer to the previous Select Operator [SEL_5] @@ -767,12 +767,12 @@ Stage-0 <-Reducer 5 [BROADCAST_EDGE] llap BROADCAST [RS_48] Group By Operator [GBY_47] (rows=1 width=552) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=42)"] + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=40)"] <-Reducer 2 [CUSTOM_SIMPLE_EDGE] llap PARTITION_ONLY_SHUFFLE [RS_46] Group By Operator [GBY_45] (rows=1 width=552) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=42)"] - Select Operator [SEL_44] (rows=195 width=91) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=40)"] + Select Operator [SEL_44] (rows=129 width=91) Output:["_col0"] Please refer to the previous Merge Join Operator [MERGEJOIN_49] @@ -961,7 +961,7 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 20 Data size: 1740 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=32) + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=40) mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL @@ -1078,7 +1078,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) Position of Big Table: 0 - Statistics: Num rows: 195 Data size: 1560 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 129 Data size: 1032 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator aggregations: count() mode: hash @@ -1127,7 +1127,7 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=32) + aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=40) mode: final outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL @@ -1167,7 +1167,7 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_11] Group By Operator [GBY_10] (rows=1 width=8) Output:["_col0"],aggregations:["count()"] - Map Join Operator [MAPJOIN_19] (rows=195 width=8) + Map Join Operator [MAPJOIN_19] (rows=129 width=8) Conds:SEL_2._col0=RS_7._col0(Inner) <-Map 3 [BROADCAST_EDGE] llap BROADCAST [RS_7] @@ -1226,7 +1226,7 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_11] Group By Operator [GBY_10] (rows=1 width=8) Output:["_col0"],aggregations:["count()"] - Map Join Operator [MAPJOIN_29] (rows=195 width=8) + Map Join Operator [MAPJOIN_29] (rows=129 width=8) Conds:SEL_2._col0=RS_7._col0(Inner) <-Map 3 [BROADCAST_EDGE] llap BROADCAST [RS_7] @@ -1286,12 +1286,12 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_17] Group By Operator [GBY_16] (rows=1 width=8) Output:["_col0"],aggregations:["count()"] - Map Join Operator [MAPJOIN_30] (rows=319 width=8) + Map Join Operator [MAPJOIN_30] (rows=261 width=8) Conds:RS_12._col1=SEL_8._col0(Inner) <-Map 1 [BROADCAST_EDGE] llap BROADCAST [RS_12] PartitionCols:_col1 - Map Join Operator [MAPJOIN_29] (rows=195 width=91) + Map Join Operator [MAPJOIN_29] (rows=129 width=91) Conds:SEL_2._col0=RS_10._col0(Inner),Output:["_col1"] <-Map 2 [BROADCAST_EDGE] llap BROADCAST [RS_10] @@ -1359,12 +1359,12 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_17] Group By Operator [GBY_16] (rows=1 width=8) Output:["_col0"],aggregations:["count()"] - Map Join Operator [MAPJOIN_50] (rows=319 width=8) + Map Join Operator [MAPJOIN_50] (rows=261 width=8) Conds:RS_12._col1=SEL_8._col0(Inner) <-Map 1 [BROADCAST_EDGE] llap BROADCAST [RS_12] PartitionCols:_col1 - Map Join Operator [MAPJOIN_49] (rows=195 width=91) + Map Join Operator [MAPJOIN_49] (rows=129 width=91) Conds:SEL_2._col0=RS_10._col0(Inner),Output:["_col1"] <-Map 2 [BROADCAST_EDGE] llap BROADCAST [RS_10] @@ -1432,7 +1432,7 @@ Stage-0 Stage-1 Map 1 llap File Output Operator [FS_15] - Map Join Operator [MAPJOIN_33] (rows=3314 width=116) + Map Join Operator [MAPJOIN_33] (rows=4097 width=129) Conds:SEL_2._col1=Union 3._col0(Inner),Output:["_col0","_col1","_col2"] <-Union 3 [BROADCAST_EDGE] <-Map 2 [CONTAINS] llap diff --git a/ql/src/test/results/clientpositive/llap/dynpart_sort_optimization2.q.out b/ql/src/test/results/clientpositive/llap/dynpart_sort_optimization2.q.out index d4811d64d7..90756c6776 100644 --- a/ql/src/test/results/clientpositive/llap/dynpart_sort_optimization2.q.out +++ b/ql/src/test/results/clientpositive/llap/dynpart_sort_optimization2.q.out @@ -1624,12 +1624,12 @@ STAGE PLANS: keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: vectorized, llap LLAP IO: no inputs @@ -1641,14 +1641,14 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: UDFToInteger(_col0) (type: int), UDFToInteger(_col1) (type: int), 'day' (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat @@ -1754,12 +1754,12 @@ STAGE PLANS: keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: vectorized, llap LLAP IO: no inputs @@ -1771,14 +1771,14 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: UDFToInteger(_col0) (type: int), UDFToInteger(_col1) (type: int), 'day' (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/explainuser_1.q.out b/ql/src/test/results/clientpositive/llap/explainuser_1.q.out index 87b8cae445..eb1268cadf 100644 --- a/ql/src/test/results/clientpositive/llap/explainuser_1.q.out +++ b/ql/src/test/results/clientpositive/llap/explainuser_1.q.out @@ -397,9 +397,9 @@ Stage-0 <-Reducer 8 [SIMPLE_EDGE] llap SHUFFLE [RS_21] PartitionCols:_col0 - Select Operator [SEL_17] (rows=1 width=89) + Select Operator [SEL_17] (rows=2 width=89) Output:["_col0","_col1"] - Group By Operator [GBY_16] (rows=1 width=93) + Group By Operator [GBY_16] (rows=2 width=93) Output:["_col0","_col1","_col2"],keys:KEY._col0, KEY._col1, KEY._col2 <-Map 7 [SIMPLE_EDGE] llap SHUFFLE [RS_15] @@ -926,9 +926,9 @@ Stage-0 Stage-1 Reducer 2 llap File Output Operator [FS_8] - Select Operator [SEL_7] (rows=100 width=7) + Select Operator [SEL_7] (rows=80 width=7) Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_11] (rows=100 width=7) + Merge Join Operator [MERGEJOIN_11] (rows=80 width=7) Conds:RS_4._col0=RS_5._col0(Left Outer),Output:["_col1","_col3"] <-Map 1 [SIMPLE_EDGE] llap SHUFFLE [RS_4] @@ -960,9 +960,9 @@ Stage-0 Stage-1 Reducer 2 llap File Output Operator [FS_8] - Select Operator [SEL_7] (rows=100 width=7) + Select Operator [SEL_7] (rows=80 width=7) Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_9] (rows=100 width=7) + Merge Join Operator [MERGEJOIN_9] (rows=80 width=7) Conds:RS_4._col0=RS_5._col0(Outer),Output:["_col1","_col3"] <-Map 1 [SIMPLE_EDGE] llap SHUFFLE [RS_4] @@ -994,9 +994,9 @@ Stage-0 Stage-1 Reducer 2 llap File Output Operator [FS_14] - Select Operator [SEL_13] (rows=291 width=100) + Select Operator [SEL_13] (rows=194 width=100) Output:["_col0","_col1","_col2","_col3","_col4"] - Merge Join Operator [MERGEJOIN_24] (rows=291 width=100) + Merge Join Operator [MERGEJOIN_24] (rows=194 width=100) Conds:RS_9._col0=RS_10._col0(Inner),RS_9._col0=RS_11._col0(Inner),Output:["_col1","_col2","_col4","_col5","_col6"] <-Map 1 [SIMPLE_EDGE] llap SHUFFLE [RS_9] @@ -1041,9 +1041,9 @@ Stage-0 Stage-1 Reducer 2 llap File Output Operator [FS_14] - Select Operator [SEL_13] (rows=291 width=177) + Select Operator [SEL_13] (rows=194 width=177) Output:["_col0","_col1","_col2","_col3"] - Merge Join Operator [MERGEJOIN_24] (rows=291 width=177) + Merge Join Operator [MERGEJOIN_24] (rows=194 width=177) Conds:RS_9._col0=RS_10._col0(Inner),RS_9._col0=RS_11._col0(Inner),Output:["_col0","_col1","_col3","_col4"] <-Map 1 [SIMPLE_EDGE] llap SHUFFLE [RS_9] @@ -1088,9 +1088,9 @@ Stage-0 Stage-1 Reducer 2 llap File Output Operator [FS_14] - Select Operator [SEL_13] (rows=72 width=101) + Select Operator [SEL_13] (rows=40 width=101) Output:["_col0","_col1","_col2","_col3","_col4"] - Merge Join Operator [MERGEJOIN_24] (rows=72 width=101) + Merge Join Operator [MERGEJOIN_24] (rows=40 width=101) Conds:RS_9._col0=RS_10._col0(Inner),RS_9._col0=RS_11._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col6"],residual filter predicates:{((_col1 + _col4) = 2)} {((_col1 > 0) or (_col6 >= 0))} <-Map 1 [SIMPLE_EDGE] llap SHUFFLE [RS_9] @@ -1135,9 +1135,9 @@ Stage-0 Stage-1 Reducer 2 llap File Output Operator [FS_14] - Select Operator [SEL_13] (rows=72 width=101) + Select Operator [SEL_13] (rows=40 width=101) Output:["_col0","_col1","_col2","_col3","_col4"] - Merge Join Operator [MERGEJOIN_24] (rows=72 width=101) + Merge Join Operator [MERGEJOIN_24] (rows=40 width=101) Conds:RS_9._col0=RS_10._col0(Inner),RS_9._col0=RS_11._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col6"],residual filter predicates:{((_col1 + _col4) = 2)} {((_col1 > 0) or (_col6 >= 0))} <-Map 1 [SIMPLE_EDGE] llap SHUFFLE [RS_9] @@ -1432,7 +1432,7 @@ Stage-0 <-Map 3 [SIMPLE_EDGE] llap SHUFFLE [RS_9] PartitionCols:_col0 - Group By Operator [GBY_7] (rows=5 width=85) + Group By Operator [GBY_7] (rows=6 width=85) Output:["_col0"],keys:_col0 Select Operator [SEL_5] (rows=18 width=80) Output:["_col0"] @@ -1456,9 +1456,9 @@ Stage-0 Stage-1 Reducer 2 llap File Output Operator [FS_18] - Select Operator [SEL_17] (rows=16 width=93) + Select Operator [SEL_17] (rows=14 width=93) Output:["_col0","_col1","_col2"] - Merge Join Operator [MERGEJOIN_28] (rows=16 width=93) + Merge Join Operator [MERGEJOIN_28] (rows=14 width=93) Conds:RS_13._col0=RS_14._col0(Left Semi),RS_13._col0=RS_15._col0(Left Semi),Output:["_col0","_col1","_col2"] <-Map 1 [SIMPLE_EDGE] llap SHUFFLE [RS_13] @@ -1472,7 +1472,7 @@ Stage-0 <-Map 3 [SIMPLE_EDGE] llap SHUFFLE [RS_14] PartitionCols:_col0 - Group By Operator [GBY_10] (rows=3 width=85) + Group By Operator [GBY_10] (rows=4 width=85) Output:["_col0"],keys:_col0 Select Operator [SEL_5] (rows=9 width=85) Output:["_col0"] @@ -1483,7 +1483,7 @@ Stage-0 <-Map 4 [SIMPLE_EDGE] llap SHUFFLE [RS_15] PartitionCols:_col0 - Group By Operator [GBY_12] (rows=6 width=85) + Group By Operator [GBY_12] (rows=7 width=85) Output:["_col0"],keys:_col0 Select Operator [SEL_8] (rows=18 width=80) Output:["_col0"] @@ -1682,9 +1682,9 @@ Stage-0 Stage-1 Reducer 2 llap File Output Operator [FS_19] - Select Operator [SEL_18] (rows=332 width=178) + Select Operator [SEL_18] (rows=366 width=178) Output:["_col0","_col1"] - Filter Operator [FIL_17] (rows=332 width=179) + Filter Operator [FIL_17] (rows=366 width=179) predicate:_col3 is null Merge Join Operator [MERGEJOIN_22] (rows=500 width=179) Conds:RS_14._col1=RS_15._col0(Left Outer),Output:["_col0","_col1","_col3"] @@ -1698,9 +1698,9 @@ Stage-0 <-Reducer 3 [SIMPLE_EDGE] llap SHUFFLE [RS_15] PartitionCols:_col0 - Select Operator [SEL_13] (rows=72 width=95) + Select Operator [SEL_13] (rows=83 width=95) Output:["_col0","_col1"] - Group By Operator [GBY_12] (rows=72 width=91) + Group By Operator [GBY_12] (rows=83 width=91) Output:["_col0"],keys:_col1 Select Operator [SEL_8] (rows=83 width=178) Output:["_col1"] @@ -1911,7 +1911,7 @@ Stage-0 Stage-1 Reducer 2 llap File Output Operator [FS_12] - Merge Join Operator [MERGEJOIN_17] (rows=166 width=178) + Merge Join Operator [MERGEJOIN_17] (rows=133 width=178) Conds:RS_8._col0=RS_9._col0(Left Semi),Output:["_col0","_col1"] <-Map 1 [SIMPLE_EDGE] llap SHUFFLE [RS_8] @@ -1925,7 +1925,7 @@ Stage-0 <-Map 3 [SIMPLE_EDGE] llap SHUFFLE [RS_9] PartitionCols:_col0 - Group By Operator [GBY_7] (rows=69 width=87) + Group By Operator [GBY_7] (rows=83 width=87) Output:["_col0"],keys:_col0 Select Operator [SEL_5] (rows=166 width=87) Output:["_col0"] @@ -1957,9 +1957,9 @@ Stage-0 Stage-1 Reducer 4 llap File Output Operator [FS_22] - Select Operator [SEL_21] (rows=5 width=8) + Select Operator [SEL_21] (rows=7 width=8) Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_32] (rows=5 width=8) + Merge Join Operator [MERGEJOIN_32] (rows=7 width=8) Conds:RS_18._col1, _col4=RS_19._col0, _col1(Left Semi),Output:["_col0","_col3"] <-Map 6 [SIMPLE_EDGE] llap SHUFFLE [RS_19] @@ -1975,14 +1975,14 @@ Stage-0 <-Reducer 3 [SIMPLE_EDGE] llap SHUFFLE [RS_18] PartitionCols:_col1, _col4 - Merge Join Operator [MERGEJOIN_31] (rows=5 width=16) + Merge Join Operator [MERGEJOIN_31] (rows=7 width=16) Conds:RS_13._col0=RS_14._col1(Inner),Output:["_col0","_col1","_col3","_col4"] <-Map 5 [SIMPLE_EDGE] llap SHUFFLE [RS_14] PartitionCols:_col1 - Select Operator [SEL_9] (rows=17 width=16) + Select Operator [SEL_9] (rows=14 width=16) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_29] (rows=17 width=16) + Filter Operator [FIL_29] (rows=14 width=16) predicate:((l_linenumber = 1) and l_partkey is not null) TableScan [TS_7] (rows=100 width=16) default@lineitem,li,Tbl:COMPLETE,Col:COMPLETE,Output:["l_orderkey","l_partkey","l_suppkey","l_linenumber"] @@ -2027,26 +2027,26 @@ Stage-0 Stage-1 Reducer 4 llap File Output Operator [FS_31] - Merge Join Operator [MERGEJOIN_44] (rows=34 width=186) + Merge Join Operator [MERGEJOIN_44] (rows=32 width=186) Conds:RS_27._col2=RS_28._col0(Left Semi),Output:["_col0","_col1","_col2"] <-Reducer 3 [SIMPLE_EDGE] llap SHUFFLE [RS_27] PartitionCols:_col2 - Filter Operator [FIL_37] (rows=83 width=186) + Filter Operator [FIL_37] (rows=66 width=186) predicate:_col2 is not null - Group By Operator [GBY_14] (rows=83 width=186) + Group By Operator [GBY_14] (rows=66 width=186) Output:["_col0","_col1","_col2"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1 <-Reducer 2 [SIMPLE_EDGE] llap SHUFFLE [RS_13] PartitionCols:_col0, _col1 - Group By Operator [GBY_12] (rows=83 width=186) + Group By Operator [GBY_12] (rows=66 width=186) Output:["_col0","_col1","_col2"],aggregations:["count()"],keys:_col0, _col1 - Merge Join Operator [MERGEJOIN_43] (rows=166 width=178) + Merge Join Operator [MERGEJOIN_43] (rows=133 width=178) Conds:RS_8._col0=RS_9._col0(Left Semi),Output:["_col0","_col1"] <-Map 5 [SIMPLE_EDGE] llap SHUFFLE [RS_9] PartitionCols:_col0 - Group By Operator [GBY_7] (rows=69 width=87) + Group By Operator [GBY_7] (rows=83 width=87) Output:["_col0"],keys:_col0 Select Operator [SEL_5] (rows=166 width=87) Output:["_col0"] @@ -2066,20 +2066,20 @@ Stage-0 <-Reducer 6 [SIMPLE_EDGE] llap SHUFFLE [RS_28] PartitionCols:_col0 - Group By Operator [GBY_26] (rows=34 width=8) + Group By Operator [GBY_26] (rows=41 width=8) Output:["_col0"],keys:_col0 - Select Operator [SEL_24] (rows=69 width=8) + Select Operator [SEL_24] (rows=83 width=8) Output:["_col0"] - Filter Operator [FIL_40] (rows=69 width=8) + Filter Operator [FIL_40] (rows=83 width=8) predicate:_col1 is not null - Select Operator [SEL_42] (rows=69 width=8) + Select Operator [SEL_42] (rows=83 width=8) Output:["_col1"] - Group By Operator [GBY_22] (rows=69 width=95) + Group By Operator [GBY_22] (rows=83 width=95) Output:["_col0","_col1"],aggregations:["count(VALUE._col0)"],keys:KEY._col0 <-Map 5 [SIMPLE_EDGE] llap SHUFFLE [RS_21] PartitionCols:_col0 - Group By Operator [GBY_20] (rows=69 width=95) + Group By Operator [GBY_20] (rows=83 width=95) Output:["_col0","_col1"],aggregations:["count()"],keys:key Filter Operator [FIL_41] (rows=166 width=87) predicate:(key > '9') @@ -2212,14 +2212,14 @@ Stage-0 <-Reducer 7 [SIMPLE_EDGE] llap SHUFFLE [RS_20] PartitionCols:_col0 - Select Operator [SEL_15] (rows=69 width=91) + Select Operator [SEL_15] (rows=83 width=91) Output:["_col0","_col1"] - Group By Operator [GBY_14] (rows=69 width=87) + Group By Operator [GBY_14] (rows=83 width=87) Output:["_col0"],keys:KEY._col0 <-Map 5 [SIMPLE_EDGE] llap SHUFFLE [RS_13] PartitionCols:_col0 - Group By Operator [GBY_12] (rows=69 width=87) + Group By Operator [GBY_12] (rows=83 width=87) Output:["_col0"],keys:key Filter Operator [FIL_29] (rows=166 width=87) predicate:(key > '2') @@ -2632,16 +2632,16 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_16] Group By Operator [GBY_15] (rows=1 width=16) Output:["_col0","_col1"],aggregations:["sum(_col0)","sum(_col1)"] - Select Operator [SEL_13] (rows=14 width=94) + Select Operator [SEL_13] (rows=16 width=94) Output:["_col0","_col1"] - Group By Operator [GBY_12] (rows=14 width=94) + Group By Operator [GBY_12] (rows=16 width=94) Output:["_col0","_col1"],aggregations:["count(VALUE._col0)"],keys:KEY._col0 <-Reducer 2 [SIMPLE_EDGE] llap SHUFFLE [RS_11] PartitionCols:_col0 - Group By Operator [GBY_10] (rows=14 width=94) + Group By Operator [GBY_10] (rows=16 width=94) Output:["_col0","_col1"],aggregations:["count()"],keys:_col0 - Merge Join Operator [MERGEJOIN_24] (rows=60 width=86) + Merge Join Operator [MERGEJOIN_24] (rows=40 width=86) Conds:RS_6._col0=RS_7._col0(Inner),Output:["_col0"] <-Map 1 [SIMPLE_EDGE] llap SHUFFLE [RS_6] @@ -2693,16 +2693,16 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_16] Group By Operator [GBY_15] (rows=1 width=16) Output:["_col0","_col1"],aggregations:["sum(_col0)","sum(_col1)"] - Select Operator [SEL_13] (rows=14 width=94) + Select Operator [SEL_13] (rows=16 width=94) Output:["_col0","_col1"] - Group By Operator [GBY_12] (rows=14 width=94) + Group By Operator [GBY_12] (rows=16 width=94) Output:["_col0","_col1"],aggregations:["count(VALUE._col0)"],keys:KEY._col0 <-Reducer 2 [SIMPLE_EDGE] llap SHUFFLE [RS_11] PartitionCols:_col0 - Group By Operator [GBY_10] (rows=14 width=94) + Group By Operator [GBY_10] (rows=16 width=94) Output:["_col0","_col1"],aggregations:["count()"],keys:_col0 - Merge Join Operator [MERGEJOIN_24] (rows=60 width=86) + Merge Join Operator [MERGEJOIN_24] (rows=40 width=86) Conds:RS_6._col0=RS_7._col0(Inner),Output:["_col0"] <-Map 1 [SIMPLE_EDGE] llap SHUFFLE [RS_6] @@ -2754,16 +2754,16 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_16] Group By Operator [GBY_15] (rows=1 width=16) Output:["_col0","_col1"],aggregations:["sum(_col0)","sum(_col1)"] - Select Operator [SEL_13] (rows=14 width=94) + Select Operator [SEL_13] (rows=16 width=94) Output:["_col0","_col1"] - Group By Operator [GBY_12] (rows=14 width=94) + Group By Operator [GBY_12] (rows=16 width=94) Output:["_col0","_col1"],aggregations:["count(VALUE._col0)"],keys:KEY._col0 <-Map 2 [SIMPLE_EDGE] llap SHUFFLE [RS_11] PartitionCols:_col0 - Group By Operator [GBY_10] (rows=14 width=94) + Group By Operator [GBY_10] (rows=16 width=94) Output:["_col0","_col1"],aggregations:["count()"],keys:_col0 - Map Join Operator [MAPJOIN_24] (rows=60 width=86) + Map Join Operator [MAPJOIN_24] (rows=40 width=86) Conds:RS_6._col0=SEL_5._col0(Inner),Output:["_col0"] <-Map 1 [BROADCAST_EDGE] llap BROADCAST [RS_6] @@ -2812,16 +2812,16 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_18] Group By Operator [GBY_17] (rows=1 width=16) Output:["_col0","_col1"],aggregations:["sum(_col0)","sum(_col1)"] - Select Operator [SEL_15] (rows=12 width=94) + Select Operator [SEL_15] (rows=10 width=94) Output:["_col0","_col1"] - Group By Operator [GBY_14] (rows=12 width=94) + Group By Operator [GBY_14] (rows=10 width=94) Output:["_col0","_col1"],aggregations:["count(VALUE._col0)"],keys:KEY._col0 <-Reducer 2 [SIMPLE_EDGE] llap SHUFFLE [RS_13] PartitionCols:_col0 - Group By Operator [GBY_12] (rows=12 width=94) + Group By Operator [GBY_12] (rows=10 width=94) Output:["_col0","_col1"],aggregations:["count()"],keys:_col0 - Merge Join Operator [MERGEJOIN_26] (rows=25 width=86) + Merge Join Operator [MERGEJOIN_26] (rows=20 width=86) Conds:RS_8._col0=RS_9._col0(Left Semi),Output:["_col0"] <-Map 1 [SIMPLE_EDGE] llap SHUFFLE [RS_8] @@ -2835,7 +2835,7 @@ Stage-0 <-Map 5 [SIMPLE_EDGE] llap SHUFFLE [RS_9] PartitionCols:_col0 - Group By Operator [GBY_7] (rows=205 width=87) + Group By Operator [GBY_7] (rows=250 width=87) Output:["_col0"],keys:_col0 Select Operator [SEL_5] (rows=500 width=87) Output:["_col0"] @@ -4157,23 +4157,23 @@ Stage-0 Stage-1 Reducer 3 llap File Output Operator [FS_16] - Select Operator [SEL_14] (rows=29 width=227) + Select Operator [SEL_14] (rows=27 width=227) Output:["_col0","_col1","_col2","_col3"] - PTF Operator [PTF_13] (rows=29 width=223) + PTF Operator [PTF_13] (rows=27 width=223) Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col1 ASC NULLS FIRST","partition by:":"_col2"}] - Select Operator [SEL_12] (rows=29 width=223) + Select Operator [SEL_12] (rows=27 width=223) Output:["_col1","_col2","_col5"] <-Reducer 2 [SIMPLE_EDGE] llap SHUFFLE [RS_11] PartitionCols:_col2 - PTF Operator [PTF_10] (rows=29 width=223) + PTF Operator [PTF_10] (rows=27 width=223) Function definitions:[{},{"Partition table definition":{"name:":"noop","order by:":"_col1 ASC NULLS FIRST","partition by:":"_col2"}}] - Select Operator [SEL_9] (rows=29 width=223) + Select Operator [SEL_9] (rows=27 width=223) Output:["_col1","_col2","_col5"] <-Map 1 [SIMPLE_EDGE] llap SHUFFLE [RS_8] PartitionCols:_col2 - Map Join Operator [MAPJOIN_21] (rows=29 width=223) + Map Join Operator [MAPJOIN_21] (rows=27 width=223) Conds:FIL_19.p_partkey=RS_5.p_partkey(Inner),Output:["_col1","_col2","_col5"] <-Map 4 [BROADCAST_EDGE] llap BROADCAST [RS_5] @@ -4375,7 +4375,7 @@ Stage-0 Stage-1 Reducer 2 llap File Output Operator [FS_11] - Map Join Operator [MAPJOIN_16] (rows=29 width=619) + Map Join Operator [MAPJOIN_16] (rows=27 width=619) Conds:FIL_14._col0=RS_8.p_partkey(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] <-Map 3 [BROADCAST_EDGE] llap BROADCAST [RS_8] @@ -4697,16 +4697,16 @@ Stage-0 Stage-1 Reducer 3 llap File Output Operator [FS_15] - Select Operator [SEL_13] (rows=29 width=259) + Select Operator [SEL_13] (rows=27 width=259) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] - PTF Operator [PTF_12] (rows=29 width=767) + PTF Operator [PTF_12] (rows=27 width=767) Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col1 ASC NULLS FIRST","partition by:":"_col2"}] - Select Operator [SEL_11] (rows=29 width=767) + Select Operator [SEL_11] (rows=27 width=767) Output:["_col1","_col2","_col5","_col7"] <-Reducer 2 [SIMPLE_EDGE] llap SHUFFLE [RS_10] PartitionCols:_col2 - Map Join Operator [MAPJOIN_20] (rows=29 width=231) + Map Join Operator [MAPJOIN_20] (rows=27 width=231) Conds:FIL_18._col0=RS_8.p_partkey(Inner),Output:["_col1","_col2","_col5","_col7"] <-Map 4 [BROADCAST_EDGE] llap BROADCAST [RS_8] @@ -5257,9 +5257,9 @@ Stage-3 Reducer 2 llap File Output Operator [FS_11] table:{"name:":"default.dest_j1"} - Select Operator [SEL_9] (rows=1219 width=95) + Select Operator [SEL_9] (rows=809 width=95) Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_16] (rows=1219 width=178) + Merge Join Operator [MERGEJOIN_16] (rows=809 width=178) Conds:RS_6._col0=RS_7._col0(Inner),Output:["_col0","_col2"] <-Map 1 [SIMPLE_EDGE] llap SHUFFLE [RS_6] @@ -5492,7 +5492,7 @@ Stage-0 Stage-1 Reducer 2 llap File Output Operator [FS_10] - Merge Join Operator [MERGEJOIN_15] (rows=1219 width=356) + Merge Join Operator [MERGEJOIN_15] (rows=809 width=356) Conds:RS_6._col0=RS_7._col0(Inner),Output:["_col0","_col1","_col2","_col3"] <-Map 1 [SIMPLE_EDGE] llap SHUFFLE [RS_6] diff --git a/ql/src/test/results/clientpositive/llap/explainuser_2.q.out b/ql/src/test/results/clientpositive/llap/explainuser_2.q.out index f8a6526c67..4f1a4a1f14 100644 --- a/ql/src/test/results/clientpositive/llap/explainuser_2.q.out +++ b/ql/src/test/results/clientpositive/llap/explainuser_2.q.out @@ -192,36 +192,36 @@ Stage-0 Stage-1 Reducer 3 llap File Output Operator [FS_16] - Select Operator [SEL_15] (rows=141 width=268) + Select Operator [SEL_15] (rows=64 width=268) Output:["_col0","_col1","_col2"] - Merge Join Operator [MERGEJOIN_26] (rows=141 width=268) - Conds:RS_12._col3=RS_13._col0(Inner),Output:["_col0","_col3","_col6"] + Merge Join Operator [MERGEJOIN_26] (rows=64 width=268) + Conds:RS_12._col3=RS_13._col0(Inner),Output:["_col1","_col2","_col4"] <-Map 5 [SIMPLE_EDGE] llap SHUFFLE [RS_13] PartitionCols:_col0 - Select Operator [SEL_8] (rows=500 width=178) - Output:["_col0","_col1"] - Filter Operator [FIL_24] (rows=500 width=178) - predicate:key is not null - TableScan [TS_6] (rows=500 width=178) - default@src,y,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + Select Operator [SEL_8] (rows=500 width=369) + Output:["_col0"] + Filter Operator [FIL_24] (rows=500 width=91) + predicate:value is not null + TableScan [TS_6] (rows=500 width=91) + default@srcpart,z,Tbl:COMPLETE,Col:COMPLETE,Output:["value"] <-Reducer 2 [SIMPLE_EDGE] llap SHUFFLE [RS_12] PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_25] (rows=58 width=177) - Conds:RS_9._col0=RS_10._col1(Inner),Output:["_col0","_col3"] + Merge Join Operator [MERGEJOIN_25] (rows=40 width=266) + Conds:RS_9._col0=RS_10._col0(Inner),Output:["_col1","_col2","_col3"] <-Map 1 [SIMPLE_EDGE] llap SHUFFLE [RS_9] PartitionCols:_col0 - Select Operator [SEL_2] (rows=500 width=369) - Output:["_col0"] - Filter Operator [FIL_22] (rows=500 width=91) - predicate:value is not null - TableScan [TS_0] (rows=500 width=91) - default@srcpart,z,Tbl:COMPLETE,Col:COMPLETE,Output:["value"] + Select Operator [SEL_2] (rows=500 width=178) + Output:["_col0","_col1"] + Filter Operator [FIL_22] (rows=500 width=178) + predicate:key is not null + TableScan [TS_0] (rows=500 width=178) + default@src,y,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] <-Map 4 [SIMPLE_EDGE] llap SHUFFLE [RS_10] - PartitionCols:_col1 + PartitionCols:_col0 Select Operator [SEL_5] (rows=25 width=175) Output:["_col0","_col1"] Filter Operator [FIL_23] (rows=25 width=175) @@ -288,15 +288,15 @@ POSTHOOK: type: QUERY Plan optimized by CBO. Vertex dependency in root stage -Reducer 10 <- Map 15 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) -Reducer 12 <- Map 11 (SIMPLE_EDGE), Map 13 (SIMPLE_EDGE) +Reducer 10 <- Reducer 11 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) +Reducer 11 <- Map 15 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) -Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) +Reducer 3 <- Reducer 10 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) Reducer 4 <- Reducer 3 (SIMPLE_EDGE) Reducer 5 <- Reducer 4 (SIMPLE_EDGE) -Reducer 7 <- Map 6 (SIMPLE_EDGE), Reducer 12 (SIMPLE_EDGE) -Reducer 8 <- Map 14 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) -Reducer 9 <- Reducer 10 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) +Reducer 7 <- Map 12 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) +Reducer 8 <- Map 13 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) +Reducer 9 <- Map 14 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) Stage-0 Fetch Operator @@ -304,78 +304,56 @@ Stage-0 Stage-1 Reducer 5 llap File Output Operator [FS_55] - Limit [LIM_54] (rows=14 width=285) + Limit [LIM_54] (rows=5 width=285) Number of rows:100 - Select Operator [SEL_53] (rows=14 width=285) + Select Operator [SEL_53] (rows=5 width=285) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] <-Reducer 4 [SIMPLE_EDGE] llap SHUFFLE [RS_52] - Group By Operator [GBY_50] (rows=14 width=285) + Group By Operator [GBY_50] (rows=5 width=285) Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["count(VALUE._col0)","count(VALUE._col1)","count(VALUE._col2)"],keys:KEY._col0, KEY._col1, KEY._col2 <-Reducer 3 [SIMPLE_EDGE] llap SHUFFLE [RS_49] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_48] (rows=14 width=285) - Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["count(_col13)","count(_col21)","count(_col3)"],keys:_col12, _col20, _col2 - Merge Join Operator [MERGEJOIN_97] (rows=363 width=534) - Conds:RS_44._col1, _col3=RS_45._col15, _col17(Inner),Output:["_col2","_col3","_col12","_col13","_col20","_col21"] - <-Reducer 2 [SIMPLE_EDGE] llap - SHUFFLE [RS_44] - PartitionCols:_col1, _col3 - Merge Join Operator [MERGEJOIN_91] (rows=99 width=269) - Conds:RS_41._col0=RS_42._col0(Inner),Output:["_col1","_col2","_col3"] - <-Map 6 [SIMPLE_EDGE] llap - SHUFFLE [RS_42] - PartitionCols:_col0 - Select Operator [SEL_5] (rows=7 width=178) - Output:["_col0"] - Filter Operator [FIL_84] (rows=7 width=178) - predicate:((value) IN ('2000Q1', '2000Q2', '2000Q3') and key is not null) - TableScan [TS_3] (rows=500 width=178) - default@src,d3,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] - <-Map 1 [SIMPLE_EDGE] llap - SHUFFLE [RS_41] - PartitionCols:_col0 - Select Operator [SEL_2] (rows=170 width=356) - Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_83] (rows=170 width=356) - predicate:(v2 is not null and v3 is not null and k1 is not null) - TableScan [TS_0] (rows=170 width=356) - default@cs,cs,Tbl:COMPLETE,Col:COMPLETE,Output:["k1","v2","k3","v3"] - <-Reducer 9 [SIMPLE_EDGE] llap + Group By Operator [GBY_48] (rows=5 width=285) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["count(_col11)","count(_col21)","count(_col3)"],keys:_col10, _col20, _col2 + Merge Join Operator [MERGEJOIN_97] (rows=940 width=534) + Conds:RS_44._col1, _col3=RS_45._col15, _col17(Inner),Output:["_col2","_col3","_col10","_col11","_col20","_col21"] + <-Reducer 10 [SIMPLE_EDGE] llap SHUFFLE [RS_45] PartitionCols:_col15, _col17 - Select Operator [SEL_40] (rows=180 width=447) - Output:["_col6","_col7","_col14","_col15","_col17"] - Merge Join Operator [MERGEJOIN_96] (rows=180 width=447) - Conds:RS_37._col6, _col4=RS_38._col4, _col2(Inner),Output:["_col2","_col3","_col14","_col15","_col17"] - <-Reducer 10 [SIMPLE_EDGE] llap + Select Operator [SEL_40] (rows=336 width=447) + Output:["_col4","_col5","_col14","_col15","_col17"] + Merge Join Operator [MERGEJOIN_96] (rows=336 width=447) + Conds:RS_37._col4, _col2=RS_38._col4, _col2(Inner),Output:["_col0","_col1","_col14","_col15","_col17"] + <-Reducer 11 [SIMPLE_EDGE] llap SHUFFLE [RS_38] PartitionCols:_col4, _col2 - Merge Join Operator [MERGEJOIN_95] (rows=18 width=356) + Merge Join Operator [MERGEJOIN_95] (rows=8 width=356) Conds:RS_24._col0=RS_25._col0(Inner),Output:["_col2","_col3","_col4","_col5"] <-Map 6 [SIMPLE_EDGE] llap SHUFFLE [RS_25] PartitionCols:_col0 - Select Operator [SEL_23] (rows=7 width=178) + Select Operator [SEL_23] (rows=5 width=178) Output:["_col0"] - Filter Operator [FIL_90] (rows=7 width=178) + Filter Operator [FIL_90] (rows=5 width=178) predicate:((value) IN ('2000Q1', '2000Q2', '2000Q3') and key is not null) - Please refer to the previous TableScan [TS_3] + TableScan [TS_3] (rows=500 width=178) + default@src,d3,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] <-Map 15 [SIMPLE_EDGE] llap SHUFFLE [RS_24] PartitionCols:_col0 - Select Operator [SEL_20] (rows=8 width=531) + Select Operator [SEL_20] (rows=7 width=531) Output:["_col0","_col2","_col3","_col4","_col5"] - Filter Operator [FIL_89] (rows=8 width=534) + Filter Operator [FIL_89] (rows=7 width=534) predicate:((v1 = 'srv1') and k2 is not null and k3 is not null and v2 is not null and v3 is not null and k1 is not null) TableScan [TS_18] (rows=85 width=534) default@sr,sr,Tbl:COMPLETE,Col:COMPLETE,Output:["k1","v1","k2","v2","k3","v3"] - <-Reducer 8 [SIMPLE_EDGE] llap + <-Reducer 9 [SIMPLE_EDGE] llap SHUFFLE [RS_37] - PartitionCols:_col6, _col4 - Merge Join Operator [MERGEJOIN_94] (rows=40 width=352) - Conds:RS_34._col3=RS_35._col1(Inner),Output:["_col2","_col3","_col4","_col6"] + PartitionCols:_col4, _col2 + Merge Join Operator [MERGEJOIN_94] (rows=42 width=352) + Conds:RS_34._col1=RS_35._col1(Inner),Output:["_col0","_col1","_col2","_col4"] <-Map 14 [SIMPLE_EDGE] llap SHUFFLE [RS_35] PartitionCols:_col1 @@ -385,42 +363,64 @@ Stage-0 predicate:((key = 'src1key') and value is not null) TableScan [TS_15] (rows=25 width=175) default@src1,src1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] - <-Reducer 7 [SIMPLE_EDGE] llap + <-Reducer 8 [SIMPLE_EDGE] llap SHUFFLE [RS_34] - PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_93] (rows=40 width=352) - Conds:RS_31._col2=RS_32._col0(Inner),Output:["_col2","_col3","_col4","_col6"] - <-Map 6 [SIMPLE_EDGE] llap + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_93] (rows=42 width=352) + Conds:RS_31._col3=RS_32._col1(Inner),Output:["_col0","_col1","_col2","_col4"] + <-Map 13 [SIMPLE_EDGE] llap SHUFFLE [RS_32] - PartitionCols:_col0 - Select Operator [SEL_14] (rows=2 width=178) - Output:["_col0"] - Filter Operator [FIL_87] (rows=2 width=178) - predicate:((value = 'd1value') and key is not null) - Please refer to the previous TableScan [TS_3] - <-Reducer 12 [SIMPLE_EDGE] llap + PartitionCols:_col1 + Select Operator [SEL_14] (rows=6 width=185) + Output:["_col1"] + Filter Operator [FIL_87] (rows=6 width=178) + predicate:((key = 'srcpartkey') and value is not null) + TableScan [TS_12] (rows=2000 width=178) + default@srcpart,srcpart,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + <-Reducer 7 [SIMPLE_EDGE] llap SHUFFLE [RS_31] - PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_92] (rows=40 width=352) - Conds:RS_28._col1=RS_29._col3(Inner),Output:["_col2","_col3","_col4","_col6"] - <-Map 11 [SIMPLE_EDGE] llap - SHUFFLE [RS_28] - PartitionCols:_col1 - Select Operator [SEL_8] (rows=10 width=185) - Output:["_col1"] - Filter Operator [FIL_85] (rows=10 width=178) - predicate:((key = 'srcpartkey') and value is not null) - TableScan [TS_6] (rows=2000 width=178) - default@srcpart,srcpart,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] - <-Map 13 [SIMPLE_EDGE] llap + PartitionCols:_col3 + Merge Join Operator [MERGEJOIN_92] (rows=7 width=443) + Conds:RS_28._col0=RS_29._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4"] + <-Map 6 [SIMPLE_EDGE] llap SHUFFLE [RS_29] - PartitionCols:_col3 - Select Operator [SEL_11] (rows=8 width=531) + PartitionCols:_col0 + Select Operator [SEL_11] (rows=2 width=178) + Output:["_col0"] + Filter Operator [FIL_86] (rows=2 width=178) + predicate:((value = 'd1value') and key is not null) + Please refer to the previous TableScan [TS_3] + <-Map 12 [SIMPLE_EDGE] llap + SHUFFLE [RS_28] + PartitionCols:_col0 + Select Operator [SEL_8] (rows=7 width=531) Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_86] (rows=8 width=534) + Filter Operator [FIL_85] (rows=7 width=534) predicate:((v3 = 'ssv3') and k2 is not null and k3 is not null and k1 is not null and v1 is not null and v2 is not null) - TableScan [TS_9] (rows=85 width=534) + TableScan [TS_6] (rows=85 width=534) default@ss,ss,Tbl:COMPLETE,Col:COMPLETE,Output:["k1","v1","k2","v2","k3","v3"] + <-Reducer 2 [SIMPLE_EDGE] llap + SHUFFLE [RS_44] + PartitionCols:_col1, _col3 + Merge Join Operator [MERGEJOIN_91] (rows=70 width=269) + Conds:RS_41._col0=RS_42._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 6 [SIMPLE_EDGE] llap + SHUFFLE [RS_42] + PartitionCols:_col0 + Select Operator [SEL_5] (rows=5 width=178) + Output:["_col0"] + Filter Operator [FIL_84] (rows=5 width=178) + predicate:((value) IN ('2000Q1', '2000Q2', '2000Q3') and key is not null) + Please refer to the previous TableScan [TS_3] + <-Map 1 [SIMPLE_EDGE] llap + SHUFFLE [RS_41] + PartitionCols:_col0 + Select Operator [SEL_2] (rows=170 width=356) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_83] (rows=170 width=356) + predicate:(v2 is not null and v3 is not null and k1 is not null) + TableScan [TS_0] (rows=170 width=356) + default@cs,cs,Tbl:COMPLETE,Col:COMPLETE,Output:["k1","v2","k3","v3"] PREHOOK: query: explain SELECT x.key, z.value, y.value @@ -469,9 +469,9 @@ Stage-0 PartitionCols:_col0, _col1 Group By Operator [GBY_55] (rows=28 width=177) Output:["_col0","_col1"],keys:_col0, _col1 - Select Operator [SEL_51] (rows=73 width=177) + Select Operator [SEL_51] (rows=33 width=177) Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_85] (rows=73 width=177) + Merge Join Operator [MERGEJOIN_85] (rows=33 width=177) Conds:RS_48._col2=RS_49._col0(Inner),Output:["_col1","_col2"] <-Map 17 [SIMPLE_EDGE] llap SHUFFLE [RS_49] @@ -485,7 +485,7 @@ Stage-0 <-Reducer 10 [SIMPLE_EDGE] llap SHUFFLE [RS_48] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_84] (rows=30 width=177) + Merge Join Operator [MERGEJOIN_84] (rows=21 width=177) Conds:RS_45._col1=RS_46._col1(Inner),Output:["_col1","_col2"] <-Map 9 [SIMPLE_EDGE] llap SHUFFLE [RS_46] @@ -531,9 +531,9 @@ Stage-0 PartitionCols:_col0, _col1 Group By Operator [GBY_55] (rows=28 width=177) Output:["_col0","_col1"],keys:_col0, _col1 - Select Operator [SEL_25] (rows=73 width=177) + Select Operator [SEL_25] (rows=33 width=177) Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_83] (rows=73 width=177) + Merge Join Operator [MERGEJOIN_83] (rows=33 width=177) Conds:RS_22._col2=RS_23._col0(Inner),Output:["_col1","_col2"] <-Map 12 [SIMPLE_EDGE] llap SHUFFLE [RS_23] @@ -547,7 +547,7 @@ Stage-0 <-Reducer 4 [SIMPLE_EDGE] llap SHUFFLE [RS_22] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_82] (rows=30 width=177) + Merge Join Operator [MERGEJOIN_82] (rows=21 width=177) Conds:RS_19._col1=RS_20._col1(Inner),Output:["_col1","_col2"] <-Map 9 [SIMPLE_EDGE] llap SHUFFLE [RS_20] @@ -643,17 +643,17 @@ Stage-0 Stage-1 Reducer 9 llap File Output Operator [FS_122] - Group By Operator [GBY_120] (rows=107 width=177) + Group By Operator [GBY_120] (rows=48 width=177) Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 <-Union 8 [SIMPLE_EDGE] <-Reducer 15 [CONTAINS] llap Reduce Output Operator [RS_119] PartitionCols:_col0, _col1 - Group By Operator [GBY_118] (rows=107 width=177) + Group By Operator [GBY_118] (rows=48 width=177) Output:["_col0","_col1"],keys:_col0, _col1 - Select Operator [SEL_114] (rows=124 width=177) + Select Operator [SEL_114] (rows=56 width=177) Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_170] (rows=124 width=177) + Merge Join Operator [MERGEJOIN_170] (rows=56 width=177) Conds:RS_111._col2=RS_112._col0(Inner),Output:["_col2","_col5"] <-Map 16 [SIMPLE_EDGE] llap SHUFFLE [RS_112] @@ -667,7 +667,7 @@ Stage-0 <-Reducer 14 [SIMPLE_EDGE] llap SHUFFLE [RS_111] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_169] (rows=51 width=86) + Merge Join Operator [MERGEJOIN_169] (rows=35 width=86) Conds:RS_108._col1=RS_109._col1(Inner),Output:["_col2"] <-Map 11 [SIMPLE_EDGE] llap SHUFFLE [RS_109] @@ -753,19 +753,19 @@ Stage-0 <-Reducer 7 [CONTAINS] llap Reduce Output Operator [RS_119] PartitionCols:_col0, _col1 - Group By Operator [GBY_118] (rows=107 width=177) + Group By Operator [GBY_118] (rows=48 width=177) Output:["_col0","_col1"],keys:_col0, _col1 - Group By Operator [GBY_67] (rows=90 width=177) + Group By Operator [GBY_67] (rows=40 width=177) Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 <-Union 6 [SIMPLE_EDGE] <-Reducer 13 [CONTAINS] llap Reduce Output Operator [RS_66] PartitionCols:_col0, _col1 - Group By Operator [GBY_65] (rows=90 width=177) + Group By Operator [GBY_65] (rows=40 width=177) Output:["_col0","_col1"],keys:_col0, _col1 - Select Operator [SEL_61] (rows=107 width=177) + Select Operator [SEL_61] (rows=48 width=177) Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_168] (rows=107 width=177) + Merge Join Operator [MERGEJOIN_168] (rows=48 width=177) Conds:RS_58._col2=RS_59._col0(Inner),Output:["_col2","_col5"] <-Map 24 [SIMPLE_EDGE] llap SHUFFLE [RS_59] @@ -779,7 +779,7 @@ Stage-0 <-Reducer 12 [SIMPLE_EDGE] llap SHUFFLE [RS_58] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_167] (rows=44 width=86) + Merge Join Operator [MERGEJOIN_167] (rows=30 width=86) Conds:RS_55._col1=RS_56._col1(Inner),Output:["_col2"] <-Map 11 [SIMPLE_EDGE] llap SHUFFLE [RS_56] @@ -839,11 +839,11 @@ Stage-0 <-Reducer 5 [CONTAINS] llap Reduce Output Operator [RS_66] PartitionCols:_col0, _col1 - Group By Operator [GBY_65] (rows=90 width=177) + Group By Operator [GBY_65] (rows=40 width=177) Output:["_col0","_col1"],keys:_col0, _col1 - Select Operator [SEL_25] (rows=73 width=177) + Select Operator [SEL_25] (rows=33 width=177) Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_166] (rows=73 width=177) + Merge Join Operator [MERGEJOIN_166] (rows=33 width=177) Conds:RS_22._col2=RS_23._col0(Inner),Output:["_col2","_col5"] <-Map 16 [SIMPLE_EDGE] llap SHUFFLE [RS_23] @@ -852,7 +852,7 @@ Stage-0 <-Reducer 4 [SIMPLE_EDGE] llap SHUFFLE [RS_22] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_165] (rows=30 width=86) + Merge Join Operator [MERGEJOIN_165] (rows=21 width=86) Conds:RS_19._col1=RS_20._col1(Inner),Output:["_col2"] <-Map 11 [SIMPLE_EDGE] llap SHUFFLE [RS_20] @@ -913,21 +913,21 @@ Stage-0 Select Operator [SEL_15] (rows=605 width=10) Output:["_col0","_col1","_col2"] Map Join Operator [MAPJOIN_26] (rows=605 width=10) - Conds:MAPJOIN_25._col3=RS_13._col0(Inner),Output:["_col0","_col3","_col6"] + Conds:MAPJOIN_25._col3=RS_13._col0(Inner),Output:["_col1","_col2","_col4"] <-Map 3 [BROADCAST_EDGE] llap BROADCAST [RS_13] PartitionCols:_col0 Select Operator [SEL_8] (rows=500 width=10) - Output:["_col0","_col1"] + Output:["_col0"] Filter Operator [FIL_24] (rows=500 width=10) - predicate:key is not null + predicate:value is not null TableScan [TS_6] (rows=500 width=10) - default@src,y,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + default@srcpart,z,Tbl:COMPLETE,Col:NONE,Output:["value"] <-Map Join Operator [MAPJOIN_25] (rows=550 width=10) - Conds:SEL_2._col0=RS_10._col1(Inner),Output:["_col0","_col3"] + Conds:SEL_2._col0=RS_10._col0(Inner),Output:["_col1","_col2","_col3"] <-Map 2 [BROADCAST_EDGE] llap BROADCAST [RS_10] - PartitionCols:_col1 + PartitionCols:_col0 Select Operator [SEL_5] (rows=25 width=7) Output:["_col0","_col1"] Filter Operator [FIL_23] (rows=25 width=7) @@ -935,11 +935,11 @@ Stage-0 TableScan [TS_3] (rows=25 width=7) default@src1,x,Tbl:COMPLETE,Col:NONE,Output:["key","value"] <-Select Operator [SEL_2] (rows=500 width=10) - Output:["_col0"] + Output:["_col0","_col1"] Filter Operator [FIL_22] (rows=500 width=10) - predicate:value is not null + predicate:key is not null TableScan [TS_0] (rows=500 width=10) - default@srcpart,z,Tbl:COMPLETE,Col:NONE,Output:["value"] + default@src,y,Tbl:COMPLETE,Col:NONE,Output:["key","value"] PREHOOK: query: EXPLAIN select @@ -1002,31 +1002,32 @@ Plan optimized by CBO. Vertex dependency in root stage Map 10 <- Map 9 (BROADCAST_EDGE) Map 2 <- Map 1 (BROADCAST_EDGE) -Map 3 <- Map 10 (BROADCAST_EDGE), Map 2 (BROADCAST_EDGE), Map 6 (BROADCAST_EDGE), Map 7 (BROADCAST_EDGE), Map 8 (BROADCAST_EDGE) -Reducer 4 <- Map 3 (SIMPLE_EDGE) -Reducer 5 <- Reducer 4 (SIMPLE_EDGE) +Map 4 <- Map 3 (BROADCAST_EDGE) +Map 5 <- Map 10 (BROADCAST_EDGE), Map 2 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE), Map 8 (BROADCAST_EDGE) +Reducer 6 <- Map 5 (SIMPLE_EDGE) +Reducer 7 <- Reducer 6 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:100 Stage-1 - Reducer 5 llap + Reducer 7 llap File Output Operator [FS_55] Limit [LIM_54] (rows=100 width=10) Number of rows:100 - Select Operator [SEL_53] (rows=805 width=10) + Select Operator [SEL_53] (rows=732 width=10) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - <-Reducer 4 [SIMPLE_EDGE] llap + <-Reducer 6 [SIMPLE_EDGE] llap SHUFFLE [RS_52] - Group By Operator [GBY_50] (rows=805 width=10) + Group By Operator [GBY_50] (rows=732 width=10) Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["count(VALUE._col0)","count(VALUE._col1)","count(VALUE._col2)"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Map 3 [SIMPLE_EDGE] llap + <-Map 5 [SIMPLE_EDGE] llap SHUFFLE [RS_49] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_48] (rows=1610 width=10) - Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["count(_col13)","count(_col21)","count(_col3)"],keys:_col12, _col20, _col2 - Map Join Operator [MAPJOIN_97] (rows=1610 width=10) - Conds:RS_44._col1, _col3=SEL_40._col15, _col17(Inner),Output:["_col2","_col3","_col12","_col13","_col20","_col21"] + Group By Operator [GBY_48] (rows=1464 width=10) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["count(_col11)","count(_col21)","count(_col3)"],keys:_col10, _col20, _col2 + Map Join Operator [MAPJOIN_97] (rows=1464 width=10) + Conds:RS_44._col1, _col3=SEL_40._col15, _col17(Inner),Output:["_col2","_col3","_col10","_col11","_col20","_col21"] <-Map 2 [BROADCAST_EDGE] llap BROADCAST [RS_44] PartitionCols:_col1, _col3 @@ -1047,10 +1048,10 @@ Stage-0 predicate:((value) IN ('2000Q1', '2000Q2', '2000Q3') and key is not null) TableScan [TS_3] (rows=500 width=10) default@src,d3,Tbl:COMPLETE,Col:NONE,Output:["key","value"] - <-Select Operator [SEL_40] (rows=1464 width=10) - Output:["_col6","_col7","_col14","_col15","_col17"] - Map Join Operator [MAPJOIN_96] (rows=1464 width=10) - Conds:MAPJOIN_94._col6, _col4=RS_38._col4, _col2(Inner),Output:["_col2","_col3","_col14","_col15","_col17"] + <-Select Operator [SEL_40] (rows=1331 width=10) + Output:["_col4","_col5","_col14","_col15","_col17"] + Map Join Operator [MAPJOIN_96] (rows=1331 width=10) + Conds:MAPJOIN_94._col4, _col2=RS_38._col4, _col2(Inner),Output:["_col0","_col1","_col14","_col15","_col17"] <-Map 10 [BROADCAST_EDGE] llap BROADCAST [RS_38] PartitionCols:_col4, _col2 @@ -1071,8 +1072,8 @@ Stage-0 predicate:((value) IN ('2000Q1', '2000Q2', '2000Q3') and key is not null) TableScan [TS_21] (rows=500 width=10) default@src,d2,Tbl:COMPLETE,Col:NONE,Output:["key","value"] - <-Map Join Operator [MAPJOIN_94] (rows=1331 width=10) - Conds:MAPJOIN_93._col3=RS_35._col1(Inner),Output:["_col2","_col3","_col4","_col6"] + <-Map Join Operator [MAPJOIN_94] (rows=1210 width=10) + Conds:MAPJOIN_93._col1=RS_35._col1(Inner),Output:["_col0","_col1","_col2","_col4"] <-Map 8 [BROADCAST_EDGE] llap BROADCAST [RS_35] PartitionCols:_col1 @@ -1082,34 +1083,34 @@ Stage-0 predicate:((key = 'src1key') and value is not null) TableScan [TS_15] (rows=25 width=7) default@src1,src1,Tbl:COMPLETE,Col:NONE,Output:["key","value"] - <-Map Join Operator [MAPJOIN_93] (rows=1210 width=10) - Conds:MAPJOIN_92._col2=RS_32._col0(Inner),Output:["_col2","_col3","_col4","_col6"] - <-Map 7 [BROADCAST_EDGE] llap - BROADCAST [RS_32] - PartitionCols:_col0 - Select Operator [SEL_14] (rows=250 width=10) - Output:["_col0"] - Filter Operator [FIL_87] (rows=250 width=10) - predicate:((value = 'd1value') and key is not null) - TableScan [TS_12] (rows=500 width=10) - default@src,d1,Tbl:COMPLETE,Col:NONE,Output:["key","value"] - <-Map Join Operator [MAPJOIN_92] (rows=1100 width=10) - Conds:SEL_8._col1=RS_29._col3(Inner),Output:["_col2","_col3","_col4","_col6"] - <-Map 6 [BROADCAST_EDGE] llap - BROADCAST [RS_29] - PartitionCols:_col3 - Select Operator [SEL_11] (rows=42 width=34) - Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_86] (rows=42 width=34) - predicate:((v3 = 'ssv3') and k2 is not null and k3 is not null and k1 is not null and v1 is not null and v2 is not null) - TableScan [TS_9] (rows=85 width=34) - default@ss,ss,Tbl:COMPLETE,Col:NONE,Output:["k1","v1","k2","v2","k3","v3"] - <-Select Operator [SEL_8] (rows=1000 width=10) - Output:["_col1"] - Filter Operator [FIL_85] (rows=1000 width=10) - predicate:((key = 'srcpartkey') and value is not null) - TableScan [TS_6] (rows=2000 width=10) - default@srcpart,srcpart,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + <-Map Join Operator [MAPJOIN_93] (rows=1100 width=10) + Conds:RS_31._col3=SEL_14._col1(Inner),Output:["_col0","_col1","_col2","_col4"] + <-Map 4 [BROADCAST_EDGE] llap + BROADCAST [RS_31] + PartitionCols:_col3 + Map Join Operator [MAPJOIN_92] (rows=275 width=10) + Conds:RS_28._col0=SEL_11._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4"] + <-Map 3 [BROADCAST_EDGE] llap + BROADCAST [RS_28] + PartitionCols:_col0 + Select Operator [SEL_8] (rows=42 width=34) + Output:["_col0","_col1","_col2","_col3","_col4"] + Filter Operator [FIL_85] (rows=42 width=34) + predicate:((v3 = 'ssv3') and k2 is not null and k3 is not null and k1 is not null and v1 is not null and v2 is not null) + TableScan [TS_6] (rows=85 width=34) + default@ss,ss,Tbl:COMPLETE,Col:NONE,Output:["k1","v1","k2","v2","k3","v3"] + <-Select Operator [SEL_11] (rows=250 width=10) + Output:["_col0"] + Filter Operator [FIL_86] (rows=250 width=10) + predicate:((value = 'd1value') and key is not null) + TableScan [TS_9] (rows=500 width=10) + default@src,d1,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + <-Select Operator [SEL_14] (rows=1000 width=10) + Output:["_col1"] + Filter Operator [FIL_87] (rows=1000 width=10) + predicate:((key = 'srcpartkey') and value is not null) + TableScan [TS_12] (rows=2000 width=10) + default@srcpart,srcpart,Tbl:COMPLETE,Col:NONE,Output:["key","value"] PREHOOK: query: explain SELECT x.key, z.value, y.value diff --git a/ql/src/test/results/clientpositive/llap/explainuser_4.q.out b/ql/src/test/results/clientpositive/llap/explainuser_4.q.out index 95d13216e7..a4c251e288 100644 --- a/ql/src/test/results/clientpositive/llap/explainuser_4.q.out +++ b/ql/src/test/results/clientpositive/llap/explainuser_4.q.out @@ -26,11 +26,11 @@ Stage-0 Stage-1 Reducer 3 llap File Output Operator [FS_12] - Select Operator [SEL_11] (rows=2166 width=556) + Select Operator [SEL_11] (rows=2076 width=553) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23"] <-Reducer 2 [SIMPLE_EDGE] llap SHUFFLE [RS_10] - Merge Join Operator [MERGEJOIN_17] (rows=2166 width=556) + Merge Join Operator [MERGEJOIN_17] (rows=2076 width=553) Conds:RS_6._col2=RS_7._col2(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23"] <-Map 1 [SIMPLE_EDGE] llap SHUFFLE [RS_6] @@ -111,7 +111,7 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_11] Group By Operator [GBY_10] (rows=1 width=8) Output:["_col0"],aggregations:["count()"] - Merge Join Operator [MERGEJOIN_19] (rows=2166 width=8) + Merge Join Operator [MERGEJOIN_19] (rows=2076 width=8) Conds:RS_6._col0=RS_7._col0(Inner) <-Map 1 [SIMPLE_EDGE] llap SHUFFLE [RS_6] @@ -180,18 +180,18 @@ Stage-0 Stage-1 Reducer 4 llap File Output Operator [FS_16] - Select Operator [SEL_15] (rows=616 width=11) + Select Operator [SEL_15] (rows=623 width=11) Output:["_col0","_col1"] <-Reducer 3 [SIMPLE_EDGE] llap SHUFFLE [RS_14] - Group By Operator [GBY_12] (rows=616 width=11) + Group By Operator [GBY_12] (rows=623 width=11) Output:["_col0","_col1"],aggregations:["count(VALUE._col0)"],keys:KEY._col0 <-Reducer 2 [SIMPLE_EDGE] llap SHUFFLE [RS_11] PartitionCols:_col0 - Group By Operator [GBY_10] (rows=616 width=11) + Group By Operator [GBY_10] (rows=623 width=11) Output:["_col0","_col1"],aggregations:["count()"],keys:_col0 - Merge Join Operator [MERGEJOIN_21] (rows=2166 width=3) + Merge Join Operator [MERGEJOIN_21] (rows=2076 width=3) Conds:RS_6._col1=RS_7._col0(Inner),Output:["_col0"] <-Map 1 [SIMPLE_EDGE] llap SHUFFLE [RS_6] diff --git a/ql/src/test/results/clientpositive/llap/extrapolate_part_stats_partial_ndv.q.out b/ql/src/test/results/clientpositive/llap/extrapolate_part_stats_partial_ndv.q.out index d97223c9d0..6bc1970ad0 100644 --- a/ql/src/test/results/clientpositive/llap/extrapolate_part_stats_partial_ndv.q.out +++ b/ql/src/test/results/clientpositive/llap/extrapolate_part_stats_partial_ndv.q.out @@ -133,7 +133,7 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@loc_orc_1d # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -locid double 1.0 4.0 0 5 from deserializer +locid double 1.0 4.0 0 4 from deserializer PREHOOK: query: describe formatted loc_orc_1d PARTITION(year='2002') locid PREHOOK: type: DESCTABLE PREHOOK: Input: default@loc_orc_1d @@ -142,7 +142,7 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@loc_orc_1d # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -locid double 1.0 5.0 0 6 from deserializer +locid double 1.0 5.0 0 5 from deserializer PREHOOK: query: describe formatted loc_orc_1d PARTITION(year='2001') cnt PREHOOK: type: DESCTABLE PREHOOK: Input: default@loc_orc_1d @@ -151,7 +151,7 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@loc_orc_1d # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -cnt decimal(10,0) 10 2000 0 5 from deserializer +cnt decimal(10,0) 10 2000 0 4 from deserializer PREHOOK: query: describe formatted loc_orc_1d PARTITION(year='2002') cnt PREHOOK: type: DESCTABLE PREHOOK: Input: default@loc_orc_1d @@ -169,7 +169,7 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@loc_orc_1d # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -zip int 43201 94087 0 4 from deserializer +zip int 43201 94087 0 3 from deserializer PREHOOK: query: describe formatted loc_orc_1d PARTITION(year='2002') zip PREHOOK: type: DESCTABLE PREHOOK: Input: default@loc_orc_1d @@ -178,7 +178,7 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@loc_orc_1d # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -zip int 43201 94087 0 4 from deserializer +zip int 43201 94087 0 3 from deserializer PREHOOK: query: explain extended select state,locid,cnt,zip from loc_orc_1d PREHOOK: type: QUERY POSTHOOK: query: explain extended select state,locid,cnt,zip from loc_orc_1d @@ -425,7 +425,7 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@loc_orc_1d # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -state string 0 4 1.25 4 from deserializer +state string 0 3 1.25 4 from deserializer PREHOOK: query: describe formatted loc_orc_1d PARTITION(year='2000') locid PREHOOK: type: DESCTABLE PREHOOK: Input: default@loc_orc_1d @@ -443,7 +443,7 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@loc_orc_1d # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -locid double 1.0 31.0 0 6 from deserializer +locid double 1.0 31.0 0 5 from deserializer PREHOOK: query: describe formatted loc_orc_1d PARTITION(year='2000') cnt PREHOOK: type: DESCTABLE PREHOOK: Input: default@loc_orc_1d @@ -452,7 +452,7 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@loc_orc_1d # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -cnt decimal(10,0) 1000 1010 0 3 from deserializer +cnt decimal(10,0) 1000 1010 0 2 from deserializer PREHOOK: query: describe formatted loc_orc_1d PARTITION(year='2003') cnt PREHOOK: type: DESCTABLE PREHOOK: Input: default@loc_orc_1d @@ -479,7 +479,7 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@loc_orc_1d # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -zip int 43201 94087 0 4 from deserializer +zip int 43201 94087 0 3 from deserializer PREHOOK: query: explain extended select state,locid,cnt,zip from loc_orc_1d PREHOOK: type: QUERY POSTHOOK: query: explain extended select state,locid,cnt,zip from loc_orc_1d @@ -797,7 +797,7 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@loc_orc_2d # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -state string 0 4 3.0 3 from deserializer +state string 0 3 3.0 3 from deserializer PREHOOK: query: describe formatted loc_orc_2d partition(zip=94086, year='2001') locid PREHOOK: type: DESCTABLE PREHOOK: Input: default@loc_orc_2d diff --git a/ql/src/test/results/clientpositive/llap/filter_union.q.out b/ql/src/test/results/clientpositive/llap/filter_union.q.out index c4af31715c..0aab9e0a0e 100644 --- a/ql/src/test/results/clientpositive/llap/filter_union.q.out +++ b/ql/src/test/results/clientpositive/llap/filter_union.q.out @@ -52,13 +52,13 @@ STAGE PLANS: keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: a sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 value expressions: _col1 (type: bigint) auto parallelism: true @@ -130,13 +130,13 @@ STAGE PLANS: keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: a sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 value expressions: _col1 (type: bigint) auto parallelism: true @@ -202,17 +202,17 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: bigint), 3 (type: int) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 205 Data size: 20295 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 24750 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 410 Data size: 40590 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 49500 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -238,17 +238,17 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: bigint), 4 (type: int) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 205 Data size: 20295 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 24750 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 410 Data size: 40590 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 49500 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -326,12 +326,12 @@ STAGE PLANS: keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -343,14 +343,14 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: bigint), 1 (type: int) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 205 Data size: 20295 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 24750 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 205 Data size: 20295 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 24750 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -414,12 +414,12 @@ STAGE PLANS: keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -431,14 +431,14 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: bigint), 4 (type: int) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 205 Data size: 20295 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 24750 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 205 Data size: 20295 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 24750 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/groupby1.q.out b/ql/src/test/results/clientpositive/llap/groupby1.q.out index 0eecbb6f4e..5917013fa4 100644 --- a/ql/src/test/results/clientpositive/llap/groupby1.q.out +++ b/ql/src/test/results/clientpositive/llap/groupby1.q.out @@ -67,14 +67,14 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: final outputColumnNames: _col0, _col1 - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 309 Data size: 29355 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: UDFToInteger(_col0) (type: int), _col1 (type: double) outputColumnNames: _col0, _col1 - Statistics: Num rows: 205 Data size: 2460 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 309 Data size: 3708 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 205 Data size: 2460 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 309 Data size: 3708 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/groupby2.q.out b/ql/src/test/results/clientpositive/llap/groupby2.q.out index 29b85d1f44..f94db4edf1 100644 --- a/ql/src/test/results/clientpositive/llap/groupby2.q.out +++ b/ql/src/test/results/clientpositive/llap/groupby2.q.out @@ -68,14 +68,14 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: final outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 205 Data size: 41000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 309 Data size: 61800 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), concat(_col0, _col2) (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 205 Data size: 76260 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 309 Data size: 114948 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 205 Data size: 76260 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 309 Data size: 114948 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/groupby_resolution.q.out b/ql/src/test/results/clientpositive/llap/groupby_resolution.q.out index f2a6ab05ac..1bfde2fad0 100644 --- a/ql/src/test/results/clientpositive/llap/groupby_resolution.q.out +++ b/ql/src/test/results/clientpositive/llap/groupby_resolution.q.out @@ -38,10 +38,10 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: complete outputColumnNames: _col0, _col1 - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 309 Data size: 29355 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 309 Data size: 29355 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -93,10 +93,10 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: complete outputColumnNames: _col0, _col1 - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 309 Data size: 29355 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 309 Data size: 29355 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -164,10 +164,10 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: final outputColumnNames: _col0, _col1 - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 309 Data size: 29355 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 309 Data size: 29355 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -235,10 +235,10 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: final outputColumnNames: _col0, _col1 - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 309 Data size: 29355 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 309 Data size: 29355 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -280,12 +280,12 @@ STAGE PLANS: keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -297,10 +297,10 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -342,12 +342,12 @@ STAGE PLANS: keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -359,10 +359,10 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -405,12 +405,12 @@ STAGE PLANS: keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: rand() (type: double) - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -422,12 +422,12 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: partials outputColumnNames: _col0, _col1 - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Reducer 3 Execution mode: llap @@ -437,10 +437,10 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: final outputColumnNames: _col0, _col1 - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -483,12 +483,12 @@ STAGE PLANS: keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: rand() (type: double) - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -500,12 +500,12 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: partials outputColumnNames: _col0, _col1 - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Reducer 3 Execution mode: llap @@ -515,10 +515,10 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: final outputColumnNames: _col0, _col1 - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -656,12 +656,12 @@ STAGE PLANS: keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 69 Data size: 6555 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 83 Data size: 7885 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: rand() (type: double) - Statistics: Num rows: 69 Data size: 6555 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 83 Data size: 7885 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -673,12 +673,12 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: partials outputColumnNames: _col0, _col1 - Statistics: Num rows: 69 Data size: 6555 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 83 Data size: 7885 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 69 Data size: 6555 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 83 Data size: 7885 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Reducer 3 Execution mode: llap @@ -688,12 +688,12 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: final outputColumnNames: _col0, _col1 - Statistics: Num rows: 69 Data size: 6555 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 83 Data size: 7885 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: 0 (type: int), _col1 (type: bigint) sort order: ++ Map-reduce partition columns: 0 (type: int) - Statistics: Num rows: 69 Data size: 6555 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 83 Data size: 7885 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string) Reducer 4 Execution mode: llap @@ -701,7 +701,7 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: string), KEY.reducesinkkey1 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 69 Data size: 6555 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 83 Data size: 7885 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -722,14 +722,14 @@ STAGE PLANS: window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true - Statistics: Num rows: 69 Data size: 6555 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 83 Data size: 7885 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: bigint), rank_window_0 (type: int) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 69 Data size: 6831 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 83 Data size: 8217 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 69 Data size: 6831 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 83 Data size: 8217 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/having.q.out b/ql/src/test/results/clientpositive/llap/having.q.out index 267254c0de..75de96ab59 100644 --- a/ql/src/test/results/clientpositive/llap/having.q.out +++ b/ql/src/test/results/clientpositive/llap/having.q.out @@ -28,12 +28,12 @@ STAGE PLANS: keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -45,21 +45,21 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: bigint) outputColumnNames: _col1 - Statistics: Num rows: 205 Data size: 1640 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (_col1 > 3) (type: boolean) - Statistics: Num rows: 68 Data size: 544 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 83 Data size: 664 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: bigint) outputColumnNames: _col0 - Statistics: Num rows: 68 Data size: 544 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 83 Data size: 664 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 68 Data size: 544 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 83 Data size: 664 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -118,12 +118,12 @@ STAGE PLANS: keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 205 Data size: 55555 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 67750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 205 Data size: 55555 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 67750 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -135,10 +135,10 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 205 Data size: 55555 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 67750 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 205 Data size: 55555 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 67750 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -496,12 +496,12 @@ STAGE PLANS: keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 205 Data size: 55555 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 67750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 205 Data size: 55555 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 67750 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -513,17 +513,17 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 205 Data size: 55555 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 67750 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (_col1 > 'val_255') (type: boolean) - Statistics: Num rows: 68 Data size: 18428 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 83 Data size: 22493 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 68 Data size: 5916 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 83 Data size: 7221 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 68 Data size: 5916 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 83 Data size: 7221 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -771,12 +771,12 @@ STAGE PLANS: keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 69 Data size: 18699 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 83 Data size: 22493 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 69 Data size: 18699 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 83 Data size: 22493 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -788,17 +788,17 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 69 Data size: 18699 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 83 Data size: 22493 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (_col1 > 'val_255') (type: boolean) - Statistics: Num rows: 23 Data size: 6233 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 27 Data size: 7317 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 23 Data size: 2001 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 27 Data size: 2349 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 23 Data size: 2001 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 27 Data size: 2349 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -973,12 +973,12 @@ STAGE PLANS: keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 205 Data size: 55555 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 67750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 205 Data size: 55555 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 67750 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -990,13 +990,13 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 205 Data size: 55555 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 67750 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (_col1 > 'val_255') (type: boolean) - Statistics: Num rows: 68 Data size: 18428 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 83 Data size: 22493 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 68 Data size: 18428 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 83 Data size: 22493 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1245,12 +1245,12 @@ STAGE PLANS: keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -1262,13 +1262,13 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (_col1 >= 4) (type: boolean) - Statistics: Num rows: 68 Data size: 6460 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 83 Data size: 7885 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 68 Data size: 6460 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 83 Data size: 7885 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/hybridgrace_hashjoin_1.q.out b/ql/src/test/results/clientpositive/llap/hybridgrace_hashjoin_1.q.out index 083bfc301c..b63ad36ac1 100644 --- a/ql/src/test/results/clientpositive/llap/hybridgrace_hashjoin_1.q.out +++ b/ql/src/test/results/clientpositive/llap/hybridgrace_hashjoin_1.q.out @@ -56,7 +56,7 @@ STAGE PLANS: 1 _col0 (type: int) input vertices: 1 Map 3 - Statistics: Num rows: 26150 Data size: 209200 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 25044 Data size: 200352 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash @@ -177,7 +177,7 @@ STAGE PLANS: 1 _col0 (type: int) input vertices: 1 Map 3 - Statistics: Num rows: 26150 Data size: 209200 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 25044 Data size: 200352 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash @@ -296,7 +296,7 @@ STAGE PLANS: 1 _col0 (type: int) input vertices: 1 Map 3 - Statistics: Num rows: 19518 Data size: 156144 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 18694 Data size: 149552 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash @@ -413,7 +413,7 @@ STAGE PLANS: 1 _col0 (type: int) input vertices: 1 Map 3 - Statistics: Num rows: 19518 Data size: 156144 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 18694 Data size: 149552 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash @@ -527,7 +527,7 @@ STAGE PLANS: 1 _col0 (type: int) input vertices: 1 Map 3 - Statistics: Num rows: 26150 Data size: 209200 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 25044 Data size: 200352 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash @@ -638,7 +638,7 @@ STAGE PLANS: 1 _col0 (type: int) input vertices: 1 Map 3 - Statistics: Num rows: 26150 Data size: 209200 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 25044 Data size: 200352 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash diff --git a/ql/src/test/results/clientpositive/llap/hybridgrace_hashjoin_2.q.out b/ql/src/test/results/clientpositive/llap/hybridgrace_hashjoin_2.q.out index a59188a46a..d39342764f 100644 --- a/ql/src/test/results/clientpositive/llap/hybridgrace_hashjoin_2.q.out +++ b/ql/src/test/results/clientpositive/llap/hybridgrace_hashjoin_2.q.out @@ -73,7 +73,7 @@ STAGE PLANS: input vertices: 0 Map 1 2 Map 4 - Statistics: Num rows: 594 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 261 Data size: 2088 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash @@ -204,7 +204,7 @@ STAGE PLANS: input vertices: 0 Map 1 2 Map 4 - Statistics: Num rows: 594 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 261 Data size: 2088 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash @@ -349,7 +349,7 @@ STAGE PLANS: 0 Map 1 2 Map 4 3 Map 5 - Statistics: Num rows: 5803 Data size: 46424 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1694 Data size: 13552 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash @@ -502,7 +502,7 @@ STAGE PLANS: 0 Map 1 2 Map 4 3 Map 5 - Statistics: Num rows: 5803 Data size: 46424 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1694 Data size: 13552 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash @@ -685,7 +685,7 @@ STAGE PLANS: input vertices: 0 Map 1 2 Map 6 - Statistics: Num rows: 594 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 261 Data size: 2088 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash @@ -746,7 +746,7 @@ STAGE PLANS: input vertices: 0 Map 7 2 Map 10 - Statistics: Num rows: 545 Data size: 4360 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 261 Data size: 2088 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash @@ -935,7 +935,7 @@ STAGE PLANS: input vertices: 0 Map 1 2 Map 6 - Statistics: Num rows: 594 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 261 Data size: 2088 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash @@ -996,7 +996,7 @@ STAGE PLANS: input vertices: 0 Map 7 2 Map 10 - Statistics: Num rows: 545 Data size: 4360 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 261 Data size: 2088 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash @@ -1180,7 +1180,7 @@ STAGE PLANS: input vertices: 0 Map 1 2 Map 4 - Statistics: Num rows: 46 Data size: 4094 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 20 Data size: 1780 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -1192,7 +1192,7 @@ STAGE PLANS: input vertices: 1 Map 5 2 Map 6 - Statistics: Num rows: 981 Data size: 7848 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 204 Data size: 1632 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash @@ -1375,7 +1375,7 @@ STAGE PLANS: input vertices: 0 Map 1 2 Map 4 - Statistics: Num rows: 46 Data size: 4094 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 20 Data size: 1780 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -1387,7 +1387,7 @@ STAGE PLANS: input vertices: 1 Map 5 2 Map 6 - Statistics: Num rows: 981 Data size: 7848 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 204 Data size: 1632 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash diff --git a/ql/src/test/results/clientpositive/llap/identity_project_remove_skip.q.out b/ql/src/test/results/clientpositive/llap/identity_project_remove_skip.q.out index b03b96b463..1f21f0f6f4 100644 --- a/ql/src/test/results/clientpositive/llap/identity_project_remove_skip.q.out +++ b/ql/src/test/results/clientpositive/llap/identity_project_remove_skip.q.out @@ -35,14 +35,14 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((value = 'val_105') and (key = '105')) (type: boolean) - Statistics: Num rows: 2 Data size: 356 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: '105' (type: string) sort order: + Map-reduce partition columns: '105' (type: string) - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 2 @@ -63,14 +63,14 @@ STAGE PLANS: 1 '105' (type: string) input vertices: 0 Map 1 - Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: '105' (type: string), 'val_105' (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/jdbc_handler.q.out b/ql/src/test/results/clientpositive/llap/jdbc_handler.q.out index b4feb0ee1b..54f5a4e8e4 100644 --- a/ql/src/test/results/clientpositive/llap/jdbc_handler.q.out +++ b/ql/src/test/results/clientpositive/llap/jdbc_handler.q.out @@ -149,12 +149,12 @@ STAGE PLANS: keys: key (type: string) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 103 Data size: 8961 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 125 Data size: 10875 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 103 Data size: 8961 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 125 Data size: 10875 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -167,21 +167,21 @@ STAGE PLANS: 0 1 outputColumnNames: _col0, _col1 - Statistics: Num rows: 103 Data size: 9064 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 125 Data size: 11000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ - Statistics: Num rows: 103 Data size: 9064 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 125 Data size: 11000 Basic stats: COMPLETE Column stats: NONE Reducer 3 Execution mode: llap Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 103 Data size: 9064 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 125 Data size: 11000 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 103 Data size: 9064 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 125 Data size: 11000 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -193,10 +193,10 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 103 Data size: 8961 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 125 Data size: 10875 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 103 Data size: 8961 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 125 Data size: 10875 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string) Stage: Stage-0 diff --git a/ql/src/test/results/clientpositive/llap/join1.q.out b/ql/src/test/results/clientpositive/llap/join1.q.out index d79a405a41..724481beef 100644 --- a/ql/src/test/results/clientpositive/llap/join1.q.out +++ b/ql/src/test/results/clientpositive/llap/join1.q.out @@ -77,14 +77,14 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col2 - Statistics: Num rows: 1219 Data size: 216982 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 809 Data size: 144002 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: UDFToInteger(_col0) (type: int), _col2 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1219 Data size: 115805 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 809 Data size: 76855 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1219 Data size: 115805 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 809 Data size: 76855 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/join32_lessSize.q.out b/ql/src/test/results/clientpositive/llap/join32_lessSize.q.out index c226eed126..d1c396a71c 100644 --- a/ql/src/test/results/clientpositive/llap/join32_lessSize.q.out +++ b/ql/src/test/results/clientpositive/llap/join32_lessSize.q.out @@ -44,37 +44,37 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: z - Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE + alias: y + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Filter Operator isSamplingPred: false - predicate: value is not null (type: boolean) - Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: value (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 184500 Basic stats: COMPLETE Column stats: COMPLETE + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 Estimated key counts: Map 3 => 25 keys: 0 _col0 (type: string) - 1 _col1 (type: string) - outputColumnNames: _col0, _col3 + 1 _col0 (type: string) + outputColumnNames: _col1, _col2, _col3 input vertices: 1 Map 3 Position of Big Table: 0 - Statistics: Num rows: 58 Data size: 10266 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 40 Data size: 10640 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col3 (type: string) null sort order: a sort order: + Map-reduce partition columns: _col3 (type: string) - Statistics: Num rows: 58 Data size: 10266 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 40 Data size: 10640 Basic stats: COMPLETE Column stats: COMPLETE tag: 0 - value expressions: _col0 (type: string) + value expressions: _col1 (type: string), _col2 (type: string) auto parallelism: true Execution mode: llap LLAP IO: no inputs @@ -83,12 +83,9 @@ STAGE PLANS: Path -> Partition: #### A masked pattern was here #### Partition - base file name: hr=11 + base file name: src input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - hr 11 properties: COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 @@ -97,13 +94,11 @@ STAGE PLANS: columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.srcpart + name default.src numFiles 1 numRows 500 - partition_columns ds/hr - partition_columns.types string:string rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} + serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 5812 @@ -113,24 +108,27 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart + name: default.src + name: default.src Truncated Path -> Alias: - /srcpart/ds=2008-04-08/hr=11 [z] + /src [y] Map 3 Map Operator Tree: TableScan @@ -146,13 +144,13 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col1 (type: string) + key expressions: _col0 (type: string) null sort order: a sort order: + - Map-reduce partition columns: _col1 (type: string) + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE tag: 1 - value expressions: _col0 (type: string) + value expressions: _col1 (type: string) auto parallelism: true Execution mode: llap LLAP IO: no inputs @@ -210,25 +208,24 @@ STAGE PLANS: Map 4 Map Operator Tree: TableScan - alias: y - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + alias: z + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Filter Operator isSamplingPred: false - predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + predicate: value is not null (type: boolean) + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + expressions: value (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 184500 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: a sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 184500 Basic stats: COMPLETE Column stats: COMPLETE tag: 1 - value expressions: _col1 (type: string) auto parallelism: true Execution mode: llap LLAP IO: no inputs @@ -237,9 +234,12 @@ STAGE PLANS: Path -> Partition: #### A masked pattern was here #### Partition - base file name: src + base file name: hr=11 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 11 properties: COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 @@ -248,11 +248,13 @@ STAGE PLANS: columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.src + name default.srcpart numFiles 1 numRows 500 + partition_columns ds/hr + partition_columns.types string:string rawDataSize 5312 - serialization.ddl struct src { string key, string value} + serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 5812 @@ -262,27 +264,24 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.src - numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src - name: default.src + name: default.srcpart + name: default.srcpart Truncated Path -> Alias: - /src [y] + /srcpart/ds=2008-04-08/hr=11 [z] Reducer 2 Execution mode: llap Needs Tagging: false @@ -293,19 +292,19 @@ STAGE PLANS: keys: 0 _col3 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0, _col3, _col6 + outputColumnNames: _col1, _col2, _col4 Position of Big Table: 1 - Statistics: Num rows: 141 Data size: 37788 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 64 Data size: 17152 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col3 (type: string), _col0 (type: string), _col6 (type: string) + expressions: _col2 (type: string), _col4 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 141 Data size: 37788 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 64 Data size: 17152 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 141 Data size: 37788 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 64 Data size: 17152 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -539,13 +538,13 @@ STAGE PLANS: input vertices: 1 Map 4 Position of Big Table: 0 - Statistics: Num rows: 44 Data size: 11616 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 39 Data size: 10296 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: string) null sort order: a sort order: + Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 44 Data size: 11616 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 39 Data size: 10296 Basic stats: COMPLETE Column stats: COMPLETE tag: 0 value expressions: _col0 (type: string), _col3 (type: string) auto parallelism: true @@ -841,13 +840,13 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0, _col3 Position of Big Table: 1 - Statistics: Num rows: 102 Data size: 17850 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 63 Data size: 11025 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: a sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 102 Data size: 17850 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 63 Data size: 11025 Basic stats: COMPLETE Column stats: COMPLETE tag: 0 value expressions: _col3 (type: string) auto parallelism: true @@ -863,17 +862,17 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0, _col3, _col6 Position of Big Table: 0 - Statistics: Num rows: 248 Data size: 65968 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 101 Data size: 26866 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col3 (type: string), _col6 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 248 Data size: 65968 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 101 Data size: 26866 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 248 Data size: 65968 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 101 Data size: 26866 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -1179,17 +1178,17 @@ STAGE PLANS: input vertices: 1 Map 4 Position of Big Table: 0 - Statistics: Num rows: 60 Data size: 10500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 40 Data size: 7000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: string), _col2 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 60 Data size: 10500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 40 Data size: 7000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: string) null sort order: a sort order: + Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 60 Data size: 10500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 40 Data size: 7000 Basic stats: COMPLETE Column stats: COMPLETE tag: 1 value expressions: _col0 (type: string) auto parallelism: true @@ -1334,17 +1333,17 @@ STAGE PLANS: 1 _col1 (type: string) outputColumnNames: _col0, _col3, _col4 Position of Big Table: 0 - Statistics: Num rows: 140 Data size: 37240 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 64 Data size: 17024 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col3 (type: string), _col0 (type: string), _col4 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 140 Data size: 37240 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 64 Data size: 17024 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 140 Data size: 37240 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 64 Data size: 17024 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -1786,17 +1785,17 @@ STAGE PLANS: 1 _col1 (type: string) outputColumnNames: _col0, _col3, _col4 Position of Big Table: 1 - Statistics: Num rows: 140 Data size: 37240 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 64 Data size: 17024 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col3 (type: string), _col0 (type: string), _col4 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 140 Data size: 37240 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 64 Data size: 17024 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 140 Data size: 37240 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 64 Data size: 17024 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -1835,17 +1834,17 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col1, _col2 Position of Big Table: 1 - Statistics: Num rows: 60 Data size: 10500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 40 Data size: 7000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: string), _col2 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 60 Data size: 10500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 40 Data size: 7000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: string) null sort order: a sort order: + Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 60 Data size: 10500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 40 Data size: 7000 Basic stats: COMPLETE Column stats: COMPLETE tag: 1 value expressions: _col0 (type: string) auto parallelism: true @@ -2080,16 +2079,16 @@ STAGE PLANS: outputColumnNames: _col1, _col2 input vertices: 1 Map 4 - Statistics: Num rows: 60 Data size: 10500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 40 Data size: 7000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: string), _col2 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 60 Data size: 10500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 40 Data size: 7000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: string) sort order: + Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 60 Data size: 10500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 40 Data size: 7000 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string) Execution mode: llap LLAP IO: no inputs @@ -2123,14 +2122,14 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col1 (type: string) outputColumnNames: _col0, _col3, _col4 - Statistics: Num rows: 140 Data size: 37240 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 64 Data size: 17024 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col3 (type: string), _col0 (type: string), _col4 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 140 Data size: 37240 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 64 Data size: 17024 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 140 Data size: 37240 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 64 Data size: 17024 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -2336,16 +2335,16 @@ STAGE PLANS: outputColumnNames: _col1, _col2 input vertices: 1 Map 4 - Statistics: Num rows: 60 Data size: 10500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 40 Data size: 7000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: string), _col2 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 60 Data size: 10500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 40 Data size: 7000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: string) sort order: + Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 60 Data size: 10500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 40 Data size: 7000 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string) Execution mode: llap LLAP IO: no inputs @@ -2379,14 +2378,14 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col1 (type: string) outputColumnNames: _col0, _col3, _col4 - Statistics: Num rows: 140 Data size: 37240 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 64 Data size: 17024 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col3 (type: string), _col0 (type: string), _col4 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 140 Data size: 37240 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 64 Data size: 17024 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 140 Data size: 37240 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 64 Data size: 17024 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/join_max_hashtable.q.out b/ql/src/test/results/clientpositive/llap/join_max_hashtable.q.out index 85d45fe712..6520fd3f2b 100644 --- a/ql/src/test/results/clientpositive/llap/join_max_hashtable.q.out +++ b/ql/src/test/results/clientpositive/llap/join_max_hashtable.q.out @@ -39,10 +39,10 @@ STAGE PLANS: outputColumnNames: _col0, _col1 input vertices: 1 Map 2 - Statistics: Num rows: 1219 Data size: 216982 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 809 Data size: 144002 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1219 Data size: 216982 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 809 Data size: 144002 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -116,10 +116,10 @@ STAGE PLANS: outputColumnNames: _col0, _col1 input vertices: 1 Map 2 - Statistics: Num rows: 5 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 356 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 5 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 356 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -169,7 +169,7 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 2 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 3 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -184,26 +184,15 @@ STAGE PLANS: expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1 - input vertices: - 1 Map 2 - Statistics: Num rows: 1219 Data size: 216982 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1219 Data size: 216982 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs - Map 2 + Map 3 Map Operator Tree: TableScan alias: y @@ -222,6 +211,27 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: string) + 1 KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 3 + Statistics: Num rows: 809 Data size: 144002 Basic stats: COMPLETE Column stats: COMPLETE + HybridGraceHashJoin: true + File Output Operator + compressed: false + Statistics: Num rows: 809 Data size: 144002 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -299,11 +309,11 @@ STAGE PLANS: outputColumnNames: _col0, _col1 input vertices: 1 Map 3 - Statistics: Num rows: 5 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 356 Basic stats: COMPLETE Column stats: COMPLETE HybridGraceHashJoin: true File Output Operator compressed: false - Statistics: Num rows: 5 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 356 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -386,11 +396,11 @@ STAGE PLANS: outputColumnNames: _col0, _col1 input vertices: 1 Map 3 - Statistics: Num rows: 1219 Data size: 216982 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 809 Data size: 144002 Basic stats: COMPLETE Column stats: COMPLETE HybridGraceHashJoin: true File Output Operator compressed: false - Statistics: Num rows: 1219 Data size: 216982 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 809 Data size: 144002 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -472,11 +482,11 @@ STAGE PLANS: outputColumnNames: _col0, _col1 input vertices: 1 Map 3 - Statistics: Num rows: 5 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 356 Basic stats: COMPLETE Column stats: COMPLETE HybridGraceHashJoin: true File Output Operator compressed: false - Statistics: Num rows: 5 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 356 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/limit_join_transpose.q.out b/ql/src/test/results/clientpositive/llap/limit_join_transpose.q.out index 99d8d6f664..61fba3dd56 100644 --- a/ql/src/test/results/clientpositive/llap/limit_join_transpose.q.out +++ b/ql/src/test/results/clientpositive/llap/limit_join_transpose.q.out @@ -66,7 +66,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1219 Data size: 433964 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 809 Data size: 288004 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: COMPLETE @@ -167,7 +167,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1219 Data size: 433964 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 809 Data size: 288004 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: COMPLETE @@ -280,7 +280,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: COMPLETE @@ -410,12 +410,12 @@ STAGE PLANS: 0 _col1 (type: string) 1 _col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col2 (type: string) sort order: + Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 2 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string) Reducer 4 Execution mode: llap @@ -427,11 +427,11 @@ STAGE PLANS: 0 _col2 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 4 Data size: 2136 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 534 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col4 (type: string), _col5 (type: string), _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 4 Data size: 2136 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 534 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 Statistics: Num rows: 1 Data size: 534 Basic stats: COMPLETE Column stats: COMPLETE @@ -552,7 +552,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col2 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 2 Data size: 1068 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 534 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 Statistics: Num rows: 1 Data size: 534 Basic stats: COMPLETE Column stats: COMPLETE @@ -589,7 +589,7 @@ STAGE PLANS: 0 _col1 (type: string) 1 _col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: COMPLETE @@ -731,11 +731,11 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col2 (type: string) sort order: + - Statistics: Num rows: 2 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string) Reducer 3 @@ -744,7 +744,7 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col2 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: COMPLETE @@ -781,15 +781,15 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col1 (type: string) outputColumnNames: _col1, _col2 - Statistics: Num rows: 2 Data size: 356 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: string), _col2 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 356 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + - Statistics: Num rows: 2 Data size: 356 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: string) Reducer 6 @@ -798,7 +798,7 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 356 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE @@ -954,7 +954,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1219 Data size: 433964 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 809 Data size: 288004 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 Offset of rows: 1 @@ -1056,7 +1056,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1219 Data size: 433964 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 809 Data size: 288004 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 Offset of rows: 1 @@ -1172,7 +1172,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 Offset of rows: 1 @@ -1304,12 +1304,12 @@ STAGE PLANS: 0 _col1 (type: string) 1 _col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col2 (type: string) sort order: + Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 2 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string) Reducer 4 Execution mode: llap @@ -1321,11 +1321,11 @@ STAGE PLANS: 0 _col2 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 4 Data size: 2136 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 534 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col4 (type: string), _col5 (type: string), _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 4 Data size: 2136 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 534 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 Offset of rows: 1 @@ -1447,7 +1447,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col2 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 2 Data size: 1068 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 534 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 Offset of rows: 1 @@ -1486,7 +1486,7 @@ STAGE PLANS: 0 _col1 (type: string) 1 _col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 Offset of rows: 1 @@ -1629,11 +1629,11 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col2 (type: string) sort order: + - Statistics: Num rows: 2 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string) Reducer 3 @@ -1642,7 +1642,7 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col2 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 Offset of rows: 1 @@ -1681,15 +1681,15 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col1 (type: string) outputColumnNames: _col1, _col2 - Statistics: Num rows: 2 Data size: 356 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: string), _col2 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 356 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + - Statistics: Num rows: 2 Data size: 356 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: string) Reducer 6 @@ -1698,7 +1698,7 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 356 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 Offset of rows: 1 diff --git a/ql/src/test/results/clientpositive/llap/limit_pushdown.q.out b/ql/src/test/results/clientpositive/llap/limit_pushdown.q.out index 93315e6b1b..b067e45e2e 100644 --- a/ql/src/test/results/clientpositive/llap/limit_pushdown.q.out +++ b/ql/src/test/results/clientpositive/llap/limit_pushdown.q.out @@ -204,12 +204,12 @@ STAGE PLANS: keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 214 Data size: 21186 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 24750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 214 Data size: 21186 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 24750 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.3 value expressions: _col1 (type: double) Execution mode: llap @@ -222,7 +222,7 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 214 Data size: 21186 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 24750 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 20 Statistics: Num rows: 20 Data size: 1980 Basic stats: COMPLETE Column stats: COMPLETE @@ -300,12 +300,12 @@ STAGE PLANS: keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 214 Data size: 36594 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 42750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 214 Data size: 36594 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 42750 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.3 value expressions: _col1 (type: struct) Execution mode: llap @@ -318,7 +318,7 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 214 Data size: 21186 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 24750 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 20 Statistics: Num rows: 20 Data size: 1980 Basic stats: COMPLETE Column stats: COMPLETE @@ -395,12 +395,12 @@ STAGE PLANS: keys: cdouble (type: double) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 4265 Data size: 25480 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5570 Data size: 33272 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: double) sort order: + Map-reduce partition columns: _col0 (type: double) - Statistics: Num rows: 4265 Data size: 25480 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5570 Data size: 33272 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.3 Execution mode: llap LLAP IO: all inputs @@ -411,7 +411,7 @@ STAGE PLANS: keys: KEY._col0 (type: double) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 4265 Data size: 25480 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5570 Data size: 33272 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 20 Statistics: Num rows: 20 Data size: 128 Basic stats: COMPLETE Column stats: COMPLETE @@ -509,7 +509,7 @@ STAGE PLANS: keys: _col0 (type: tinyint) mode: complete outputColumnNames: _col0, _col1 - Statistics: Num rows: 95 Data size: 1048 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 1412 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 20 Statistics: Num rows: 20 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE @@ -607,7 +607,7 @@ STAGE PLANS: keys: _col0 (type: tinyint) mode: complete outputColumnNames: _col0, _col1 - Statistics: Num rows: 95 Data size: 1048 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 1412 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 20 Statistics: Num rows: 20 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE @@ -702,7 +702,7 @@ STAGE PLANS: keys: KEY._col0 (type: tinyint) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 95 Data size: 1808 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 2436 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 20 Statistics: Num rows: 20 Data size: 384 Basic stats: COMPLETE Column stats: COMPLETE @@ -805,12 +805,12 @@ STAGE PLANS: keys: value (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 214 Data size: 21186 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 24750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 214 Data size: 21186 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 24750 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: double) Execution mode: llap LLAP IO: no inputs @@ -822,11 +822,11 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 214 Data size: 21186 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 24750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: double) sort order: + - Statistics: Num rows: 214 Data size: 21186 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 24750 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.3 value expressions: _col0 (type: string) Reducer 3 @@ -835,7 +835,7 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: double) outputColumnNames: _col0, _col1 - Statistics: Num rows: 214 Data size: 21186 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 24750 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 20 Statistics: Num rows: 20 Data size: 1980 Basic stats: COMPLETE Column stats: COMPLETE @@ -924,19 +924,19 @@ STAGE PLANS: keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.3 value expressions: _col1 (type: bigint) Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.3 value expressions: _col1 (type: bigint) Execution mode: llap @@ -949,7 +949,7 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 2 Statistics: Num rows: 2 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE @@ -972,13 +972,13 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 4 - Statistics: Num rows: 2 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -991,7 +991,7 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 3 Statistics: Num rows: 3 Data size: 285 Basic stats: COMPLETE Column stats: COMPLETE @@ -1070,7 +1070,7 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: complete outputColumnNames: _col0, _col1 - Statistics: Num rows: 214 Data size: 21186 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 309 Data size: 30591 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 20 Statistics: Num rows: 20 Data size: 1980 Basic stats: COMPLETE Column stats: COMPLETE diff --git a/ql/src/test/results/clientpositive/llap/limit_pushdown3.q.out b/ql/src/test/results/clientpositive/llap/limit_pushdown3.q.out index 351ee01b45..470cffe7cc 100644 --- a/ql/src/test/results/clientpositive/llap/limit_pushdown3.q.out +++ b/ql/src/test/results/clientpositive/llap/limit_pushdown3.q.out @@ -205,12 +205,12 @@ STAGE PLANS: keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 214 Data size: 21186 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 24750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 214 Data size: 21186 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 24750 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.3 value expressions: _col1 (type: double) Execution mode: llap @@ -223,11 +223,11 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 214 Data size: 21186 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 24750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + - Statistics: Num rows: 214 Data size: 21186 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 24750 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.3 value expressions: _col1 (type: double) Reducer 3 @@ -236,7 +236,7 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: double) outputColumnNames: _col0, _col1 - Statistics: Num rows: 214 Data size: 21186 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 24750 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 20 Statistics: Num rows: 20 Data size: 1980 Basic stats: COMPLETE Column stats: COMPLETE @@ -315,12 +315,12 @@ STAGE PLANS: keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 214 Data size: 36594 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 42750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 214 Data size: 36594 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 42750 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.3 value expressions: _col1 (type: struct) Execution mode: llap @@ -333,11 +333,11 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 214 Data size: 21186 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 24750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + - Statistics: Num rows: 214 Data size: 21186 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 24750 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.3 value expressions: _col1 (type: double) Reducer 3 @@ -346,7 +346,7 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: double) outputColumnNames: _col0, _col1 - Statistics: Num rows: 214 Data size: 21186 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 24750 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 20 Statistics: Num rows: 20 Data size: 1980 Basic stats: COMPLETE Column stats: COMPLETE @@ -424,12 +424,12 @@ STAGE PLANS: keys: cdouble (type: double) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 4265 Data size: 25480 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5570 Data size: 33272 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: double) sort order: + Map-reduce partition columns: _col0 (type: double) - Statistics: Num rows: 4265 Data size: 25480 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5570 Data size: 33272 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.3 Execution mode: llap LLAP IO: all inputs @@ -440,11 +440,11 @@ STAGE PLANS: keys: KEY._col0 (type: double) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 4265 Data size: 25480 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5570 Data size: 33272 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: double) sort order: + - Statistics: Num rows: 4265 Data size: 25480 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5570 Data size: 33272 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.3 Reducer 3 Execution mode: llap @@ -452,7 +452,7 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: double) outputColumnNames: _col0 - Statistics: Num rows: 4265 Data size: 25480 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5570 Data size: 33272 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 20 Statistics: Num rows: 20 Data size: 128 Basic stats: COMPLETE Column stats: COMPLETE @@ -551,11 +551,11 @@ STAGE PLANS: keys: _col0 (type: tinyint) mode: complete outputColumnNames: _col0, _col1 - Statistics: Num rows: 95 Data size: 1048 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 1412 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: tinyint) sort order: + - Statistics: Num rows: 95 Data size: 1048 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 1412 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.3 value expressions: _col1 (type: bigint) Reducer 3 @@ -564,7 +564,7 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: tinyint), VALUE._col0 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 95 Data size: 1048 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 1412 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 20 Statistics: Num rows: 20 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE @@ -663,11 +663,11 @@ STAGE PLANS: keys: _col0 (type: tinyint) mode: complete outputColumnNames: _col0, _col1 - Statistics: Num rows: 95 Data size: 1048 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 1412 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: tinyint) sort order: + - Statistics: Num rows: 95 Data size: 1048 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 1412 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.3 value expressions: _col1 (type: bigint) Reducer 3 @@ -676,7 +676,7 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: tinyint), VALUE._col0 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 95 Data size: 1048 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 1412 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 20 Statistics: Num rows: 20 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE @@ -772,11 +772,11 @@ STAGE PLANS: keys: KEY._col0 (type: tinyint) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 95 Data size: 1808 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 2436 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: tinyint) sort order: + - Statistics: Num rows: 95 Data size: 1808 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 2436 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.3 value expressions: _col1 (type: bigint), _col2 (type: bigint) Reducer 3 @@ -785,7 +785,7 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: tinyint), VALUE._col0 (type: bigint), VALUE._col1 (type: bigint) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 95 Data size: 1808 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 2436 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 20 Statistics: Num rows: 20 Data size: 384 Basic stats: COMPLETE Column stats: COMPLETE @@ -888,12 +888,12 @@ STAGE PLANS: keys: value (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 214 Data size: 21186 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 24750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 214 Data size: 21186 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 24750 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: double) Execution mode: llap LLAP IO: no inputs @@ -905,11 +905,11 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 214 Data size: 21186 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 24750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: double) sort order: + - Statistics: Num rows: 214 Data size: 21186 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 24750 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.3 value expressions: _col0 (type: string) Reducer 3 @@ -918,7 +918,7 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: double) outputColumnNames: _col0, _col1 - Statistics: Num rows: 214 Data size: 21186 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 24750 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 20 Statistics: Num rows: 20 Data size: 1980 Basic stats: COMPLETE Column stats: COMPLETE @@ -1009,11 +1009,11 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: complete outputColumnNames: _col0, _col1 - Statistics: Num rows: 214 Data size: 21186 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 309 Data size: 30591 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + - Statistics: Num rows: 214 Data size: 21186 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 309 Data size: 30591 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.3 value expressions: _col1 (type: double) Reducer 3 @@ -1022,7 +1022,7 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: double) outputColumnNames: _col0, _col1 - Statistics: Num rows: 214 Data size: 21186 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 309 Data size: 30591 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 20 Statistics: Num rows: 20 Data size: 1980 Basic stats: COMPLETE Column stats: COMPLETE diff --git a/ql/src/test/results/clientpositive/llap/llap_smb.q.out b/ql/src/test/results/clientpositive/llap/llap_smb.q.out index e3044baf52..87b33db805 100644 --- a/ql/src/test/results/clientpositive/llap/llap_smb.q.out +++ b/ql/src/test/results/clientpositive/llap/llap_smb.q.out @@ -321,8 +321,8 @@ POSTHOOK: Input: default@orc_a@y=2001/q=8 POSTHOOK: Input: default@orc_a@y=2001/q=9 POSTHOOK: Input: default@orc_b #### A masked pattern was here #### -2001 0 52 -2001 9 139630 +2000 5 52 +2001 5 139630 PREHOOK: query: DROP TABLE orc_a PREHOOK: type: DROPTABLE PREHOOK: Input: default@orc_a diff --git a/ql/src/test/results/clientpositive/llap/llap_stats.q.out b/ql/src/test/results/clientpositive/llap/llap_stats.q.out index f81ad50679..fda614f7b5 100644 --- a/ql/src/test/results/clientpositive/llap/llap_stats.q.out +++ b/ql/src/test/results/clientpositive/llap/llap_stats.q.out @@ -114,17 +114,17 @@ STAGE PLANS: outputColumnNames: ctinyint, csmallint, cint Statistics: Num rows: 10 Data size: 116 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator - aggregations: compute_stats(ctinyint, 16), compute_stats(csmallint, 16) + aggregations: compute_stats(ctinyint, 'hll'), compute_stats(csmallint, 'hll') keys: cint (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 5 Data size: 4780 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 5 Data size: 4660 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 5 Data size: 4780 Basic stats: COMPLETE Column stats: PARTIAL - value expressions: _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 5 Data size: 4660 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col1 (type: struct), _col2 (type: struct) Execution mode: llap LLAP IO: all inputs Reducer 2 diff --git a/ql/src/test/results/clientpositive/llap/llap_vector_nohybridgrace.q.out b/ql/src/test/results/clientpositive/llap/llap_vector_nohybridgrace.q.out index 0e9e1207cb..2c62dfba31 100644 --- a/ql/src/test/results/clientpositive/llap/llap_vector_nohybridgrace.q.out +++ b/ql/src/test/results/clientpositive/llap/llap_vector_nohybridgrace.q.out @@ -66,7 +66,7 @@ STAGE PLANS: nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Fast Hash Table and No Hybrid Hash Join IS true input vertices: 1 Map 3 - Statistics: Num rows: 26150 Data size: 209200 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 25044 Data size: 200352 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() Group By Vectorization: @@ -248,7 +248,7 @@ STAGE PLANS: nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Fast Hash Table and No Hybrid Hash Join IS true input vertices: 1 Map 3 - Statistics: Num rows: 26150 Data size: 209200 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 25044 Data size: 200352 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() Group By Vectorization: diff --git a/ql/src/test/results/clientpositive/llap/llapdecider.q.out b/ql/src/test/results/clientpositive/llap/llapdecider.q.out index 69312cd6a2..9411169802 100644 --- a/ql/src/test/results/clientpositive/llap/llapdecider.q.out +++ b/ql/src/test/results/clientpositive/llap/llapdecider.q.out @@ -29,12 +29,12 @@ STAGE PLANS: keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Reducer 2 Reduce Operator Tree: @@ -43,21 +43,21 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: bigint) sort order: + - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string) Reducer 3 Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -268,12 +268,12 @@ STAGE PLANS: keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: llap LLAP IO: all inputs @@ -285,11 +285,11 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: bigint) sort order: + - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string) Reducer 3 Execution mode: llap @@ -297,10 +297,10 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -376,21 +376,21 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1219 Data size: 433964 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 809 Data size: 288004 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col3 (type: string) sort order: + - Statistics: Num rows: 1219 Data size: 433964 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 809 Data size: 288004 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) Reducer 3 Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: string), KEY.reducesinkkey0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1219 Data size: 433964 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 809 Data size: 288004 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1219 Data size: 433964 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 809 Data size: 288004 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -469,11 +469,11 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1219 Data size: 433964 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 809 Data size: 288004 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col3 (type: string) sort order: + - Statistics: Num rows: 1219 Data size: 433964 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 809 Data size: 288004 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) Reducer 3 Execution mode: llap @@ -481,10 +481,10 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: string), KEY.reducesinkkey0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1219 Data size: 433964 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 809 Data size: 288004 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1219 Data size: 433964 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 809 Data size: 288004 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -561,11 +561,11 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1219 Data size: 433964 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 809 Data size: 288004 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col3 (type: string) sort order: + - Statistics: Num rows: 1219 Data size: 433964 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 809 Data size: 288004 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) Reducer 3 Execution mode: llap @@ -573,10 +573,10 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: string), KEY.reducesinkkey0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1219 Data size: 433964 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 809 Data size: 288004 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1219 Data size: 433964 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 809 Data size: 288004 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -650,21 +650,21 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1219 Data size: 433964 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 809 Data size: 288004 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col3 (type: string) sort order: + - Statistics: Num rows: 1219 Data size: 433964 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 809 Data size: 288004 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) Reducer 3 Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: string), KEY.reducesinkkey0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1219 Data size: 433964 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 809 Data size: 288004 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1219 Data size: 433964 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 809 Data size: 288004 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -738,21 +738,21 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1219 Data size: 433964 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 809 Data size: 288004 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col3 (type: string) sort order: + - Statistics: Num rows: 1219 Data size: 433964 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 809 Data size: 288004 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) Reducer 3 Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: string), KEY.reducesinkkey0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1219 Data size: 433964 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 809 Data size: 288004 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1219 Data size: 433964 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 809 Data size: 288004 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -830,21 +830,21 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1219 Data size: 433964 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 809 Data size: 288004 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col3 (type: string) sort order: + - Statistics: Num rows: 1219 Data size: 433964 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 809 Data size: 288004 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) Reducer 3 Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: string), KEY.reducesinkkey0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1219 Data size: 433964 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 809 Data size: 288004 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1219 Data size: 433964 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 809 Data size: 288004 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -918,21 +918,21 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1219 Data size: 433964 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 809 Data size: 288004 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col3 (type: string) sort order: + - Statistics: Num rows: 1219 Data size: 433964 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 809 Data size: 288004 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) Reducer 3 Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: string), KEY.reducesinkkey0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1219 Data size: 433964 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 809 Data size: 288004 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1219 Data size: 433964 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 809 Data size: 288004 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1011,11 +1011,11 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1219 Data size: 433964 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 809 Data size: 288004 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col3 (type: string) sort order: + - Statistics: Num rows: 1219 Data size: 433964 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 809 Data size: 288004 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) Reducer 3 Execution mode: llap @@ -1023,10 +1023,10 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: string), KEY.reducesinkkey0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1219 Data size: 433964 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 809 Data size: 288004 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1219 Data size: 433964 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 809 Data size: 288004 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/merge1.q.out b/ql/src/test/results/clientpositive/llap/merge1.q.out index 8021b67733..649c2f714b 100644 --- a/ql/src/test/results/clientpositive/llap/merge1.q.out +++ b/ql/src/test/results/clientpositive/llap/merge1.q.out @@ -42,12 +42,12 @@ STAGE PLANS: keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -59,14 +59,14 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: UDFToInteger(_col0) (type: int), UDFToInteger(_col1) (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 205 Data size: 1640 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 205 Data size: 1640 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/merge2.q.out b/ql/src/test/results/clientpositive/llap/merge2.q.out index 7bcdd2d57e..dd726894fd 100644 --- a/ql/src/test/results/clientpositive/llap/merge2.q.out +++ b/ql/src/test/results/clientpositive/llap/merge2.q.out @@ -59,14 +59,14 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: UDFToInteger(_col0) (type: int), UDFToInteger(_col1) (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 205 Data size: 1640 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 205 Data size: 1640 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/mergejoin.q.out b/ql/src/test/results/clientpositive/llap/mergejoin.q.out index 10fb45d284..9df5e42df2 100644 --- a/ql/src/test/results/clientpositive/llap/mergejoin.q.out +++ b/ql/src/test/results/clientpositive/llap/mergejoin.q.out @@ -63,7 +63,7 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=28) + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=32) mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE @@ -83,10 +83,10 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 60 Data size: 21180 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 40 Data size: 14120 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 60 Data size: 21180 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 40 Data size: 14120 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -95,7 +95,7 @@ STAGE PLANS: Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=28) + aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=32) mode: final outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE @@ -1774,7 +1774,7 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 25 Data size: 2225 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=28) + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=38) mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE @@ -1838,7 +1838,7 @@ STAGE PLANS: Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=28) + aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=38) mode: final outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE @@ -2346,7 +2346,7 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 25 Data size: 2225 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=28) + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=38) mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE @@ -2410,7 +2410,7 @@ STAGE PLANS: Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=28) + aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=38) mode: final outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE diff --git a/ql/src/test/results/clientpositive/llap/mrr.q.out b/ql/src/test/results/clientpositive/llap/mrr.q.out index fe477fd815..c42202187f 100644 --- a/ql/src/test/results/clientpositive/llap/mrr.q.out +++ b/ql/src/test/results/clientpositive/llap/mrr.q.out @@ -29,12 +29,12 @@ STAGE PLANS: keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -46,11 +46,11 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: bigint) sort order: + - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string) Reducer 3 Execution mode: llap @@ -58,10 +58,10 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -457,17 +457,17 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col1, _col2 - Statistics: Num rows: 1219 Data size: 216982 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 809 Data size: 144002 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: _col1 (type: string), _col2 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 609 Data size: 108402 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 404 Data size: 71912 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 609 Data size: 108402 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 404 Data size: 71912 Basic stats: COMPLETE Column stats: COMPLETE Reducer 3 Execution mode: llap Reduce Operator Tree: @@ -475,27 +475,27 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 609 Data size: 108402 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 404 Data size: 71912 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(_col1) keys: _col0 (type: string) mode: complete outputColumnNames: _col0, _col1 - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 309 Data size: 29355 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: bigint), _col0 (type: string) sort order: ++ - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 309 Data size: 29355 Basic stats: COMPLETE Column stats: COMPLETE Reducer 4 Execution mode: llap Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 309 Data size: 29355 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 309 Data size: 29355 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -882,17 +882,17 @@ STAGE PLANS: outputColumnNames: _col1, _col2 input vertices: 0 Map 1 - Statistics: Num rows: 1219 Data size: 216982 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 809 Data size: 144002 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: _col1 (type: string), _col2 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 609 Data size: 108402 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 404 Data size: 71912 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 609 Data size: 108402 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 404 Data size: 71912 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 3 @@ -902,27 +902,27 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 609 Data size: 108402 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 404 Data size: 71912 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(_col1) keys: _col0 (type: string) mode: complete outputColumnNames: _col0, _col1 - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 309 Data size: 29355 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: bigint), _col0 (type: string) sort order: ++ - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 309 Data size: 29355 Basic stats: COMPLETE Column stats: COMPLETE Reducer 4 Execution mode: llap Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 309 Data size: 29355 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 309 Data size: 29355 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1312,24 +1312,24 @@ STAGE PLANS: keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -1341,12 +1341,12 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Reducer 3 Execution mode: llap @@ -1360,15 +1360,15 @@ STAGE PLANS: 1 _col0 (type: string) 2 _col0 (type: string) outputColumnNames: _col0, _col1, _col3, _col4, _col5 - Statistics: Num rows: 68 Data size: 13464 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 54 Data size: 10692 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col3 (type: bigint), _col1 (type: bigint), _col4 (type: string), _col5 (type: bigint) outputColumnNames: _col0, _col1, _col3, _col4, _col5 - Statistics: Num rows: 68 Data size: 19380 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 54 Data size: 15390 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + - Statistics: Num rows: 68 Data size: 19380 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 54 Data size: 15390 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint), _col3 (type: bigint), _col4 (type: string), _col5 (type: bigint) Reducer 4 Execution mode: llap @@ -1376,10 +1376,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: bigint), KEY.reducesinkkey0 (type: string), VALUE._col1 (type: bigint), VALUE._col2 (type: string), VALUE._col3 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 68 Data size: 19380 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 54 Data size: 15390 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 68 Data size: 19380 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 54 Data size: 15390 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1392,15 +1392,15 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (_col1 > 1) (type: boolean) - Statistics: Num rows: 68 Data size: 6460 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 83 Data size: 7885 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 68 Data size: 6460 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 83 Data size: 7885 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Reducer 6 Execution mode: llap @@ -1410,12 +1410,12 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Stage: Stage-0 diff --git a/ql/src/test/results/clientpositive/llap/multiMapJoin2.q.out b/ql/src/test/results/clientpositive/llap/multiMapJoin2.q.out index 25378d3e8b..8e7b456d0c 100644 --- a/ql/src/test/results/clientpositive/llap/multiMapJoin2.q.out +++ b/ql/src/test/results/clientpositive/llap/multiMapJoin2.q.out @@ -47,11 +47,11 @@ STAGE PLANS: outputColumnNames: _col0 input vertices: 1 Map 4 - Statistics: Num rows: 60 Data size: 5220 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 40 Data size: 3480 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + - Statistics: Num rows: 120 Data size: 10440 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 80 Data size: 6960 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 4 @@ -94,11 +94,11 @@ STAGE PLANS: outputColumnNames: _col0 input vertices: 1 Map 6 - Statistics: Num rows: 60 Data size: 5220 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 40 Data size: 3480 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + - Statistics: Num rows: 120 Data size: 10440 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 80 Data size: 6960 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 6 @@ -126,10 +126,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 120 Data size: 10440 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 80 Data size: 6960 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 120 Data size: 10440 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 80 Data size: 6960 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -350,21 +350,21 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 60 Data size: 5220 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 40 Data size: 3480 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + - Statistics: Num rows: 120 Data size: 10440 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 80 Data size: 6960 Basic stats: COMPLETE Column stats: COMPLETE Reducer 4 Execution mode: llap Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 120 Data size: 10440 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 80 Data size: 6960 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 120 Data size: 10440 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 80 Data size: 6960 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -379,11 +379,11 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 60 Data size: 5220 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 40 Data size: 3480 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + - Statistics: Num rows: 120 Data size: 10440 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 80 Data size: 6960 Basic stats: COMPLETE Column stats: COMPLETE Union 3 Vertex: Union 3 @@ -556,11 +556,11 @@ STAGE PLANS: outputColumnNames: _col0 input vertices: 1 Map 6 - Statistics: Num rows: 60 Data size: 5220 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 40 Data size: 3480 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + - Statistics: Num rows: 72 Data size: 6252 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 52 Data size: 4512 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 6 @@ -593,17 +593,17 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: string) sort order: + - Statistics: Num rows: 72 Data size: 6252 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 52 Data size: 4512 Basic stats: COMPLETE Column stats: COMPLETE Reducer 4 Execution mode: llap Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 72 Data size: 6264 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 52 Data size: 4524 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 72 Data size: 6264 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 52 Data size: 4524 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -750,7 +750,7 @@ STAGE PLANS: outputColumnNames: _col0 input vertices: 1 Map 5 - Statistics: Num rows: 60 Data size: 5220 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 40 Data size: 3480 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: _col0 (type: string) mode: hash @@ -808,7 +808,7 @@ STAGE PLANS: outputColumnNames: _col0 input vertices: 1 Map 5 - Statistics: Num rows: 60 Data size: 5220 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 40 Data size: 3480 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: _col0 (type: string) mode: hash @@ -990,7 +990,7 @@ STAGE PLANS: outputColumnNames: _col0 input vertices: 1 Map 5 - Statistics: Num rows: 60 Data size: 5220 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 40 Data size: 3480 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: _col0 (type: string) mode: hash @@ -1048,7 +1048,7 @@ STAGE PLANS: outputColumnNames: _col0 input vertices: 1 Map 5 - Statistics: Num rows: 60 Data size: 5220 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 40 Data size: 3480 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: _col0 (type: string) mode: hash @@ -1230,7 +1230,7 @@ STAGE PLANS: outputColumnNames: _col0 input vertices: 1 Map 5 - Statistics: Num rows: 60 Data size: 5220 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 40 Data size: 3480 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: _col0 (type: string) mode: hash @@ -1454,7 +1454,7 @@ STAGE PLANS: outputColumnNames: _col0 input vertices: 1 Map 5 - Statistics: Num rows: 60 Data size: 5220 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 40 Data size: 3480 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: _col0 (type: string) mode: hash @@ -1857,17 +1857,17 @@ STAGE PLANS: outputColumnNames: _col0 input vertices: 1 Map 5 - Statistics: Num rows: 1219 Data size: 106053 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 809 Data size: 70383 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: _col0 (type: string) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 205 Data size: 17835 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 309 Data size: 26883 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 205 Data size: 17835 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 309 Data size: 26883 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 5 @@ -1896,17 +1896,17 @@ STAGE PLANS: outputColumnNames: _col0 input vertices: 1 Map 7 - Statistics: Num rows: 1219 Data size: 106053 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 809 Data size: 70383 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: _col0 (type: string) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 205 Data size: 17835 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 309 Data size: 26883 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 205 Data size: 17835 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 309 Data size: 26883 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 6 diff --git a/ql/src/test/results/clientpositive/llap/offset_limit.q.out b/ql/src/test/results/clientpositive/llap/offset_limit.q.out index adfeb05448..0e718fde78 100644 --- a/ql/src/test/results/clientpositive/llap/offset_limit.q.out +++ b/ql/src/test/results/clientpositive/llap/offset_limit.q.out @@ -31,12 +31,12 @@ STAGE PLANS: keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: double) Execution mode: llap @@ -49,11 +49,11 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: double) Reducer 3 @@ -62,7 +62,7 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: double) outputColumnNames: _col0, _col1 - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 10 Offset of rows: 10 diff --git a/ql/src/test/results/clientpositive/llap/offset_limit_ppd_optimizer.q.out b/ql/src/test/results/clientpositive/llap/offset_limit_ppd_optimizer.q.out index 48e58e1bd9..b6bc569f19 100644 --- a/ql/src/test/results/clientpositive/llap/offset_limit_ppd_optimizer.q.out +++ b/ql/src/test/results/clientpositive/llap/offset_limit_ppd_optimizer.q.out @@ -206,12 +206,12 @@ STAGE PLANS: keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 214 Data size: 21186 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 24750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 214 Data size: 21186 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 24750 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.3 value expressions: _col1 (type: double) Execution mode: llap @@ -224,7 +224,7 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 214 Data size: 21186 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 24750 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 20 Offset of rows: 10 @@ -303,12 +303,12 @@ STAGE PLANS: keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 214 Data size: 36594 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 42750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 214 Data size: 36594 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 42750 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.3 value expressions: _col1 (type: struct) Execution mode: llap @@ -321,7 +321,7 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 214 Data size: 21186 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 24750 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 20 Offset of rows: 10 @@ -399,12 +399,12 @@ STAGE PLANS: keys: cdouble (type: double) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 4265 Data size: 25480 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5570 Data size: 33272 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: double) sort order: + Map-reduce partition columns: _col0 (type: double) - Statistics: Num rows: 4265 Data size: 25480 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5570 Data size: 33272 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.3 Execution mode: llap LLAP IO: all inputs @@ -415,7 +415,7 @@ STAGE PLANS: keys: KEY._col0 (type: double) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 4265 Data size: 25480 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5570 Data size: 33272 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 20 Offset of rows: 10 @@ -514,7 +514,7 @@ STAGE PLANS: keys: _col0 (type: tinyint) mode: complete outputColumnNames: _col0, _col1 - Statistics: Num rows: 95 Data size: 1048 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 1412 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 20 Offset of rows: 10 @@ -613,7 +613,7 @@ STAGE PLANS: keys: _col0 (type: tinyint) mode: complete outputColumnNames: _col0, _col1 - Statistics: Num rows: 95 Data size: 1048 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 1412 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 20 Offset of rows: 10 @@ -709,7 +709,7 @@ STAGE PLANS: keys: KEY._col0 (type: tinyint) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 95 Data size: 1808 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 2436 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 20 Offset of rows: 10 @@ -813,12 +813,12 @@ STAGE PLANS: keys: value (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 214 Data size: 21186 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 24750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 214 Data size: 21186 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 24750 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: double) Execution mode: llap LLAP IO: no inputs @@ -830,11 +830,11 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 214 Data size: 21186 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 24750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: double), _col0 (type: string) sort order: ++ - Statistics: Num rows: 214 Data size: 21186 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 24750 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.3 Reducer 3 Execution mode: llap @@ -842,7 +842,7 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: double) outputColumnNames: _col0, _col1 - Statistics: Num rows: 214 Data size: 21186 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 24750 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 20 Offset of rows: 10 @@ -933,7 +933,7 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: complete outputColumnNames: _col0, _col1 - Statistics: Num rows: 214 Data size: 21186 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 309 Data size: 30591 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 20 Offset of rows: 10 @@ -1335,7 +1335,7 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: complete outputColumnNames: _col0, _col1 - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 309 Data size: 29355 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 20 Offset of rows: 10 @@ -1379,7 +1379,7 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: complete outputColumnNames: _col0, _col1 - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 309 Data size: 29355 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 20 Offset of rows: 20 diff --git a/ql/src/test/results/clientpositive/llap/parallel_colstats.q.out b/ql/src/test/results/clientpositive/llap/parallel_colstats.q.out index 95ed8b813a..6c95f3a5df 100644 --- a/ql/src/test/results/clientpositive/llap/parallel_colstats.q.out +++ b/ql/src/test/results/clientpositive/llap/parallel_colstats.q.out @@ -102,14 +102,14 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 250 Data size: 44500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 16), compute_stats(value, 16) + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Group By Operator keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: complete @@ -128,14 +128,14 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 250 Data size: 44500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 16), compute_stats(value, 16) + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Reducer 4 Execution mode: llap Reduce Operator Tree: diff --git a/ql/src/test/results/clientpositive/llap/ptf.q.out b/ql/src/test/results/clientpositive/llap/ptf.q.out index fbaf1e6474..c49fe07ea8 100644 --- a/ql/src/test/results/clientpositive/llap/ptf.q.out +++ b/ql/src/test/results/clientpositive/llap/ptf.q.out @@ -247,12 +247,12 @@ STAGE PLANS: 0 p_partkey (type: int) 1 p_partkey (type: int) outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 29 Data size: 6467 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 27 Data size: 6021 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col2 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 29 Data size: 6467 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 27 Data size: 6021 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col5 (type: int) Reducer 3 Execution mode: llap @@ -260,7 +260,7 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 29 Data size: 6467 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 27 Data size: 6021 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -274,12 +274,12 @@ STAGE PLANS: output shape: _col1: string, _col2: string, _col5: int partition by: _col2 raw input shape: - Statistics: Num rows: 29 Data size: 6467 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 27 Data size: 6021 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col2 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 29 Data size: 6467 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 27 Data size: 6021 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col5 (type: int) Reducer 4 Execution mode: llap @@ -287,7 +287,7 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 29 Data size: 6467 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 27 Data size: 6021 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -308,14 +308,14 @@ STAGE PLANS: window function: GenericUDAFLagEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true - Statistics: Num rows: 29 Data size: 6467 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 27 Data size: 6021 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), (_col5 - lag_window_0) (type: int) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 29 Data size: 6583 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 27 Data size: 6129 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 29 Data size: 6583 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 27 Data size: 6129 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1136,10 +1136,10 @@ STAGE PLANS: 0 _col0 (type: int) 1 p_partkey (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 29 Data size: 17951 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 27 Data size: 16713 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 29 Data size: 17951 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 27 Data size: 16713 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1260,14 +1260,14 @@ STAGE PLANS: 0 p_partkey (type: int) 1 _col0 (type: int) outputColumnNames: _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 - Statistics: Num rows: 29 Data size: 17951 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 27 Data size: 16713 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col12 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 29 Data size: 17951 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 27 Data size: 16713 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 29 Data size: 17951 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 27 Data size: 16713 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2421,12 +2421,12 @@ STAGE PLANS: 0 _col0 (type: int) 1 p_partkey (type: int) outputColumnNames: _col1, _col2, _col5, _col7 - Statistics: Num rows: 29 Data size: 6699 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 27 Data size: 6237 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col2 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 29 Data size: 6699 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 27 Data size: 6237 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col5 (type: int), _col7 (type: double) Reducer 4 Execution mode: llap @@ -2434,7 +2434,7 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) outputColumnNames: _col1, _col2, _col5, _col7 - Statistics: Num rows: 29 Data size: 22243 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 27 Data size: 20709 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -2481,14 +2481,14 @@ STAGE PLANS: window function: GenericUDAFLagEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true - Statistics: Num rows: 29 Data size: 22243 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 27 Data size: 20709 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), count_window_2 (type: bigint), _col7 (type: double), round(sum_window_3, 2) (type: double), _col5 (type: int), (_col5 - lag_window_4) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 29 Data size: 7511 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 27 Data size: 6993 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 29 Data size: 7511 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 27 Data size: 6993 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/ptf_streaming.q.out b/ql/src/test/results/clientpositive/llap/ptf_streaming.q.out index 6013c11c9e..6106fee9f3 100644 --- a/ql/src/test/results/clientpositive/llap/ptf_streaming.q.out +++ b/ql/src/test/results/clientpositive/llap/ptf_streaming.q.out @@ -247,12 +247,12 @@ STAGE PLANS: 0 p_partkey (type: int) 1 p_partkey (type: int) outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 29 Data size: 6467 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 27 Data size: 6021 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col2 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 29 Data size: 6467 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 27 Data size: 6021 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col5 (type: int) Reducer 3 Execution mode: llap @@ -260,7 +260,7 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 29 Data size: 6467 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 27 Data size: 6021 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -274,12 +274,12 @@ STAGE PLANS: output shape: _col1: string, _col2: string, _col5: int partition by: _col2 raw input shape: - Statistics: Num rows: 29 Data size: 6467 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 27 Data size: 6021 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col2 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 29 Data size: 6467 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 27 Data size: 6021 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col5 (type: int) Reducer 4 Execution mode: llap @@ -287,7 +287,7 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 29 Data size: 6467 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 27 Data size: 6021 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -308,14 +308,14 @@ STAGE PLANS: window function: GenericUDAFLagEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true - Statistics: Num rows: 29 Data size: 6467 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 27 Data size: 6021 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), (_col5 - lag_window_0) (type: int) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 29 Data size: 6583 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 27 Data size: 6129 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 29 Data size: 6583 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 27 Data size: 6129 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -466,10 +466,10 @@ STAGE PLANS: 0 _col0 (type: int) 1 p_partkey (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 29 Data size: 17951 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 27 Data size: 16713 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 29 Data size: 17951 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 27 Data size: 16713 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1711,12 +1711,12 @@ STAGE PLANS: 0 _col0 (type: int) 1 p_partkey (type: int) outputColumnNames: _col1, _col2, _col5, _col7 - Statistics: Num rows: 29 Data size: 6699 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 27 Data size: 6237 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col2 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 29 Data size: 6699 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 27 Data size: 6237 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col5 (type: int), _col7 (type: double) Reducer 4 Execution mode: llap @@ -1724,7 +1724,7 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) outputColumnNames: _col1, _col2, _col5, _col7 - Statistics: Num rows: 29 Data size: 22243 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 27 Data size: 20709 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -1771,14 +1771,14 @@ STAGE PLANS: window function: GenericUDAFLagEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true - Statistics: Num rows: 29 Data size: 22243 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 27 Data size: 20709 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), count_window_2 (type: bigint), _col7 (type: double), sum_window_3 (type: double), _col5 (type: int), (_col5 - lag_window_4) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 29 Data size: 7511 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 27 Data size: 6993 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 29 Data size: 7511 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 27 Data size: 6993 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/reduce_deduplicate_extended.q.out b/ql/src/test/results/clientpositive/llap/reduce_deduplicate_extended.q.out index bc44db795a..b4ac5a6062 100644 --- a/ql/src/test/results/clientpositive/llap/reduce_deduplicate_extended.q.out +++ b/ql/src/test/results/clientpositive/llap/reduce_deduplicate_extended.q.out @@ -42,10 +42,10 @@ STAGE PLANS: keys: _col0 (type: string) mode: complete outputColumnNames: _col0, _col1 - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 309 Data size: 29355 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 309 Data size: 29355 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -215,12 +215,12 @@ STAGE PLANS: keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: double) Execution mode: llap LLAP IO: no inputs @@ -232,10 +232,10 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -313,18 +313,18 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 60 Data size: 10680 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 40 Data size: 7120 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col0) keys: _col0 (type: string), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 30 Data size: 5580 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 20 Data size: 3720 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 30 Data size: 5580 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 20 Data size: 3720 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: double) Reducer 3 Execution mode: llap @@ -334,14 +334,14 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 30 Data size: 5580 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 20 Data size: 3720 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col2 (type: double) outputColumnNames: _col0, _col1 - Statistics: Num rows: 30 Data size: 2850 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 20 Data size: 1900 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 30 Data size: 2850 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 20 Data size: 1900 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -419,21 +419,21 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 60 Data size: 10680 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 40 Data size: 7120 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ - Statistics: Num rows: 60 Data size: 10680 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 40 Data size: 7120 Basic stats: COMPLETE Column stats: COMPLETE Reducer 3 Execution mode: llap Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 60 Data size: 10680 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 40 Data size: 7120 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 60 Data size: 10680 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 40 Data size: 7120 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -498,10 +498,10 @@ STAGE PLANS: keys: _col0 (type: string) mode: complete outputColumnNames: _col0 - Statistics: Num rows: 205 Data size: 17835 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 205 Data size: 17835 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -563,10 +563,10 @@ STAGE PLANS: keys: _col0 (type: string) mode: complete outputColumnNames: _col0, _col1 - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2596,10 +2596,10 @@ STAGE PLANS: keys: _col0 (type: string) mode: complete outputColumnNames: _col0, _col1 - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 309 Data size: 29355 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 309 Data size: 29355 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2779,10 +2779,10 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: complete outputColumnNames: _col0, _col1 - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 309 Data size: 29355 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 309 Data size: 29355 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2860,12 +2860,12 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 60 Data size: 10680 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 40 Data size: 7120 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 60 Data size: 10680 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 40 Data size: 7120 Basic stats: COMPLETE Column stats: COMPLETE Reducer 3 Execution mode: llap Reduce Operator Tree: @@ -2874,14 +2874,14 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: complete outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 52 Data size: 9672 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 40 Data size: 7440 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col2 (type: double) outputColumnNames: _col0, _col1 - Statistics: Num rows: 52 Data size: 4940 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 40 Data size: 3800 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 52 Data size: 4940 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 40 Data size: 3800 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2959,21 +2959,21 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 60 Data size: 10680 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 40 Data size: 7120 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ - Statistics: Num rows: 60 Data size: 10680 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 40 Data size: 7120 Basic stats: COMPLETE Column stats: COMPLETE Reducer 3 Execution mode: llap Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 60 Data size: 10680 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 40 Data size: 7120 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 60 Data size: 10680 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 40 Data size: 7120 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -3033,10 +3033,10 @@ STAGE PLANS: keys: _col0 (type: string) mode: complete outputColumnNames: _col0 - Statistics: Num rows: 205 Data size: 17835 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 309 Data size: 26883 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 205 Data size: 17835 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 309 Data size: 26883 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -3093,10 +3093,10 @@ STAGE PLANS: keys: _col0 (type: string) mode: complete outputColumnNames: _col0, _col1 - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 309 Data size: 29355 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 309 Data size: 29355 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/semijoin_hint.q.out b/ql/src/test/results/clientpositive/llap/semijoin_hint.q.out index 76c985e727..d2ed3d5673 100644 --- a/ql/src/test/results/clientpositive/llap/semijoin_hint.q.out +++ b/ql/src/test/results/clientpositive/llap/semijoin_hint.q.out @@ -222,7 +222,7 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=410) + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=618) mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL @@ -241,9 +241,9 @@ STAGE PLANS: keys: 0 str (type: string) 1 key1 (type: string) - Statistics: Num rows: 9756 Data size: 78048 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 6472 Data size: 51776 Basic stats: COMPLETE Column stats: PARTIAL Select Operator - Statistics: Num rows: 9756 Data size: 39024 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 6472 Data size: 25888 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator aggregations: count() mode: hash @@ -272,7 +272,7 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=410) + aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=618) mode: final outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL @@ -353,9 +353,9 @@ STAGE PLANS: keys: 0 str (type: string) 1 key1 (type: string) - Statistics: Num rows: 9756 Data size: 78048 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 6472 Data size: 51776 Basic stats: COMPLETE Column stats: PARTIAL Select Operator - Statistics: Num rows: 9756 Data size: 39024 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 6472 Data size: 25888 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator aggregations: count() mode: hash @@ -478,16 +478,16 @@ STAGE PLANS: 0 cstring (type: string) 1 value (type: string) outputColumnNames: str - Statistics: Num rows: 3281 Data size: 285447 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4056 Data size: 352872 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: str (type: string) sort order: + Map-reduce partition columns: str (type: string) - Statistics: Num rows: 3281 Data size: 285447 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4056 Data size: 352872 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: str (type: string) outputColumnNames: _col0 - Statistics: Num rows: 3281 Data size: 285447 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4056 Data size: 352872 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=5000) mode: hash @@ -506,9 +506,9 @@ STAGE PLANS: keys: 0 str (type: string) 1 key1 (type: string) - Statistics: Num rows: 16004 Data size: 128032 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 13126 Data size: 105008 Basic stats: COMPLETE Column stats: PARTIAL Select Operator - Statistics: Num rows: 16004 Data size: 64016 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 13126 Data size: 52504 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator aggregations: count() mode: hash @@ -655,9 +655,9 @@ STAGE PLANS: 0 cstring (type: string) 1 key1 (type: string) 2 str (type: string) - Statistics: Num rows: 16008 Data size: 128064 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 13128 Data size: 105024 Basic stats: COMPLETE Column stats: PARTIAL Select Operator - Statistics: Num rows: 16008 Data size: 64032 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 13128 Data size: 52512 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator aggregations: count() mode: hash @@ -782,9 +782,9 @@ STAGE PLANS: keys: 0 str (type: string) 1 key1 (type: string) - Statistics: Num rows: 9756 Data size: 78048 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 6472 Data size: 51776 Basic stats: COMPLETE Column stats: PARTIAL Select Operator - Statistics: Num rows: 9756 Data size: 39024 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 6472 Data size: 25888 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator aggregations: count() mode: hash @@ -894,9 +894,9 @@ STAGE PLANS: keys: 0 value (type: string) 1 key1 (type: string) - Statistics: Num rows: 9345 Data size: 74760 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 6472 Data size: 51776 Basic stats: COMPLETE Column stats: PARTIAL Select Operator - Statistics: Num rows: 9345 Data size: 37380 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 6472 Data size: 25888 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator aggregations: count() mode: hash @@ -1082,7 +1082,7 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - Statistics: Num rows: 9756 Data size: 78048 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 6472 Data size: 51776 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator aggregations: count() mode: hash @@ -1128,7 +1128,7 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - Statistics: Num rows: 9756 Data size: 78048 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 6472 Data size: 51776 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator aggregations: count() mode: hash @@ -1224,7 +1224,7 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=410) + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=618) mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL @@ -1243,7 +1243,7 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - Statistics: Num rows: 9756 Data size: 78048 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 6472 Data size: 51776 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator aggregations: count() mode: hash @@ -1272,7 +1272,7 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=410) + aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=618) mode: final outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL @@ -1353,7 +1353,7 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - Statistics: Num rows: 9756 Data size: 78048 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 6472 Data size: 51776 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator aggregations: count() mode: hash @@ -1476,16 +1476,16 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col1 (type: string) outputColumnNames: _col1 - Statistics: Num rows: 3281 Data size: 285447 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4056 Data size: 352872 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: string) sort order: + Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 3281 Data size: 285447 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4056 Data size: 352872 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 3281 Data size: 285447 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4056 Data size: 352872 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=5000) mode: hash @@ -1504,7 +1504,7 @@ STAGE PLANS: keys: 0 _col1 (type: string) 1 _col0 (type: string) - Statistics: Num rows: 16004 Data size: 128032 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 13126 Data size: 105008 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator aggregations: count() mode: hash @@ -1651,7 +1651,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) 2 _col0 (type: string) - Statistics: Num rows: 16008 Data size: 128064 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 13128 Data size: 105024 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator aggregations: count() mode: hash @@ -1776,7 +1776,7 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - Statistics: Num rows: 9756 Data size: 78048 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 6472 Data size: 51776 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator aggregations: count() mode: hash @@ -1886,7 +1886,7 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - Statistics: Num rows: 9345 Data size: 74760 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 6472 Data size: 51776 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator aggregations: count() mode: hash @@ -2056,7 +2056,7 @@ STAGE PLANS: keys: 0 str (type: string) 1 key1 (type: string) - Statistics: Num rows: 9756 Data size: 78048 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 6472 Data size: 51776 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator aggregations: count() mode: hash @@ -2102,7 +2102,7 @@ STAGE PLANS: keys: 0 str (type: string) 1 key1 (type: string) - Statistics: Num rows: 9756 Data size: 78048 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 6472 Data size: 51776 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator aggregations: count() mode: hash @@ -2190,7 +2190,7 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=410) + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=618) mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL @@ -2209,7 +2209,7 @@ STAGE PLANS: keys: 0 str (type: string) 1 key1 (type: string) - Statistics: Num rows: 9756 Data size: 78048 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 6472 Data size: 51776 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator aggregations: count() mode: hash @@ -2238,7 +2238,7 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=410) + aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=618) mode: final outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL @@ -2311,7 +2311,7 @@ STAGE PLANS: keys: 0 str (type: string) 1 key1 (type: string) - Statistics: Num rows: 9756 Data size: 78048 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 6472 Data size: 51776 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator aggregations: count() mode: hash @@ -2435,12 +2435,12 @@ STAGE PLANS: 0 str (type: string) 1 key1 (type: string) outputColumnNames: _col1 - Statistics: Num rows: 9756 Data size: 887796 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 6472 Data size: 588952 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator key expressions: _col1 (type: string) sort order: + Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 9756 Data size: 887796 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 6472 Data size: 588952 Basic stats: COMPLETE Column stats: PARTIAL Reducer 3 Execution mode: llap Reduce Operator Tree: @@ -2450,7 +2450,7 @@ STAGE PLANS: keys: 0 _col1 (type: string) 1 cstring (type: string) - Statistics: Num rows: 16008 Data size: 128064 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 13127 Data size: 105016 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator aggregations: count() mode: hash @@ -2585,7 +2585,7 @@ STAGE PLANS: 0 str (type: string) 1 key1 (type: string) 2 cstring (type: string) - Statistics: Num rows: 16008 Data size: 128064 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 13128 Data size: 105024 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator aggregations: count() mode: hash @@ -2702,7 +2702,7 @@ STAGE PLANS: keys: 0 str (type: string) 1 key1 (type: string) - Statistics: Num rows: 9756 Data size: 78048 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 6472 Data size: 51776 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator aggregations: count() mode: hash @@ -2804,7 +2804,7 @@ STAGE PLANS: keys: 0 value (type: string) 1 key1 (type: string) - Statistics: Num rows: 9345 Data size: 74760 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 6472 Data size: 51776 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator aggregations: count() mode: hash diff --git a/ql/src/test/results/clientpositive/llap/skewjoin.q.out b/ql/src/test/results/clientpositive/llap/skewjoin.q.out index dc79b26020..f4fc43edfa 100644 --- a/ql/src/test/results/clientpositive/llap/skewjoin.q.out +++ b/ql/src/test/results/clientpositive/llap/skewjoin.q.out @@ -141,14 +141,14 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col2 - Statistics: Num rows: 1219 Data size: 216982 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 809 Data size: 144002 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: UDFToInteger(_col0) (type: int), _col2 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1219 Data size: 115805 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 809 Data size: 76855 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1219 Data size: 115805 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 809 Data size: 76855 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -654,7 +654,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col2, _col3 - Statistics: Num rows: 1219 Data size: 216982 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 809 Data size: 144002 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(hash(_col2)), sum(hash(_col3)) mode: hash @@ -781,7 +781,7 @@ STAGE PLANS: 0 _col0 (type: string), UDFToDouble(substring(_col1, 5)) (type: double) 1 _col0 (type: string), (substring(_col1, 5) + 1) (type: double) outputColumnNames: _col2, _col3 - Statistics: Num rows: 5 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 356 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(hash(_col2)), sum(hash(_col3)) mode: hash @@ -937,7 +937,7 @@ STAGE PLANS: 1 _col0 (type: string) 2 _col0 (type: string) outputColumnNames: _col0, _col3 - Statistics: Num rows: 314 Data size: 55892 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 143 Data size: 25454 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(hash(_col0)), sum(hash(_col3)) mode: hash diff --git a/ql/src/test/results/clientpositive/llap/subquery_exists.q.out b/ql/src/test/results/clientpositive/llap/subquery_exists.q.out index 0749872253..4d223923a7 100644 --- a/ql/src/test/results/clientpositive/llap/subquery_exists.q.out +++ b/ql/src/test/results/clientpositive/llap/subquery_exists.q.out @@ -265,12 +265,12 @@ STAGE PLANS: keys: _col0 (type: string) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 214 Data size: 19474 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 22750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 214 Data size: 19474 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 22750 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -283,10 +283,10 @@ STAGE PLANS: 0 _col1 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 404 Data size: 71912 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 404 Data size: 71912 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/subquery_in.q.out b/ql/src/test/results/clientpositive/llap/subquery_in.q.out index 1d5f547adf..5c94c6e283 100644 --- a/ql/src/test/results/clientpositive/llap/subquery_in.q.out +++ b/ql/src/test/results/clientpositive/llap/subquery_in.q.out @@ -56,12 +56,12 @@ STAGE PLANS: keys: _col0 (type: string) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 69 Data size: 6003 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 83 Data size: 7221 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 69 Data size: 6003 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 83 Data size: 7221 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -74,10 +74,10 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 133 Data size: 23674 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 133 Data size: 23674 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -777,12 +777,12 @@ STAGE PLANS: keys: key (type: string) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 69 Data size: 6003 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 83 Data size: 7221 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 69 Data size: 6003 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 83 Data size: 7221 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -796,14 +796,14 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col3 residual filter predicates: {(_col1 <> _col3)} - Statistics: Num rows: 168 Data size: 44520 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 134 Data size: 35510 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 168 Data size: 29904 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 134 Data size: 23852 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 168 Data size: 29904 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 134 Data size: 23852 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -815,12 +815,12 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 69 Data size: 6003 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 83 Data size: 7221 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 69 Data size: 6003 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 83 Data size: 7221 Basic stats: COMPLETE Column stats: COMPLETE Stage: Stage-0 Fetch Operator @@ -932,16 +932,16 @@ STAGE PLANS: Statistics: Num rows: 100 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((l_linenumber = 1) and l_partkey is not null and l_orderkey is not null) (type: boolean) - Statistics: Num rows: 17 Data size: 272 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 14 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: l_orderkey (type: int), l_partkey (type: int), l_suppkey (type: int) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 17 Data size: 272 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 14 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 17 Data size: 272 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 14 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col2 (type: int) Execution mode: llap LLAP IO: no inputs @@ -992,12 +992,12 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col1 (type: int) outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 7 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 7 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col3 (type: int) Reducer 4 Execution mode: llap @@ -1009,14 +1009,14 @@ STAGE PLANS: 0 _col1 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col3 - Statistics: Num rows: 5 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col3 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 5 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 5 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1438,10 +1438,10 @@ STAGE PLANS: 0 (_col5 - 1) (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 8 Data size: 4952 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 7 Data size: 4333 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 8 Data size: 4952 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 7 Data size: 4333 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2426,21 +2426,21 @@ STAGE PLANS: 0 (_col1 + 1) (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col2 - Statistics: Num rows: 18 Data size: 1944 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 16 Data size: 1728 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col2 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 18 Data size: 1944 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 16 Data size: 1728 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: _col0 (type: string), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 9 Data size: 972 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: int) - Statistics: Num rows: 9 Data size: 972 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE Reducer 6 Execution mode: llap Reduce Operator Tree: @@ -2550,12 +2550,12 @@ STAGE PLANS: keys: _col0 (type: string) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 214 Data size: 19474 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 22750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 214 Data size: 19474 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 22750 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 6 @@ -2592,18 +2592,18 @@ STAGE PLANS: 0 _col1 (type: string) 1 _col0 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 404 Data size: 35148 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 202 Data size: 19190 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 202 Data size: 19190 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Reducer 3 Execution mode: llap @@ -2613,15 +2613,15 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 202 Data size: 19190 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: _col1 is not null (type: boolean) - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 202 Data size: 19190 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: bigint) sort order: + Map-reduce partition columns: _col1 (type: bigint) - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 202 Data size: 19190 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string) Reducer 4 Execution mode: llap @@ -2800,7 +2800,7 @@ STAGE PLANS: 0 _col0 (type: string), _col1 (type: string) 1 _col1 (type: string), _col0 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 174 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 87 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: _col0 (type: string) @@ -3014,11 +3014,11 @@ STAGE PLANS: 0 (_col5 - 1) (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 8 Data size: 4952 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 7 Data size: 4333 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col3 (type: string) sort order: + - Statistics: Num rows: 8 Data size: 4952 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 7 Data size: 4333 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Reducer 3 Execution mode: llap @@ -3026,10 +3026,10 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: int), VALUE._col1 (type: string), VALUE._col2 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: string), VALUE._col4 (type: int), VALUE._col5 (type: string), VALUE._col6 (type: double), VALUE._col7 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 8 Data size: 4952 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 7 Data size: 4333 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 8 Data size: 4952 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 7 Data size: 4333 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -3170,11 +3170,11 @@ STAGE PLANS: 0 (_col5 - 1) (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 8 Data size: 4952 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 7 Data size: 4333 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col3 (type: string) sort order: + - Statistics: Num rows: 8 Data size: 4952 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 7 Data size: 4333 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Reducer 3 @@ -3183,7 +3183,7 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: int), VALUE._col1 (type: string), VALUE._col2 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: string), VALUE._col4 (type: int), VALUE._col5 (type: string), VALUE._col6 (type: double), VALUE._col7 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 8 Data size: 4952 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 7 Data size: 4333 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 4 Statistics: Num rows: 4 Data size: 2476 Basic stats: COMPLETE Column stats: COMPLETE @@ -3348,10 +3348,10 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 60 Data size: 10680 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 40 Data size: 7120 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 60 Data size: 10680 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 40 Data size: 7120 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -3596,21 +3596,21 @@ STAGE PLANS: 0 _col1 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col3 - Statistics: Num rows: 24 Data size: 3000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 28 Data size: 3500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col3 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 24 Data size: 3000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 28 Data size: 3500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: _col0 (type: string), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 14 Data size: 1750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: int) - Statistics: Num rows: 12 Data size: 1500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 14 Data size: 1750 Basic stats: COMPLETE Column stats: COMPLETE Stage: Stage-0 Fetch Operator @@ -3756,21 +3756,21 @@ STAGE PLANS: 0 _col1 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col4 - Statistics: Num rows: 24 Data size: 3000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 28 Data size: 3500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col4 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 24 Data size: 3000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 28 Data size: 3500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: _col0 (type: string), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 14 Data size: 1750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: int) - Statistics: Num rows: 12 Data size: 1500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 14 Data size: 1750 Basic stats: COMPLETE Column stats: COMPLETE Stage: Stage-0 Fetch Operator @@ -3918,21 +3918,21 @@ STAGE PLANS: 0 _col1 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 24 Data size: 5496 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 28 Data size: 6412 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col3 (type: int) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 24 Data size: 5496 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 28 Data size: 6412 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: _col0 (type: string), _col1 (type: string), _col2 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 12 Data size: 2748 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 14 Data size: 3206 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) - Statistics: Num rows: 12 Data size: 2748 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 14 Data size: 3206 Basic stats: COMPLETE Column stats: COMPLETE Stage: Stage-0 Fetch Operator @@ -4044,21 +4044,21 @@ STAGE PLANS: 0 _col1 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col3 - Statistics: Num rows: 24 Data size: 3000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 28 Data size: 3500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col3 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 24 Data size: 3000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 28 Data size: 3500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: _col0 (type: string), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 14 Data size: 1750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: int) - Statistics: Num rows: 12 Data size: 1500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 14 Data size: 1750 Basic stats: COMPLETE Column stats: COMPLETE Stage: Stage-0 Fetch Operator @@ -4180,7 +4180,7 @@ STAGE PLANS: 0 _col4 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col10 - Statistics: Num rows: 26 Data size: 16198 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 26 Data size: 16214 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (sq_count_check(_col10, true) > 0) (type: boolean) Statistics: Num rows: 8 Data size: 4992 Basic stats: COMPLETE Column stats: COMPLETE @@ -4509,12 +4509,12 @@ STAGE PLANS: keys: _col2 (type: int) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 8 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 9 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 8 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 9 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: int) Reducer 5 Execution mode: llap @@ -4524,16 +4524,16 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 8 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 9 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: int), _col0 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 8 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 9 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int), _col0 (type: int) sort order: ++ Map-reduce partition columns: _col1 (type: int), _col0 (type: int) - Statistics: Num rows: 8 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 9 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE Reducer 7 Execution mode: llap Reduce Operator Tree: @@ -4657,7 +4657,7 @@ STAGE PLANS: 0 _col4 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col10 - Statistics: Num rows: 26 Data size: 16198 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 26 Data size: 16214 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (sq_count_check(_col10, true) > 0) (type: boolean) Statistics: Num rows: 8 Data size: 4992 Basic stats: COMPLETE Column stats: COMPLETE @@ -4681,12 +4681,12 @@ STAGE PLANS: 0 _col4 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13 - Statistics: Num rows: 8 Data size: 5016 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8 Data size: 5032 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col4 (type: string), UDFToLong(_col5) (type: bigint) sort order: ++ Map-reduce partition columns: _col4 (type: string), UDFToLong(_col5) (type: bigint) - Statistics: Num rows: 8 Data size: 5016 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8 Data size: 5032 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: bigint), _col13 (type: bigint) Reducer 4 Execution mode: llap @@ -4698,10 +4698,10 @@ STAGE PLANS: 0 _col4 (type: string), UDFToLong(_col5) (type: bigint) 1 _col1 (type: string), _col0 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col16 - Statistics: Num rows: 8 Data size: 5020 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8 Data size: 5036 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (not CASE WHEN ((_col12 = 0)) THEN (false) WHEN (_col12 is null) THEN (false) WHEN (_col16 is not null) THEN (true) WHEN (_col5 is null) THEN (null) WHEN ((_col13 < _col12)) THEN (true) ELSE (false) END) (type: boolean) - Statistics: Num rows: 4 Data size: 2512 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4 Data size: 2528 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 @@ -4914,7 +4914,7 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col10 - Statistics: Num rows: 26 Data size: 16214 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 26 Data size: 16206 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (sq_count_check(_col10, true) > 0) (type: boolean) Statistics: Num rows: 8 Data size: 4992 Basic stats: COMPLETE Column stats: COMPLETE @@ -5680,10 +5680,10 @@ STAGE PLANS: 0 _col4 (type: string), _col5 (type: int) 1 _col1 (type: string), _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 8 Data size: 4952 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 9 Data size: 5571 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 8 Data size: 4952 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 9 Data size: 5571 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -5716,17 +5716,17 @@ STAGE PLANS: 0 _col1 (type: string) 1 _col0 (type: string) outputColumnNames: _col2, _col4 - Statistics: Num rows: 313 Data size: 33804 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 366 Data size: 39528 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: _col2 (type: string), _col4 (type: int) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 156 Data size: 16848 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 183 Data size: 19764 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: int) - Statistics: Num rows: 156 Data size: 16848 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 183 Data size: 19764 Basic stats: COMPLETE Column stats: COMPLETE Reducer 6 Execution mode: llap Reduce Operator Tree: @@ -5734,16 +5734,16 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 156 Data size: 16848 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 183 Data size: 19764 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: int), _col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 156 Data size: 16848 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 183 Data size: 19764 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: string), _col0 (type: int) sort order: ++ Map-reduce partition columns: _col1 (type: string), _col0 (type: int) - Statistics: Num rows: 156 Data size: 16848 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 183 Data size: 19764 Basic stats: COMPLETE Column stats: COMPLETE Reducer 8 Execution mode: llap Reduce Operator Tree: diff --git a/ql/src/test/results/clientpositive/llap/subquery_multi.q.out b/ql/src/test/results/clientpositive/llap/subquery_multi.q.out index 41b8ea3f25..bbc8a5b0fb 100644 --- a/ql/src/test/results/clientpositive/llap/subquery_multi.q.out +++ b/ql/src/test/results/clientpositive/llap/subquery_multi.q.out @@ -2934,16 +2934,16 @@ STAGE PLANS: Statistics: Num rows: 100 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((l_linenumber = 1) and l_partkey is not null) (type: boolean) - Statistics: Num rows: 17 Data size: 272 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 14 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: l_orderkey (type: int), l_partkey (type: int), l_suppkey (type: int), 1 (type: int) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 17 Data size: 272 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 14 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 17 Data size: 272 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 14 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col2 (type: int), _col3 (type: int) Execution mode: llap LLAP IO: no inputs @@ -3047,12 +3047,12 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col1 (type: int) outputColumnNames: _col0, _col1, _col3, _col4 - Statistics: Num rows: 5 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 7 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int), _col4 (type: int) sort order: ++ Map-reduce partition columns: _col1 (type: int), _col4 (type: int) - Statistics: Num rows: 5 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 7 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col3 (type: int) Reducer 4 Execution mode: llap @@ -3064,14 +3064,14 @@ STAGE PLANS: 0 _col1 (type: int), _col4 (type: int) 1 _col0 (type: int), _col1 (type: int) outputColumnNames: _col0, _col3 - Statistics: Num rows: 5 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col3 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 5 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 5 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -3103,10 +3103,10 @@ STAGE PLANS: 0 _col2 (type: double) 1 _col0 (type: double) outputColumnNames: _col0, _col1, _col2, _col4, _col5, _col7 - Statistics: Num rows: 14 Data size: 456 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 14 Data size: 460 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (not CASE WHEN ((_col4 = 0)) THEN (false) WHEN (_col7 is not null) THEN (true) WHEN (_col2 is null) THEN (null) WHEN ((_col5 < _col4)) THEN (true) ELSE (false) END) (type: boolean) - Statistics: Num rows: 7 Data size: 228 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 7 Data size: 232 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: int) outputColumnNames: _col0, _col1 @@ -3244,12 +3244,12 @@ STAGE PLANS: keys: _col0 (type: string) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 214 Data size: 19474 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 22750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 214 Data size: 19474 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 22750 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -3262,7 +3262,7 @@ STAGE PLANS: 0 _col0 (type: string), _col1 (type: string) 1 _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 356 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: _col0 (type: string), _col1 (type: string) @@ -3321,18 +3321,18 @@ STAGE PLANS: 0 _col1 (type: string) 1 _col0 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 134 Data size: 11658 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 69 Data size: 6555 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 67 Data size: 6365 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 69 Data size: 6555 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 67 Data size: 6365 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Reducer 6 Execution mode: llap @@ -3342,28 +3342,28 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 69 Data size: 6555 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 67 Data size: 6365 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: bigint) outputColumnNames: _col1 - Statistics: Num rows: 69 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 67 Data size: 536 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: _col1 is not null (type: boolean) - Statistics: Num rows: 69 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 67 Data size: 536 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: bigint) outputColumnNames: _col0 - Statistics: Num rows: 69 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 67 Data size: 536 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: _col0 (type: bigint) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 34 Data size: 272 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 33 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: bigint) sort order: + Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 34 Data size: 272 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 33 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE Stage: Stage-0 Fetch Operator @@ -3767,10 +3767,10 @@ STAGE PLANS: 0 _col1 (type: string), _col4 (type: string) 1 _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 6 Data size: 3714 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 7 Data size: 4333 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 6 Data size: 3714 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 7 Data size: 4333 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -4084,12 +4084,12 @@ STAGE PLANS: keys: key (type: string) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 69 Data size: 6003 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 83 Data size: 7221 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 69 Data size: 6003 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 83 Data size: 7221 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 9 @@ -4150,10 +4150,10 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col1, _col2, _col4 - Statistics: Num rows: 500 Data size: 50176 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 50040 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 500 Data size: 50176 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 50040 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string), _col2 (type: bigint), _col4 (type: boolean) Reducer 4 Execution mode: llap @@ -4165,12 +4165,12 @@ STAGE PLANS: 0 1 outputColumnNames: _col1, _col2, _col4, _col5 - Statistics: Num rows: 500 Data size: 52176 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 52040 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((_col4 is not null and (_col2 <> 0)) or _col1 is not null or _col5 is not null) (type: boolean) - Statistics: Num rows: 500 Data size: 52176 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 52040 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - Statistics: Num rows: 500 Data size: 52176 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 52040 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash @@ -4214,16 +4214,16 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 69 Data size: 6003 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 83 Data size: 7221 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), true (type: boolean) outputColumnNames: _col0, _col1 - Statistics: Num rows: 69 Data size: 6279 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 83 Data size: 7553 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 69 Data size: 6279 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 83 Data size: 7553 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: boolean) Stage: Stage-0 diff --git a/ql/src/test/results/clientpositive/llap/subquery_notin.q.out b/ql/src/test/results/clientpositive/llap/subquery_notin.q.out index c760af2f87..e4aee8efd9 100644 --- a/ql/src/test/results/clientpositive/llap/subquery_notin.q.out +++ b/ql/src/test/results/clientpositive/llap/subquery_notin.q.out @@ -69,12 +69,12 @@ STAGE PLANS: keys: key (type: string) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 69 Data size: 6003 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 83 Data size: 7221 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 69 Data size: 6003 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 83 Data size: 7221 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -104,10 +104,10 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col5 - Statistics: Num rows: 500 Data size: 97676 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 97540 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((_col2 = 0) or (_col5 is null and _col0 is not null and (_col3 >= _col2))) (type: boolean) - Statistics: Num rows: 500 Data size: 97676 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 97540 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 @@ -138,16 +138,16 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 69 Data size: 6003 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 83 Data size: 7221 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), true (type: boolean) outputColumnNames: _col0, _col1 - Statistics: Num rows: 69 Data size: 6279 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 83 Data size: 7553 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 69 Data size: 6279 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 83 Data size: 7553 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: boolean) Stage: Stage-0 @@ -1525,12 +1525,12 @@ STAGE PLANS: keys: _col0 (type: string) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 35 Data size: 6440 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 41 Data size: 7544 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 35 Data size: 6440 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 41 Data size: 7544 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -1560,10 +1560,10 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col4 - Statistics: Num rows: 166 Data size: 17438 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 166 Data size: 17366 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((_col1 = 0) or (_col4 is null and _col0 is not null and (_col2 >= _col1))) (type: boolean) - Statistics: Num rows: 166 Data size: 17438 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 166 Data size: 17366 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string) outputColumnNames: _col0 @@ -1594,16 +1594,16 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 35 Data size: 6440 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 41 Data size: 7544 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), true (type: boolean) outputColumnNames: _col0, _col1 - Statistics: Num rows: 35 Data size: 6580 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 41 Data size: 7708 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 35 Data size: 6580 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 41 Data size: 7708 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: boolean) Stage: Stage-0 @@ -1716,12 +1716,12 @@ STAGE PLANS: 0 _col4 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col10, _col11 - Statistics: Num rows: 26 Data size: 16302 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 26 Data size: 16334 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col4 (type: string), _col5 (type: int) sort order: ++ Map-reduce partition columns: _col4 (type: string), _col5 (type: int) - Statistics: Num rows: 26 Data size: 16302 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 26 Data size: 16334 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col10 (type: bigint), _col11 (type: bigint) Reducer 3 Execution mode: llap @@ -1733,10 +1733,10 @@ STAGE PLANS: 0 _col4 (type: string), _col5 (type: int) 1 _col1 (type: string), _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col10, _col11, _col14 - Statistics: Num rows: 26 Data size: 16306 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 26 Data size: 16338 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (not CASE WHEN ((_col10 = 0)) THEN (false) WHEN (_col10 is null) THEN (false) WHEN (_col14 is not null) THEN (true) WHEN (_col5 is null) THEN (null) WHEN ((_col11 < _col10)) THEN (true) ELSE (false) END) (type: boolean) - Statistics: Num rows: 13 Data size: 8163 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 8179 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 @@ -1919,10 +1919,10 @@ STAGE PLANS: 0 (_col5 - 1) (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col12 - Statistics: Num rows: 26 Data size: 16546 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 26 Data size: 16542 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((_col9 = 0) or (_col12 is null and _col5 is not null and (_col10 >= _col9))) (type: boolean) - Statistics: Num rows: 26 Data size: 16546 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 26 Data size: 16542 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 @@ -2467,12 +2467,12 @@ STAGE PLANS: 0 _col0 (type: int) 1 (_col0 + 100) (type: int) outputColumnNames: _col1, _col2, _col3 - Statistics: Num rows: 7 Data size: 903 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6 Data size: 774 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: string), _col3 (type: int) sort order: ++ Map-reduce partition columns: _col1 (type: string), _col3 (type: int) - Statistics: Num rows: 7 Data size: 903 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6 Data size: 774 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: boolean) Stage: Stage-0 @@ -3180,12 +3180,12 @@ STAGE PLANS: keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 12 Data size: 1296 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 12 Data size: 1296 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint), _col2 (type: bigint) Select Operator expressions: (UDFToDouble(p_type) + 2.0) (type: double), p_brand (type: string) @@ -3213,12 +3213,12 @@ STAGE PLANS: 0 _col2 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col4, _col5 - Statistics: Num rows: 26 Data size: 8658 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 26 Data size: 8594 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: UDFToDouble(_col1) (type: double), _col2 (type: string) sort order: ++ Map-reduce partition columns: UDFToDouble(_col1) (type: double), _col2 (type: string) - Statistics: Num rows: 26 Data size: 8658 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 26 Data size: 8594 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string), _col1 (type: string), _col4 (type: bigint), _col5 (type: bigint) Reducer 3 Execution mode: llap @@ -3230,10 +3230,10 @@ STAGE PLANS: 0 UDFToDouble(_col1) (type: double), _col2 (type: string) 1 _col0 (type: double), _col1 (type: string) outputColumnNames: _col0, _col1, _col4, _col5, _col8 - Statistics: Num rows: 26 Data size: 6274 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 26 Data size: 6206 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (not CASE WHEN ((_col4 = 0)) THEN (false) WHEN (_col4 is null) THEN (false) WHEN (_col8 is not null) THEN (true) WHEN (_col1 is null) THEN (null) WHEN ((_col5 < _col4)) THEN (true) ELSE (false) END) (type: boolean) - Statistics: Num rows: 13 Data size: 3137 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 3105 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string) outputColumnNames: _col0 @@ -3253,12 +3253,12 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 12 Data size: 1296 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 12 Data size: 1296 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint), _col2 (type: bigint) Reducer 6 Execution mode: llap @@ -3421,12 +3421,12 @@ STAGE PLANS: 0 _col2 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col4, _col5 - Statistics: Num rows: 26 Data size: 6178 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 26 Data size: 6114 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: string), _col2 (type: int) sort order: ++ Map-reduce partition columns: _col1 (type: string), _col2 (type: int) - Statistics: Num rows: 26 Data size: 6178 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 26 Data size: 6114 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string), _col4 (type: bigint), _col5 (type: bigint) Reducer 3 Execution mode: llap @@ -3438,10 +3438,10 @@ STAGE PLANS: 0 _col1 (type: string), _col2 (type: int) 1 _col0 (type: string), _col1 (type: int) outputColumnNames: _col0, _col1, _col4, _col5, _col8 - Statistics: Num rows: 26 Data size: 6078 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 26 Data size: 6014 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (not CASE WHEN ((_col4 = 0)) THEN (false) WHEN (_col4 is null) THEN (false) WHEN (_col8 is not null) THEN (true) WHEN (_col1 is null) THEN (null) WHEN ((_col5 < _col4)) THEN (true) ELSE (false) END) (type: boolean) - Statistics: Num rows: 13 Data size: 3041 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 3009 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string) outputColumnNames: _col0 @@ -3463,18 +3463,18 @@ STAGE PLANS: 0 (_col1 + 1) (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col2 - Statistics: Num rows: 18 Data size: 1944 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 16 Data size: 1728 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(), count(_col0) keys: _col2 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 9 Data size: 180 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 9 Data size: 180 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint), _col2 (type: bigint) Reducer 6 Execution mode: llap @@ -3484,12 +3484,12 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 9 Data size: 180 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 9 Data size: 180 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint), _col2 (type: bigint) Reducer 7 Execution mode: llap @@ -3501,17 +3501,17 @@ STAGE PLANS: 0 (_col1 + 1) (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col2 - Statistics: Num rows: 18 Data size: 1944 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 16 Data size: 1728 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: _col0 (type: string), _col2 (type: int) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 9 Data size: 972 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: int) - Statistics: Num rows: 9 Data size: 972 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE Reducer 8 Execution mode: llap Reduce Operator Tree: @@ -3519,19 +3519,19 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 9 Data size: 972 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: _col0 is not null (type: boolean) - Statistics: Num rows: 9 Data size: 972 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: int), true (type: boolean) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 9 Data size: 1008 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8 Data size: 896 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: int) - Statistics: Num rows: 9 Data size: 1008 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8 Data size: 896 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: boolean) Stage: Stage-0 @@ -3634,12 +3634,12 @@ STAGE PLANS: keys: key (type: string) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 205 Data size: 17835 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 205 Data size: 17835 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key = '90') (type: boolean) Statistics: Num rows: 2 Data size: 174 Basic stats: COMPLETE Column stats: COMPLETE @@ -3686,25 +3686,25 @@ STAGE PLANS: 0 _col1 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col5 - Statistics: Num rows: 500 Data size: 98916 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 98620 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((_col2 = 0) or (_col5 is null and _col1 is not null and (_col3 >= _col2))) (type: boolean) - Statistics: Num rows: 500 Data size: 98916 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 98620 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 98916 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 98620 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Reducer 4 Execution mode: llap @@ -3714,15 +3714,15 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: _col1 is not null (type: boolean) - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: bigint) sort order: + Map-reduce partition columns: _col1 (type: bigint) - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string) Reducer 5 Execution mode: llap @@ -3761,16 +3761,16 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 205 Data size: 17835 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), true (type: boolean) outputColumnNames: _col0, _col1 - Statistics: Num rows: 205 Data size: 18655 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 22750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 205 Data size: 18655 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 22750 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: boolean) Reducer 9 Execution mode: llap @@ -3898,12 +3898,12 @@ STAGE PLANS: keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 205 Data size: 21115 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 25750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 205 Data size: 21115 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 25750 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint), _col2 (type: bigint) Filter Operator predicate: key is not null (type: boolean) @@ -3958,12 +3958,12 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col3, _col4 - Statistics: Num rows: 500 Data size: 97000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 95480 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 500 Data size: 97000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 95480 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col3 (type: bigint), _col4 (type: bigint) Reducer 3 Execution mode: llap @@ -3975,25 +3975,25 @@ STAGE PLANS: 0 _col0 (type: string), _col1 (type: string) 1 _col1 (type: string), _col0 (type: string) outputColumnNames: _col0, _col1, _col3, _col4, _col7 - Statistics: Num rows: 500 Data size: 97012 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 95488 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (not CASE WHEN ((_col3 = 0)) THEN (false) WHEN (_col3 is null) THEN (false) WHEN (_col7 is not null) THEN (true) WHEN (_col1 is null) THEN (null) WHEN ((_col4 < _col3)) THEN (true) ELSE (false) END) (type: boolean) - Statistics: Num rows: 250 Data size: 48508 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 47752 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 250 Data size: 48508 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 47752 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 103 Data size: 9785 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 125 Data size: 11875 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 103 Data size: 9785 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 125 Data size: 11875 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Reducer 4 Execution mode: llap @@ -4003,15 +4003,15 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 103 Data size: 9785 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 125 Data size: 11875 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: _col1 is not null (type: boolean) - Statistics: Num rows: 103 Data size: 9785 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 125 Data size: 11875 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: bigint) sort order: + Map-reduce partition columns: _col1 (type: bigint) - Statistics: Num rows: 103 Data size: 9785 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 125 Data size: 11875 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string) Reducer 5 Execution mode: llap @@ -4039,12 +4039,12 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 205 Data size: 21115 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 25750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 205 Data size: 21115 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 25750 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint), _col2 (type: bigint) Reducer 7 Execution mode: llap @@ -4241,10 +4241,10 @@ STAGE PLANS: 0 (_col5 - 1) (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col12 - Statistics: Num rows: 26 Data size: 16546 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 26 Data size: 16542 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((_col9 = 0) or (_col12 is null and _col5 is not null and (_col10 >= _col9))) (type: boolean) - Statistics: Num rows: 26 Data size: 16546 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 26 Data size: 16542 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 @@ -4469,10 +4469,10 @@ STAGE PLANS: 0 (_col5 - 1) (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col12 - Statistics: Num rows: 26 Data size: 16546 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 26 Data size: 16542 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((_col9 = 0) or (_col12 is null and _col5 is not null and (_col10 >= _col9))) (type: boolean) - Statistics: Num rows: 26 Data size: 16546 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 26 Data size: 16542 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 @@ -4761,10 +4761,10 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col5 - Statistics: Num rows: 500 Data size: 97244 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 97164 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((_col2 = 0) or (_col5 is null and _col0 is not null and (_col3 >= _col2))) (type: boolean) - Statistics: Num rows: 500 Data size: 97244 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 97164 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 @@ -5362,12 +5362,12 @@ STAGE PLANS: keys: p_brand (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 12 Data size: 1296 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 12 Data size: 1296 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint), _col2 (type: bigint) Group By Operator keys: p_brand (type: string), p_type (type: string) @@ -5422,12 +5422,12 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col3, _col4 - Statistics: Num rows: 26 Data size: 2912 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 26 Data size: 2848 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: int) - Statistics: Num rows: 26 Data size: 2912 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 26 Data size: 2848 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col3 (type: bigint), _col4 (type: bigint) Reducer 3 Execution mode: llap @@ -5439,12 +5439,12 @@ STAGE PLANS: 0 _col0 (type: string), _col1 (type: int) 1 _col1 (type: string), _col3 (type: int) outputColumnNames: _col1, _col3, _col4, _col7 - Statistics: Num rows: 26 Data size: 524 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 26 Data size: 460 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (not CASE WHEN ((_col3 = 0)) THEN (false) WHEN (_col3 is null) THEN (false) WHEN (_col7 is not null) THEN (true) WHEN ((_col1 + 100) is null) THEN (null) WHEN ((_col4 < _col3)) THEN (true) ELSE (false) END) (type: boolean) - Statistics: Num rows: 13 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 232 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - Statistics: Num rows: 13 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 232 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash @@ -5477,12 +5477,12 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 12 Data size: 1296 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 12 Data size: 1296 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint), _col2 (type: bigint) Reducer 7 Execution mode: llap @@ -5512,12 +5512,12 @@ STAGE PLANS: 0 UDFToDouble(_col0) (type: double) 1 UDFToDouble((_col0 + 100)) (type: double) outputColumnNames: _col1, _col2, _col3 - Statistics: Num rows: 6 Data size: 600 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 7 Data size: 700 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: string), _col3 (type: int) sort order: ++ Map-reduce partition columns: _col1 (type: string), _col3 (type: int) - Statistics: Num rows: 6 Data size: 600 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 7 Data size: 700 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: boolean) Stage: Stage-0 @@ -7202,17 +7202,17 @@ STAGE PLANS: keys: value (type: string) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 214 Data size: 19474 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 22750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 214 Data size: 19474 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 22750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 214 Data size: 19474 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 22750 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 10 @@ -7222,10 +7222,10 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 214 Data size: 19474 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 22750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 214 Data size: 19474 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 22750 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string) Reducer 2 Execution mode: llap @@ -7237,12 +7237,12 @@ STAGE PLANS: 0 _col1 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col3, _col4 - Statistics: Num rows: 500 Data size: 91704 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 91672 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 500 Data size: 91704 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 91672 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col3 (type: bigint), _col4 (type: bigint) Reducer 3 Execution mode: llap @@ -7254,10 +7254,10 @@ STAGE PLANS: 0 _col0 (type: string), _col1 (type: string) 1 _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col3, _col4, _col7 - Statistics: Num rows: 500 Data size: 91932 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 91900 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (not CASE WHEN ((_col3 = 0)) THEN (false) WHEN (_col3 is null) THEN (false) WHEN (_col7 is not null) THEN (true) WHEN (_col0 is null) THEN (null) WHEN ((_col4 < _col3)) THEN (true) ELSE (false) END) (type: boolean) - Statistics: Num rows: 250 Data size: 45976 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 45960 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 @@ -7280,22 +7280,22 @@ STAGE PLANS: 1 outputColumnNames: _col0, _col1, _col2 residual filter predicates: {(_col2 > _col1)} - Statistics: Num rows: 11841 Data size: 3185229 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13833 Data size: 3721077 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col2 (type: string) outputColumnNames: _col0, _col2 - Statistics: Num rows: 11841 Data size: 3185229 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13833 Data size: 3721077 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(), count(_col0) keys: _col2 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 72 Data size: 7704 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 103 Data size: 11021 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 72 Data size: 7704 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 103 Data size: 11021 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint), _col2 (type: bigint) Reducer 5 Execution mode: llap @@ -7305,12 +7305,12 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 72 Data size: 7704 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 103 Data size: 11021 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 72 Data size: 7704 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 103 Data size: 11021 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint), _col2 (type: bigint) Reducer 6 Execution mode: llap @@ -7323,21 +7323,21 @@ STAGE PLANS: 1 outputColumnNames: _col0, _col1, _col2 residual filter predicates: {(_col2 > _col1)} - Statistics: Num rows: 11841 Data size: 3185229 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13833 Data size: 3721077 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col2 (type: string) outputColumnNames: _col0, _col2 - Statistics: Num rows: 11841 Data size: 3185229 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13833 Data size: 3721077 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: _col0 (type: string), _col2 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 1656 Data size: 294768 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3605 Data size: 641690 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 1656 Data size: 294768 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3605 Data size: 641690 Basic stats: COMPLETE Column stats: COMPLETE Reducer 7 Execution mode: llap Reduce Operator Tree: @@ -7345,19 +7345,19 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 1656 Data size: 294768 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3605 Data size: 641690 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: _col0 is not null (type: boolean) - Statistics: Num rows: 1656 Data size: 294768 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3605 Data size: 641690 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: string), true (type: boolean) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1656 Data size: 301392 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3605 Data size: 656110 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 1656 Data size: 301392 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3605 Data size: 656110 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: boolean) Reducer 9 Execution mode: llap @@ -7366,10 +7366,10 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 214 Data size: 19474 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 22750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 214 Data size: 19474 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 22750 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string) Stage: Stage-0 diff --git a/ql/src/test/results/clientpositive/llap/subquery_scalar.q.out b/ql/src/test/results/clientpositive/llap/subquery_scalar.q.out index 03d5f191b6..1556114955 100644 --- a/ql/src/test/results/clientpositive/llap/subquery_scalar.q.out +++ b/ql/src/test/results/clientpositive/llap/subquery_scalar.q.out @@ -2519,12 +2519,12 @@ STAGE PLANS: keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Reducer 3 Execution mode: llap @@ -2534,10 +2534,10 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string), _col1 (type: bigint) Reducer 4 Execution mode: llap @@ -2552,14 +2552,14 @@ STAGE PLANS: 2 outputColumnNames: _col0, _col1, _col3 residual filter predicates: {(_col1 > _col3)} - Statistics: Num rows: 68 Data size: 7004 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 83 Data size: 8549 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 68 Data size: 6460 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 83 Data size: 7885 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 68 Data size: 6460 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 83 Data size: 7885 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -3252,18 +3252,18 @@ STAGE PLANS: 0 _col1 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col3 - Statistics: Num rows: 24 Data size: 3000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 28 Data size: 3500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: max(_col0) keys: _col3 (type: int) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 2256 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 14 Data size: 2632 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 12 Data size: 2256 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 14 Data size: 2632 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Reducer 5 Execution mode: llap @@ -3273,16 +3273,16 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 2256 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 14 Data size: 2632 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: string), true (type: boolean), _col0 (type: int) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 12 Data size: 2304 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 14 Data size: 2688 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col2 (type: int) sort order: + Map-reduce partition columns: _col2 (type: int) - Statistics: Num rows: 12 Data size: 2304 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 14 Data size: 2688 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string), _col1 (type: boolean) Stage: Stage-0 @@ -3832,16 +3832,16 @@ STAGE PLANS: Statistics: Num rows: 100 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((l_linenumber = 1) and l_partkey is not null) (type: boolean) - Statistics: Num rows: 17 Data size: 272 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 14 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: l_orderkey (type: int), l_partkey (type: int), l_suppkey (type: int), 1 (type: int) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 17 Data size: 272 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 14 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 17 Data size: 272 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 14 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col2 (type: int), _col3 (type: int) Execution mode: llap LLAP IO: no inputs @@ -3894,12 +3894,12 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col1 (type: int) outputColumnNames: _col0, _col1, _col3, _col4 - Statistics: Num rows: 5 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 7 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col4 (type: int) sort order: + Map-reduce partition columns: _col4 (type: int) - Statistics: Num rows: 5 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 7 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col3 (type: int) Reducer 4 Execution mode: llap @@ -3912,14 +3912,14 @@ STAGE PLANS: 1 _col2 (type: int) outputColumnNames: _col0, _col1, _col3, _col5, _col6 residual filter predicates: {(_col1 <> CASE WHEN (_col6 is null) THEN (null) ELSE (_col5) END)} - Statistics: Num rows: 5 Data size: 100 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 7 Data size: 140 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col3 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 5 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 5 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -4041,16 +4041,16 @@ STAGE PLANS: Statistics: Num rows: 100 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((l_linenumber = 1) and l_partkey is not null) (type: boolean) - Statistics: Num rows: 17 Data size: 272 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 14 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: l_orderkey (type: int), l_partkey (type: int), l_suppkey (type: int), 1 (type: int) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 17 Data size: 272 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 14 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 17 Data size: 272 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 14 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col2 (type: int), _col3 (type: int) Execution mode: llap LLAP IO: no inputs @@ -4103,12 +4103,12 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col1 (type: int) outputColumnNames: _col0, _col1, _col3, _col4 - Statistics: Num rows: 5 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 7 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col4 (type: int) sort order: + Map-reduce partition columns: _col4 (type: int) - Statistics: Num rows: 5 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 7 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col3 (type: int) Reducer 4 Execution mode: llap @@ -4121,14 +4121,14 @@ STAGE PLANS: 1 _col2 (type: int) outputColumnNames: _col0, _col1, _col3, _col5, _col6 residual filter predicates: {(_col1 <> CASE WHEN (_col6 is null) THEN (null) ELSE (_col5) END)} - Statistics: Num rows: 5 Data size: 100 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 7 Data size: 140 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col3 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 5 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 5 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -4290,11 +4290,11 @@ STAGE PLANS: 2 _col1 (type: int) outputColumnNames: _col1, _col2, _col4 residual filter predicates: {(_col1 > _col4)} - Statistics: Num rows: 2 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: double) outputColumnNames: _col2 - Statistics: Num rows: 2 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col2) mode: hash @@ -4461,10 +4461,10 @@ STAGE PLANS: 0 _col2 (type: string) 1 _col2 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 26 Data size: 10686 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 26 Data size: 11062 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (not (_col1 like CASE WHEN (_col4 is null) THEN (null) ELSE (_col3) END)) (type: boolean) - Statistics: Num rows: 13 Data size: 5437 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 5625 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col2 (type: string) outputColumnNames: _col0, _col1 @@ -5313,12 +5313,12 @@ STAGE PLANS: keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Filter Operator predicate: (key > '9') (type: boolean) @@ -5344,10 +5344,10 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string), _col1 (type: bigint) Reducer 3 Execution mode: llap @@ -5360,14 +5360,14 @@ STAGE PLANS: 1 outputColumnNames: _col0, _col1, _col2 residual filter predicates: {(_col1 > _col2)} - Statistics: Num rows: 68 Data size: 7004 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 83 Data size: 8549 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 68 Data size: 6460 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 83 Data size: 7885 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 68 Data size: 6460 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 83 Data size: 7885 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -5482,12 +5482,12 @@ STAGE PLANS: keys: _col0 (type: string) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 69 Data size: 6003 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 83 Data size: 7221 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 69 Data size: 6003 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 83 Data size: 7221 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key > '9') (type: boolean) Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE @@ -5514,18 +5514,18 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 133 Data size: 23674 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: _col0 (type: string), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 83 Data size: 15438 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 66 Data size: 12276 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 83 Data size: 15438 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 66 Data size: 12276 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Reducer 3 Execution mode: llap @@ -5535,10 +5535,10 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 83 Data size: 15438 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 66 Data size: 12276 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 83 Data size: 15438 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 66 Data size: 12276 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) Reducer 4 Execution mode: llap @@ -5551,14 +5551,14 @@ STAGE PLANS: 1 outputColumnNames: _col0, _col1, _col2, _col3 residual filter predicates: {(_col2 > _col3)} - Statistics: Num rows: 27 Data size: 5238 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 22 Data size: 4268 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 27 Data size: 5022 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 22 Data size: 4092 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 27 Data size: 5022 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 22 Data size: 4092 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -5852,14 +5852,14 @@ STAGE PLANS: 2 _col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col11 residual filter predicates: {(_col5 > _col11)} - Statistics: Num rows: 1 Data size: 623 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 1246 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 1 Data size: 619 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 1238 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 619 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 1238 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -5951,12 +5951,12 @@ STAGE PLANS: keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -5989,10 +5989,10 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string), _col1 (type: bigint) Reducer 3 Execution mode: llap @@ -6005,14 +6005,14 @@ STAGE PLANS: 1 outputColumnNames: _col0, _col1, _col2 residual filter predicates: {(_col1 > _col2)} - Statistics: Num rows: 68 Data size: 7004 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 83 Data size: 8549 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 68 Data size: 6460 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 83 Data size: 7885 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 68 Data size: 6460 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 83 Data size: 7885 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -6076,12 +6076,12 @@ STAGE PLANS: keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Filter Operator predicate: (key = '90') (type: boolean) @@ -6110,10 +6110,10 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string), _col1 (type: bigint) Reducer 3 Execution mode: llap @@ -6126,14 +6126,14 @@ STAGE PLANS: 1 outputColumnNames: _col0, _col1, _col2 residual filter predicates: {(_col1 > _col2)} - Statistics: Num rows: 68 Data size: 7004 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 83 Data size: 8549 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 68 Data size: 6460 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 83 Data size: 7885 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 68 Data size: 6460 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 83 Data size: 7885 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/subquery_select.q.out b/ql/src/test/results/clientpositive/llap/subquery_select.q.out index 4917c42b0e..eafb77af64 100644 --- a/ql/src/test/results/clientpositive/llap/subquery_select.q.out +++ b/ql/src/test/results/clientpositive/llap/subquery_select.q.out @@ -263,12 +263,12 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col3, _col4 - Statistics: Num rows: 26 Data size: 3016 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 26 Data size: 3048 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: int) - Statistics: Num rows: 26 Data size: 3016 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 26 Data size: 3048 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col3 (type: bigint), _col4 (type: bigint) Reducer 3 Execution mode: llap @@ -280,7 +280,7 @@ STAGE PLANS: 0 _col0 (type: string), _col1 (type: int) 1 _col1 (type: string), _col0 (type: int) outputColumnNames: _col1, _col3, _col4, _col7 - Statistics: Num rows: 26 Data size: 316 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 26 Data size: 348 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: int), CASE WHEN ((_col3 = 0)) THEN (false) WHEN (_col3 is null) THEN (false) WHEN (_col7 is not null) THEN (true) WHEN (_col1 is null) THEN (null) WHEN ((_col4 < _col3)) THEN (null) ELSE (false) END (type: boolean) outputColumnNames: _col0, _col1 @@ -671,7 +671,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 26 Data size: 2912 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 26 Data size: 2928 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (sq_count_check(_col3, true) > 0) (type: boolean) Statistics: Num rows: 8 Data size: 904 Basic stats: COMPLETE Column stats: COMPLETE @@ -695,12 +695,12 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 8 Data size: 928 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8 Data size: 944 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: int) - Statistics: Num rows: 8 Data size: 928 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8 Data size: 944 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col5 (type: bigint), _col6 (type: bigint) Reducer 4 Execution mode: llap @@ -712,7 +712,7 @@ STAGE PLANS: 0 _col0 (type: string), _col1 (type: int) 1 _col1 (type: string), _col0 (type: int) outputColumnNames: _col1, _col5, _col6, _col9 - Statistics: Num rows: 8 Data size: 100 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8 Data size: 116 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: int), CASE WHEN ((_col5 = 0)) THEN (true) WHEN (_col5 is null) THEN (true) WHEN (_col9 is not null) THEN (false) WHEN (_col1 is null) THEN (null) WHEN ((_col6 < _col5)) THEN (null) ELSE (true) END (type: boolean) outputColumnNames: _col0, _col1 @@ -1034,7 +1034,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col1, _col3 - Statistics: Num rows: 26 Data size: 156 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 26 Data size: 164 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: int), _col3 is not null (type: boolean) outputColumnNames: _col0, _col1 @@ -1305,7 +1305,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col1, _col3 - Statistics: Num rows: 26 Data size: 156 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 26 Data size: 164 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: int), _col3 is null (type: boolean) outputColumnNames: _col0, _col1 @@ -1444,7 +1444,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col2 (type: string) outputColumnNames: _col1, _col2, _col3 - Statistics: Num rows: 26 Data size: 260 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 26 Data size: 272 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: int), CASE WHEN (_col3 is null) THEN (0) ELSE (_col2) END (type: bigint) outputColumnNames: _col0, _col1 @@ -1582,7 +1582,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col2 (type: string) outputColumnNames: _col1, _col2, _col3 - Statistics: Num rows: 26 Data size: 2548 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 26 Data size: 2736 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: int), CASE WHEN (_col3 is null) THEN (null) ELSE (_col2) END (type: string) outputColumnNames: _col0, _col1 @@ -1890,12 +1890,12 @@ STAGE PLANS: keys: _col2 (type: string), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 72 Data size: 19800 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 83 Data size: 22825 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 72 Data size: 19800 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 83 Data size: 22825 Basic stats: COMPLETE Column stats: COMPLETE Reducer 4 Execution mode: llap Reduce Operator Tree: @@ -1903,12 +1903,12 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 72 Data size: 19800 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 83 Data size: 22825 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 72 Data size: 19800 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 83 Data size: 22825 Basic stats: COMPLETE Column stats: COMPLETE Reducer 6 Execution mode: llap Reduce Operator Tree: @@ -2017,12 +2017,12 @@ STAGE PLANS: keys: value (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 214 Data size: 58850 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 68750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 214 Data size: 58850 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 68750 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -2054,7 +2054,7 @@ STAGE PLANS: 0 _col1 (type: string) 1 _col2 (type: string) outputColumnNames: _col1, _col2, _col3 - Statistics: Num rows: 166 Data size: 46314 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 166 Data size: 40486 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: CASE WHEN (_col3 is null) THEN (null) ELSE (_col2) END (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 @@ -2077,16 +2077,16 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 214 Data size: 58850 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 68750 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: string), true (type: boolean), _col0 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 214 Data size: 59706 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 69750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col2 (type: string) sort order: + Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 214 Data size: 59706 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 69750 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string), _col1 (type: boolean) Stage: Stage-0 @@ -2654,7 +2654,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col2 (type: string) outputColumnNames: _col1, _col2, _col3 - Statistics: Num rows: 26 Data size: 208 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 26 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: int), (1 + CASE WHEN (_col3 is null) THEN (null) ELSE (_col2) END) (type: int) outputColumnNames: _col0, _col1 @@ -2789,7 +2789,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col2 (type: string) outputColumnNames: _col1, _col2, _col3 - Statistics: Num rows: 26 Data size: 260 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 26 Data size: 284 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: int), CASE WHEN (_col3 is null) THEN (false) ELSE (_col2 is null) END (type: boolean) outputColumnNames: _col0, _col1 @@ -3191,12 +3191,12 @@ STAGE PLANS: 0 _col1 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col4, _col5 - Statistics: Num rows: 26 Data size: 6162 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 26 Data size: 6194 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: string), _col2 (type: int) sort order: ++ Map-reduce partition columns: _col1 (type: string), _col2 (type: int) - Statistics: Num rows: 26 Data size: 6162 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 26 Data size: 6194 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string), _col4 (type: bigint), _col5 (type: bigint) Reducer 3 Execution mode: llap @@ -3208,10 +3208,10 @@ STAGE PLANS: 0 _col1 (type: string), _col2 (type: int) 1 _col1 (type: string), _col0 (type: int) outputColumnNames: _col0, _col2, _col4, _col5, _col8 - Statistics: Num rows: 26 Data size: 3462 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 26 Data size: 3494 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 26 Data size: 3462 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 26 Data size: 3494 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string), _col2 (type: int), _col4 (type: bigint), _col5 (type: bigint), _col8 (type: boolean) Reducer 4 Execution mode: llap @@ -3223,12 +3223,12 @@ STAGE PLANS: 0 1 outputColumnNames: _col0, _col2, _col4, _col5, _col8, _col9, _col10 - Statistics: Num rows: 26 Data size: 3878 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 26 Data size: 3910 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 26 Data size: 3878 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 26 Data size: 3910 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: int), _col4 (type: bigint), _col5 (type: bigint), _col8 (type: boolean), _col9 (type: bigint), _col10 (type: bigint) Reducer 5 Execution mode: llap @@ -3240,7 +3240,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col2, _col4, _col5, _col8, _col9, _col10, _col12 - Statistics: Num rows: 26 Data size: 3886 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 26 Data size: 3918 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: int), (CASE WHEN ((_col4 = 0)) THEN (false) WHEN (_col4 is null) THEN (false) WHEN (_col8 is not null) THEN (true) WHEN (_col2 is null) THEN (null) WHEN ((_col5 < _col4)) THEN (null) ELSE (false) END and CASE WHEN ((_col9 = 0)) THEN (false) WHEN (_col12 is not null) THEN (true) WHEN (_col0 is null) THEN (null) WHEN ((_col10 < _col9)) THEN (null) ELSE (false) END) (type: boolean) outputColumnNames: _col0, _col1 @@ -3413,7 +3413,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col1, _col3 - Statistics: Num rows: 26 Data size: 156 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 26 Data size: 164 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: int), _col3 is null (type: boolean) outputColumnNames: _col0, _col1 @@ -4264,14 +4264,14 @@ STAGE PLANS: 1 _col0 (type: int) 2 _col2 (type: int) outputColumnNames: _col1, _col3, _col4 - Statistics: Num rows: 5 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: int), CASE WHEN (_col4 is null) THEN (0) ELSE (_col3) END (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -5524,7 +5524,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col2 (type: string) outputColumnNames: _col1, _col2, _col3 - Statistics: Num rows: 26 Data size: 208 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 26 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: int), exp(CASE WHEN (_col3 is null) THEN (null) ELSE (_col2) END) (type: double) outputColumnNames: _col0, _col1 diff --git a/ql/src/test/results/clientpositive/llap/subquery_views.q.out b/ql/src/test/results/clientpositive/llap/subquery_views.q.out index b64e0f49c6..a2b3fd26b2 100644 --- a/ql/src/test/results/clientpositive/llap/subquery_views.q.out +++ b/ql/src/test/results/clientpositive/llap/subquery_views.q.out @@ -237,17 +237,17 @@ STAGE PLANS: keys: key (type: string) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 205 Data size: 17835 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 205 Data size: 17835 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 205 Data size: 17835 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 10 @@ -260,12 +260,12 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col2, _col3, _col4 - Statistics: Num rows: 83 Data size: 15106 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 67 Data size: 12194 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col4 (type: string), _col2 (type: string) sort order: ++ Map-reduce partition columns: _col4 (type: string), _col2 (type: string) - Statistics: Num rows: 83 Data size: 15106 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 67 Data size: 12194 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col3 (type: boolean) Reducer 12 Execution mode: llap @@ -274,12 +274,12 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 205 Data size: 17835 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 205 Data size: 17835 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE Reducer 13 Execution mode: llap Reduce Operator Tree: @@ -287,12 +287,12 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 205 Data size: 17835 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 205 Data size: 17835 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -380,12 +380,12 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col2, _col3, _col4 - Statistics: Num rows: 83 Data size: 15106 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 67 Data size: 12194 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col4 (type: string), _col2 (type: string) sort order: ++ Map-reduce partition columns: _col4 (type: string), _col2 (type: string) - Statistics: Num rows: 83 Data size: 15106 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 67 Data size: 12194 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col3 (type: boolean) Reducer 7 Execution mode: llap diff --git a/ql/src/test/results/clientpositive/llap/sysdb.q.out b/ql/src/test/results/clientpositive/llap/sysdb.q.out index fbbf8d9b7f..0e13beb823 100644 --- a/ql/src/test/results/clientpositive/llap/sysdb.q.out +++ b/ql/src/test/results/clientpositive/llap/sysdb.q.out @@ -3277,16 +3277,16 @@ POSTHOOK: query: select table_name, column_name, num_nulls, num_distincts from t POSTHOOK: type: QUERY POSTHOOK: Input: sys@tab_col_stats #### A masked pattern was here #### -alltypesorc cbigint 3115 4083 +alltypesorc cbigint 3115 6064 alltypesorc cboolean1 3114 NULL alltypesorc cboolean2 3115 NULL -alltypesorc cdouble 3114 4264 -alltypesorc cfloat 3115 117 -alltypesorc cint 3115 5774 -alltypesorc csmallint 3114 5529 -alltypesorc cstring1 3114 7488 -alltypesorc cstring2 3115 4083 -alltypesorc ctimestamp1 3115 31 +alltypesorc cdouble 3114 5569 +alltypesorc cfloat 3115 131 +alltypesorc cint 3115 6029 +alltypesorc csmallint 3114 5594 +alltypesorc cstring1 3114 6058 +alltypesorc cstring2 3115 6040 +alltypesorc ctimestamp1 3115 35 PREHOOK: query: select table_name, partition_name, column_name, num_nulls, num_distincts from part_col_stats order by table_name, partition_name, column_name limit 10 PREHOOK: type: QUERY PREHOOK: Input: sys@part_col_stats @@ -3303,8 +3303,8 @@ cbo_t1 dt=2014 value 2 4 cbo_t2 dt=2014 c_boolean 2 NULL cbo_t2 dt=2014 c_float 2 2 cbo_t2 dt=2014 c_int 2 2 -cbo_t2 dt=2014 key 2 4 -cbo_t2 dt=2014 value 2 4 +cbo_t2 dt=2014 key 2 5 +cbo_t2 dt=2014 value 2 5 PREHOOK: query: select schema_version from version order by schema_version limit 5 PREHOOK: type: QUERY PREHOOK: Input: sys@version @@ -3347,9 +3347,8 @@ POSTHOOK: Input: sys@table_params POSTHOOK: Input: sys@table_stats_view #### A masked pattern was here #### {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} 0 0 0 0 -{"BASIC_STATS":"true","COLUMN_STATS":{"column_name":"true","create_time":"true","grant_option":"true","grantor":"true","grantor_type":"true","principal_name":"true","principal_type":"true","tbl_col_priv":"true","tbl_column_grant_id":"true","tbl_id":"true"}} 0 0 0 0 -{"BASIC_STATS":"true","COLUMN_STATS":{"next_val":"true","sequence_name":"true"}} 0 0 0 0 -{"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} 0 0 0 0 +{"BASIC_STATS":"true","COLUMN_STATS":{"a":"true","b":"true","c":"true","d":"true","e":"true","f":"true","g":"true"}} 0 0 0 0 +{"BASIC_STATS":"true","COLUMN_STATS":{"db_id":"true","param_key":"true","param_value":"true"}} 0 0 0 0 #### A masked pattern was here #### PREHOOK: query: select COLUMN_STATS_ACCURATE, NUM_FILES, NUM_ROWS, RAW_DATA_SIZE, TOTAL_SIZE FROM PARTITION_STATS_VIEW where COLUMN_STATS_ACCURATE is not null order by NUM_FILES, NUM_ROWS, RAW_DATA_SIZE limit 5 PREHOOK: type: QUERY @@ -3457,7 +3456,7 @@ POSTHOOK: query: select max(num_distincts) from sys.tab_col_stats POSTHOOK: type: QUERY POSTHOOK: Input: sys@tab_col_stats #### A masked pattern was here #### -7488 +6064 PREHOOK: query: use INFORMATION_SCHEMA PREHOOK: type: SWITCHDATABASE PREHOOK: Input: database:information_schema @@ -3604,13 +3603,13 @@ default default alltypesorc ctimestamp1 8 NULL YES timestamp NULL NULL NULL NULL default default alltypesorc ctimestamp2 9 NULL YES timestamp NULL NULL NULL NULL NULL 9 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL 11 NO NO NULL NULL NULL NULL NULL NULL NEVER NULL NO NO NULL YES timestamp NULL NULL default default alltypesorc cboolean1 10 NULL YES boolean NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL 11 NO NO NULL NULL NULL NULL NULL NULL NEVER NULL NO NO NULL YES boolean NULL NULL default default alltypesorc cboolean2 11 NULL YES boolean NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL 11 NO NO NULL NULL NULL NULL NULL NULL NEVER NULL NO NO NULL YES boolean NULL NULL -default default moretypes a 0 NULL YES decimal(10,2) NULL NULL 10 10 2 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL 27 NO NO NULL NULL NULL NULL NULL NULL NEVER NULL NO NO NULL YES decimal(10,2) 10 10 -default default moretypes b 1 NULL YES tinyint NULL NULL 3 10 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL 27 NO NO NULL NULL NULL NULL NULL NULL NEVER NULL NO NO NULL YES tinyint 3 10 -default default moretypes c 2 NULL YES smallint NULL NULL 5 10 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL 27 NO NO NULL NULL NULL NULL NULL NULL NEVER NULL NO NO NULL YES smallint 5 10 -default default moretypes d 3 NULL YES int NULL NULL 10 10 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL 27 NO NO NULL NULL NULL NULL NULL NULL NEVER NULL NO NO NULL YES int 10 10 -default default moretypes e 4 NULL YES bigint NULL NULL 19 10 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL 27 NO NO NULL NULL NULL NULL NULL NULL NEVER NULL NO NO NULL YES bigint 19 10 -default default moretypes f 5 NULL YES varchar(10) 10 10 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL 27 NO NO NULL NULL NULL NULL NULL NULL NEVER NULL NO NO NULL YES varchar(10) NULL NULL -default default moretypes g 6 NULL YES char(3) 3 3 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL 27 NO NO NULL NULL NULL NULL NULL NULL NEVER NULL NO NO NULL YES char(3) NULL NULL +default default moretypes a 0 NULL YES decimal(10,2) NULL NULL 10 10 2 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL 55 NO NO NULL NULL NULL NULL NULL NULL NEVER NULL NO NO NULL YES decimal(10,2) 10 10 +default default moretypes b 1 NULL YES tinyint NULL NULL 3 10 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL 55 NO NO NULL NULL NULL NULL NULL NULL NEVER NULL NO NO NULL YES tinyint 3 10 +default default moretypes c 2 NULL YES smallint NULL NULL 5 10 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL 55 NO NO NULL NULL NULL NULL NULL NULL NEVER NULL NO NO NULL YES smallint 5 10 +default default moretypes d 3 NULL YES int NULL NULL 10 10 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL 55 NO NO NULL NULL NULL NULL NULL NULL NEVER NULL NO NO NULL YES int 10 10 +default default moretypes e 4 NULL YES bigint NULL NULL 19 10 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL 55 NO NO NULL NULL NULL NULL NULL NULL NEVER NULL NO NO NULL YES bigint 19 10 +default default moretypes f 5 NULL YES varchar(10) 10 10 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL 55 NO NO NULL NULL NULL NULL NULL NULL NEVER NULL NO NO NULL YES varchar(10) NULL NULL +default default moretypes g 6 NULL YES char(3) 3 3 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL 55 NO NO NULL NULL NULL NULL NULL NULL NEVER NULL NO NO NULL YES char(3) NULL NULL PREHOOK: query: select * from COLUMN_PRIVILEGES order by GRANTOR, GRANTEE, TABLE_SCHEMA, TABLE_NAME, COLUMN_NAME limit 10 PREHOOK: type: QUERY PREHOOK: Input: information_schema@column_privileges diff --git a/ql/src/test/results/clientpositive/llap/tez_dml.q.out b/ql/src/test/results/clientpositive/llap/tez_dml.q.out index 786929e7af..ece6946fb8 100644 --- a/ql/src/test/results/clientpositive/llap/tez_dml.q.out +++ b/ql/src/test/results/clientpositive/llap/tez_dml.q.out @@ -32,12 +32,12 @@ STAGE PLANS: keys: value (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 214 Data size: 21186 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 24750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 214 Data size: 21186 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 24750 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -49,11 +49,11 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 214 Data size: 21186 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 24750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: bigint) sort order: + - Statistics: Num rows: 214 Data size: 21186 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 24750 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string) Reducer 3 Execution mode: llap @@ -61,10 +61,10 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 214 Data size: 21186 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 24750 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 214 Data size: 21186 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 24750 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/tez_dynpart_hashjoin_1.q.out b/ql/src/test/results/clientpositive/llap/tez_dynpart_hashjoin_1.q.out index 626a0640da..f7f48885f3 100644 --- a/ql/src/test/results/clientpositive/llap/tez_dynpart_hashjoin_1.q.out +++ b/ql/src/test/results/clientpositive/llap/tez_dynpart_hashjoin_1.q.out @@ -77,11 +77,11 @@ STAGE PLANS: 0 _col2 (type: int) 1 _col2 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 - Statistics: Num rows: 2166 Data size: 1204460 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2076 Data size: 1148660 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col2 (type: int) sort order: + - Statistics: Num rows: 2166 Data size: 1204460 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2076 Data size: 1148660 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: string), _col7 (type: string), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: boolean), _col11 (type: boolean), _col12 (type: tinyint), _col13 (type: smallint), _col14 (type: int), _col15 (type: bigint), _col16 (type: float), _col17 (type: double), _col18 (type: string), _col19 (type: string), _col20 (type: timestamp), _col21 (type: timestamp), _col22 (type: boolean), _col23 (type: boolean) Reducer 3 Execution mode: llap @@ -89,10 +89,10 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: tinyint), VALUE._col1 (type: smallint), KEY.reducesinkkey0 (type: int), VALUE._col2 (type: bigint), VALUE._col3 (type: float), VALUE._col4 (type: double), VALUE._col5 (type: string), VALUE._col6 (type: string), VALUE._col7 (type: timestamp), VALUE._col8 (type: timestamp), VALUE._col9 (type: boolean), VALUE._col10 (type: boolean), VALUE._col11 (type: tinyint), VALUE._col12 (type: smallint), VALUE._col13 (type: int), VALUE._col14 (type: bigint), VALUE._col15 (type: float), VALUE._col16 (type: double), VALUE._col17 (type: string), VALUE._col18 (type: string), VALUE._col19 (type: timestamp), VALUE._col20 (type: timestamp), VALUE._col21 (type: boolean), VALUE._col22 (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 - Statistics: Num rows: 2166 Data size: 1204460 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2076 Data size: 1148660 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 2166 Data size: 1204460 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2076 Data size: 1148660 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -206,7 +206,7 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - Statistics: Num rows: 2166 Data size: 17328 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2076 Data size: 16608 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash @@ -336,18 +336,18 @@ STAGE PLANS: 0 _col1 (type: int) 1 _col0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 2166 Data size: 7284 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2076 Data size: 6924 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: _col0 (type: smallint) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 616 Data size: 7004 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 623 Data size: 7064 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: smallint) sort order: + Map-reduce partition columns: _col0 (type: smallint) - Statistics: Num rows: 616 Data size: 7004 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 623 Data size: 7064 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Reducer 3 Execution mode: llap @@ -357,11 +357,11 @@ STAGE PLANS: keys: KEY._col0 (type: smallint) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 616 Data size: 7004 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 623 Data size: 7064 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: bigint) sort order: + - Statistics: Num rows: 616 Data size: 7004 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 623 Data size: 7064 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: smallint) Reducer 4 Execution mode: llap @@ -369,10 +369,10 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: smallint), KEY.reducesinkkey0 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 616 Data size: 7004 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 623 Data size: 7064 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 616 Data size: 7004 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 623 Data size: 7064 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/tez_dynpart_hashjoin_2.q.out b/ql/src/test/results/clientpositive/llap/tez_dynpart_hashjoin_2.q.out index 273cf06e31..642bda2736 100644 --- a/ql/src/test/results/clientpositive/llap/tez_dynpart_hashjoin_2.q.out +++ b/ql/src/test/results/clientpositive/llap/tez_dynpart_hashjoin_2.q.out @@ -105,11 +105,11 @@ STAGE PLANS: 1 UDFToInteger(_col0) (type: int) 2 (UDFToInteger(_col0) + 0) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - Statistics: Num rows: 2710 Data size: 601590 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1776 Data size: 312050 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: smallint), _col0 (type: tinyint), _col2 (type: int) sort order: +++ - Statistics: Num rows: 2710 Data size: 601590 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1776 Data size: 312050 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: string), _col7 (type: string), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: boolean), _col11 (type: boolean) Reducer 3 Execution mode: llap @@ -117,10 +117,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey1 (type: tinyint), KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey2 (type: int), VALUE._col0 (type: bigint), VALUE._col1 (type: float), VALUE._col2 (type: double), VALUE._col3 (type: string), VALUE._col4 (type: string), VALUE._col5 (type: timestamp), VALUE._col6 (type: timestamp), VALUE._col7 (type: boolean), VALUE._col8 (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - Statistics: Num rows: 2710 Data size: 601590 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1776 Data size: 312050 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 2710 Data size: 601590 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1776 Data size: 312050 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -296,11 +296,11 @@ STAGE PLANS: 1 UDFToInteger(_col0) (type: int) 2 (UDFToInteger(_col0) + 0) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - Statistics: Num rows: 2710 Data size: 601590 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1776 Data size: 312050 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: smallint), _col0 (type: tinyint), _col2 (type: int) sort order: +++ - Statistics: Num rows: 2710 Data size: 601590 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1776 Data size: 312050 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: string), _col7 (type: string), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: boolean), _col11 (type: boolean) Reducer 3 Execution mode: llap @@ -308,10 +308,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey1 (type: tinyint), KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey2 (type: int), VALUE._col0 (type: bigint), VALUE._col1 (type: float), VALUE._col2 (type: double), VALUE._col3 (type: string), VALUE._col4 (type: string), VALUE._col5 (type: timestamp), VALUE._col6 (type: timestamp), VALUE._col7 (type: boolean), VALUE._col8 (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - Statistics: Num rows: 2710 Data size: 601590 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1776 Data size: 312050 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 2710 Data size: 601590 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1776 Data size: 312050 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -487,11 +487,11 @@ STAGE PLANS: 1 UDFToInteger(_col0) (type: int) 2 (UDFToInteger(_col0) + 0) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - Statistics: Num rows: 2710 Data size: 601590 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1776 Data size: 312050 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: smallint), _col0 (type: tinyint), _col2 (type: int) sort order: +++ - Statistics: Num rows: 2710 Data size: 601590 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1776 Data size: 312050 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: string), _col7 (type: string), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: boolean), _col11 (type: boolean) Reducer 3 Execution mode: llap @@ -499,10 +499,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey1 (type: tinyint), KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey2 (type: int), VALUE._col0 (type: bigint), VALUE._col1 (type: float), VALUE._col2 (type: double), VALUE._col3 (type: string), VALUE._col4 (type: string), VALUE._col5 (type: timestamp), VALUE._col6 (type: timestamp), VALUE._col7 (type: boolean), VALUE._col8 (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - Statistics: Num rows: 2710 Data size: 601590 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1776 Data size: 312050 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 2710 Data size: 601590 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1776 Data size: 312050 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/tez_dynpart_hashjoin_3.q.out b/ql/src/test/results/clientpositive/llap/tez_dynpart_hashjoin_3.q.out index 04e0f86fe2..4f557d3cc3 100644 --- a/ql/src/test/results/clientpositive/llap/tez_dynpart_hashjoin_3.q.out +++ b/ql/src/test/results/clientpositive/llap/tez_dynpart_hashjoin_3.q.out @@ -87,7 +87,7 @@ STAGE PLANS: 0 _col2 (type: int) 1 UDFToInteger(_col0) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - Statistics: Num rows: 2 Data size: 620 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 310 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 Statistics: Num rows: 1 Data size: 310 Basic stats: COMPLETE Column stats: COMPLETE @@ -186,7 +186,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 input vertices: 1 Map 3 - Statistics: Num rows: 2 Data size: 620 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 310 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 Statistics: Num rows: 1 Data size: 310 Basic stats: COMPLETE Column stats: COMPLETE diff --git a/ql/src/test/results/clientpositive/llap/tez_join_hash.q.out b/ql/src/test/results/clientpositive/llap/tez_join_hash.q.out index 92a188e18a..e313bad4dc 100644 --- a/ql/src/test/results/clientpositive/llap/tez_join_hash.q.out +++ b/ql/src/test/results/clientpositive/llap/tez_join_hash.q.out @@ -207,7 +207,7 @@ STAGE PLANS: key expressions: _col1 (type: string) sort order: + Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 21512 Data size: 3829136 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 14944 Data size: 2660032 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs @@ -240,16 +240,16 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col1, _col2 - Statistics: Num rows: 19512 Data size: 3473136 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12944 Data size: 2304032 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 19512 Data size: 3473136 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12944 Data size: 2304032 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: string) sort order: + Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 21512 Data size: 3829136 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 14944 Data size: 2660032 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string) Reducer 4 Execution mode: llap @@ -261,18 +261,18 @@ STAGE PLANS: 0 _col1 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 50261 Data size: 8946458 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 24181 Data size: 4304218 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: _col0 (type: string), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 25130 Data size: 4674180 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12090 Data size: 2248740 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 25130 Data size: 4674180 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12090 Data size: 2248740 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Reducer 5 Execution mode: vectorized, llap @@ -282,14 +282,14 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 25130 Data size: 4674180 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12090 Data size: 2248740 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col2 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 25130 Data size: 2387350 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12090 Data size: 1148550 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 25130 Data size: 2387350 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12090 Data size: 1148550 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/tez_join_tests.q.out b/ql/src/test/results/clientpositive/llap/tez_join_tests.q.out index 1e1b63d95e..75879fb157 100644 --- a/ql/src/test/results/clientpositive/llap/tez_join_tests.q.out +++ b/ql/src/test/results/clientpositive/llap/tez_join_tests.q.out @@ -71,12 +71,12 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 60 Data size: 10680 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 40 Data size: 7120 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: string) sort order: + Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 60 Data size: 10680 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 40 Data size: 7120 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string) Reducer 3 Execution mode: llap @@ -88,11 +88,11 @@ STAGE PLANS: 0 _col1 (type: string) 1 _col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 500 Data size: 114098 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 100570 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + - Statistics: Num rows: 500 Data size: 114098 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 100570 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string) Reducer 4 Execution mode: llap @@ -100,10 +100,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 500 Data size: 114098 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 100570 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 500 Data size: 114098 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 100570 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/tez_joins_explain.q.out b/ql/src/test/results/clientpositive/llap/tez_joins_explain.q.out index 5297d7ecbc..3790b37760 100644 --- a/ql/src/test/results/clientpositive/llap/tez_joins_explain.q.out +++ b/ql/src/test/results/clientpositive/llap/tez_joins_explain.q.out @@ -71,12 +71,12 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 60 Data size: 10680 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 40 Data size: 7120 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: string) sort order: + Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 60 Data size: 10680 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 40 Data size: 7120 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string) Reducer 3 Execution mode: llap @@ -88,11 +88,11 @@ STAGE PLANS: 0 _col1 (type: string) 1 _col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 500 Data size: 114098 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 100570 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + - Statistics: Num rows: 500 Data size: 114098 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 100570 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string) Reducer 4 Execution mode: llap @@ -100,10 +100,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 500 Data size: 114098 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 100570 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 500 Data size: 114098 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 100570 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/tez_smb_main.q.out b/ql/src/test/results/clientpositive/llap/tez_smb_main.q.out index 66d7aeca70..e5f6940e48 100644 --- a/ql/src/test/results/clientpositive/llap/tez_smb_main.q.out +++ b/ql/src/test/results/clientpositive/llap/tez_smb_main.q.out @@ -66,10 +66,10 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 60 Data size: 21180 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 40 Data size: 14120 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 60 Data size: 21180 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 40 Data size: 14120 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/tez_union.q.out b/ql/src/test/results/clientpositive/llap/tez_union.q.out index c72b232b35..91184032a7 100644 --- a/ql/src/test/results/clientpositive/llap/tez_union.q.out +++ b/ql/src/test/results/clientpositive/llap/tez_union.q.out @@ -42,10 +42,10 @@ STAGE PLANS: outputColumnNames: _col0, _col1 input vertices: 1 Map 3 - Statistics: Num rows: 1219 Data size: 216982 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 809 Data size: 144002 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1719 Data size: 305982 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1309 Data size: 233002 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -82,7 +82,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1719 Data size: 305982 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1309 Data size: 233002 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -264,7 +264,7 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - Statistics: Num rows: 4878 Data size: 39024 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3236 Data size: 25888 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash @@ -400,10 +400,10 @@ STAGE PLANS: outputColumnNames: _col0, _col1 input vertices: 0 Map 1 - Statistics: Num rows: 2439 Data size: 424386 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1618 Data size: 281532 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 2439 Data size: 424386 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1618 Data size: 281532 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -431,10 +431,10 @@ STAGE PLANS: outputColumnNames: _col0, _col1 input vertices: 0 Map 1 - Statistics: Num rows: 2439 Data size: 424386 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1618 Data size: 281532 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 2439 Data size: 424386 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1618 Data size: 281532 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -556,7 +556,7 @@ STAGE PLANS: outputColumnNames: _col0 input vertices: 1 Map 4 - Statistics: Num rows: 1219 Data size: 106053 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 809 Data size: 70383 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -569,15 +569,15 @@ STAGE PLANS: input vertices: 1 Map 9 2 Map 10 - Statistics: Num rows: 21754 Data size: 5677794 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6354 Data size: 1658394 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: string), _col0 (type: string), _col2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 21754 Data size: 5677794 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6354 Data size: 1658394 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col2 (type: string) sort order: + - Statistics: Num rows: 21754 Data size: 5677794 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6354 Data size: 1658394 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string), _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -650,7 +650,7 @@ STAGE PLANS: outputColumnNames: _col0 input vertices: 1 Map 6 - Statistics: Num rows: 1219 Data size: 106053 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 809 Data size: 70383 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -663,15 +663,15 @@ STAGE PLANS: input vertices: 1 Map 9 2 Map 10 - Statistics: Num rows: 21754 Data size: 5677794 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6354 Data size: 1658394 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: string), _col0 (type: string), _col2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 21754 Data size: 5677794 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6354 Data size: 1658394 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col2 (type: string) sort order: + - Statistics: Num rows: 21754 Data size: 5677794 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6354 Data size: 1658394 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string), _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -715,7 +715,7 @@ STAGE PLANS: outputColumnNames: _col0 input vertices: 1 Map 8 - Statistics: Num rows: 1219 Data size: 106053 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 809 Data size: 70383 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -728,15 +728,15 @@ STAGE PLANS: input vertices: 1 Map 9 2 Map 10 - Statistics: Num rows: 21754 Data size: 5677794 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6354 Data size: 1658394 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: string), _col0 (type: string), _col2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 21754 Data size: 5677794 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6354 Data size: 1658394 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col2 (type: string) sort order: + - Statistics: Num rows: 21754 Data size: 5677794 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6354 Data size: 1658394 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string), _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -794,10 +794,10 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), KEY.reducesinkkey0 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 21754 Data size: 5677794 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6354 Data size: 1658394 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 21754 Data size: 5677794 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6354 Data size: 1658394 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -996,10 +996,10 @@ STAGE PLANS: outputColumnNames: _col0 input vertices: 1 Map 3 - Statistics: Num rows: 1219 Data size: 106053 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 809 Data size: 70383 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 2438 Data size: 212106 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1618 Data size: 140766 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1046,10 +1046,10 @@ STAGE PLANS: outputColumnNames: _col0 input vertices: 1 Map 5 - Statistics: Num rows: 1219 Data size: 106053 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 809 Data size: 70383 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 2438 Data size: 212106 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1618 Data size: 140766 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1186,10 +1186,10 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3 input vertices: 1 Map 4 - Statistics: Num rows: 2439 Data size: 868284 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1618 Data size: 576008 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 2439 Data size: 868284 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1618 Data size: 576008 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1214,10 +1214,10 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3 input vertices: 1 Map 4 - Statistics: Num rows: 2439 Data size: 868284 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1618 Data size: 576008 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 2439 Data size: 868284 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1618 Data size: 576008 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1330,10 +1330,10 @@ STAGE PLANS: outputColumnNames: _col0, _col1 input vertices: 0 Union 2 - Statistics: Num rows: 2439 Data size: 424386 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1618 Data size: 281532 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 2439 Data size: 424386 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1618 Data size: 281532 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/tez_union2.q.out b/ql/src/test/results/clientpositive/llap/tez_union2.q.out index 7b45c7c719..1c8dd111d8 100644 --- a/ql/src/test/results/clientpositive/llap/tez_union2.q.out +++ b/ql/src/test/results/clientpositive/llap/tez_union2.q.out @@ -61,12 +61,12 @@ STAGE PLANS: keys: _col0 (type: string), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 352 Data size: 62656 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 404 Data size: 71912 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 352 Data size: 62656 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 404 Data size: 71912 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 4 @@ -118,10 +118,10 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 352 Data size: 62656 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 404 Data size: 71912 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 352 Data size: 62656 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 404 Data size: 71912 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -142,21 +142,21 @@ STAGE PLANS: keys: _col0 (type: string) mode: complete outputColumnNames: _col0 - Statistics: Num rows: 205 Data size: 17835 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 309 Data size: 26883 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 205 Data size: 35670 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 309 Data size: 53766 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: _col0 (type: string), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 352 Data size: 62656 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 404 Data size: 71912 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 352 Data size: 62656 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 404 Data size: 71912 Basic stats: COMPLETE Column stats: COMPLETE Union 2 Vertex: Union 2 Union 5 diff --git a/ql/src/test/results/clientpositive/llap/tez_union_multiinsert.q.out b/ql/src/test/results/clientpositive/llap/tez_union_multiinsert.q.out index 5e0d072095..d399c5e4f5 100644 --- a/ql/src/test/results/clientpositive/llap/tez_union_multiinsert.q.out +++ b/ql/src/test/results/clientpositive/llap/tez_union_multiinsert.q.out @@ -186,14 +186,14 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 205 Data size: 19680 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 309 Data size: 29664 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), UDFToString(_col1) (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 205 Data size: 55760 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 309 Data size: 84048 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 205 Data size: 55760 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 309 Data size: 84048 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1059,14 +1059,14 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 205 Data size: 19680 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 309 Data size: 29664 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), UDFToString(_col1) (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 205 Data size: 55760 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 309 Data size: 84048 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 205 Data size: 55760 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 309 Data size: 84048 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1962,14 +1962,14 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 205 Data size: 19680 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 309 Data size: 29664 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), UDFToString(_col1) (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 205 Data size: 55760 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 309 Data size: 84048 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 205 Data size: 55760 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 309 Data size: 84048 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -2857,14 +2857,14 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 205 Data size: 19680 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 24000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), UDFToString(_col1) (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 205 Data size: 55760 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 68000 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 205 Data size: 55760 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 68000 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/tez_vector_dynpart_hashjoin_1.q.out b/ql/src/test/results/clientpositive/llap/tez_vector_dynpart_hashjoin_1.q.out index a38e339012..de20075775 100644 --- a/ql/src/test/results/clientpositive/llap/tez_vector_dynpart_hashjoin_1.q.out +++ b/ql/src/test/results/clientpositive/llap/tez_vector_dynpart_hashjoin_1.q.out @@ -77,11 +77,11 @@ STAGE PLANS: 0 _col2 (type: int) 1 _col2 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 - Statistics: Num rows: 2166 Data size: 1204460 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2076 Data size: 1148660 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col2 (type: int) sort order: + - Statistics: Num rows: 2166 Data size: 1204460 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2076 Data size: 1148660 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: string), _col7 (type: string), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: boolean), _col11 (type: boolean), _col12 (type: tinyint), _col13 (type: smallint), _col14 (type: int), _col15 (type: bigint), _col16 (type: float), _col17 (type: double), _col18 (type: string), _col19 (type: string), _col20 (type: timestamp), _col21 (type: timestamp), _col22 (type: boolean), _col23 (type: boolean) Reducer 3 Execution mode: llap @@ -89,10 +89,10 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: tinyint), VALUE._col1 (type: smallint), KEY.reducesinkkey0 (type: int), VALUE._col2 (type: bigint), VALUE._col3 (type: float), VALUE._col4 (type: double), VALUE._col5 (type: string), VALUE._col6 (type: string), VALUE._col7 (type: timestamp), VALUE._col8 (type: timestamp), VALUE._col9 (type: boolean), VALUE._col10 (type: boolean), VALUE._col11 (type: tinyint), VALUE._col12 (type: smallint), VALUE._col13 (type: int), VALUE._col14 (type: bigint), VALUE._col15 (type: float), VALUE._col16 (type: double), VALUE._col17 (type: string), VALUE._col18 (type: string), VALUE._col19 (type: timestamp), VALUE._col20 (type: timestamp), VALUE._col21 (type: boolean), VALUE._col22 (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 - Statistics: Num rows: 2166 Data size: 1204460 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2076 Data size: 1148660 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 2166 Data size: 1204460 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2076 Data size: 1148660 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -206,7 +206,7 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - Statistics: Num rows: 2166 Data size: 17328 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2076 Data size: 16608 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash @@ -336,18 +336,18 @@ STAGE PLANS: 0 _col1 (type: int) 1 _col0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 2166 Data size: 7284 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2076 Data size: 6924 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: _col0 (type: smallint) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 616 Data size: 7004 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 623 Data size: 7064 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: smallint) sort order: + Map-reduce partition columns: _col0 (type: smallint) - Statistics: Num rows: 616 Data size: 7004 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 623 Data size: 7064 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Reducer 3 Execution mode: llap @@ -357,11 +357,11 @@ STAGE PLANS: keys: KEY._col0 (type: smallint) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 616 Data size: 7004 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 623 Data size: 7064 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: bigint) sort order: + - Statistics: Num rows: 616 Data size: 7004 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 623 Data size: 7064 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: smallint) Reducer 4 Execution mode: llap @@ -369,10 +369,10 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: smallint), KEY.reducesinkkey0 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 616 Data size: 7004 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 623 Data size: 7064 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 616 Data size: 7004 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 623 Data size: 7064 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/tez_vector_dynpart_hashjoin_2.q.out b/ql/src/test/results/clientpositive/llap/tez_vector_dynpart_hashjoin_2.q.out index f7c694bf84..1ce7a3a37c 100644 --- a/ql/src/test/results/clientpositive/llap/tez_vector_dynpart_hashjoin_2.q.out +++ b/ql/src/test/results/clientpositive/llap/tez_vector_dynpart_hashjoin_2.q.out @@ -105,11 +105,11 @@ STAGE PLANS: 1 UDFToInteger(_col0) (type: int) 2 (UDFToInteger(_col0) + 0) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - Statistics: Num rows: 2710 Data size: 601590 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1776 Data size: 312050 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: smallint), _col0 (type: tinyint), _col2 (type: int) sort order: +++ - Statistics: Num rows: 2710 Data size: 601590 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1776 Data size: 312050 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: string), _col7 (type: string), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: boolean), _col11 (type: boolean) Reducer 3 Execution mode: llap @@ -117,10 +117,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey1 (type: tinyint), KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey2 (type: int), VALUE._col0 (type: bigint), VALUE._col1 (type: float), VALUE._col2 (type: double), VALUE._col3 (type: string), VALUE._col4 (type: string), VALUE._col5 (type: timestamp), VALUE._col6 (type: timestamp), VALUE._col7 (type: boolean), VALUE._col8 (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - Statistics: Num rows: 2710 Data size: 601590 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1776 Data size: 312050 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 2710 Data size: 601590 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1776 Data size: 312050 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -296,11 +296,11 @@ STAGE PLANS: 1 UDFToInteger(_col0) (type: int) 2 (UDFToInteger(_col0) + 0) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - Statistics: Num rows: 2710 Data size: 601590 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1776 Data size: 312050 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: smallint), _col0 (type: tinyint), _col2 (type: int) sort order: +++ - Statistics: Num rows: 2710 Data size: 601590 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1776 Data size: 312050 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: string), _col7 (type: string), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: boolean), _col11 (type: boolean) Reducer 3 Execution mode: vectorized, llap @@ -308,10 +308,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey1 (type: tinyint), KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey2 (type: int), VALUE._col0 (type: bigint), VALUE._col1 (type: float), VALUE._col2 (type: double), VALUE._col3 (type: string), VALUE._col4 (type: string), VALUE._col5 (type: timestamp), VALUE._col6 (type: timestamp), VALUE._col7 (type: boolean), VALUE._col8 (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - Statistics: Num rows: 2710 Data size: 601590 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1776 Data size: 312050 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 2710 Data size: 601590 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1776 Data size: 312050 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -487,11 +487,11 @@ STAGE PLANS: 1 UDFToInteger(_col0) (type: int) 2 (UDFToInteger(_col0) + 0) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - Statistics: Num rows: 2710 Data size: 601590 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1776 Data size: 312050 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: smallint), _col0 (type: tinyint), _col2 (type: int) sort order: +++ - Statistics: Num rows: 2710 Data size: 601590 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1776 Data size: 312050 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: string), _col7 (type: string), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: boolean), _col11 (type: boolean) Reducer 3 Execution mode: vectorized, llap @@ -499,10 +499,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey1 (type: tinyint), KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey2 (type: int), VALUE._col0 (type: bigint), VALUE._col1 (type: float), VALUE._col2 (type: double), VALUE._col3 (type: string), VALUE._col4 (type: string), VALUE._col5 (type: timestamp), VALUE._col6 (type: timestamp), VALUE._col7 (type: boolean), VALUE._col8 (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - Statistics: Num rows: 2710 Data size: 601590 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1776 Data size: 312050 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 2710 Data size: 601590 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1776 Data size: 312050 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/unionDistinct_1.q.out b/ql/src/test/results/clientpositive/llap/unionDistinct_1.q.out index 602d57625c..8d97fa4fab 100644 --- a/ql/src/test/results/clientpositive/llap/unionDistinct_1.q.out +++ b/ql/src/test/results/clientpositive/llap/unionDistinct_1.q.out @@ -1409,10 +1409,10 @@ STAGE PLANS: keys: _col0 (type: string) mode: complete outputColumnNames: _col0, _col1 - Statistics: Num rows: 14 Data size: 1344 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 16 Data size: 1536 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 14 Data size: 1344 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 16 Data size: 1536 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -4492,14 +4492,14 @@ STAGE PLANS: keys: _col0 (type: string) mode: complete outputColumnNames: _col0, _col1 - Statistics: Num rows: 205 Data size: 19680 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 24000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), UDFToString(_col1) (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 205 Data size: 55760 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 68000 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 205 Data size: 55760 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 68000 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -5481,10 +5481,10 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 99 Data size: 53856 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 66 Data size: 35904 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 99 Data size: 53856 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 66 Data size: 35904 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -5630,12 +5630,12 @@ STAGE PLANS: keys: _col0 (type: string) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 205 Data size: 37720 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 309 Data size: 56856 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 205 Data size: 37720 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 309 Data size: 56856 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 10 @@ -5651,12 +5651,12 @@ STAGE PLANS: keys: _col0 (type: string) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 205 Data size: 37720 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 309 Data size: 56856 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 205 Data size: 37720 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 309 Data size: 56856 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 11 @@ -5672,12 +5672,12 @@ STAGE PLANS: keys: _col0 (type: string) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 205 Data size: 37720 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 309 Data size: 56856 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 205 Data size: 37720 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 309 Data size: 56856 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 12 @@ -5693,12 +5693,12 @@ STAGE PLANS: keys: _col0 (type: string) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 108 Data size: 19872 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 160 Data size: 29440 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 108 Data size: 19872 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 160 Data size: 29440 Basic stats: COMPLETE Column stats: PARTIAL Execution mode: llap LLAP IO: no inputs Map 13 @@ -5714,12 +5714,12 @@ STAGE PLANS: keys: _col0 (type: string) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 59 Data size: 10856 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 85 Data size: 15640 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 59 Data size: 10856 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 85 Data size: 15640 Basic stats: COMPLETE Column stats: PARTIAL Execution mode: llap LLAP IO: no inputs Reducer 3 @@ -5729,17 +5729,17 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 205 Data size: 37720 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 309 Data size: 56856 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: _col0 (type: string) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 205 Data size: 37720 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 309 Data size: 56856 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 205 Data size: 37720 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 309 Data size: 56856 Basic stats: COMPLETE Column stats: COMPLETE Reducer 5 Execution mode: llap Reduce Operator Tree: @@ -5747,17 +5747,17 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 205 Data size: 37720 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 309 Data size: 56856 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: _col0 (type: string) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 108 Data size: 19872 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 160 Data size: 29440 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 108 Data size: 19872 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 160 Data size: 29440 Basic stats: COMPLETE Column stats: PARTIAL Reducer 7 Execution mode: llap Reduce Operator Tree: @@ -5765,17 +5765,17 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 108 Data size: 19872 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 160 Data size: 29440 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator keys: _col0 (type: string) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 59 Data size: 10856 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 85 Data size: 15640 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 59 Data size: 10856 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 85 Data size: 15640 Basic stats: COMPLETE Column stats: PARTIAL Reducer 9 Execution mode: llap Reduce Operator Tree: @@ -5783,16 +5783,16 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 59 Data size: 10856 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 85 Data size: 15640 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator aggregations: count(1) keys: _col0 (type: string) mode: complete outputColumnNames: _col0, _col1 - Statistics: Num rows: 59 Data size: 11328 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 85 Data size: 16320 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false - Statistics: Num rows: 59 Data size: 11328 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 85 Data size: 16320 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -13633,12 +13633,12 @@ STAGE PLANS: keys: _col0 (type: string), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 227 Data size: 61517 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 67750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 227 Data size: 61517 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 67750 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 4 @@ -13655,12 +13655,12 @@ STAGE PLANS: keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -13671,10 +13671,10 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 227 Data size: 61517 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 67750 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 227 Data size: 61517 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 67750 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -13688,21 +13688,21 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), UDFToString(_col1) (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 205 Data size: 55555 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 67750 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: _col0 (type: string), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 227 Data size: 61517 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 67750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 227 Data size: 61517 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 67750 Basic stats: COMPLETE Column stats: COMPLETE Union 2 Vertex: Union 2 @@ -13803,12 +13803,12 @@ STAGE PLANS: keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -13828,12 +13828,12 @@ STAGE PLANS: keys: _col0 (type: string), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 227 Data size: 61517 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 67750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 227 Data size: 61517 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 67750 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -13844,21 +13844,21 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), UDFToString(_col1) (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 205 Data size: 55555 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 67750 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: _col0 (type: string), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 227 Data size: 61517 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 67750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 227 Data size: 61517 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 67750 Basic stats: COMPLETE Column stats: COMPLETE Reducer 4 Execution mode: llap Reduce Operator Tree: @@ -13866,10 +13866,10 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 227 Data size: 61517 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 67750 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 227 Data size: 61517 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 67750 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/union_top_level.q.out b/ql/src/test/results/clientpositive/llap/union_top_level.q.out index 268e0413cd..cfbc069869 100644 --- a/ql/src/test/results/clientpositive/llap/union_top_level.q.out +++ b/ql/src/test/results/clientpositive/llap/union_top_level.q.out @@ -274,15 +274,15 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col1, _col2 - Statistics: Num rows: 1219 Data size: 216982 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 809 Data size: 144002 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1219 Data size: 216982 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 809 Data size: 144002 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + - Statistics: Num rows: 1219 Data size: 216982 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 809 Data size: 144002 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: string) Reducer 3 @@ -291,7 +291,7 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1219 Data size: 216982 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 809 Data size: 144002 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 10 Statistics: Num rows: 10 Data size: 1780 Basic stats: COMPLETE Column stats: COMPLETE @@ -312,15 +312,15 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col1, _col2 - Statistics: Num rows: 1219 Data size: 216982 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 809 Data size: 144002 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1219 Data size: 216982 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 809 Data size: 144002 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + - Statistics: Num rows: 1219 Data size: 216982 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 809 Data size: 144002 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: string) Reducer 6 @@ -329,7 +329,7 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1219 Data size: 216982 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 809 Data size: 144002 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 10 Statistics: Num rows: 10 Data size: 1780 Basic stats: COMPLETE Column stats: COMPLETE diff --git a/ql/src/test/results/clientpositive/llap/varchar_udf1.q.out b/ql/src/test/results/clientpositive/llap/varchar_udf1.q.out index 8162305df6..2e9d88e343 100644 --- a/ql/src/test/results/clientpositive/llap/varchar_udf1.q.out +++ b/ql/src/test/results/clientpositive/llap/varchar_udf1.q.out @@ -393,15 +393,15 @@ POSTHOOK: Input: default@varchar_udf_1 #### A masked pattern was here #### val_238 val_238 true PREHOOK: query: select - compute_stats(c2, 16), - compute_stats(c4, 16) + compute_stats(c2, 'fm', 16), + compute_stats(c4, 'fm', 16) from varchar_udf_1 PREHOOK: type: QUERY PREHOOK: Input: default@varchar_udf_1 #### A masked pattern was here #### POSTHOOK: query: select - compute_stats(c2, 16), - compute_stats(c4, 16) + compute_stats(c2, 'fm', 16), + compute_stats(c4, 'fm', 16) from varchar_udf_1 POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1 diff --git a/ql/src/test/results/clientpositive/llap/vector_groupby_mapjoin.q.out b/ql/src/test/results/clientpositive/llap/vector_groupby_mapjoin.q.out index 16b716c4e5..e43b4d1bb4 100644 --- a/ql/src/test/results/clientpositive/llap/vector_groupby_mapjoin.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_groupby_mapjoin.q.out @@ -75,14 +75,14 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col5 input vertices: 1 Reducer 5 - Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 98620 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator Filter Vectorization: className: VectorFilterOperator native: true predicateExpression: FilterExprOrExpr(children: FilterLongColEqualLongScalar(col 2, val 0) -> boolean, FilterExprAndExpr(children: SelectColumnIsNull(col 4) -> boolean, SelectColumnIsNotNull(col 0) -> boolean, FilterLongColGreaterEqualLongColumn(col 3, col 2) -> boolean) -> boolean) -> boolean predicate: ((_col2 = 0) or (_col5 is null and _col0 is not null and (_col3 >= _col2))) (type: boolean) - Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 98620 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 @@ -159,7 +159,7 @@ STAGE PLANS: keys: key (type: string) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 205 Data size: 17835 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + @@ -168,7 +168,7 @@ STAGE PLANS: className: VectorReduceSinkStringOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 205 Data size: 17835 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs Map Vectorization: @@ -260,7 +260,7 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 205 Data size: 17835 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), true (type: boolean) outputColumnNames: _col0, _col1 @@ -269,7 +269,7 @@ STAGE PLANS: native: true projectedOutputColumns: [0, 1] selectExpressions: ConstantVectorExpression(val 1) -> 1:long - Statistics: Num rows: 205 Data size: 18655 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 22750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + @@ -278,7 +278,7 @@ STAGE PLANS: className: VectorReduceSinkStringOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 205 Data size: 18655 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 22750 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: boolean) Stage: Stage-0 diff --git a/ql/src/test/results/clientpositive/llap/vector_left_outer_join.q.out b/ql/src/test/results/clientpositive/llap/vector_left_outer_join.q.out index 6118c73c2b..62d9fc8dbc 100644 --- a/ql/src/test/results/clientpositive/llap/vector_left_outer_join.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_left_outer_join.q.out @@ -51,7 +51,7 @@ STAGE PLANS: outputColumnNames: _col0 input vertices: 1 Map 3 - Statistics: Num rows: 26150 Data size: 92144 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 25044 Data size: 87720 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Left Outer Join 0 to 1 @@ -60,7 +60,7 @@ STAGE PLANS: 1 _col0 (type: tinyint) input vertices: 1 Map 4 - Statistics: Num rows: 3418417 Data size: 27347336 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2423154 Data size: 19385232 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash diff --git a/ql/src/test/results/clientpositive/llap/vector_mapjoin_reduce.q.out b/ql/src/test/results/clientpositive/llap/vector_mapjoin_reduce.q.out index 8df5a64fee..e6d444b249 100644 --- a/ql/src/test/results/clientpositive/llap/vector_mapjoin_reduce.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_mapjoin_reduce.q.out @@ -87,7 +87,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprAndExpr(children: FilterLongColEqualLongScalar(col 3, val 1) -> boolean, SelectColumnIsNotNull(col 1) -> boolean, SelectColumnIsNotNull(col 0) -> boolean) -> boolean predicate: ((l_linenumber = 1) and l_partkey is not null and l_orderkey is not null) (type: boolean) - Statistics: Num rows: 17 Data size: 272 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 14 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: l_orderkey (type: int), l_partkey (type: int), l_suppkey (type: int) outputColumnNames: _col0, _col1, _col2 @@ -95,7 +95,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0, 1, 2] - Statistics: Num rows: 17 Data size: 272 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 14 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + @@ -104,7 +104,7 @@ STAGE PLANS: className: VectorReduceSinkLongOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 17 Data size: 272 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 14 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col2 (type: int) Execution mode: vectorized, llap LLAP IO: no inputs @@ -207,7 +207,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col3 input vertices: 1 Map 3 - Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 7 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Left Semi Join 0 to 1 @@ -221,7 +221,7 @@ STAGE PLANS: outputColumnNames: _col0, _col3 input vertices: 1 Map 4 - Statistics: Num rows: 5 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col3 (type: int) outputColumnNames: _col0, _col1 @@ -229,13 +229,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0, 2] - Statistics: Num rows: 5 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 5 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -360,7 +360,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprAndExpr(children: FilterLongColEqualLongScalar(col 3, val 1) -> boolean, SelectColumnIsNotNull(col 1) -> boolean) -> boolean predicate: ((l_linenumber = 1) and l_partkey is not null) (type: boolean) - Statistics: Num rows: 17 Data size: 272 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 14 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: l_orderkey (type: int), l_partkey (type: int), l_suppkey (type: int), 1 (type: int) outputColumnNames: _col0, _col1, _col2, _col3 @@ -369,7 +369,7 @@ STAGE PLANS: native: true projectedOutputColumns: [0, 1, 2, 16] selectExpressions: ConstantVectorExpression(val 1) -> 16:long - Statistics: Num rows: 17 Data size: 272 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 14 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + @@ -378,7 +378,7 @@ STAGE PLANS: className: VectorReduceSinkLongOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 17 Data size: 272 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 14 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col2 (type: int), _col3 (type: int) Execution mode: vectorized, llap LLAP IO: no inputs @@ -481,7 +481,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col3, _col4 input vertices: 1 Map 3 - Statistics: Num rows: 5 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 7 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Left Semi Join 0 to 1 @@ -495,7 +495,7 @@ STAGE PLANS: outputColumnNames: _col0, _col3 input vertices: 1 Map 4 - Statistics: Num rows: 5 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col3 (type: int) outputColumnNames: _col0, _col1 @@ -503,13 +503,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0, 2] - Statistics: Num rows: 5 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 5 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/vector_outer_join1.q.out b/ql/src/test/results/clientpositive/llap/vector_outer_join1.q.out index d95604d722..0c27d4f41b 100644 --- a/ql/src/test/results/clientpositive/llap/vector_outer_join1.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_outer_join1.q.out @@ -279,13 +279,13 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 input vertices: 1 Map 2 - Statistics: Num rows: 32 Data size: 17832 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 28 Data size: 15376 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 32 Data size: 17832 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 28 Data size: 15376 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -450,13 +450,13 @@ STAGE PLANS: outputColumnNames: _col0 input vertices: 1 Map 2 - Statistics: Num rows: 112 Data size: 432 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 225 Data size: 884 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 112 Data size: 432 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 225 Data size: 884 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -712,7 +712,7 @@ STAGE PLANS: outputColumnNames: _col0 input vertices: 1 Map 3 - Statistics: Num rows: 32 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 28 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Left Outer Join 0 to 1 @@ -730,7 +730,7 @@ STAGE PLANS: outputColumnNames: _col0 input vertices: 1 Map 4 - Statistics: Num rows: 240 Data size: 944 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 420 Data size: 1664 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(), sum(_col0) Group By Vectorization: diff --git a/ql/src/test/results/clientpositive/llap/vector_outer_join2.q.out b/ql/src/test/results/clientpositive/llap/vector_outer_join2.q.out index 16de760424..82fa27dc29 100644 --- a/ql/src/test/results/clientpositive/llap/vector_outer_join2.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_outer_join2.q.out @@ -294,7 +294,7 @@ STAGE PLANS: outputColumnNames: _col1 input vertices: 1 Map 3 - Statistics: Num rows: 57 Data size: 384 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 50 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Left Outer Join 0 to 1 @@ -312,7 +312,7 @@ STAGE PLANS: outputColumnNames: _col1 input vertices: 1 Map 4 - Statistics: Num rows: 162 Data size: 1224 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 142 Data size: 1064 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(), sum(_col1) Group By Vectorization: diff --git a/ql/src/test/results/clientpositive/llap/vector_udf1.q.out b/ql/src/test/results/clientpositive/llap/vector_udf1.q.out index 16edaacf94..9a164fe130 100644 --- a/ql/src/test/results/clientpositive/llap/vector_udf1.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_udf1.q.out @@ -2659,11 +2659,11 @@ STAGE PLANS: aggregations: compute_stats(_col0, 16), compute_stats(_col2, 16) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Execution mode: llap LLAP IO: all inputs Map Vectorization: @@ -2684,10 +2684,10 @@ STAGE PLANS: aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2700,15 +2700,15 @@ STAGE PLANS: ListSink PREHOOK: query: select - compute_stats(c2, 16), - compute_stats(c4, 16) + compute_stats(c2, 'fm', 16), + compute_stats(c4, 'fm', 16) from varchar_udf_1 PREHOOK: type: QUERY PREHOOK: Input: default@varchar_udf_1 #### A masked pattern was here #### POSTHOOK: query: select - compute_stats(c2, 16), - compute_stats(c4, 16) + compute_stats(c2, 'fm', 16), + compute_stats(c4, 'fm', 16) from varchar_udf_1 POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1 diff --git a/ql/src/test/results/clientpositive/llap/vectorization_0.q.out b/ql/src/test/results/clientpositive/llap/vectorization_0.q.out index fba9c07350..db76eaaf1b 100644 --- a/ql/src/test/results/clientpositive/llap/vectorization_0.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorization_0.q.out @@ -30841,17 +30841,17 @@ STAGE PLANS: Filter Operator isSamplingPred: false predicate: (((cint = 49) or (cfloat = 3.5)) and ((cint = 47) or (cfloat = 2.09)) and ((cint = 45) or (cfloat = 3.02))) (type: boolean) - Statistics: Num rows: 30 Data size: 7690 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 27 Data size: 6990 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - Statistics: Num rows: 30 Data size: 7690 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 27 Data size: 6990 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 30 Data size: 7690 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 27 Data size: 6990 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -30958,19 +30958,19 @@ STAGE PLANS: Filter Operator isSamplingPred: false predicate: (cstring1) IN ('biology', 'history', 'topology') (type: boolean) - Statistics: Num rows: 5 Data size: 470 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6 Data size: 470 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: cstring1 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 306 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: a sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 306 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 value expressions: _col1 (type: bigint) auto parallelism: true @@ -31036,16 +31036,16 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 306 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: bigint), _col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 306 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: string) null sort order: a sort order: + - Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 306 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 value expressions: _col0 (type: bigint) auto parallelism: false @@ -31056,13 +31056,13 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: bigint), KEY.reducesinkkey0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 306 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 306 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git a/ql/src/test/results/clientpositive/llap/vectorization_8.q.out b/ql/src/test/results/clientpositive/llap/vectorization_8.q.out index 0d5b6d53e0..334e2dbb93 100644 --- a/ql/src/test/results/clientpositive/llap/vectorization_8.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorization_8.q.out @@ -78,7 +78,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: SelectColumnIsNotNull(col 7) -> boolean, FilterDoubleColLessEqualDoubleScalar(col 12, val 10.0)(children: CastTimestampToDouble(col 8) -> 12:double) -> boolean, FilterDoubleColNotEqualDoubleScalar(col 12, val 16.0)(children: CastTimestampToDouble(col 9) -> 12:double) -> boolean) -> boolean, FilterDoubleColLessDoubleScalar(col 4, val -6432.0) -> boolean, FilterExprAndExpr(children: SelectColumnIsNotNull(col 10) -> boolean, FilterDoubleColEqualDoubleScalar(col 5, val 988888.0) -> boolean) -> boolean) -> boolean predicate: ((cstring2 is not null and (UDFToDouble(ctimestamp1) <= 10.0) and (UDFToDouble(ctimestamp2) <> 16.0)) or (cfloat < -6432) or (cboolean1 is not null and (cdouble = 988888.0))) (type: boolean) - Statistics: Num rows: 3060 Data size: 743036 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3059 Data size: 742850 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ctimestamp1 (type: timestamp), cdouble (type: double), cboolean1 (type: boolean), cstring1 (type: string), cfloat (type: float), (- cdouble) (type: double), (-5638.15 - cdouble) (type: double), (cdouble * -257.0) (type: double), (UDFToFloat(cint) + cfloat) (type: float), ((- cdouble) + UDFToDouble(cbigint)) (type: double), (- cdouble) (type: double), (-1.389 - cfloat) (type: float), (- cfloat) (type: float), ((-5638.15 - cdouble) + UDFToDouble((UDFToFloat(cint) + cfloat))) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 @@ -87,7 +87,7 @@ STAGE PLANS: native: true projectedOutputColumns: [8, 5, 10, 6, 4, 12, 13, 14, 16, 18, 15, 17, 19, 21] selectExpressions: DoubleColUnaryMinus(col 5) -> 12:double, DoubleScalarSubtractDoubleColumn(val -5638.15, col 5) -> 13:double, DoubleColMultiplyDoubleScalar(col 5, val -257.0) -> 14:double, DoubleColAddDoubleColumn(col 15, col 4)(children: CastLongToFloatViaLongToDouble(col 2) -> 15:double) -> 16:double, DoubleColAddDoubleColumn(col 15, col 17)(children: DoubleColUnaryMinus(col 5) -> 15:double, CastLongToDouble(col 3) -> 17:double) -> 18:double, DoubleColUnaryMinus(col 5) -> 15:double, DoubleScalarSubtractDoubleColumn(val -1.3890000581741333, col 4) -> 17:double, DoubleColUnaryMinus(col 4) -> 19:double, DoubleColAddDoubleColumn(col 20, col 22)(children: DoubleScalarSubtractDoubleColumn(val -5638.15, col 5) -> 20:double, col 22) -> 21:double - Statistics: Num rows: 3060 Data size: 557456 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3059 Data size: 557250 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: timestamp), _col1 (type: double), _col2 (type: boolean), _col3 (type: string), _col4 (type: float), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: float), _col9 (type: double), _col10 (type: double), _col11 (type: float), _col12 (type: float), _col13 (type: double) sort order: ++++++++++++++ @@ -97,7 +97,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumns: [] - Statistics: Num rows: 3060 Data size: 557456 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3059 Data size: 557250 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 Execution mode: vectorized, llap LLAP IO: all inputs @@ -138,7 +138,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 5, 11, 12, 13] - Statistics: Num rows: 3060 Data size: 557456 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3059 Data size: 557250 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 20 Limit Vectorization: @@ -314,7 +314,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: SelectColumnIsNotNull(col 7) -> boolean, FilterDoubleColLessEqualDoubleScalar(col 12, val 12.503)(children: CastTimestampToDouble(col 8) -> 12:double) -> boolean, FilterDoubleColNotEqualDoubleScalar(col 12, val 11.998)(children: CastTimestampToDouble(col 9) -> 12:double) -> boolean) -> boolean, FilterDoubleColLessDoubleScalar(col 4, val -6432.0) -> boolean, FilterExprAndExpr(children: SelectColumnIsNotNull(col 10) -> boolean, FilterDoubleColEqualDoubleScalar(col 5, val 988888.0) -> boolean) -> boolean) -> boolean predicate: ((cstring2 is not null and (UDFToDouble(ctimestamp1) <= 12.503) and (UDFToDouble(ctimestamp2) <> 11.998)) or (cfloat < -6432) or (cboolean1 is not null and (cdouble = 988888.0))) (type: boolean) - Statistics: Num rows: 3060 Data size: 743036 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3059 Data size: 742850 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ctimestamp1 (type: timestamp), cdouble (type: double), cboolean1 (type: boolean), cstring1 (type: string), cfloat (type: float), (- cdouble) (type: double), (-5638.15 - cdouble) (type: double), (cdouble * -257.0) (type: double), (UDFToFloat(cint) + cfloat) (type: float), ((- cdouble) + UDFToDouble(cbigint)) (type: double), (- cdouble) (type: double), (-1.389 - cfloat) (type: float), (- cfloat) (type: float), ((-5638.15 - cdouble) + UDFToDouble((UDFToFloat(cint) + cfloat))) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 @@ -323,7 +323,7 @@ STAGE PLANS: native: true projectedOutputColumns: [8, 5, 10, 6, 4, 12, 13, 14, 16, 18, 15, 17, 19, 21] selectExpressions: DoubleColUnaryMinus(col 5) -> 12:double, DoubleScalarSubtractDoubleColumn(val -5638.15, col 5) -> 13:double, DoubleColMultiplyDoubleScalar(col 5, val -257.0) -> 14:double, DoubleColAddDoubleColumn(col 15, col 4)(children: CastLongToFloatViaLongToDouble(col 2) -> 15:double) -> 16:double, DoubleColAddDoubleColumn(col 15, col 17)(children: DoubleColUnaryMinus(col 5) -> 15:double, CastLongToDouble(col 3) -> 17:double) -> 18:double, DoubleColUnaryMinus(col 5) -> 15:double, DoubleScalarSubtractDoubleColumn(val -1.3890000581741333, col 4) -> 17:double, DoubleColUnaryMinus(col 4) -> 19:double, DoubleColAddDoubleColumn(col 20, col 22)(children: DoubleScalarSubtractDoubleColumn(val -5638.15, col 5) -> 20:double, col 22) -> 21:double - Statistics: Num rows: 3060 Data size: 557456 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3059 Data size: 557250 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: timestamp), _col1 (type: double), _col2 (type: boolean), _col3 (type: string), _col4 (type: float), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: float), _col9 (type: double), _col10 (type: double), _col11 (type: float), _col12 (type: float), _col13 (type: double) sort order: ++++++++++++++ @@ -331,7 +331,7 @@ STAGE PLANS: className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 3060 Data size: 557456 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3059 Data size: 557250 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 Execution mode: vectorized, llap LLAP IO: all inputs @@ -360,7 +360,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 5, 11, 12, 13] - Statistics: Num rows: 3060 Data size: 557456 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3059 Data size: 557250 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 20 Limit Vectorization: diff --git a/ql/src/test/results/clientpositive/llap/vectorization_short_regress.q.out b/ql/src/test/results/clientpositive/llap/vectorization_short_regress.q.out index 00577620d8..38598b489a 100644 --- a/ql/src/test/results/clientpositive/llap/vectorization_short_regress.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorization_short_regress.q.out @@ -102,7 +102,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprOrExpr(children: FilterLongScalarEqualLongColumn(val 762, col 3) -> boolean, FilterExprAndExpr(children: FilterDoubleColLessDoubleColumn(col 12, col 4)(children: CastLongToFloatViaLongToDouble(col 1) -> 12:double) -> boolean, FilterDoubleColGreaterDoubleScalar(col 12, val -5.0)(children: CastTimestampToDouble(col 9) -> 12:double) -> boolean, FilterDoubleColNotEqualDoubleColumn(col 5, col 12)(children: CastLongToDouble(col 2) -> 12:double) -> boolean) -> boolean, FilterStringGroupColEqualStringScalar(col 6, val a) -> boolean, FilterExprAndExpr(children: FilterDecimalColLessEqualDecimalScalar(col 13, val -1.389)(children: CastLongToDecimal(col 3) -> 13:decimal(22,3)) -> boolean, FilterStringGroupColNotEqualStringScalar(col 7, val a) -> boolean, FilterDecimalScalarNotEqualDecimalColumn(val 79.553, col 14)(children: CastLongToDecimal(col 2) -> 14:decimal(13,3)) -> boolean, FilterLongColNotEqualLongColumn(col 11, col 10) -> boolean) -> boolean) -> boolean predicate: ((762 = cbigint) or ((UDFToFloat(csmallint) < cfloat) and (UDFToDouble(ctimestamp2) > -5.0) and (cdouble <> UDFToDouble(cint))) or (cstring1 = 'a') or ((CAST( cbigint AS decimal(22,3)) <= -1.389) and (cstring2 <> 'a') and (79.553 <> CAST( cint AS decimal(13,3))) and (cboolean2 <> cboolean1))) (type: boolean) - Statistics: Num rows: 5466 Data size: 1157380 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5465 Data size: 1157230 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cfloat (type: float), cdouble (type: double) outputColumnNames: ctinyint, csmallint, cint, cfloat, cdouble @@ -110,7 +110,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0, 1, 2, 4, 5] - Statistics: Num rows: 5466 Data size: 1157380 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5465 Data size: 1157230 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: avg(cint), sum(cdouble), stddev_pop(cint), stddev_samp(csmallint), var_samp(cint), avg(cfloat), stddev_samp(cint), min(ctinyint), count(csmallint) Group By Vectorization: @@ -857,7 +857,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterTimestampColLessEqualTimestampColumn(col 9, col 8) -> boolean, FilterDoubleColNotEqualDoubleColumn(col 12, col 5)(children: CastLongToDouble(col 3) -> 12:double) -> boolean, FilterStringScalarLessEqualStringGroupColumn(val ss, col 6) -> boolean) -> boolean, FilterExprAndExpr(children: FilterLongColLessLongColumn(col 1, col 0)(children: col 0) -> boolean, FilterDoubleColGreaterEqualDoubleScalar(col 12, val 0.0)(children: CastTimestampToDouble(col 8) -> 12:double) -> boolean) -> boolean, FilterDoubleColEqualDoubleScalar(col 4, val 17.0) -> boolean) -> boolean predicate: (((ctimestamp2 <= ctimestamp1) and (UDFToDouble(cbigint) <> cdouble) and ('ss' <= cstring1)) or ((csmallint < UDFToShort(ctinyint)) and (UDFToDouble(ctimestamp1) >= 0.0)) or (cfloat = 17)) (type: boolean) - Statistics: Num rows: 2835 Data size: 493648 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2824 Data size: 491654 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ctinyint (type: tinyint), cint (type: int), cbigint (type: bigint), cfloat (type: float) outputColumnNames: ctinyint, cint, cbigint, cfloat @@ -865,7 +865,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0, 2, 3, 4] - Statistics: Num rows: 2835 Data size: 493648 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2824 Data size: 491654 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: avg(ctinyint), max(cbigint), stddev_samp(cint), var_pop(cint), var_pop(cbigint), max(cfloat) Group By Vectorization: @@ -1398,7 +1398,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterLongScalarGreaterLongColumn(val 197, col 0)(children: col 0) -> boolean, FilterLongColEqualLongColumn(col 2, col 3)(children: col 2) -> boolean) -> boolean, FilterLongColEqualLongScalar(col 3, val 359) -> boolean, FilterLongColLessLongScalar(col 10, val 0) -> boolean, FilterExprAndExpr(children: FilterStringColLikeStringScalar(col 6, pattern %ss) -> boolean, FilterDoubleColLessEqualDoubleColumn(col 4, col 12)(children: CastLongToFloatViaLongToDouble(col 0) -> 12:double) -> boolean) -> boolean) -> boolean predicate: (((197 > UDFToInteger(ctinyint)) and (UDFToLong(cint) = cbigint)) or (cbigint = 359) or (cboolean1 < 0) or ((cstring1 like '%ss') and (cfloat <= UDFToFloat(ctinyint)))) (type: boolean) - Statistics: Num rows: 8195 Data size: 1735170 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8194 Data size: 1734900 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cint (type: int), cbigint (type: bigint), cstring1 (type: string), cboolean1 (type: boolean), cfloat (type: float), cdouble (type: double), ctimestamp2 (type: timestamp), csmallint (type: smallint), cstring2 (type: string), cboolean2 (type: boolean), (UDFToDouble(cint) / UDFToDouble(cbigint)) (type: double), (CAST( cbigint AS decimal(19,0)) % 79.553) (type: decimal(5,3)), (- (UDFToDouble(cint) / UDFToDouble(cbigint))) (type: double), (10.175 % cfloat) (type: float), (- cfloat) (type: float), (cfloat - (- cfloat)) (type: float), ((cfloat - (- cfloat)) % -6432.0) (type: float), (cdouble * UDFToDouble(csmallint)) (type: double), (- cdouble) (type: double), (- cbigint) (type: bigint), (UDFToDouble(cfloat) - (UDFToDouble(cint) / UDFToDouble(cbigint))) (type: double), (- csmallint) (type: smallint), (3569 % cbigint) (type: bigint), (359.0 - cdouble) (type: double), (- csmallint) (type: smallint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24 @@ -1407,7 +1407,7 @@ STAGE PLANS: native: true projectedOutputColumns: [2, 3, 6, 10, 4, 5, 9, 1, 7, 11, 14, 16, 12, 13, 17, 19, 18, 21, 20, 22, 23, 26, 27, 24, 28] selectExpressions: DoubleColDivideDoubleColumn(col 12, col 13)(children: CastLongToDouble(col 2) -> 12:double, CastLongToDouble(col 3) -> 13:double) -> 14:double, DecimalColModuloDecimalScalar(col 15, val 79.553)(children: CastLongToDecimal(col 3) -> 15:decimal(19,0)) -> 16:decimal(5,3), DoubleColUnaryMinus(col 17)(children: DoubleColDivideDoubleColumn(col 12, col 13)(children: CastLongToDouble(col 2) -> 12:double, CastLongToDouble(col 3) -> 13:double) -> 17:double) -> 12:double, DoubleScalarModuloDoubleColumn(val 10.175000190734863, col 4) -> 13:double, DoubleColUnaryMinus(col 4) -> 17:double, DoubleColSubtractDoubleColumn(col 4, col 18)(children: DoubleColUnaryMinus(col 4) -> 18:double) -> 19:double, DoubleColModuloDoubleScalar(col 20, val -6432.0)(children: DoubleColSubtractDoubleColumn(col 4, col 18)(children: DoubleColUnaryMinus(col 4) -> 18:double) -> 20:double) -> 18:double, DoubleColMultiplyDoubleColumn(col 5, col 20)(children: CastLongToDouble(col 1) -> 20:double) -> 21:double, DoubleColUnaryMinus(col 5) -> 20:double, LongColUnaryMinus(col 3) -> 22:long, DoubleColSubtractDoubleColumn(col 4, col 25)(children: col 4, DoubleColDivideDoubleColumn(col 23, col 24)(children: CastLongToDouble(col 2) -> 23:double, CastLongToDouble(col 3) -> 24:double) -> 25:double) -> 23:double, LongColUnaryMinus(col 1) -> 26:long, LongScalarModuloLongColumn(val 3569, col 3) -> 27:long, DoubleScalarSubtractDoubleColumn(val 359.0, col 5) -> 24:double, LongColUnaryMinus(col 1) -> 28:long - Statistics: Num rows: 8195 Data size: 3349694 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8194 Data size: 3349228 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: bigint), _col2 (type: string), _col3 (type: boolean), _col4 (type: float), _col5 (type: double), _col6 (type: timestamp), _col7 (type: smallint), _col8 (type: string), _col9 (type: boolean), _col10 (type: double), _col11 (type: decimal(5,3)), _col12 (type: double), _col13 (type: float), _col14 (type: float), _col15 (type: float), _col16 (type: float), _col17 (type: double), _col18 (type: double), _col19 (type: bigint), _col20 (type: double), _col21 (type: smallint), _col22 (type: bigint), _col23 (type: double), _col24 (type: smallint) sort order: +++++++++++++++++++++++++ @@ -1415,7 +1415,7 @@ STAGE PLANS: className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 8195 Data size: 3349694 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8194 Data size: 3349228 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 Execution mode: vectorized, llap LLAP IO: all inputs @@ -1444,7 +1444,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 21] - Statistics: Num rows: 8195 Data size: 3349694 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8194 Data size: 3349228 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 25 Limit Vectorization: @@ -2220,7 +2220,7 @@ STAGE PLANS: keys: csmallint (type: smallint) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1128 Data size: 201900 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1141 Data size: 204228 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: smallint) sort order: + @@ -2229,7 +2229,7 @@ STAGE PLANS: className: VectorReduceSinkLongOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 1128 Data size: 201900 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1141 Data size: 204228 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: struct), _col2 (type: bigint), _col3 (type: struct), _col4 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs @@ -2265,7 +2265,7 @@ STAGE PLANS: keys: KEY._col0 (type: smallint) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1128 Data size: 39468 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1141 Data size: 39924 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: smallint), (UDFToInteger(_col0) % -75) (type: int), _col1 (type: double), (-1.389 / CAST( _col0 AS decimal(5,0))) (type: decimal(10,9)), _col2 (type: bigint), (UDFToDouble((UDFToInteger(_col0) % -75)) / UDFToDouble(_col2)) (type: double), (- (UDFToInteger(_col0) % -75)) (type: int), _col3 (type: double), (- (- (UDFToInteger(_col0) % -75))) (type: int), _col4 (type: bigint), (_col4 - -89010) (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 @@ -2274,7 +2274,7 @@ STAGE PLANS: native: true projectedOutputColumns: [0, 5, 1, 7, 2, 11, 12, 3, 8, 4, 13] selectExpressions: LongColModuloLongScalar(col 0, val -75)(children: col 0) -> 5:long, DecimalScalarDivideDecimalColumn(val -1.389, col 6)(children: CastLongToDecimal(col 0) -> 6:decimal(5,0)) -> 7:decimal(10,9), DoubleColDivideDoubleColumn(col 9, col 10)(children: CastLongToDouble(col 8)(children: LongColModuloLongScalar(col 0, val -75)(children: col 0) -> 8:long) -> 9:double, CastLongToDouble(col 2) -> 10:double) -> 11:double, LongColUnaryMinus(col 8)(children: LongColModuloLongScalar(col 0, val -75)(children: col 0) -> 8:long) -> 12:long, LongColUnaryMinus(col 13)(children: LongColUnaryMinus(col 8)(children: LongColModuloLongScalar(col 0, val -75)(children: col 0) -> 8:long) -> 13:long) -> 8:long, LongColSubtractLongScalar(col 4, val -89010) -> 13:long - Statistics: Num rows: 1128 Data size: 197388 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1141 Data size: 199664 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: double), _col3 (type: decimal(10,9)), _col4 (type: bigint), _col5 (type: double), _col6 (type: int), _col7 (type: double), _col8 (type: int), _col9 (type: bigint), _col10 (type: bigint) sort order: +++++++++++ @@ -2282,7 +2282,7 @@ STAGE PLANS: className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 1128 Data size: 197388 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1141 Data size: 199664 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 Reducer 3 Execution mode: vectorized, llap @@ -2301,7 +2301,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] - Statistics: Num rows: 1128 Data size: 197388 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1141 Data size: 199664 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 20 Limit Vectorization: @@ -2500,7 +2500,7 @@ STAGE PLANS: keys: cdouble (type: double) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 870 Data size: 234888 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1136 Data size: 306696 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: double) sort order: + @@ -2509,7 +2509,7 @@ STAGE PLANS: className: VectorReduceSinkMultiKeyOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 870 Data size: 234888 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1136 Data size: 306696 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: struct), _col2 (type: bigint), _col3 (type: double), _col4 (type: struct), _col5 (type: struct), _col6 (type: double) Execution mode: vectorized, llap LLAP IO: all inputs @@ -2545,7 +2545,7 @@ STAGE PLANS: keys: KEY._col0 (type: double) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 870 Data size: 46968 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1136 Data size: 61320 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: double), _col1 (type: double), (2563.58 * _col1) (type: double), (- _col1) (type: double), _col2 (type: bigint), ((2563.58 * _col1) + -5638.15) (type: double), ((- _col1) * ((2563.58 * _col1) + -5638.15)) (type: double), _col3 (type: double), _col4 (type: double), (_col0 - (- _col1)) (type: double), _col5 (type: double), (_col0 + _col1) (type: double), (_col0 * 762.0) (type: double), _col6 (type: double), (-863.257 % (_col0 * 762.0)) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 @@ -2554,7 +2554,7 @@ STAGE PLANS: native: true projectedOutputColumns: [0, 1, 7, 8, 2, 10, 11, 3, 4, 12, 5, 9, 13, 6, 15] selectExpressions: DoubleScalarMultiplyDoubleColumn(val 2563.58, col 1) -> 7:double, DoubleColUnaryMinus(col 1) -> 8:double, DoubleColAddDoubleScalar(col 9, val -5638.15)(children: DoubleScalarMultiplyDoubleColumn(val 2563.58, col 1) -> 9:double) -> 10:double, DoubleColMultiplyDoubleColumn(col 9, col 12)(children: DoubleColUnaryMinus(col 1) -> 9:double, DoubleColAddDoubleScalar(col 11, val -5638.15)(children: DoubleScalarMultiplyDoubleColumn(val 2563.58, col 1) -> 11:double) -> 12:double) -> 11:double, DoubleColSubtractDoubleColumn(col 0, col 9)(children: DoubleColUnaryMinus(col 1) -> 9:double) -> 12:double, DoubleColAddDoubleColumn(col 0, col 1) -> 9:double, DoubleColMultiplyDoubleScalar(col 0, val 762.0) -> 13:double, DoubleScalarModuloDoubleColumn(val -863.257, col 14)(children: DoubleColMultiplyDoubleScalar(col 0, val 762.0) -> 14:double) -> 15:double - Statistics: Num rows: 870 Data size: 109608 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1136 Data size: 143112 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: double) sort order: + @@ -2562,7 +2562,7 @@ STAGE PLANS: className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 870 Data size: 109608 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1136 Data size: 143112 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: bigint), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: double), _col9 (type: double), _col10 (type: double), _col11 (type: double), _col12 (type: double), _col13 (type: double), _col14 (type: double) Reducer 3 Execution mode: vectorized, llap @@ -2581,13 +2581,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 13] - Statistics: Num rows: 870 Data size: 109608 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1136 Data size: 143112 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 870 Data size: 109608 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1136 Data size: 143112 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/vectorized_distinct_gby.q.out b/ql/src/test/results/clientpositive/llap/vectorized_distinct_gby.q.out index c3e5f7c90d..fe9f0d22f6 100644 --- a/ql/src/test/results/clientpositive/llap/vectorized_distinct_gby.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorized_distinct_gby.q.out @@ -211,7 +211,7 @@ STAGE PLANS: keys: cint (type: int) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 5775 Data size: 17248 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6030 Data size: 18008 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + @@ -222,7 +222,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumns: [] - Statistics: Num rows: 5775 Data size: 17248 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6030 Data size: 18008 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -266,7 +266,7 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 5775 Data size: 17248 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6030 Data size: 18008 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col0), count(_col0), avg(_col0), std(_col0) Group By Vectorization: diff --git a/ql/src/test/results/clientpositive/llap/vectorized_dynamic_semijoin_reduction.q.out b/ql/src/test/results/clientpositive/llap/vectorized_dynamic_semijoin_reduction.q.out index 9a1c44c3e6..0af7204256 100644 --- a/ql/src/test/results/clientpositive/llap/vectorized_dynamic_semijoin_reduction.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorized_dynamic_semijoin_reduction.q.out @@ -1741,7 +1741,7 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 57 Data size: 228 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=94) + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=114) mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE @@ -1789,7 +1789,7 @@ STAGE PLANS: Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=94) + aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=114) mode: final outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE diff --git a/ql/src/test/results/clientpositive/llap/vectorized_dynamic_semijoin_reduction2.q.out b/ql/src/test/results/clientpositive/llap/vectorized_dynamic_semijoin_reduction2.q.out index a03466f859..ee65bed15b 100644 --- a/ql/src/test/results/clientpositive/llap/vectorized_dynamic_semijoin_reduction2.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorized_dynamic_semijoin_reduction2.q.out @@ -152,7 +152,7 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=36) + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=40) mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE @@ -200,7 +200,7 @@ STAGE PLANS: Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=36) + aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=40) mode: final outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE @@ -288,7 +288,7 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 20 Data size: 2240 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=32) + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=40) mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE @@ -336,7 +336,7 @@ STAGE PLANS: Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=32) + aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=40) mode: final outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE @@ -424,7 +424,7 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=60) + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=40) mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE @@ -472,7 +472,7 @@ STAGE PLANS: Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=60) + aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=40) mode: final outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE @@ -832,7 +832,7 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 20 Data size: 1880 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=30) + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=40) mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE @@ -851,7 +851,7 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 21 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash @@ -880,7 +880,7 @@ STAGE PLANS: Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=30) + aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=40) mode: final outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE @@ -968,7 +968,7 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 20 Data size: 1880 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=30) + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=40) mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 282 Basic stats: COMPLETE Column stats: COMPLETE @@ -987,7 +987,7 @@ STAGE PLANS: keys: 0 _col0 (type: char(10)) 1 _col0 (type: char(10)) - Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 21 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash @@ -1016,7 +1016,7 @@ STAGE PLANS: Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=30) + aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=40) mode: final outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 282 Basic stats: COMPLETE Column stats: COMPLETE @@ -1104,7 +1104,7 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 20 Data size: 1880 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=30) + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=40) mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 282 Basic stats: COMPLETE Column stats: COMPLETE @@ -1123,7 +1123,7 @@ STAGE PLANS: keys: 0 _col0 (type: varchar(10)) 1 _col0 (type: varchar(10)) - Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 21 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash @@ -1152,7 +1152,7 @@ STAGE PLANS: Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=30) + aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=40) mode: final outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 282 Basic stats: COMPLETE Column stats: COMPLETE diff --git a/ql/src/test/results/clientpositive/llap/vectorized_mapjoin.q.out b/ql/src/test/results/clientpositive/llap/vectorized_mapjoin.q.out index 8160bc7c44..9590c005c3 100644 --- a/ql/src/test/results/clientpositive/llap/vectorized_mapjoin.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorized_mapjoin.q.out @@ -59,7 +59,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1 input vertices: 1 Map 3 - Statistics: Num rows: 19518 Data size: 137552 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 18694 Data size: 130960 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: int), (_col0 + _col1) (type: int) outputColumnNames: _col0, _col1, _col2 @@ -68,7 +68,7 @@ STAGE PLANS: native: true projectedOutputColumns: [2, 2, 12] selectExpressions: LongColAddLongColumn(col 2, col 2) -> 12:long - Statistics: Num rows: 19518 Data size: 137552 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 18694 Data size: 130960 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(_col0), max(_col1), min(_col0), avg(_col2) Group By Vectorization: diff --git a/ql/src/test/results/clientpositive/llap/vectorized_multi_output_select.q.out b/ql/src/test/results/clientpositive/llap/vectorized_multi_output_select.q.out index f744eb6513..2c6685691a 100644 --- a/ql/src/test/results/clientpositive/llap/vectorized_multi_output_select.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorized_multi_output_select.q.out @@ -57,7 +57,7 @@ STAGE PLANS: 1 _col0 (type: string) input vertices: 1 Map 4 - Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash @@ -75,7 +75,7 @@ STAGE PLANS: 1 _col0 (type: string) input vertices: 1 Map 5 - Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash @@ -94,16 +94,16 @@ STAGE PLANS: Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((value = 'val_278') and key is not null) (type: boolean) - Statistics: Num rows: 2 Data size: 350 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 175 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 354 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 177 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 2 Data size: 354 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 177 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs Map 5 @@ -113,16 +113,16 @@ STAGE PLANS: Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((value = 'val_255') and key is not null) (type: boolean) - Statistics: Num rows: 2 Data size: 350 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 175 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 354 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 177 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 2 Data size: 354 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 177 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs Reducer 2 diff --git a/ql/src/test/results/clientpositive/llap/vectorized_nested_mapjoin.q.out b/ql/src/test/results/clientpositive/llap/vectorized_nested_mapjoin.q.out index 28a8340a9c..2a95065203 100644 --- a/ql/src/test/results/clientpositive/llap/vectorized_nested_mapjoin.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorized_nested_mapjoin.q.out @@ -40,11 +40,11 @@ STAGE PLANS: outputColumnNames: _col2, _col3 input vertices: 1 Map 3 - Statistics: Num rows: 884742 Data size: 10596096 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 661228 Data size: 7913928 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: smallint), _col3 (type: double) outputColumnNames: _col0, _col1 - Statistics: Num rows: 884742 Data size: 10596096 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 661228 Data size: 7913928 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -54,7 +54,7 @@ STAGE PLANS: outputColumnNames: _col1 input vertices: 1 Map 4 - Statistics: Num rows: 1966236 Data size: 15716016 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1452263 Data size: 11604232 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col1) mode: hash diff --git a/ql/src/test/results/clientpositive/llap/vectorized_shufflejoin.q.out b/ql/src/test/results/clientpositive/llap/vectorized_shufflejoin.q.out index 73ab9fca82..5469018a98 100644 --- a/ql/src/test/results/clientpositive/llap/vectorized_shufflejoin.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorized_shufflejoin.q.out @@ -118,11 +118,11 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 19518 Data size: 137552 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 18694 Data size: 130960 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: int), (_col0 + _col1) (type: int) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 19518 Data size: 137552 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 18694 Data size: 130960 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(_col0), max(_col1), min(_col0), avg(_col2) Group By Vectorization: diff --git a/ql/src/test/results/clientpositive/llap/windowing_gby.q.out b/ql/src/test/results/clientpositive/llap/windowing_gby.q.out index 2c47b8b2a6..22d2d75e70 100644 --- a/ql/src/test/results/clientpositive/llap/windowing_gby.q.out +++ b/ql/src/test/results/clientpositive/llap/windowing_gby.q.out @@ -41,7 +41,7 @@ Stage-0 PartitionCols:_col0 Group By Operator [GBY_10] (rows=3 width=20) Output:["_col0","_col1","_col2"],aggregations:["sum(_col3)","sum(_col1)"],keys:_col2 - Merge Join Operator [MERGEJOIN_24] (rows=29 width=7) + Merge Join Operator [MERGEJOIN_24] (rows=36 width=7) Conds:RS_6._col0=RS_7._col1(Inner),Output:["_col1","_col2","_col3"] <-Map 1 [SIMPLE_EDGE] llap SHUFFLE [RS_6] diff --git a/ql/src/test/results/clientpositive/parallel_colstats.q.out b/ql/src/test/results/clientpositive/parallel_colstats.q.out index c85113137b..d5bce1ec77 100644 --- a/ql/src/test/results/clientpositive/parallel_colstats.q.out +++ b/ql/src/test/results/clientpositive/parallel_colstats.q.out @@ -100,10 +100,10 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(key, 16), compute_stats(value, 16) + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -128,10 +128,10 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(key, 16), compute_stats(value, 16) + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -172,17 +172,17 @@ STAGE PLANS: TableScan Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Reduce Operator Tree: Group By Operator aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -207,17 +207,17 @@ STAGE PLANS: TableScan Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Reduce Operator Tree: Group By Operator aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/partial_column_stats.q.out b/ql/src/test/results/clientpositive/partial_column_stats.q.out index 5876efacf3..87d47dae22 100644 --- a/ql/src/test/results/clientpositive/partial_column_stats.q.out +++ b/ql/src/test/results/clientpositive/partial_column_stats.q.out @@ -26,23 +26,23 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Group By Operator - aggregations: compute_stats(key, 16), compute_stats(value, 16) + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 944 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 944 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Reduce Operator Tree: Group By Operator aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/partition_coltype_literals.q.out b/ql/src/test/results/clientpositive/partition_coltype_literals.q.out index 3505556029..d459b36ff0 100644 --- a/ql/src/test/results/clientpositive/partition_coltype_literals.q.out +++ b/ql/src/test/results/clientpositive/partition_coltype_literals.q.out @@ -304,7 +304,7 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@partcoltypenum # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -key int 27 484 0 18 from deserializer +key int 27 484 0 20 from deserializer PREHOOK: query: describe formatted partcoltypenum partition (tint=110Y, sint=22000S, bint=330000000000L) value PREHOOK: type: DESCTABLE PREHOOK: Input: default@partcoltypenum @@ -313,7 +313,7 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@partcoltypenum # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -value string 0 18 6.766666666666667 7 from deserializer +value string 0 20 6.766666666666667 7 from deserializer PREHOOK: query: describe formatted partcoltypenum tint PREHOOK: type: DESCTABLE PREHOOK: Input: default@partcoltypenum diff --git a/ql/src/test/results/clientpositive/reduceSinkDeDuplication_pRS_key_empty.q.out b/ql/src/test/results/clientpositive/reduceSinkDeDuplication_pRS_key_empty.q.out index 3ad09e815c..4bddd3bef8 100644 --- a/ql/src/test/results/clientpositive/reduceSinkDeDuplication_pRS_key_empty.q.out +++ b/ql/src/test/results/clientpositive/reduceSinkDeDuplication_pRS_key_empty.q.out @@ -1,4 +1,4 @@ -PREHOOK: query: select compute_stats(a,16),compute_stats(b,16),compute_stats(c,16),compute_stats(d,16) +PREHOOK: query: select compute_stats(a,'fm',16),compute_stats(b,'fm',16),compute_stats(c,'fm',16),compute_stats(d,'fm',16) from ( select @@ -10,7 +10,7 @@ select PREHOOK: type: QUERY PREHOOK: Input: default@src #### A masked pattern was here #### -POSTHOOK: query: select compute_stats(a,16),compute_stats(b,16),compute_stats(c,16),compute_stats(d,16) +POSTHOOK: query: select compute_stats(a,'fm',16),compute_stats(b,'fm',16),compute_stats(c,'fm',16),compute_stats(d,'fm',16) from ( select @@ -23,7 +23,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### {"columntype":"Double","min":260.182,"max":260.182,"countnulls":0,"numdistinctvalues":1,"ndvbitvector":"{1}{0}{0}{0}{1}{1}{1}{0}{0}{0}{0}{0}{1}{2}{1}{0}"} {"columntype":"String","maxlength":2,"avglength":2.0,"countnulls":0,"numdistinctvalues":1,"ndvbitvector":"{1}{2}{0}{3}{6}{3}{0}{1}{1}{0}{0}{0}{0}{0}{0}{0}"} {"columntype":"Double","min":20428.07287599998,"max":20428.07287599998,"countnulls":0,"numdistinctvalues":2,"ndvbitvector":"{0}{0}{3}{0}{1}{1}{0}{0}{0}{0}{0}{0}{0}{4}{2}{0}"} {"columntype":"Double","min":20469.01089779557,"max":20469.01089779557,"countnulls":0,"numdistinctvalues":1,"ndvbitvector":"{0}{1}{3}{2}{3}{5}{2}{0}{1}{0}{1}{1}{1}{1}{0}{1}"} -PREHOOK: query: explain select compute_stats(a,16),compute_stats(b,16),compute_stats(c,16),compute_stats(d,16) +PREHOOK: query: explain select compute_stats(a,'fm',16),compute_stats(b,'fm',16),compute_stats(c,'fm',16),compute_stats(d,'fm',16) from ( select @@ -33,7 +33,7 @@ select var_samp(substr(src.value,5)) as d from src)subq PREHOOK: type: QUERY -POSTHOOK: query: explain select compute_stats(a,16),compute_stats(b,16),compute_stats(c,16),compute_stats(d,16) +POSTHOOK: query: explain select compute_stats(a,'fm',16),compute_stats(b,'fm',16),compute_stats(c,'fm',16),compute_stats(d,'fm',16) from ( select @@ -69,7 +69,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(_col0, 16), compute_stats(_col1, 16), compute_stats(_col2, 16), compute_stats(_col3, 16) + aggregations: compute_stats(_col0, 'fm', 16), compute_stats(_col1, 'fm', 16), compute_stats(_col2, 'fm', 16), compute_stats(_col3, 'fm', 16) mode: complete outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 2004 Basic stats: COMPLETE Column stats: NONE @@ -87,7 +87,7 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: select compute_stats(a,16),compute_stats(b,16),compute_stats(c,16),compute_stats(d,16) +PREHOOK: query: select compute_stats(a,'fm',16),compute_stats(b,'fm',16),compute_stats(c,'fm',16),compute_stats(d,'fm',16) from ( select @@ -99,7 +99,7 @@ select PREHOOK: type: QUERY PREHOOK: Input: default@src #### A masked pattern was here #### -POSTHOOK: query: select compute_stats(a,16),compute_stats(b,16),compute_stats(c,16),compute_stats(d,16) +POSTHOOK: query: select compute_stats(a,'fm',16),compute_stats(b,'fm',16),compute_stats(c,'fm',16),compute_stats(d,'fm',16) from ( select @@ -112,7 +112,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### {"columntype":"Double","min":256.10355987055016,"max":256.10355987055016,"countnulls":0,"numdistinctvalues":1,"ndvbitvector":"{2}{1}{0}{2}{0}{1}{1}{1}{0}{0}{1}{1}{0}{2}{1}{0}"} {"columntype":"String","maxlength":2,"avglength":2.0,"countnulls":0,"numdistinctvalues":1,"ndvbitvector":"{1}{2}{0}{3}{6}{3}{0}{1}{1}{0}{0}{0}{0}{0}{0}{0}"} {"columntype":"Double","min":20428.07287599999,"max":20428.07287599999,"countnulls":0,"numdistinctvalues":1,"ndvbitvector":"{1}{4}{0}{0}{4}{3}{0}{1}{0}{0}{0}{0}{0}{0}{1}{2}"} {"columntype":"Double","min":20469.010897795582,"max":20469.010897795582,"countnulls":0,"numdistinctvalues":2,"ndvbitvector":"{2}{0}{2}{2}{0}{0}{2}{0}{0}{0}{0}{0}{1}{0}{0}{0}"} -PREHOOK: query: explain select compute_stats(a,16),compute_stats(b,16),compute_stats(c,16),compute_stats(d,16) +PREHOOK: query: explain select compute_stats(a,'fm',16),compute_stats(b,'fm',16),compute_stats(c,'fm',16),compute_stats(d,'fm',16) from ( select @@ -122,7 +122,7 @@ select var_samp(substr(src.value,5)) as d from src)subq PREHOOK: type: QUERY -POSTHOOK: query: explain select compute_stats(a,16),compute_stats(b,16),compute_stats(c,16),compute_stats(d,16) +POSTHOOK: query: explain select compute_stats(a,'fm',16),compute_stats(b,'fm',16),compute_stats(c,'fm',16),compute_stats(d,'fm',16) from ( select @@ -175,7 +175,7 @@ STAGE PLANS: value expressions: _col0 (type: double), _col1 (type: string), _col2 (type: double), _col3 (type: double) Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0, 16), compute_stats(VALUE._col2, 16), compute_stats(VALUE._col3, 16), compute_stats(VALUE._col4, 16) + aggregations: compute_stats(VALUE._col0, 'fm', 16), compute_stats(VALUE._col3, 'fm', 16), compute_stats(VALUE._col4, 'fm', 16), compute_stats(VALUE._col5, 'fm', 16) mode: complete outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 2004 Basic stats: COMPLETE Column stats: NONE @@ -193,7 +193,7 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: select compute_stats(a,16),compute_stats(b,16),compute_stats(c,16),compute_stats(d,16) +PREHOOK: query: select compute_stats(a,'fm',16),compute_stats(b,'fm',16),compute_stats(c,'fm',16),compute_stats(d,'fm',16) from ( select @@ -205,7 +205,7 @@ select PREHOOK: type: QUERY PREHOOK: Input: default@src #### A masked pattern was here #### -POSTHOOK: query: select compute_stats(a,16),compute_stats(b,16),compute_stats(c,16),compute_stats(d,16) +POSTHOOK: query: select compute_stats(a,'fm',16),compute_stats(b,'fm',16),compute_stats(c,'fm',16),compute_stats(d,'fm',16) from ( select diff --git a/ql/src/test/results/clientpositive/remove_exprs_stats.q.out b/ql/src/test/results/clientpositive/remove_exprs_stats.q.out index 33cf90ae9d..4600e71f09 100644 --- a/ql/src/test/results/clientpositive/remove_exprs_stats.q.out +++ b/ql/src/test/results/clientpositive/remove_exprs_stats.q.out @@ -449,14 +449,14 @@ STAGE PLANS: Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (locid) IN (5) (type: boolean) - Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -485,14 +485,14 @@ STAGE PLANS: Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (locid) IN (5, 2, 3) (type: boolean) - Statistics: Num rows: 5 Data size: 510 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4 Data size: 408 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 5 Data size: 510 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4 Data size: 408 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 5 Data size: 510 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4 Data size: 408 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/rename_external_partition_location.q.out b/ql/src/test/results/clientpositive/rename_external_partition_location.q.out index ec4076f908..19546c38bc 100644 --- a/ql/src/test/results/clientpositive/rename_external_partition_location.q.out +++ b/ql/src/test/results/clientpositive/rename_external_partition_location.q.out @@ -173,7 +173,7 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@ex_table # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -value string 0 5 5.0 5 from deserializer +value string 0 6 5.0 5 from deserializer PREHOOK: query: ALTER TABLE ex_table PARTITION (part='part1') RENAME TO PARTITION (part='part2') PREHOOK: type: ALTERTABLE_RENAMEPART PREHOOK: Input: default@ex_table @@ -321,4 +321,4 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@ex_table # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -value string 0 5 5.0 5 from deserializer +value string 0 6 5.0 5 from deserializer diff --git a/ql/src/test/results/clientpositive/rename_table_update_column_stats.q.out b/ql/src/test/results/clientpositive/rename_table_update_column_stats.q.out index b3d6f039ac..16b3a38c46 100644 --- a/ql/src/test/results/clientpositive/rename_table_update_column_stats.q.out +++ b/ql/src/test/results/clientpositive/rename_table_update_column_stats.q.out @@ -59,7 +59,7 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testtable1 # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -col1 int 27 484 0 8 from deserializer +col1 int 27 484 0 10 from deserializer COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}} PREHOOK: query: describe formatted statsdb1.testtable1 col2 PREHOOK: type: DESCTABLE @@ -69,7 +69,7 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testtable1 # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -col2 string 0 12 6.7 7 from deserializer +col2 string 0 10 6.7 7 from deserializer COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}} PREHOOK: query: describe formatted statsdb1.testtable1 col3 PREHOOK: type: DESCTABLE @@ -98,7 +98,7 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb2@testtable2 # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -col1 int 27 484 0 8 from deserializer +col1 int 27 484 0 10 from deserializer COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}} PREHOOK: query: describe formatted statsdb2.testtable2 col2 PREHOOK: type: DESCTABLE @@ -108,7 +108,7 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb2@testtable2 # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -col2 string 0 12 6.7 7 from deserializer +col2 string 0 10 6.7 7 from deserializer COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}} PREHOOK: query: describe formatted statsdb2.testtable2 col3 PREHOOK: type: DESCTABLE @@ -205,7 +205,7 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testtable1 # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -col1 int 27 484 0 8 from deserializer +col1 int 27 484 0 10 from deserializer COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}} PREHOOK: query: describe formatted statsdb1.testtable1 col2 PREHOOK: type: DESCTABLE @@ -215,7 +215,7 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testtable1 # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -col2 string 0 12 6.7 7 from deserializer +col2 string 0 10 6.7 7 from deserializer COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}} PREHOOK: query: describe formatted statsdb1.testtable1 col3 PREHOOK: type: DESCTABLE @@ -244,7 +244,7 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb2@testtable2 # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -col1 int 27 484 0 8 from deserializer +col1 int 27 484 0 10 from deserializer COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}} PREHOOK: query: describe formatted statsdb2.testtable2 col2 PREHOOK: type: DESCTABLE @@ -254,7 +254,7 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb2@testtable2 # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -col2 string 0 12 6.7 7 from deserializer +col2 string 0 10 6.7 7 from deserializer COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}} PREHOOK: query: describe formatted statsdb2.testtable2 col3 PREHOOK: type: DESCTABLE diff --git a/ql/src/test/results/clientpositive/temp_table_display_colstats_tbllvl.q.out b/ql/src/test/results/clientpositive/temp_table_display_colstats_tbllvl.q.out index bd024a7ab1..ad92058cab 100644 --- a/ql/src/test/results/clientpositive/temp_table_display_colstats_tbllvl.q.out +++ b/ql/src/test/results/clientpositive/temp_table_display_colstats_tbllvl.q.out @@ -80,23 +80,23 @@ STAGE PLANS: outputColumnNames: sourceip, adrevenue, avgtimeonsite Statistics: Num rows: 65 Data size: 7060 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(sourceip, 16), compute_stats(avgtimeonsite, 16), compute_stats(adrevenue, 16) + aggregations: compute_stats(sourceip, 'hll'), compute_stats(avgtimeonsite, 'hll'), compute_stats(adrevenue, 'hll') mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1444 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1408 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 1444 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 1408 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) Reduce Operator Tree: Group By Operator aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1452 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 1452 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -132,16 +132,16 @@ STAGE PLANS: outputColumnNames: sourceip, adrevenue, avgtimeonsite Statistics: Num rows: 65 Data size: 7060 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(sourceip, 16), compute_stats(avgtimeonsite, 16), compute_stats(adrevenue, 16) + aggregations: compute_stats(sourceip, 'hll'), compute_stats(avgtimeonsite, 'hll'), compute_stats(adrevenue, 'hll') mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1444 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1408 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1444 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1408 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -196,13 +196,13 @@ STAGE PLANS: aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1452 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1452 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -244,7 +244,7 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@uservisits_web_text_none # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -sourceIP string 0 69 12.763636363636364 13 from deserializer +sourceIP string 0 55 12.763636363636364 13 from deserializer PREHOOK: query: desc formatted UserVisits_web_text_none avgTimeOnSite PREHOOK: type: DESCTABLE PREHOOK: Input: default@uservisits_web_text_none @@ -253,7 +253,7 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@uservisits_web_text_none # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -avgTimeOnSite int 1 9 0 11 from deserializer +avgTimeOnSite int 1 9 0 9 from deserializer PREHOOK: query: desc formatted UserVisits_web_text_none adRevenue PREHOOK: type: DESCTABLE PREHOOK: Input: default@uservisits_web_text_none @@ -262,7 +262,7 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@uservisits_web_text_none # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -adRevenue float 13.099044799804688 492.98870849609375 0 58 from deserializer +adRevenue float 13.099044799804688 492.98870849609375 0 55 from deserializer PREHOOK: query: CREATE TEMPORARY TABLE empty_tab( a int, b double, @@ -315,23 +315,23 @@ STAGE PLANS: outputColumnNames: a, b, c, d, e Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Group By Operator - aggregations: compute_stats(a, 16), compute_stats(b, 16), compute_stats(c, 16), compute_stats(d, 16), compute_stats(e, 16) + aggregations: compute_stats(a, 'hll'), compute_stats(b, 'hll'), compute_stats(c, 'hll'), compute_stats(d, 'hll'), compute_stats(e, 'hll') mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 2004 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 2004 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) Reduce Operator Tree: Group By Operator aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 2012 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 2000 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 2012 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 2000 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -467,7 +467,7 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@uservisits_web_text_none # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -sourceIP string 0 69 12.763636363636364 13 from deserializer +sourceIP string 0 55 12.763636363636364 13 from deserializer PREHOOK: query: analyze table UserVisits_web_text_none compute statistics for columns sKeyword PREHOOK: type: QUERY PREHOOK: Input: test@uservisits_web_text_none @@ -491,7 +491,7 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: test@uservisits_web_text_none # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -sKeyword string 0 49 7.872727272727273 19 from deserializer +sKeyword string 0 54 7.872727272727273 19 from deserializer PREHOOK: query: desc formatted test.UserVisits_web_text_none sKeyword PREHOOK: type: DESCTABLE PREHOOK: Input: test@uservisits_web_text_none @@ -500,4 +500,4 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: test@uservisits_web_text_none # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -sKeyword string 0 49 7.872727272727273 19 from deserializer +sKeyword string 0 54 7.872727272727273 19 from deserializer diff --git a/ql/src/test/results/clientpositive/tez/explainanalyze_1.q.out b/ql/src/test/results/clientpositive/tez/explainanalyze_1.q.out index 6602222ed7..59437b80b4 100644 --- a/ql/src/test/results/clientpositive/tez/explainanalyze_1.q.out +++ b/ql/src/test/results/clientpositive/tez/explainanalyze_1.q.out @@ -240,7 +240,7 @@ Stage-0 Stage-1 Reducer 2 File Output Operator [FS_5] - Group By Operator [GBY_3] (rows=205/309 width=95) + Group By Operator [GBY_3] (rows=309/309 width=95) Output:["_col0","_col1"],aggregations:["count(KEY._col0)"],keys:KEY._col0 <-Map 1 [SIMPLE_EDGE] SHUFFLE [RS_2] @@ -287,7 +287,7 @@ Stage-0 Output:["_col0"],aggregations:["count()"] <-Reducer 2 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_10] - Merge Join Operator [MERGEJOIN_18] (rows=382/0 width=8) + Merge Join Operator [MERGEJOIN_18] (rows=267/0 width=8) Conds:RS_6._col0=RS_7._col0(Inner) <-Map 1 [SIMPLE_EDGE] SHUFFLE [RS_6] @@ -336,7 +336,7 @@ Stage-0 Output:["_col0"],aggregations:["count()"] <-Reducer 2 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_10] - Merge Join Operator [MERGEJOIN_18] (rows=399/1019 width=8) + Merge Join Operator [MERGEJOIN_18] (rows=267/1019 width=8) Conds:RS_6._col0=RS_7._col0(Inner) <-Map 1 [SIMPLE_EDGE] SHUFFLE [RS_6] @@ -440,9 +440,9 @@ Stage-0 Stage-1 Reducer 2 File Output Operator [FS_10] - Select Operator [SEL_9] (rows=1219/1028 width=178) + Select Operator [SEL_9] (rows=809/1028 width=178) Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_15] (rows=1219/1028 width=178) + Merge Join Operator [MERGEJOIN_15] (rows=809/1028 width=178) Conds:RS_6._col0=RS_7._col0(Inner),Output:["_col0","_col2"] <-Map 1 [SIMPLE_EDGE] SHUFFLE [RS_6] diff --git a/ql/src/test/results/clientpositive/tez/explainanalyze_2.q.out b/ql/src/test/results/clientpositive/tez/explainanalyze_2.q.out index 0916565f0f..c86717790d 100644 --- a/ql/src/test/results/clientpositive/tez/explainanalyze_2.q.out +++ b/ql/src/test/results/clientpositive/tez/explainanalyze_2.q.out @@ -59,15 +59,15 @@ Stage-0 Stage-1 Reducer 7 File Output Operator [FS_56] - Group By Operator [GBY_54] (rows=28/15 width=177) + Group By Operator [GBY_54] (rows=32/15 width=177) Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 <-Union 6 [SIMPLE_EDGE] <-Reducer 11 [CONTAINS] Reduce Output Operator [RS_53] PartitionCols:_col0, _col1 - Select Operator [SEL_49] (rows=148/61 width=177) + Select Operator [SEL_49] (rows=67/61 width=177) Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_82] (rows=148/61 width=177) + Merge Join Operator [MERGEJOIN_82] (rows=67/61 width=177) Conds:RS_46._col2=RS_47._col0(Inner),Output:["_col1","_col2"] <-Map 17 [SIMPLE_EDGE] SHUFFLE [RS_47] @@ -81,7 +81,7 @@ Stage-0 <-Reducer 10 [SIMPLE_EDGE] SHUFFLE [RS_46] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_81] (rows=61/52 width=177) + Merge Join Operator [MERGEJOIN_81] (rows=42/52 width=177) Conds:RS_43._col1=RS_44._col1(Inner),Output:["_col1","_col2"] <-Map 9 [SIMPLE_EDGE] SHUFFLE [RS_44] @@ -121,9 +121,9 @@ Stage-0 <-Reducer 5 [CONTAINS] Reduce Output Operator [RS_53] PartitionCols:_col0, _col1 - Select Operator [SEL_24] (rows=148/61 width=177) + Select Operator [SEL_24] (rows=67/61 width=177) Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_80] (rows=148/61 width=177) + Merge Join Operator [MERGEJOIN_80] (rows=67/61 width=177) Conds:RS_21._col2=RS_22._col0(Inner),Output:["_col1","_col2"] <-Map 12 [SIMPLE_EDGE] SHUFFLE [RS_22] @@ -137,7 +137,7 @@ Stage-0 <-Reducer 4 [SIMPLE_EDGE] SHUFFLE [RS_21] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_79] (rows=61/52 width=177) + Merge Join Operator [MERGEJOIN_79] (rows=42/52 width=177) Conds:RS_18._col1=RS_19._col1(Inner),Output:["_col1","_col2"] <-Map 9 [SIMPLE_EDGE] SHUFFLE [RS_19] @@ -259,15 +259,15 @@ Stage-0 Stage-1 Reducer 9 File Output Operator [FS_114] - Group By Operator [GBY_112] (rows=872/15 width=177) + Group By Operator [GBY_112] (rows=398/15 width=177) Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 <-Union 8 [SIMPLE_EDGE] <-Reducer 15 [CONTAINS] Reduce Output Operator [RS_111] PartitionCols:_col0, _col1 - Select Operator [SEL_107] (rows=434/61 width=177) + Select Operator [SEL_107] (rows=199/61 width=177) Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_162] (rows=434/61 width=177) + Merge Join Operator [MERGEJOIN_162] (rows=199/61 width=177) Conds:RS_104._col2=RS_105._col0(Inner),Output:["_col2","_col5"] <-Map 16 [SIMPLE_EDGE] SHUFFLE [RS_105] @@ -281,7 +281,7 @@ Stage-0 <-Reducer 14 [SIMPLE_EDGE] SHUFFLE [RS_104] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_161] (rows=178/52 width=86) + Merge Join Operator [MERGEJOIN_161] (rows=123/52 width=86) Conds:RS_101._col1=RS_102._col1(Inner),Output:["_col2"] <-Map 11 [SIMPLE_EDGE] SHUFFLE [RS_102] @@ -355,15 +355,15 @@ Stage-0 <-Reducer 7 [CONTAINS] Reduce Output Operator [RS_111] PartitionCols:_col0, _col1 - Group By Operator [GBY_63] (rows=438/15 width=177) + Group By Operator [GBY_63] (rows=199/15 width=177) Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 <-Union 6 [SIMPLE_EDGE] <-Reducer 13 [CONTAINS] Reduce Output Operator [RS_62] PartitionCols:_col0, _col1 - Select Operator [SEL_58] (rows=290/61 width=177) + Select Operator [SEL_58] (rows=132/61 width=177) Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_160] (rows=290/61 width=177) + Merge Join Operator [MERGEJOIN_160] (rows=132/61 width=177) Conds:RS_55._col2=RS_56._col0(Inner),Output:["_col2","_col5"] <-Map 24 [SIMPLE_EDGE] SHUFFLE [RS_56] @@ -377,7 +377,7 @@ Stage-0 <-Reducer 12 [SIMPLE_EDGE] SHUFFLE [RS_55] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_159] (rows=119/52 width=86) + Merge Join Operator [MERGEJOIN_159] (rows=82/52 width=86) Conds:RS_52._col1=RS_53._col1(Inner),Output:["_col2"] <-Map 11 [SIMPLE_EDGE] SHUFFLE [RS_53] @@ -429,9 +429,9 @@ Stage-0 <-Reducer 5 [CONTAINS] Reduce Output Operator [RS_62] PartitionCols:_col0, _col1 - Select Operator [SEL_24] (rows=148/61 width=177) + Select Operator [SEL_24] (rows=67/61 width=177) Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_158] (rows=148/61 width=177) + Merge Join Operator [MERGEJOIN_158] (rows=67/61 width=177) Conds:RS_21._col2=RS_22._col0(Inner),Output:["_col2","_col5"] <-Map 16 [SIMPLE_EDGE] SHUFFLE [RS_22] @@ -440,7 +440,7 @@ Stage-0 <-Reducer 4 [SIMPLE_EDGE] SHUFFLE [RS_21] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_157] (rows=61/52 width=86) + Merge Join Operator [MERGEJOIN_157] (rows=42/52 width=86) Conds:RS_18._col1=RS_19._col1(Inner),Output:["_col2"] <-Map 11 [SIMPLE_EDGE] SHUFFLE [RS_19] @@ -1134,9 +1134,9 @@ Stage-5 <-Reducer 12 [CONTAINS] File Output Operator [FS_75] table:{"name:":"default.a"} - Select Operator [SEL_44] (rows=5839/5421 width=178) + Select Operator [SEL_44] (rows=2682/5421 width=178) Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_122] (rows=5839/5421 width=178) + Merge Join Operator [MERGEJOIN_122] (rows=2682/5421 width=178) Conds:RS_41._col1=RS_42._col0(Inner),Output:["_col1","_col4"] <-Map 17 [SIMPLE_EDGE] SHUFFLE [RS_42] @@ -1150,7 +1150,7 @@ Stage-5 <-Reducer 11 [SIMPLE_EDGE] SHUFFLE [RS_41] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_121] (rows=2394/2097 width=87) + Merge Join Operator [MERGEJOIN_121] (rows=1658/2097 width=87) Conds:Union 14._col0=RS_39._col1(Inner),Output:["_col1"] <-Map 10 [SIMPLE_EDGE] SHUFFLE [RS_39] @@ -1198,9 +1198,9 @@ Stage-5 <-Reducer 4 [CONTAINS] File Output Operator [FS_75] table:{"name:":"default.a"} - Select Operator [SEL_20] (rows=148/170 width=177) + Select Operator [SEL_20] (rows=67/170 width=177) Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_120] (rows=148/170 width=177) + Merge Join Operator [MERGEJOIN_120] (rows=67/170 width=177) Conds:RS_17._col1=RS_18._col0(Inner),Output:["_col1","_col4"] <-Map 10 [SIMPLE_EDGE] SHUFFLE [RS_18] @@ -1213,7 +1213,7 @@ Stage-5 <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_17] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_119] (rows=61/108 width=86) + Merge Join Operator [MERGEJOIN_119] (rows=42/108 width=86) Conds:Union 2._col0=RS_15._col1(Inner),Output:["_col1"] <-Map 7 [SIMPLE_EDGE] SHUFFLE [RS_15] @@ -1252,14 +1252,14 @@ Stage-5 <-Reducer 9 [CONTAINS] File Output Operator [FS_75] table:{"name:":"default.a"} - Select Operator [SEL_72] (rows=313/820 width=175) + Select Operator [SEL_72] (rows=192/820 width=175) Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_124] (rows=313/820 width=175) + Merge Join Operator [MERGEJOIN_124] (rows=192/820 width=175) Conds:RS_69._col1=Union 20._col0(Inner),Output:["_col0","_col3"] <-Reducer 8 [SIMPLE_EDGE] SHUFFLE [RS_69] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_123] (rows=44/115 width=264) + Merge Join Operator [MERGEJOIN_123] (rows=39/115 width=264) Conds:RS_66._col0=RS_67._col0(Inner),Output:["_col0","_col1","_col3"] <-Map 7 [SIMPLE_EDGE] SHUFFLE [RS_66] @@ -1440,20 +1440,20 @@ Stage-5 Reducer 9 File Output Operator [FS_115] table:{"name:":"default.a"} - Group By Operator [GBY_112] (rows=6300/319 width=178) + Group By Operator [GBY_112] (rows=2941/319 width=178) Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 <-Union 8 [SIMPLE_EDGE] <-Reducer 13 [CONTAINS] Reduce Output Operator [RS_111] PartitionCols:_col0, _col1 - Select Operator [SEL_107] (rows=313/304 width=175) + Select Operator [SEL_107] (rows=192/304 width=175) Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_164] (rows=313/304 width=175) + Merge Join Operator [MERGEJOIN_164] (rows=192/304 width=175) Conds:RS_104._col1=RS_105._col1(Inner),Output:["_col0","_col3"] <-Reducer 12 [SIMPLE_EDGE] SHUFFLE [RS_104] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_163] (rows=44/115 width=264) + Merge Join Operator [MERGEJOIN_163] (rows=39/115 width=264) Conds:RS_101._col0=RS_102._col0(Inner),Output:["_col0","_col1","_col3"] <-Map 11 [SIMPLE_EDGE] SHUFFLE [RS_101] @@ -1536,15 +1536,15 @@ Stage-5 <-Reducer 7 [CONTAINS] Reduce Output Operator [RS_111] PartitionCols:_col0, _col1 - Group By Operator [GBY_63] (rows=5987/309 width=178) + Group By Operator [GBY_63] (rows=2749/309 width=178) Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 <-Union 6 [SIMPLE_EDGE] <-Reducer 16 [CONTAINS] Reduce Output Operator [RS_62] PartitionCols:_col0, _col1 - Select Operator [SEL_58] (rows=5839/1056 width=178) + Select Operator [SEL_58] (rows=2682/1056 width=178) Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_162] (rows=5839/1056 width=178) + Merge Join Operator [MERGEJOIN_162] (rows=2682/1056 width=178) Conds:RS_55._col2=RS_56._col0(Inner),Output:["_col2","_col5"] <-Map 24 [SIMPLE_EDGE] SHUFFLE [RS_56] @@ -1558,7 +1558,7 @@ Stage-5 <-Reducer 15 [SIMPLE_EDGE] SHUFFLE [RS_55] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_161] (rows=2394/512 width=87) + Merge Join Operator [MERGEJOIN_161] (rows=1658/512 width=87) Conds:RS_52._col1=RS_53._col1(Inner),Output:["_col2"] <-Map 14 [SIMPLE_EDGE] SHUFFLE [RS_53] @@ -1615,9 +1615,9 @@ Stage-5 <-Reducer 5 [CONTAINS] Reduce Output Operator [RS_62] PartitionCols:_col0, _col1 - Select Operator [SEL_24] (rows=148/61 width=177) + Select Operator [SEL_24] (rows=67/61 width=177) Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_160] (rows=148/61 width=177) + Merge Join Operator [MERGEJOIN_160] (rows=67/61 width=177) Conds:RS_21._col2=RS_22._col0(Inner),Output:["_col2","_col5"] <-Map 14 [SIMPLE_EDGE] SHUFFLE [RS_22] @@ -1630,7 +1630,7 @@ Stage-5 <-Reducer 4 [SIMPLE_EDGE] SHUFFLE [RS_21] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_159] (rows=61/52 width=86) + Merge Join Operator [MERGEJOIN_159] (rows=42/52 width=86) Conds:RS_18._col1=RS_19._col1(Inner),Output:["_col2"] <-Map 11 [SIMPLE_EDGE] SHUFFLE [RS_19] @@ -1748,9 +1748,9 @@ Stage-4 Reducer 5 File Output Operator [FS_18] table:{"name:":"default.dest1"} - Select Operator [SEL_16] (rows=205/310 width=272) + Select Operator [SEL_16] (rows=309/310 width=272) Output:["_col0","_col1"] - Group By Operator [GBY_15] (rows=205/310 width=96) + Group By Operator [GBY_15] (rows=309/310 width=96) Output:["_col0","_col1"],aggregations:["count(DISTINCT KEY._col1:0._col0)"],keys:KEY._col0 <-Reducer 4 [SIMPLE_EDGE] SHUFFLE [RS_14] @@ -1826,9 +1826,9 @@ Stage-0 Stage-1 Reducer 2 File Output Operator [FS_8] - Select Operator [SEL_7] (rows=594/4122 width=260) + Select Operator [SEL_7] (rows=261/4122 width=260) Output:["_col0","_col1","_col2"] - Merge Join Operator [MERGEJOIN_9] (rows=594/4122 width=260) + Merge Join Operator [MERGEJOIN_9] (rows=261/4122 width=260) Conds:RS_3.key=RS_3.key(Unique),RS_3.key=RS_3.key(Unique),RS_3.key=RS_3.key(Unique),Output:["_col0","_col5","_col10"] <-Map 1 [SIMPLE_EDGE] SHUFFLE [RS_3] @@ -1885,9 +1885,9 @@ Stage-0 Stage-1 Reducer 2 File Output Operator [FS_9] - Transform Operator [SCR_8] (rows=1219/1028 width=178) + Transform Operator [SCR_8] (rows=809/1028 width=178) command:cat - Merge Join Operator [MERGEJOIN_14] (rows=1219/1028 width=178) + Merge Join Operator [MERGEJOIN_14] (rows=809/1028 width=178) Conds:RS_3.key=RS_5.key(Inner),Output:["_col0","_col1"] <-Map 1 [SIMPLE_EDGE] SHUFFLE [RS_3] @@ -1980,9 +1980,9 @@ Stage-4 Reducer 4 File Output Operator [FS_16] table:{"name:":"default.dest1"} - Select Operator [SEL_14] (rows=205/310 width=272) + Select Operator [SEL_14] (rows=309/310 width=272) Output:["_col0","_col1"] - Group By Operator [GBY_13] (rows=205/310 width=96) + Group By Operator [GBY_13] (rows=309/310 width=96) Output:["_col0","_col1"],aggregations:["count(DISTINCT KEY._col1:0._col0)"],keys:KEY._col0 <-Union 3 [SIMPLE_EDGE] <-Map 6 [CONTAINS] @@ -2098,9 +2098,9 @@ Stage-4 Reducer 4 File Output Operator [FS_14] table:{"name:":"default.dest1"} - Select Operator [SEL_12] (rows=205/310 width=272) + Select Operator [SEL_12] (rows=309/310 width=272) Output:["_col0","_col1"] - Group By Operator [GBY_11] (rows=205/310 width=96) + Group By Operator [GBY_11] (rows=309/310 width=96) Output:["_col0","_col1"],aggregations:["count(DISTINCT KEY._col1:0._col0)"],keys:KEY._col0 <-Union 3 [SIMPLE_EDGE] <-Map 6 [CONTAINS] diff --git a/ql/src/test/results/clientpositive/tez/explainanalyze_3.q.out b/ql/src/test/results/clientpositive/tez/explainanalyze_3.q.out index e5c8d6c51e..9d47066297 100644 --- a/ql/src/test/results/clientpositive/tez/explainanalyze_3.q.out +++ b/ql/src/test/results/clientpositive/tez/explainanalyze_3.q.out @@ -256,7 +256,7 @@ Stage-3 Reducer 2 File Output Operator [FS_5] Group By Operator [GBY_3] (rows=1/1 width=960) - Output:["_col0","_col1"],aggregations:["compute_stats(VALUE._col0, 16)","compute_stats(VALUE._col2, 16)"] + Output:["_col0","_col1"],aggregations:["compute_stats(VALUE._col0, 'hll')","compute_stats(VALUE._col2, 'hll')"] <-Map 1 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_2] Select Operator [SEL_1] (rows=500/500 width=10) diff --git a/ql/src/test/results/clientpositive/tez/explainanalyze_4.q.out b/ql/src/test/results/clientpositive/tez/explainanalyze_4.q.out index 9fbe8c5263..72d0682c13 100644 --- a/ql/src/test/results/clientpositive/tez/explainanalyze_4.q.out +++ b/ql/src/test/results/clientpositive/tez/explainanalyze_4.q.out @@ -44,11 +44,11 @@ Stage-0 Stage-1 Reducer 3 File Output Operator [FS_12] - Select Operator [SEL_11] (rows=2166/10 width=556) + Select Operator [SEL_11] (rows=2076/10 width=553) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23"] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_10] - Merge Join Operator [MERGEJOIN_17] (rows=2166/10 width=556) + Merge Join Operator [MERGEJOIN_17] (rows=2076/10 width=553) Conds:RS_6._col2=RS_7._col2(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23"] <-Map 1 [SIMPLE_EDGE] SHUFFLE [RS_6] @@ -143,7 +143,7 @@ Stage-0 Output:["_col0"],aggregations:["count()"] <-Reducer 2 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_10] - Merge Join Operator [MERGEJOIN_18] (rows=2166/10 width=8) + Merge Join Operator [MERGEJOIN_18] (rows=2076/10 width=8) Conds:RS_6._col0=RS_7._col0(Inner) <-Map 1 [SIMPLE_EDGE] SHUFFLE [RS_6] @@ -232,16 +232,16 @@ Stage-0 Stage-1 Reducer 4 File Output Operator [FS_15] - Select Operator [SEL_14] (rows=616/5 width=11) + Select Operator [SEL_14] (rows=623/5 width=11) Output:["_col0","_col1"] <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_13] - Group By Operator [GBY_11] (rows=616/5 width=11) + Group By Operator [GBY_11] (rows=623/5 width=11) Output:["_col0","_col1"],aggregations:["count()"],keys:KEY._col0 <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_10] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_20] (rows=2166/10 width=3) + Merge Join Operator [MERGEJOIN_20] (rows=2076/10 width=3) Conds:RS_6._col1=RS_7._col0(Inner),Output:["_col0"] <-Map 1 [SIMPLE_EDGE] SHUFFLE [RS_6] diff --git a/ql/src/test/results/clientpositive/tez/explainanalyze_5.q.out b/ql/src/test/results/clientpositive/tez/explainanalyze_5.q.out index b35e294813..626e1fd4d0 100644 --- a/ql/src/test/results/clientpositive/tez/explainanalyze_5.q.out +++ b/ql/src/test/results/clientpositive/tez/explainanalyze_5.q.out @@ -58,7 +58,7 @@ Stage-3 Reducer 2 File Output Operator [FS_5] Group By Operator [GBY_3] (rows=1/1 width=960) - Output:["_col0","_col1"],aggregations:["compute_stats(VALUE._col0, 16)","compute_stats(VALUE._col2, 16)"] + Output:["_col0","_col1"],aggregations:["compute_stats(VALUE._col0, 'hll')","compute_stats(VALUE._col2, 'hll')"] <-Map 1 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_2] Select Operator [SEL_1] (rows=500/500 width=10) @@ -114,13 +114,13 @@ Stage-4 Reducer 5 File Output Operator [FS_5] Group By Operator [GBY_3] (rows=1/1 width=960) - Output:["_col0","_col1"],aggregations:["compute_stats(VALUE._col0, 16)","compute_stats(VALUE._col2, 16)"] + Output:["_col0","_col1"],aggregations:["compute_stats(VALUE._col0, 'hll')","compute_stats(VALUE._col2, 'hll')"] <-Reducer 4 [CUSTOM_SIMPLE_EDGE] File Output Operator [FS_19] table:{"name:":"default.src_multi2"} - Select Operator [SEL_18] (rows=1280/508 width=178) + Select Operator [SEL_18] (rows=849/508 width=178) Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_26] (rows=1280/508 width=178) + Merge Join Operator [MERGEJOIN_26] (rows=849/508 width=178) Conds:RS_15._col0=RS_16._col0(Inner),Output:["_col0","_col3"] <-Map 7 [SIMPLE_EDGE] SHUFFLE [RS_16] @@ -158,7 +158,7 @@ Stage-4 TableScan [TS_3] (rows=25/25 width=175) Output:["key","value"] PARTITION_ONLY_SHUFFLE [RS_2] - Select Operator [SEL_1] (rows=1280/508 width=178) + Select Operator [SEL_1] (rows=849/508 width=178) Output:["key","value"] Please refer to the previous Select Operator [SEL_18] diff --git a/ql/src/test/results/clientpositive/tez/explainuser_3.q.out b/ql/src/test/results/clientpositive/tez/explainuser_3.q.out index 65c9114b20..13c19ca039 100644 --- a/ql/src/test/results/clientpositive/tez/explainuser_3.q.out +++ b/ql/src/test/results/clientpositive/tez/explainuser_3.q.out @@ -41,13 +41,13 @@ Stage-0 Stage-1 Reducer 2 vectorized File Output Operator [FS_8] - Select Operator [SEL_7] (rows=10 width=101) + Select Operator [SEL_7] (rows=10 width=100) Output:["_col0","_col1"] <-Map 1 [SIMPLE_EDGE] vectorized SHUFFLE [RS_6] - Select Operator [SEL_5] (rows=10 width=101) + Select Operator [SEL_5] (rows=10 width=100) Output:["_col0","_col1"] - TableScan [TS_0] (rows=10 width=101) + TableScan [TS_0] (rows=10 width=100) default@acid_vectorized,acid_vectorized, ACID table,Tbl:COMPLETE,Col:COMPLETE,Output:["a","b"] PREHOOK: query: explain select key, value @@ -213,8 +213,8 @@ Stage-3 Output:["_col0","_col1"],aggregations:["compute_stats(VALUE._col0)","compute_stats(VALUE._col1)"] <-Map 1 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_3] - Group By Operator [GBY_2] (rows=1 width=984) - Output:["_col0","_col1"],aggregations:["compute_stats(key, 16)","compute_stats(value, 16)"] + Group By Operator [GBY_2] (rows=1 width=960) + Output:["_col0","_col1"],aggregations:["compute_stats(key, 'hll')","compute_stats(value, 'hll')"] Select Operator [SEL_1] (rows=500 width=178) Output:["key","value"] TableScan [TS_0] (rows=500 width=178) diff --git a/ql/src/test/results/clientpositive/tez/hybridgrace_hashjoin_1.q.out b/ql/src/test/results/clientpositive/tez/hybridgrace_hashjoin_1.q.out index 0a1e039cf1..8ee4f4d716 100644 --- a/ql/src/test/results/clientpositive/tez/hybridgrace_hashjoin_1.q.out +++ b/ql/src/test/results/clientpositive/tez/hybridgrace_hashjoin_1.q.out @@ -56,7 +56,7 @@ STAGE PLANS: 1 _col0 (type: int) input vertices: 1 Map 3 - Statistics: Num rows: 26150 Data size: 209200 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 25044 Data size: 200352 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash @@ -172,7 +172,7 @@ STAGE PLANS: 1 _col0 (type: int) input vertices: 1 Map 3 - Statistics: Num rows: 26150 Data size: 209200 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 25044 Data size: 200352 Basic stats: COMPLETE Column stats: COMPLETE HybridGraceHashJoin: true Group By Operator aggregations: count() @@ -287,7 +287,7 @@ STAGE PLANS: 1 _col0 (type: int) input vertices: 1 Map 3 - Statistics: Num rows: 19518 Data size: 156144 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 18694 Data size: 149552 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash @@ -399,7 +399,7 @@ STAGE PLANS: 1 _col0 (type: int) input vertices: 1 Map 3 - Statistics: Num rows: 19518 Data size: 156144 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 18694 Data size: 149552 Basic stats: COMPLETE Column stats: COMPLETE HybridGraceHashJoin: true Group By Operator aggregations: count() @@ -509,7 +509,7 @@ STAGE PLANS: 1 _col0 (type: int) input vertices: 1 Map 3 - Statistics: Num rows: 26150 Data size: 209200 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 25044 Data size: 200352 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash @@ -615,7 +615,7 @@ STAGE PLANS: 1 _col0 (type: int) input vertices: 1 Map 3 - Statistics: Num rows: 26150 Data size: 209200 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 25044 Data size: 200352 Basic stats: COMPLETE Column stats: COMPLETE HybridGraceHashJoin: true Group By Operator aggregations: count() diff --git a/ql/src/test/results/clientpositive/tez/hybridgrace_hashjoin_2.q.out b/ql/src/test/results/clientpositive/tez/hybridgrace_hashjoin_2.q.out index 6f5a3a96ca..ed994442e2 100644 --- a/ql/src/test/results/clientpositive/tez/hybridgrace_hashjoin_2.q.out +++ b/ql/src/test/results/clientpositive/tez/hybridgrace_hashjoin_2.q.out @@ -71,7 +71,7 @@ STAGE PLANS: input vertices: 0 Map 1 2 Map 4 - Statistics: Num rows: 594 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 261 Data size: 2088 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash @@ -195,7 +195,7 @@ STAGE PLANS: input vertices: 0 Map 1 2 Map 4 - Statistics: Num rows: 594 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 261 Data size: 2088 Basic stats: COMPLETE Column stats: COMPLETE HybridGraceHashJoin: true Group By Operator aggregations: count() @@ -334,7 +334,7 @@ STAGE PLANS: 0 Map 1 2 Map 4 3 Map 5 - Statistics: Num rows: 5803 Data size: 46424 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1694 Data size: 13552 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash @@ -478,7 +478,7 @@ STAGE PLANS: 0 Map 1 2 Map 4 3 Map 5 - Statistics: Num rows: 5803 Data size: 46424 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1694 Data size: 13552 Basic stats: COMPLETE Column stats: COMPLETE HybridGraceHashJoin: true Group By Operator aggregations: count() @@ -651,7 +651,7 @@ STAGE PLANS: input vertices: 0 Map 1 2 Map 6 - Statistics: Num rows: 594 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 261 Data size: 2088 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash @@ -706,7 +706,7 @@ STAGE PLANS: input vertices: 0 Map 7 2 Map 10 - Statistics: Num rows: 545 Data size: 4360 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 261 Data size: 2088 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash @@ -886,7 +886,7 @@ STAGE PLANS: input vertices: 0 Map 1 2 Map 6 - Statistics: Num rows: 594 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 261 Data size: 2088 Basic stats: COMPLETE Column stats: COMPLETE HybridGraceHashJoin: true Group By Operator aggregations: count() @@ -942,7 +942,7 @@ STAGE PLANS: input vertices: 0 Map 7 2 Map 10 - Statistics: Num rows: 545 Data size: 4360 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 261 Data size: 2088 Basic stats: COMPLETE Column stats: COMPLETE HybridGraceHashJoin: true Group By Operator aggregations: count() @@ -1120,7 +1120,7 @@ STAGE PLANS: input vertices: 0 Map 1 2 Map 4 - Statistics: Num rows: 46 Data size: 4094 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 20 Data size: 1780 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -1132,7 +1132,7 @@ STAGE PLANS: input vertices: 1 Map 5 2 Map 6 - Statistics: Num rows: 981 Data size: 7848 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 204 Data size: 1632 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash @@ -1304,7 +1304,7 @@ STAGE PLANS: input vertices: 0 Map 1 2 Map 4 - Statistics: Num rows: 46 Data size: 4094 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 20 Data size: 1780 Basic stats: COMPLETE Column stats: COMPLETE HybridGraceHashJoin: true Map Join Operator condition map: @@ -1317,7 +1317,7 @@ STAGE PLANS: input vertices: 1 Map 5 2 Map 6 - Statistics: Num rows: 981 Data size: 7848 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 204 Data size: 1632 Basic stats: COMPLETE Column stats: COMPLETE HybridGraceHashJoin: true Group By Operator aggregations: count() diff --git a/ql/src/test/results/clientpositive/tez/vectorization_limit.q.out b/ql/src/test/results/clientpositive/tez/vectorization_limit.q.out index afcae8c34c..0fb1260251 100644 --- a/ql/src/test/results/clientpositive/tez/vectorization_limit.q.out +++ b/ql/src/test/results/clientpositive/tez/vectorization_limit.q.out @@ -269,7 +269,7 @@ STAGE PLANS: keys: _col0 (type: tinyint) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 95 Data size: 7888 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 10628 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: tinyint) sort order: + @@ -281,7 +281,7 @@ STAGE PLANS: nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true partitionColumns: [0] valueColumns: [1] - Statistics: Num rows: 95 Data size: 7888 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 10628 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.3 value expressions: _col1 (type: struct) Execution mode: vectorized @@ -329,7 +329,7 @@ STAGE PLANS: keys: KEY._col0 (type: tinyint) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 95 Data size: 1048 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 1412 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 20 Limit Vectorization: @@ -431,7 +431,7 @@ STAGE PLANS: keys: ctinyint (type: tinyint) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 95 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 388 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: tinyint) sort order: + @@ -442,7 +442,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumns: [] - Statistics: Num rows: 95 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 388 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.3 Execution mode: vectorized Map Vectorization: @@ -486,7 +486,7 @@ STAGE PLANS: keys: KEY._col0 (type: tinyint) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 95 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 388 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 20 Limit Vectorization: @@ -658,7 +658,7 @@ STAGE PLANS: keys: _col0 (type: tinyint) mode: complete outputColumnNames: _col0, _col1 - Statistics: Num rows: 95 Data size: 1048 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 1412 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 20 Limit Vectorization: @@ -790,7 +790,7 @@ STAGE PLANS: keys: cdouble (type: double) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 3185 Data size: 44512 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4159 Data size: 58120 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: double) sort order: + @@ -801,7 +801,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumns: [1] - Statistics: Num rows: 3185 Data size: 44512 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4159 Data size: 58120 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: vectorized Map Vectorization: @@ -847,7 +847,7 @@ STAGE PLANS: keys: KEY._col0 (type: double) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 3185 Data size: 44512 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4159 Data size: 58120 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: bigint), _col0 (type: double) sort order: ++ @@ -857,7 +857,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumns: [] - Statistics: Num rows: 3185 Data size: 44512 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4159 Data size: 58120 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.3 Reducer 3 Execution mode: vectorized @@ -882,7 +882,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [1, 0] - Statistics: Num rows: 3185 Data size: 44512 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4159 Data size: 58120 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 20 Limit Vectorization: diff --git a/ql/src/test/results/clientpositive/tunable_ndv.q.out b/ql/src/test/results/clientpositive/tunable_ndv.q.out index 6ae54b4927..437beafc0d 100644 --- a/ql/src/test/results/clientpositive/tunable_ndv.q.out +++ b/ql/src/test/results/clientpositive/tunable_ndv.q.out @@ -84,7 +84,7 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@loc_orc_1d # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -locid int 1 4 0 5 from deserializer +locid int 1 4 0 4 from deserializer PREHOOK: query: describe formatted loc_orc_1d locid PREHOOK: type: DESCTABLE PREHOOK: Input: default@loc_orc_1d