diff --git common/src/java/org/apache/hadoop/hive/common/ndv/FMSketch.java common/src/java/org/apache/hadoop/hive/common/ndv/FMSketch.java deleted file mode 100644 index 160ce66..0000000 --- common/src/java/org/apache/hadoop/hive/common/ndv/FMSketch.java +++ /dev/null @@ -1,361 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hive.common.ndv.fm; - -import java.io.ByteArrayInputStream; -import java.io.ByteArrayOutputStream; -import java.io.IOException; -import java.io.InputStream; -import java.util.Random; - -import javolution.util.FastBitSet; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import org.apache.commons.codec.binary.Base64; -import org.apache.hadoop.hive.common.classification.InterfaceAudience; -import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimator; -import org.apache.hadoop.hive.common.type.HiveDecimal; -import org.apache.hadoop.hive.ql.util.JavaDataModel; - -public class FMSketch implements NumDistinctValueEstimator{ - - static final Logger LOG = LoggerFactory.getLogger(FMSketch.class.getName()); - public static final byte[] MAGIC = new byte[] { 'F', 'M' }; - - /* We want a,b,x to come from a finite field of size 0 to k, where k is a prime number. - * 2^p - 1 is prime for p = 31. Hence bitvectorSize has to be 31. Pick k to be 2^p -1. - * If a,b,x didn't come from a finite field ax1 + b mod k and ax2 + b mod k will not be pair wise - * independent. As a consequence, the hash values will not distribute uniformly from 0 to 2^p-1 - * thus introducing errors in the estimates. - */ - public static final int BIT_VECTOR_SIZE = 31; - - // Refer to Flajolet-Martin'86 for the value of phi - private static final double PHI = 0.77351; - - private final int[] a; - private final int[] b; - private final FastBitSet[] bitVector; - - private final Random aValue; - private final Random bValue; - - private int numBitVectors; - - /* Create a new distinctValueEstimator - */ - public FMSketch(int numBitVectors) { - this.numBitVectors = numBitVectors; - bitVector = new FastBitSet[numBitVectors]; - for (int i=0; i< numBitVectors; i++) { - bitVector[i] = new FastBitSet(BIT_VECTOR_SIZE); - } - - a = new int[numBitVectors]; - b = new int[numBitVectors]; - - /* Use a large prime number as a seed to the random number generator. - * Java's random number generator uses the Linear Congruential Generator to generate random - * numbers using the following recurrence relation, - * - * X(n+1) = (a X(n) + c ) mod m - * - * where X0 is the seed. Java implementation uses m = 2^48. This is problematic because 2^48 - * is not a prime number and hence the set of numbers from 0 to m don't form a finite field. - * If these numbers don't come from a finite field any give X(n) and X(n+1) may not be pair - * wise independent. - * - * However, empirically passing in prime numbers as seeds seems to work better than when passing - * composite numbers as seeds. Ideally Java's Random should pick m such that m is prime. - * - */ - aValue = new Random(99397); - bValue = new Random(9876413); - - for (int i = 0; i < numBitVectors; i++) { - int randVal; - /* a and b shouldn't be even; If a and b are even, then none of the values - * will set bit 0 thus introducing errors in the estimate. Both a and b can be even - * 25% of the times and as a result 25% of the bit vectors could be inaccurate. To avoid this - * always pick odd values for a and b. - */ - do { - randVal = aValue.nextInt(); - } while (randVal % 2 == 0); - - a[i] = randVal; - - do { - randVal = bValue.nextInt(); - } while (randVal % 2 == 0); - - b[i] = randVal; - - if (a[i] < 0) { - a[i] = a[i] + (1 << BIT_VECTOR_SIZE - 1); - } - - if (b[i] < 0) { - b[i] = b[i] + (1 << BIT_VECTOR_SIZE - 1); - } - } - } - - /** - * Resets a distinctValueEstimator object to its original state. - */ - public void reset() { - for (int i=0; i< numBitVectors; i++) { - bitVector[i].clear(); - } - } - - public FastBitSet getBitVector(int index) { - return bitVector[index]; - } - - public FastBitSet setBitVector(FastBitSet fastBitSet, int index) { - return bitVector[index] = fastBitSet; - } - - public int getnumBitVectors() { - return numBitVectors; - } - - public int getBitVectorSize() { - return BIT_VECTOR_SIZE; - } - - public void printNumDistinctValueEstimator() { - String t = new String(); - - LOG.debug("NumDistinctValueEstimator"); - LOG.debug("Number of Vectors: {}", numBitVectors); - LOG.debug("Vector Size: {}", BIT_VECTOR_SIZE); - - for (int i=0; i < numBitVectors; i++) { - t = t + bitVector[i].toString(); - } - - LOG.debug("Serialized Vectors: "); - LOG.debug(t); - } - - @Override - public String serialize() { - ByteArrayOutputStream bos = new ByteArrayOutputStream(); - // write bytes to bos ... - try { - FMSketchUtils.serializeFM(bos, this); - String result = Base64.encodeBase64String(bos.toByteArray()); - bos.close(); - return result; - } catch (IOException e) { - throw new RuntimeException(e); - } - } - - @Override - public NumDistinctValueEstimator deserialize(String s) { - InputStream is = new ByteArrayInputStream(Base64.decodeBase64(s)); - try { - NumDistinctValueEstimator n = FMSketchUtils.deserializeFM(is); - is.close(); - return n; - } catch (IOException e) { - throw new RuntimeException(e); - } - } - - private int generateHash(long v, int hashNum) { - int mod = (1<> 1; - } - - // Set bitvector[index] := 1 - bitVector[i].set(index); - } - } - - public void addToEstimatorPCSA(long v) { - int hash = generateHashForPCSA(v); - int rho = hash/numBitVectors; - int index; - - // Find the index of the least significant bit that is 1 - for (index=0; index> 1; - } - - // Set bitvector[index] := 1 - bitVector[hash%numBitVectors].set(index); - } - - public void addToEstimator(double d) { - int v = new Double(d).hashCode(); - addToEstimator(v); - } - - public void addToEstimatorPCSA(double d) { - int v = new Double(d).hashCode(); - addToEstimatorPCSA(v); - } - - public void addToEstimator(HiveDecimal decimal) { - int v = decimal.hashCode(); - addToEstimator(v); - } - - public void addToEstimatorPCSA(HiveDecimal decimal) { - int v = decimal.hashCode(); - addToEstimatorPCSA(v); - } - - public void mergeEstimators(FMSketch o) { - // Bitwise OR the bitvector with the bitvector in the agg buffer - for (int i=0; i 0) { - length += model.array() * 3; // three array - length += model.primitive1() * numVector * 2; // two int array - length += (model.object() + model.array() + model.primitive1() + - model.primitive2()) * numVector; // bitset array - } - return length; - } - - public int lengthFor(JavaDataModel model) { - return lengthFor(model, getnumBitVectors()); - } - - // the caller needs to gurrantee that they are the same type based on numBitVectors - @Override - public void mergeEstimators(NumDistinctValueEstimator o) { - // Bitwise OR the bitvector with the bitvector in the agg buffer - for (int i = 0; i < numBitVectors; i++) { - bitVector[i].or(((FMSketch) o).getBitVector(i)); - } - } - - @Override - public void addToEstimator(String s) { - int v = s.hashCode(); - addToEstimator(v); - } - - @Override - public boolean canMerge(NumDistinctValueEstimator o) { - return o instanceof FMSketch && this.numBitVectors == ((FMSketch) o).numBitVectors; - } -} diff --git common/src/java/org/apache/hadoop/hive/common/ndv/NumDistinctValueEstimator.java common/src/java/org/apache/hadoop/hive/common/ndv/NumDistinctValueEstimator.java index 4517b69..ed0db14 100644 --- common/src/java/org/apache/hadoop/hive/common/ndv/NumDistinctValueEstimator.java +++ common/src/java/org/apache/hadoop/hive/common/ndv/NumDistinctValueEstimator.java @@ -28,9 +28,9 @@ public void reset(); - public String serialize(); + public byte[] serialize(); - public NumDistinctValueEstimator deserialize(String s); + public NumDistinctValueEstimator deserialize(byte[] buf); public void addToEstimator(long v); diff --git common/src/java/org/apache/hadoop/hive/common/ndv/NumDistinctValueEstimatorFactory.java common/src/java/org/apache/hadoop/hive/common/ndv/NumDistinctValueEstimatorFactory.java index 6a29859..6a33a32 100644 --- common/src/java/org/apache/hadoop/hive/common/ndv/NumDistinctValueEstimatorFactory.java +++ common/src/java/org/apache/hadoop/hive/common/ndv/NumDistinctValueEstimatorFactory.java @@ -19,12 +19,9 @@ package org.apache.hadoop.hive.common.ndv; -import java.io.ByteArrayInputStream; import java.io.IOException; -import java.io.InputStream; import java.util.Arrays; -import org.apache.commons.codec.binary.Base64; import org.apache.hadoop.hive.common.ndv.fm.FMSketch; import org.apache.hadoop.hive.common.ndv.fm.FMSketchUtils; import org.apache.hadoop.hive.common.ndv.hll.HyperLogLog; @@ -34,22 +31,20 @@ private NumDistinctValueEstimatorFactory() { } - private static boolean isFMSketch(String s) throws IOException { - InputStream in = new ByteArrayInputStream(Base64.decodeBase64(s)); + private static boolean isFMSketch(byte[] buf) throws IOException { byte[] magic = new byte[2]; - magic[0] = (byte) in.read(); - magic[1] = (byte) in.read(); - in.close(); + magic[0] = (byte) buf[0]; + magic[1] = (byte) buf[1]; return Arrays.equals(magic, FMSketchUtils.MAGIC); } - public static NumDistinctValueEstimator getNumDistinctValueEstimator(String s) { + public static NumDistinctValueEstimator getNumDistinctValueEstimator(byte[] buf) { // Right now we assume only FM and HLL are available. try { - if (isFMSketch(s)) { - return FMSketchUtils.deserializeFM(s); + if (isFMSketch(buf)) { + return FMSketchUtils.deserializeFM(buf); } else { - return HyperLogLog.builder().build().deserialize(s); + return HyperLogLog.builder().build().deserialize(buf); } } catch (IOException e) { throw new RuntimeException(e); diff --git common/src/java/org/apache/hadoop/hive/common/ndv/fm/FMSketch.java common/src/java/org/apache/hadoop/hive/common/ndv/fm/FMSketch.java new file mode 100644 index 0000000..957a3a1 --- /dev/null +++ common/src/java/org/apache/hadoop/hive/common/ndv/fm/FMSketch.java @@ -0,0 +1,359 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.common.ndv.fm; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.util.Random; + +import org.apache.hadoop.hive.common.classification.InterfaceAudience; +import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimator; +import org.apache.hadoop.hive.common.type.HiveDecimal; +import org.apache.hadoop.hive.ql.util.JavaDataModel; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import javolution.util.FastBitSet; + +public class FMSketch implements NumDistinctValueEstimator { + + static final Logger LOG = LoggerFactory.getLogger(FMSketch.class.getName()); + + /* We want a,b,x to come from a finite field of size 0 to k, where k is a prime number. + * 2^p - 1 is prime for p = 31. Hence bitvectorSize has to be 31. Pick k to be 2^p -1. + * If a,b,x didn't come from a finite field ax1 + b mod k and ax2 + b mod k will not be pair wise + * independent. As a consequence, the hash values will not distribute uniformly from 0 to 2^p-1 + * thus introducing errors in the estimates. + */ + public static final int BIT_VECTOR_SIZE = 31; + + // Refer to Flajolet-Martin'86 for the value of phi + private static final double PHI = 0.77351; + + private final int[] a; + private final int[] b; + private final FastBitSet[] bitVector; + + private final Random aValue; + private final Random bValue; + + private int numBitVectors; + + /* Create a new distinctValueEstimator + */ + public FMSketch(int numBitVectors) { + this.numBitVectors = numBitVectors; + bitVector = new FastBitSet[numBitVectors]; + for (int i=0; i< numBitVectors; i++) { + bitVector[i] = new FastBitSet(BIT_VECTOR_SIZE); + } + + a = new int[numBitVectors]; + b = new int[numBitVectors]; + + /* Use a large prime number as a seed to the random number generator. + * Java's random number generator uses the Linear Congruential Generator to generate random + * numbers using the following recurrence relation, + * + * X(n+1) = (a X(n) + c ) mod m + * + * where X0 is the seed. Java implementation uses m = 2^48. This is problematic because 2^48 + * is not a prime number and hence the set of numbers from 0 to m don't form a finite field. + * If these numbers don't come from a finite field any give X(n) and X(n+1) may not be pair + * wise independent. + * + * However, empirically passing in prime numbers as seeds seems to work better than when passing + * composite numbers as seeds. Ideally Java's Random should pick m such that m is prime. + * + */ + aValue = new Random(99397); + bValue = new Random(9876413); + + for (int i = 0; i < numBitVectors; i++) { + int randVal; + /* a and b shouldn't be even; If a and b are even, then none of the values + * will set bit 0 thus introducing errors in the estimate. Both a and b can be even + * 25% of the times and as a result 25% of the bit vectors could be inaccurate. To avoid this + * always pick odd values for a and b. + */ + do { + randVal = aValue.nextInt(); + } while (randVal % 2 == 0); + + a[i] = randVal; + + do { + randVal = bValue.nextInt(); + } while (randVal % 2 == 0); + + b[i] = randVal; + + if (a[i] < 0) { + a[i] = a[i] + (1 << BIT_VECTOR_SIZE - 1); + } + + if (b[i] < 0) { + b[i] = b[i] + (1 << BIT_VECTOR_SIZE - 1); + } + } + } + + /** + * Resets a distinctValueEstimator object to its original state. + */ + public void reset() { + for (int i=0; i< numBitVectors; i++) { + bitVector[i].clear(); + } + } + + public FastBitSet getBitVector(int index) { + return bitVector[index]; + } + + public FastBitSet setBitVector(FastBitSet fastBitSet, int index) { + return bitVector[index] = fastBitSet; + } + + public int getnumBitVectors() { + return numBitVectors; + } + + public int getBitVectorSize() { + return BIT_VECTOR_SIZE; + } + + public void printNumDistinctValueEstimator() { + String t = new String(); + + LOG.debug("NumDistinctValueEstimator"); + LOG.debug("Number of Vectors: {}", numBitVectors); + LOG.debug("Vector Size: {}", BIT_VECTOR_SIZE); + + for (int i=0; i < numBitVectors; i++) { + t = t + bitVector[i].toString(); + } + + LOG.debug("Serialized Vectors: "); + LOG.debug(t); + } + + @Override + public byte[] serialize() { + ByteArrayOutputStream bos = new ByteArrayOutputStream(); + // write bytes to bos ... + try { + FMSketchUtils.serializeFM(bos, this); + final byte[] result = bos.toByteArray(); + bos.close(); + return result; + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + @Override + public NumDistinctValueEstimator deserialize(byte[] buf) { + InputStream is = new ByteArrayInputStream(buf); + try { + NumDistinctValueEstimator n = FMSketchUtils.deserializeFM(is); + is.close(); + return n; + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + private int generateHash(long v, int hashNum) { + int mod = (1<> 1; + } + + // Set bitvector[index] := 1 + bitVector[i].set(index); + } + } + + public void addToEstimatorPCSA(long v) { + int hash = generateHashForPCSA(v); + int rho = hash/numBitVectors; + int index; + + // Find the index of the least significant bit that is 1 + for (index=0; index> 1; + } + + // Set bitvector[index] := 1 + bitVector[hash%numBitVectors].set(index); + } + + public void addToEstimator(double d) { + int v = new Double(d).hashCode(); + addToEstimator(v); + } + + public void addToEstimatorPCSA(double d) { + int v = new Double(d).hashCode(); + addToEstimatorPCSA(v); + } + + public void addToEstimator(HiveDecimal decimal) { + int v = decimal.hashCode(); + addToEstimator(v); + } + + public void addToEstimatorPCSA(HiveDecimal decimal) { + int v = decimal.hashCode(); + addToEstimatorPCSA(v); + } + + public void mergeEstimators(FMSketch o) { + // Bitwise OR the bitvector with the bitvector in the agg buffer + for (int i=0; i 0) { + length += model.array() * 3; // three array + length += model.primitive1() * numVector * 2; // two int array + length += (model.object() + model.array() + model.primitive1() + + model.primitive2()) * numVector; // bitset array + } + return length; + } + + public int lengthFor(JavaDataModel model) { + return lengthFor(model, getnumBitVectors()); + } + + // the caller needs to gurrantee that they are the same type based on numBitVectors + @Override + public void mergeEstimators(NumDistinctValueEstimator o) { + // Bitwise OR the bitvector with the bitvector in the agg buffer + for (int i = 0; i < numBitVectors; i++) { + bitVector[i].or(((FMSketch) o).getBitVector(i)); + } + } + + @Override + public void addToEstimator(String s) { + int v = s.hashCode(); + addToEstimator(v); + } + + @Override + public boolean canMerge(NumDistinctValueEstimator o) { + return o instanceof FMSketch && this.numBitVectors == ((FMSketch) o).numBitVectors; + } +} diff --git common/src/java/org/apache/hadoop/hive/common/ndv/fm/FMSketchUtils.java common/src/java/org/apache/hadoop/hive/common/ndv/fm/FMSketchUtils.java index b6f7fdd..0ea70bc 100644 --- common/src/java/org/apache/hadoop/hive/common/ndv/fm/FMSketchUtils.java +++ common/src/java/org/apache/hadoop/hive/common/ndv/fm/FMSketchUtils.java @@ -23,12 +23,11 @@ import java.io.OutputStream; import java.util.Arrays; -import javolution.util.FastBitSet; - -import org.apache.commons.codec.binary.Base64; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import javolution.util.FastBitSet; + public class FMSketchUtils { static final Logger LOG = LoggerFactory.getLogger(FMSketch.class.getName()); @@ -78,8 +77,8 @@ private static void writeBitVector(OutputStream out, FastBitSet bit) throws IOEx * Deserializes from string to FastBitSet; Creates a NumDistinctValueEstimator * object and returns it. */ - public static FMSketch deserializeFM(String s) throws IOException { - InputStream is = new ByteArrayInputStream(Base64.decodeBase64(s)); + public static FMSketch deserializeFM(byte[] buf) throws IOException { + InputStream is = new ByteArrayInputStream(buf); try { FMSketch sketch = deserializeFM(is); is.close(); diff --git common/src/java/org/apache/hadoop/hive/common/ndv/hll/HyperLogLog.java common/src/java/org/apache/hadoop/hive/common/ndv/hll/HyperLogLog.java index 182560a..b80a0ac 100644 --- common/src/java/org/apache/hadoop/hive/common/ndv/hll/HyperLogLog.java +++ common/src/java/org/apache/hadoop/hive/common/ndv/hll/HyperLogLog.java @@ -27,7 +27,6 @@ import java.util.Map; import java.util.TreeMap; -import org.apache.commons.codec.binary.Base64; import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimator; import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.ql.util.JavaDataModel; @@ -56,10 +55,10 @@ * noBias - Use Google's bias table lookup for short range bias correction. * Enabling this will highly improve the estimation accuracy for short * range values. Default: true - * + * * */ -public class HyperLogLog implements NumDistinctValueEstimator{ +public class HyperLogLog implements NumDistinctValueEstimator { private final static int DEFAULT_HASH_BITS = 64; private final static long HASH64_ZERO = Murmur3.hash64(new byte[] {0}); private final static long HASH64_ONE = Murmur3.hash64(new byte[] {1}); @@ -571,22 +570,26 @@ public void reset() { } @Override - public String serialize() { + public byte[] serialize() { ByteArrayOutputStream bos = new ByteArrayOutputStream(); // write bytes to bos ... try { HyperLogLogUtils.serializeHLL(bos, this); + byte[] result = bos.toByteArray(); + bos.close(); + return result; } catch (IOException e) { throw new RuntimeException(e); } - return Base64.encodeBase64String(bos.toByteArray()); } - + @Override - public NumDistinctValueEstimator deserialize(String s) { - InputStream is = new ByteArrayInputStream(Base64.decodeBase64(s)); + public NumDistinctValueEstimator deserialize(byte[] buf) { + InputStream is = new ByteArrayInputStream(buf); try { - return HyperLogLogUtils.deserializeHLL(is); + HyperLogLog result = HyperLogLogUtils.deserializeHLL(is); + is.close(); + return result; } catch (IOException e) { throw new RuntimeException(e); } diff --git common/src/test/org/apache/hadoop/hive/common/ndv/fm/TestFMSketchSerialization.java common/src/test/org/apache/hadoop/hive/common/ndv/fm/TestFMSketchSerialization.java index 74fdf58..1843441 100644 --- common/src/test/org/apache/hadoop/hive/common/ndv/fm/TestFMSketchSerialization.java +++ common/src/test/org/apache/hadoop/hive/common/ndv/fm/TestFMSketchSerialization.java @@ -86,12 +86,12 @@ public void testSerDe() throws IOException { sketch.setBitVector(fastBitSet[i], i); } assertEquals(sketch.estimateNumDistinctValues(), 3); - String s = sketch.serialize(); + byte[] buf = sketch.serialize(); FMSketch newSketch = (FMSketch) NumDistinctValueEstimatorFactory - .getNumDistinctValueEstimator(s); + .getNumDistinctValueEstimator(buf); sketch.equals(newSketch); assertEquals(newSketch.estimateNumDistinctValues(), 3); - assertEquals(newSketch.serialize(), s); + assertEquals(newSketch.serialize(), buf); } } \ No newline at end of file diff --git metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java index b3274ca..f26831c 100644 --- metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java +++ metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java @@ -947,7 +947,7 @@ private String extractSqlClob(Object value) { } } - static String extractSqlBlob(Object value) throws MetaException { + static byte[] extractSqlBlob(Object value) throws MetaException { if (value == null) return null; if (value instanceof Blob) { @@ -955,7 +955,7 @@ static String extractSqlBlob(Object value) throws MetaException { try { // getBytes function says: pos the ordinal position of the first byte in // the BLOB value to be extracted; the first byte is at position 1 - return new String(((Blob) value).getBytes(1, (int) ((Blob) value).length())); + return ((Blob) value).getBytes(1, (int) ((Blob) value).length()); } catch (SQLException e) { throw new MetaException("Encounter error while processing blob."); } diff --git metastore/src/java/org/apache/hadoop/hive/metastore/StatObjectConverter.java metastore/src/java/org/apache/hadoop/hive/metastore/StatObjectConverter.java index d53ea4c..7c8054b 100644 --- metastore/src/java/org/apache/hadoop/hive/metastore/StatObjectConverter.java +++ metastore/src/java/org/apache/hadoop/hive/metastore/StatObjectConverter.java @@ -25,6 +25,7 @@ import org.apache.hadoop.hive.metastore.api.BinaryColumnStatsData; import org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData; import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData; +import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData._Fields; import org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc; import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; import org.apache.hadoop.hive.metastore.api.Date; @@ -37,7 +38,11 @@ import org.apache.hadoop.hive.metastore.api.MetaException; import org.apache.hadoop.hive.metastore.api.NoSuchObjectException; import org.apache.hadoop.hive.metastore.api.StringColumnStatsData; -import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData._Fields; +import org.apache.hadoop.hive.metastore.columnstats.cache.DateColumnStatsDataInspector; +import org.apache.hadoop.hive.metastore.columnstats.cache.DecimalColumnStatsDataInspector; +import org.apache.hadoop.hive.metastore.columnstats.cache.DoubleColumnStatsDataInspector; +import org.apache.hadoop.hive.metastore.columnstats.cache.LongColumnStatsDataInspector; +import org.apache.hadoop.hive.metastore.columnstats.cache.StringColumnStatsDataInspector; import org.apache.hadoop.hive.metastore.model.MPartition; import org.apache.hadoop.hive.metastore.model.MPartitionColumnStatistics; import org.apache.hadoop.hive.metastore.model.MTable; @@ -76,7 +81,7 @@ public static MTableColumnStatistics convertToMTableColumnStatistics(MTable tabl mColStats.setLongStats( longStats.isSetNumNulls() ? longStats.getNumNulls() : null, longStats.isSetNumDVs() ? longStats.getNumDVs() : null, - longStats.isSetBitVectors() ? longStats.getBitVectors().getBytes() : null, + longStats.isSetBitVectors() ? longStats.getBitVectors() : null, longStats.isSetLowValue() ? longStats.getLowValue() : null, longStats.isSetHighValue() ? longStats.getHighValue() : null); } else if (statsObj.getStatsData().isSetDoubleStats()) { @@ -84,7 +89,7 @@ public static MTableColumnStatistics convertToMTableColumnStatistics(MTable tabl mColStats.setDoubleStats( doubleStats.isSetNumNulls() ? doubleStats.getNumNulls() : null, doubleStats.isSetNumDVs() ? doubleStats.getNumDVs() : null, - doubleStats.isSetBitVectors() ? doubleStats.getBitVectors().getBytes() : null, + doubleStats.isSetBitVectors() ? doubleStats.getBitVectors() : null, doubleStats.isSetLowValue() ? doubleStats.getLowValue() : null, doubleStats.isSetHighValue() ? doubleStats.getHighValue() : null); } else if (statsObj.getStatsData().isSetDecimalStats()) { @@ -94,14 +99,14 @@ public static MTableColumnStatistics convertToMTableColumnStatistics(MTable tabl mColStats.setDecimalStats( decimalStats.isSetNumNulls() ? decimalStats.getNumNulls() : null, decimalStats.isSetNumDVs() ? decimalStats.getNumDVs() : null, - decimalStats.isSetBitVectors() ? decimalStats.getBitVectors().getBytes() : null, + decimalStats.isSetBitVectors() ? decimalStats.getBitVectors() : null, low, high); } else if (statsObj.getStatsData().isSetStringStats()) { StringColumnStatsData stringStats = statsObj.getStatsData().getStringStats(); mColStats.setStringStats( stringStats.isSetNumNulls() ? stringStats.getNumNulls() : null, stringStats.isSetNumDVs() ? stringStats.getNumDVs() : null, - stringStats.isSetBitVectors() ? stringStats.getBitVectors().getBytes() : null, + stringStats.isSetBitVectors() ? stringStats.getBitVectors() : null, stringStats.isSetMaxColLen() ? stringStats.getMaxColLen() : null, stringStats.isSetAvgColLen() ? stringStats.getAvgColLen() : null); } else if (statsObj.getStatsData().isSetBinaryStats()) { @@ -115,7 +120,7 @@ public static MTableColumnStatistics convertToMTableColumnStatistics(MTable tabl mColStats.setDateStats( dateStats.isSetNumNulls() ? dateStats.getNumNulls() : null, dateStats.isSetNumDVs() ? dateStats.getNumDVs() : null, - dateStats.isSetBitVectors() ? dateStats.getBitVectors().getBytes() : null, + dateStats.isSetBitVectors() ? dateStats.getBitVectors() : null, dateStats.isSetLowValue() ? dateStats.getLowValue().getDaysSinceEpoch() : null, dateStats.isSetHighValue() ? dateStats.getHighValue().getDaysSinceEpoch() : null); } @@ -226,12 +231,12 @@ public static ColumnStatisticsObj getTableColumnStatisticsObj( colStatsData.setBooleanStats(boolStats); } else if (colType.equals("string") || colType.startsWith("varchar") || colType.startsWith("char")) { - StringColumnStatsData stringStats = new StringColumnStatsData(); + StringColumnStatsDataInspector stringStats = new StringColumnStatsDataInspector(); stringStats.setNumNulls(mStatsObj.getNumNulls()); stringStats.setAvgColLen(mStatsObj.getAvgColLen()); stringStats.setMaxColLen(mStatsObj.getMaxColLen()); stringStats.setNumDVs(mStatsObj.getNumDVs()); - stringStats.setBitVectors((mStatsObj.getBitVector()==null||!enableBitVector)? null : new String(mStatsObj.getBitVector())); + stringStats.setBitVectors((mStatsObj.getBitVector()==null||!enableBitVector)? null : mStatsObj.getBitVector()); colStatsData.setStringStats(stringStats); } else if (colType.equals("binary")) { BinaryColumnStatsData binaryStats = new BinaryColumnStatsData(); @@ -242,7 +247,7 @@ public static ColumnStatisticsObj getTableColumnStatisticsObj( } else if (colType.equals("bigint") || colType.equals("int") || colType.equals("smallint") || colType.equals("tinyint") || colType.equals("timestamp")) { - LongColumnStatsData longStats = new LongColumnStatsData(); + LongColumnStatsDataInspector longStats = new LongColumnStatsDataInspector(); longStats.setNumNulls(mStatsObj.getNumNulls()); Long longHighValue = mStatsObj.getLongHighValue(); if (longHighValue != null) { @@ -253,10 +258,10 @@ public static ColumnStatisticsObj getTableColumnStatisticsObj( longStats.setLowValue(longLowValue); } longStats.setNumDVs(mStatsObj.getNumDVs()); - longStats.setBitVectors((mStatsObj.getBitVector()==null||!enableBitVector)? null : new String(mStatsObj.getBitVector())); + longStats.setBitVectors((mStatsObj.getBitVector()==null||!enableBitVector)? null : mStatsObj.getBitVector()); colStatsData.setLongStats(longStats); } else if (colType.equals("double") || colType.equals("float")) { - DoubleColumnStatsData doubleStats = new DoubleColumnStatsData(); + DoubleColumnStatsDataInspector doubleStats = new DoubleColumnStatsDataInspector(); doubleStats.setNumNulls(mStatsObj.getNumNulls()); Double doubleHighValue = mStatsObj.getDoubleHighValue(); if (doubleHighValue != null) { @@ -267,10 +272,10 @@ public static ColumnStatisticsObj getTableColumnStatisticsObj( doubleStats.setLowValue(doubleLowValue); } doubleStats.setNumDVs(mStatsObj.getNumDVs()); - doubleStats.setBitVectors((mStatsObj.getBitVector()==null||!enableBitVector)? null : new String(mStatsObj.getBitVector())); + doubleStats.setBitVectors((mStatsObj.getBitVector()==null||!enableBitVector)? null : mStatsObj.getBitVector()); colStatsData.setDoubleStats(doubleStats); } else if (colType.startsWith("decimal")) { - DecimalColumnStatsData decimalStats = new DecimalColumnStatsData(); + DecimalColumnStatsDataInspector decimalStats = new DecimalColumnStatsDataInspector(); decimalStats.setNumNulls(mStatsObj.getNumNulls()); String decimalHighValue = mStatsObj.getDecimalHighValue(); if (decimalHighValue != null) { @@ -281,10 +286,10 @@ public static ColumnStatisticsObj getTableColumnStatisticsObj( decimalStats.setLowValue(createThriftDecimal(decimalLowValue)); } decimalStats.setNumDVs(mStatsObj.getNumDVs()); - decimalStats.setBitVectors((mStatsObj.getBitVector()==null||!enableBitVector)? null : new String(mStatsObj.getBitVector())); + decimalStats.setBitVectors((mStatsObj.getBitVector()==null||!enableBitVector)? null : mStatsObj.getBitVector()); colStatsData.setDecimalStats(decimalStats); } else if (colType.equals("date")) { - DateColumnStatsData dateStats = new DateColumnStatsData(); + DateColumnStatsDataInspector dateStats = new DateColumnStatsDataInspector(); dateStats.setNumNulls(mStatsObj.getNumNulls()); Long highValue = mStatsObj.getLongHighValue(); if (highValue != null) { @@ -295,7 +300,7 @@ public static ColumnStatisticsObj getTableColumnStatisticsObj( dateStats.setLowValue(new Date(lowValue)); } dateStats.setNumDVs(mStatsObj.getNumDVs()); - dateStats.setBitVectors((mStatsObj.getBitVector()==null||!enableBitVector)? null : new String(mStatsObj.getBitVector())); + dateStats.setBitVectors((mStatsObj.getBitVector()==null||!enableBitVector)? null : mStatsObj.getBitVector()); colStatsData.setDateStats(dateStats); } statsObj.setStatsData(colStatsData); @@ -339,7 +344,7 @@ public static MPartitionColumnStatistics convertToMPartitionColumnStatistics( mColStats.setLongStats( longStats.isSetNumNulls() ? longStats.getNumNulls() : null, longStats.isSetNumDVs() ? longStats.getNumDVs() : null, - longStats.isSetBitVectors() ? longStats.getBitVectors().getBytes() : null, + longStats.isSetBitVectors() ? longStats.getBitVectors() : null, longStats.isSetLowValue() ? longStats.getLowValue() : null, longStats.isSetHighValue() ? longStats.getHighValue() : null); } else if (statsObj.getStatsData().isSetDoubleStats()) { @@ -347,7 +352,7 @@ public static MPartitionColumnStatistics convertToMPartitionColumnStatistics( mColStats.setDoubleStats( doubleStats.isSetNumNulls() ? doubleStats.getNumNulls() : null, doubleStats.isSetNumDVs() ? doubleStats.getNumDVs() : null, - doubleStats.isSetBitVectors() ? doubleStats.getBitVectors().getBytes() : null, + doubleStats.isSetBitVectors() ? doubleStats.getBitVectors() : null, doubleStats.isSetLowValue() ? doubleStats.getLowValue() : null, doubleStats.isSetHighValue() ? doubleStats.getHighValue() : null); } else if (statsObj.getStatsData().isSetDecimalStats()) { @@ -357,14 +362,14 @@ public static MPartitionColumnStatistics convertToMPartitionColumnStatistics( mColStats.setDecimalStats( decimalStats.isSetNumNulls() ? decimalStats.getNumNulls() : null, decimalStats.isSetNumDVs() ? decimalStats.getNumDVs() : null, - decimalStats.isSetBitVectors() ? decimalStats.getBitVectors().getBytes() : null, + decimalStats.isSetBitVectors() ? decimalStats.getBitVectors() : null, low, high); } else if (statsObj.getStatsData().isSetStringStats()) { StringColumnStatsData stringStats = statsObj.getStatsData().getStringStats(); mColStats.setStringStats( stringStats.isSetNumNulls() ? stringStats.getNumNulls() : null, stringStats.isSetNumDVs() ? stringStats.getNumDVs() : null, - stringStats.isSetBitVectors() ? stringStats.getBitVectors().getBytes() : null, + stringStats.isSetBitVectors() ? stringStats.getBitVectors() : null, stringStats.isSetMaxColLen() ? stringStats.getMaxColLen() : null, stringStats.isSetAvgColLen() ? stringStats.getAvgColLen() : null); } else if (statsObj.getStatsData().isSetBinaryStats()) { @@ -378,7 +383,7 @@ public static MPartitionColumnStatistics convertToMPartitionColumnStatistics( mColStats.setDateStats( dateStats.isSetNumNulls() ? dateStats.getNumNulls() : null, dateStats.isSetNumDVs() ? dateStats.getNumDVs() : null, - dateStats.isSetBitVectors() ? dateStats.getBitVectors().getBytes() : null, + dateStats.isSetBitVectors() ? dateStats.getBitVectors() : null, dateStats.isSetLowValue() ? dateStats.getLowValue().getDaysSinceEpoch() : null, dateStats.isSetHighValue() ? dateStats.getHighValue().getDaysSinceEpoch() : null); } @@ -401,12 +406,12 @@ public static ColumnStatisticsObj getPartitionColumnStatisticsObj( colStatsData.setBooleanStats(boolStats); } else if (colType.equals("string") || colType.startsWith("varchar") || colType.startsWith("char")) { - StringColumnStatsData stringStats = new StringColumnStatsData(); + StringColumnStatsDataInspector stringStats = new StringColumnStatsDataInspector(); stringStats.setNumNulls(mStatsObj.getNumNulls()); stringStats.setAvgColLen(mStatsObj.getAvgColLen()); stringStats.setMaxColLen(mStatsObj.getMaxColLen()); stringStats.setNumDVs(mStatsObj.getNumDVs()); - stringStats.setBitVectors((mStatsObj.getBitVector()==null||!enableBitVector)? null : new String(mStatsObj.getBitVector())); + stringStats.setBitVectors((mStatsObj.getBitVector()==null||!enableBitVector)? null : mStatsObj.getBitVector()); colStatsData.setStringStats(stringStats); } else if (colType.equals("binary")) { BinaryColumnStatsData binaryStats = new BinaryColumnStatsData(); @@ -417,7 +422,7 @@ public static ColumnStatisticsObj getPartitionColumnStatisticsObj( } else if (colType.equals("tinyint") || colType.equals("smallint") || colType.equals("int") || colType.equals("bigint") || colType.equals("timestamp")) { - LongColumnStatsData longStats = new LongColumnStatsData(); + LongColumnStatsDataInspector longStats = new LongColumnStatsDataInspector(); longStats.setNumNulls(mStatsObj.getNumNulls()); if (mStatsObj.getLongHighValue() != null) { longStats.setHighValue(mStatsObj.getLongHighValue()); @@ -426,10 +431,10 @@ public static ColumnStatisticsObj getPartitionColumnStatisticsObj( longStats.setLowValue(mStatsObj.getLongLowValue()); } longStats.setNumDVs(mStatsObj.getNumDVs()); - longStats.setBitVectors((mStatsObj.getBitVector()==null||!enableBitVector)? null : new String(mStatsObj.getBitVector())); + longStats.setBitVectors((mStatsObj.getBitVector()==null||!enableBitVector)? null : mStatsObj.getBitVector()); colStatsData.setLongStats(longStats); } else if (colType.equals("double") || colType.equals("float")) { - DoubleColumnStatsData doubleStats = new DoubleColumnStatsData(); + DoubleColumnStatsDataInspector doubleStats = new DoubleColumnStatsDataInspector(); doubleStats.setNumNulls(mStatsObj.getNumNulls()); if (mStatsObj.getDoubleHighValue() != null) { doubleStats.setHighValue(mStatsObj.getDoubleHighValue()); @@ -438,10 +443,10 @@ public static ColumnStatisticsObj getPartitionColumnStatisticsObj( doubleStats.setLowValue(mStatsObj.getDoubleLowValue()); } doubleStats.setNumDVs(mStatsObj.getNumDVs()); - doubleStats.setBitVectors((mStatsObj.getBitVector()==null||!enableBitVector)? null : new String(mStatsObj.getBitVector())); + doubleStats.setBitVectors((mStatsObj.getBitVector()==null||!enableBitVector)? null : mStatsObj.getBitVector()); colStatsData.setDoubleStats(doubleStats); } else if (colType.startsWith("decimal")) { - DecimalColumnStatsData decimalStats = new DecimalColumnStatsData(); + DecimalColumnStatsDataInspector decimalStats = new DecimalColumnStatsDataInspector(); decimalStats.setNumNulls(mStatsObj.getNumNulls()); if (mStatsObj.getDecimalHighValue() != null) { decimalStats.setHighValue(createThriftDecimal(mStatsObj.getDecimalHighValue())); @@ -450,15 +455,15 @@ public static ColumnStatisticsObj getPartitionColumnStatisticsObj( decimalStats.setLowValue(createThriftDecimal(mStatsObj.getDecimalLowValue())); } decimalStats.setNumDVs(mStatsObj.getNumDVs()); - decimalStats.setBitVectors((mStatsObj.getBitVector()==null||!enableBitVector)? null : new String(mStatsObj.getBitVector())); + decimalStats.setBitVectors((mStatsObj.getBitVector()==null||!enableBitVector)? null : mStatsObj.getBitVector()); colStatsData.setDecimalStats(decimalStats); } else if (colType.equals("date")) { - DateColumnStatsData dateStats = new DateColumnStatsData(); + DateColumnStatsDataInspector dateStats = new DateColumnStatsDataInspector(); dateStats.setNumNulls(mStatsObj.getNumNulls()); dateStats.setHighValue(new Date(mStatsObj.getLongHighValue())); dateStats.setLowValue(new Date(mStatsObj.getLongLowValue())); dateStats.setNumDVs(mStatsObj.getNumDVs()); - dateStats.setBitVectors((mStatsObj.getBitVector()==null||!enableBitVector)? null : new String(mStatsObj.getBitVector())); + dateStats.setBitVectors((mStatsObj.getBitVector()==null||!enableBitVector)? null : mStatsObj.getBitVector()); colStatsData.setDateStats(dateStats); } statsObj.setStatsData(colStatsData); @@ -489,7 +494,7 @@ public static void fillColumnStatisticsData(String colType, ColumnStatisticsData data.setBooleanStats(boolStats); } else if (colType.equals("string") || colType.startsWith("varchar") || colType.startsWith("char")) { - StringColumnStatsData stringStats = new StringColumnStatsData(); + StringColumnStatsDataInspector stringStats = new StringColumnStatsDataInspector(); stringStats.setNumNulls(MetaStoreDirectSql.extractSqlLong(nulls)); stringStats.setAvgColLen(MetaStoreDirectSql.extractSqlDouble(avglen)); stringStats.setMaxColLen(MetaStoreDirectSql.extractSqlLong(maxlen)); @@ -505,7 +510,7 @@ public static void fillColumnStatisticsData(String colType, ColumnStatisticsData } else if (colType.equals("bigint") || colType.equals("int") || colType.equals("smallint") || colType.equals("tinyint") || colType.equals("timestamp")) { - LongColumnStatsData longStats = new LongColumnStatsData(); + LongColumnStatsDataInspector longStats = new LongColumnStatsDataInspector(); longStats.setNumNulls(MetaStoreDirectSql.extractSqlLong(nulls)); if (lhigh != null) { longStats.setHighValue(MetaStoreDirectSql.extractSqlLong(lhigh)); @@ -517,7 +522,7 @@ public static void fillColumnStatisticsData(String colType, ColumnStatisticsData longStats.setBitVectors(MetaStoreDirectSql.extractSqlBlob(bitVector)); data.setLongStats(longStats); } else if (colType.equals("double") || colType.equals("float")) { - DoubleColumnStatsData doubleStats = new DoubleColumnStatsData(); + DoubleColumnStatsDataInspector doubleStats = new DoubleColumnStatsDataInspector(); doubleStats.setNumNulls(MetaStoreDirectSql.extractSqlLong(nulls)); if (dhigh != null) { doubleStats.setHighValue(MetaStoreDirectSql.extractSqlDouble(dhigh)); @@ -529,7 +534,7 @@ public static void fillColumnStatisticsData(String colType, ColumnStatisticsData doubleStats.setBitVectors(MetaStoreDirectSql.extractSqlBlob(bitVector)); data.setDoubleStats(doubleStats); } else if (colType.startsWith("decimal")) { - DecimalColumnStatsData decimalStats = new DecimalColumnStatsData(); + DecimalColumnStatsDataInspector decimalStats = new DecimalColumnStatsDataInspector(); decimalStats.setNumNulls(MetaStoreDirectSql.extractSqlLong(nulls)); if (dechigh != null) { decimalStats.setHighValue(createThriftDecimal((String)dechigh)); @@ -541,7 +546,7 @@ public static void fillColumnStatisticsData(String colType, ColumnStatisticsData decimalStats.setBitVectors(MetaStoreDirectSql.extractSqlBlob(bitVector)); data.setDecimalStats(decimalStats); } else if (colType.equals("date")) { - DateColumnStatsData dateStats = new DateColumnStatsData(); + DateColumnStatsDataInspector dateStats = new DateColumnStatsDataInspector(); dateStats.setNumNulls(MetaStoreDirectSql.extractSqlLong(nulls)); if (lhigh != null) { dateStats.setHighValue(new Date(MetaStoreDirectSql.extractSqlLong(lhigh))); @@ -570,7 +575,7 @@ public static void fillColumnStatisticsData(String colType, ColumnStatisticsData data.setBooleanStats(boolStats); } else if (colType.equals("string") || colType.startsWith("varchar") || colType.startsWith("char")) { - StringColumnStatsData stringStats = new StringColumnStatsData(); + StringColumnStatsDataInspector stringStats = new StringColumnStatsDataInspector(); stringStats.setNumNulls(MetaStoreDirectSql.extractSqlLong(nulls)); stringStats.setAvgColLen(MetaStoreDirectSql.extractSqlDouble(avglen)); stringStats.setMaxColLen(MetaStoreDirectSql.extractSqlLong(maxlen)); @@ -584,7 +589,7 @@ public static void fillColumnStatisticsData(String colType, ColumnStatisticsData data.setBinaryStats(binaryStats); } else if (colType.equals("bigint") || colType.equals("int") || colType.equals("smallint") || colType.equals("tinyint") || colType.equals("timestamp")) { - LongColumnStatsData longStats = new LongColumnStatsData(); + LongColumnStatsDataInspector longStats = new LongColumnStatsDataInspector(); longStats.setNumNulls(MetaStoreDirectSql.extractSqlLong(nulls)); if (lhigh != null) { longStats.setHighValue(MetaStoreDirectSql.extractSqlLong(lhigh)); @@ -619,7 +624,7 @@ public static void fillColumnStatisticsData(String colType, ColumnStatisticsData longStats.setNumDVs(estimation); data.setLongStats(longStats); } else if (colType.equals("date")) { - DateColumnStatsData dateStats = new DateColumnStatsData(); + DateColumnStatsDataInspector dateStats = new DateColumnStatsDataInspector(); dateStats.setNumNulls(MetaStoreDirectSql.extractSqlLong(nulls)); if (lhigh != null) { dateStats.setHighValue(new Date(MetaStoreDirectSql.extractSqlLong(lhigh))); @@ -654,7 +659,7 @@ public static void fillColumnStatisticsData(String colType, ColumnStatisticsData dateStats.setNumDVs(estimation); data.setDateStats(dateStats); } else if (colType.equals("double") || colType.equals("float")) { - DoubleColumnStatsData doubleStats = new DoubleColumnStatsData(); + DoubleColumnStatsDataInspector doubleStats = new DoubleColumnStatsDataInspector(); doubleStats.setNumNulls(MetaStoreDirectSql.extractSqlLong(nulls)); if (dhigh != null) { doubleStats.setHighValue(MetaStoreDirectSql.extractSqlDouble(dhigh)); @@ -681,7 +686,7 @@ public static void fillColumnStatisticsData(String colType, ColumnStatisticsData } data.setDoubleStats(doubleStats); } else if (colType.startsWith("decimal")) { - DecimalColumnStatsData decimalStats = new DecimalColumnStatsData(); + DecimalColumnStatsDataInspector decimalStats = new DecimalColumnStatsDataInspector(); decimalStats.setNumNulls(MetaStoreDirectSql.extractSqlLong(nulls)); Decimal low = null; Decimal high = null; diff --git metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/ColumnStatsAggregatorFactory.java metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/ColumnStatsAggregatorFactory.java index 173e06f..dfae708 100644 --- metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/ColumnStatsAggregatorFactory.java +++ metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/ColumnStatsAggregatorFactory.java @@ -24,11 +24,11 @@ import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData; import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData._Fields; import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; -import org.apache.hadoop.hive.metastore.api.DateColumnStatsData; -import org.apache.hadoop.hive.metastore.api.DecimalColumnStatsData; -import org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData; -import org.apache.hadoop.hive.metastore.api.LongColumnStatsData; -import org.apache.hadoop.hive.metastore.api.StringColumnStatsData; +import org.apache.hadoop.hive.metastore.columnstats.cache.DateColumnStatsDataInspector; +import org.apache.hadoop.hive.metastore.columnstats.cache.DecimalColumnStatsDataInspector; +import org.apache.hadoop.hive.metastore.columnstats.cache.DoubleColumnStatsDataInspector; +import org.apache.hadoop.hive.metastore.columnstats.cache.LongColumnStatsDataInspector; +import org.apache.hadoop.hive.metastore.columnstats.cache.StringColumnStatsDataInspector; public class ColumnStatsAggregatorFactory { @@ -79,19 +79,19 @@ public static ColumnStatisticsObj newColumnStaticsObj(String colName, String col break; case LONG_STATS: - csd.setLongStats(new LongColumnStatsData()); + csd.setLongStats(new LongColumnStatsDataInspector()); break; case DATE_STATS: - csd.setDateStats(new DateColumnStatsData()); + csd.setDateStats(new DateColumnStatsDataInspector()); break; case DOUBLE_STATS: - csd.setDoubleStats(new DoubleColumnStatsData()); + csd.setDoubleStats(new DoubleColumnStatsDataInspector()); break; case STRING_STATS: - csd.setStringStats(new StringColumnStatsData()); + csd.setStringStats(new StringColumnStatsDataInspector()); break; case BINARY_STATS: @@ -99,7 +99,7 @@ public static ColumnStatisticsObj newColumnStaticsObj(String colName, String col break; case DECIMAL_STATS: - csd.setDecimalStats(new DecimalColumnStatsData()); + csd.setDecimalStats(new DecimalColumnStatsDataInspector()); break; default: diff --git metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/DateColumnStatsAggregator.java metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/DateColumnStatsAggregator.java index 04a1eb5..5f36bb2 100644 --- metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/DateColumnStatsAggregator.java +++ metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/DateColumnStatsAggregator.java @@ -34,6 +34,7 @@ import org.apache.hadoop.hive.metastore.api.Date; import org.apache.hadoop.hive.metastore.api.DateColumnStatsData; import org.apache.hadoop.hive.metastore.api.MetaException; +import org.apache.hadoop.hive.metastore.columnstats.cache.DateColumnStatsDataInspector; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -66,13 +67,14 @@ public ColumnStatisticsObj aggregate(String colName, List partNames, .getStatsData().getSetField()); } if (!cso.getStatsData().getDateStats().isSetBitVectors() - || cso.getStatsData().getDateStats().getBitVectors().length() == 0) { + || cso.getStatsData().getDateStats().getBitVectors().length == 0) { ndvEstimator = null; break; } else { // check if all of the bit vectors can merge - NumDistinctValueEstimator estimator = NumDistinctValueEstimatorFactory - .getNumDistinctValueEstimator(cso.getStatsData().getDateStats().getBitVectors()); + DateColumnStatsDataInspector dateColumnStats = + (DateColumnStatsDataInspector) cso.getStatsData().getDateStats(); + NumDistinctValueEstimator estimator = dateColumnStats.getNdvEstimator(); if (ndvEstimator == null) { ndvEstimator = estimator; } else { @@ -92,20 +94,20 @@ public ColumnStatisticsObj aggregate(String colName, List partNames, LOG.debug("all of the bit vectors can merge for " + colName + " is " + (ndvEstimator != null)); ColumnStatisticsData columnStatisticsData = new ColumnStatisticsData(); if (doAllPartitionContainStats || css.size() < 2) { - DateColumnStatsData aggregateData = null; + DateColumnStatsDataInspector aggregateData = null; long lowerBound = 0; long higherBound = 0; double densityAvgSum = 0.0; for (ColumnStatistics cs : css) { ColumnStatisticsObj cso = cs.getStatsObjIterator().next(); - DateColumnStatsData newData = cso.getStatsData().getDateStats(); + DateColumnStatsDataInspector newData = + (DateColumnStatsDataInspector) cso.getStatsData().getDateStats(); lowerBound = Math.max(lowerBound, newData.getNumDVs()); higherBound += newData.getNumDVs(); densityAvgSum += (diff(newData.getHighValue(), newData.getLowValue())) / newData.getNumDVs(); if (ndvEstimator != null) { - ndvEstimator.mergeEstimators(NumDistinctValueEstimatorFactory - .getNumDistinctValueEstimator(newData.getBitVectors())); + ndvEstimator.mergeEstimators(newData.getNdvEstimator()); } if (aggregateData == null) { aggregateData = newData.deepCopy(); @@ -173,11 +175,12 @@ public ColumnStatisticsObj aggregate(String colName, List partNames, double pseudoIndexSum = 0; int length = 0; int curIndex = -1; - DateColumnStatsData aggregateData = null; + DateColumnStatsDataInspector aggregateData = null; for (ColumnStatistics cs : css) { String partName = cs.getStatsDesc().getPartName(); ColumnStatisticsObj cso = cs.getStatsObjIterator().next(); - DateColumnStatsData newData = cso.getStatsData().getDateStats(); + DateColumnStatsDataInspector newData = + (DateColumnStatsDataInspector) cso.getStatsData().getDateStats(); // newData.isSetBitVectors() should be true for sure because we // already checked it before. if (indexMap.get(partName) != curIndex) { @@ -213,8 +216,7 @@ public ColumnStatisticsObj aggregate(String colName, List partNames, aggregateData.setHighValue(max(aggregateData.getHighValue(), newData.getHighValue())); aggregateData.setNumNulls(aggregateData.getNumNulls() + newData.getNumNulls()); } - ndvEstimator.mergeEstimators(NumDistinctValueEstimatorFactory - .getNumDistinctValueEstimator(newData.getBitVectors())); + ndvEstimator.mergeEstimators(newData.getNdvEstimator()); } if (length > 0) { // we have to set ndv @@ -255,7 +257,7 @@ public void extrapolate(ColumnStatisticsData extrapolateData, int numParts, int numPartsWithStats, Map adjustedIndexMap, Map adjustedStatsMap, double densityAvg) { int rightBorderInd = numParts; - DateColumnStatsData extrapolateDateData = new DateColumnStatsData(); + DateColumnStatsDataInspector extrapolateDateData = new DateColumnStatsDataInspector(); Map extractedAdjustedStatsMap = new HashMap<>(); for (Map.Entry entry : adjustedStatsMap.entrySet()) { extractedAdjustedStatsMap.put(entry.getKey(), entry.getValue().getDateStats()); diff --git metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/DecimalColumnStatsAggregator.java metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/DecimalColumnStatsAggregator.java index d220e7f..d166d21 100644 --- metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/DecimalColumnStatsAggregator.java +++ metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/DecimalColumnStatsAggregator.java @@ -35,6 +35,7 @@ import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; import org.apache.hadoop.hive.metastore.api.DecimalColumnStatsData; import org.apache.hadoop.hive.metastore.api.MetaException; +import org.apache.hadoop.hive.metastore.columnstats.cache.DecimalColumnStatsDataInspector; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -67,13 +68,14 @@ public ColumnStatisticsObj aggregate(String colName, List partNames, .getStatsData().getSetField()); } if (!cso.getStatsData().getDecimalStats().isSetBitVectors() - || cso.getStatsData().getDecimalStats().getBitVectors().length() == 0) { + || cso.getStatsData().getDecimalStats().getBitVectors().length == 0) { ndvEstimator = null; break; } else { // check if all of the bit vectors can merge - NumDistinctValueEstimator estimator = NumDistinctValueEstimatorFactory - .getNumDistinctValueEstimator(cso.getStatsData().getDecimalStats().getBitVectors()); + DecimalColumnStatsDataInspector decimalColumnStatsData = + (DecimalColumnStatsDataInspector) cso.getStatsData().getDecimalStats(); + NumDistinctValueEstimator estimator = decimalColumnStatsData.getNdvEstimator(); if (ndvEstimator == null) { ndvEstimator = estimator; } else { @@ -93,20 +95,20 @@ public ColumnStatisticsObj aggregate(String colName, List partNames, LOG.debug("all of the bit vectors can merge for " + colName + " is " + (ndvEstimator != null)); ColumnStatisticsData columnStatisticsData = new ColumnStatisticsData(); if (doAllPartitionContainStats || css.size() < 2) { - DecimalColumnStatsData aggregateData = null; + DecimalColumnStatsDataInspector aggregateData = null; long lowerBound = 0; long higherBound = 0; double densityAvgSum = 0.0; for (ColumnStatistics cs : css) { ColumnStatisticsObj cso = cs.getStatsObjIterator().next(); - DecimalColumnStatsData newData = cso.getStatsData().getDecimalStats(); + DecimalColumnStatsDataInspector newData = + (DecimalColumnStatsDataInspector) cso.getStatsData().getDecimalStats(); lowerBound = Math.max(lowerBound, newData.getNumDVs()); higherBound += newData.getNumDVs(); densityAvgSum += (MetaStoreUtils.decimalToDouble(newData.getHighValue()) - MetaStoreUtils .decimalToDouble(newData.getLowValue())) / newData.getNumDVs(); if (ndvEstimator != null) { - ndvEstimator.mergeEstimators(NumDistinctValueEstimatorFactory - .getNumDistinctValueEstimator(newData.getBitVectors())); + ndvEstimator.mergeEstimators(newData.getNdvEstimator()); } if (aggregateData == null) { aggregateData = newData.deepCopy(); @@ -184,11 +186,12 @@ public ColumnStatisticsObj aggregate(String colName, List partNames, double pseudoIndexSum = 0; int length = 0; int curIndex = -1; - DecimalColumnStatsData aggregateData = null; + DecimalColumnStatsDataInspector aggregateData = null; for (ColumnStatistics cs : css) { String partName = cs.getStatsDesc().getPartName(); ColumnStatisticsObj cso = cs.getStatsObjIterator().next(); - DecimalColumnStatsData newData = cso.getStatsData().getDecimalStats(); + DecimalColumnStatsDataInspector newData = + (DecimalColumnStatsDataInspector) cso.getStatsData().getDecimalStats(); // newData.isSetBitVectors() should be true for sure because we // already checked it before. if (indexMap.get(partName) != curIndex) { @@ -234,8 +237,7 @@ public ColumnStatisticsObj aggregate(String colName, List partNames, } aggregateData.setNumNulls(aggregateData.getNumNulls() + newData.getNumNulls()); } - ndvEstimator.mergeEstimators(NumDistinctValueEstimatorFactory - .getNumDistinctValueEstimator(newData.getBitVectors())); + ndvEstimator.mergeEstimators(newData.getNdvEstimator()); } if (length > 0) { // we have to set ndv @@ -264,7 +266,7 @@ public void extrapolate(ColumnStatisticsData extrapolateData, int numParts, int numPartsWithStats, Map adjustedIndexMap, Map adjustedStatsMap, double densityAvg) { int rightBorderInd = numParts; - DecimalColumnStatsData extrapolateDecimalData = new DecimalColumnStatsData(); + DecimalColumnStatsDataInspector extrapolateDecimalData = new DecimalColumnStatsDataInspector(); Map extractedAdjustedStatsMap = new HashMap<>(); for (Map.Entry entry : adjustedStatsMap.entrySet()) { extractedAdjustedStatsMap.put(entry.getKey(), entry.getValue().getDecimalStats()); diff --git metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/DoubleColumnStatsAggregator.java metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/DoubleColumnStatsAggregator.java index 1b44dd9..f0be478 100644 --- metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/DoubleColumnStatsAggregator.java +++ metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/DoubleColumnStatsAggregator.java @@ -33,6 +33,7 @@ import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; import org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData; import org.apache.hadoop.hive.metastore.api.MetaException; +import org.apache.hadoop.hive.metastore.columnstats.cache.DoubleColumnStatsDataInspector; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -65,13 +66,14 @@ public ColumnStatisticsObj aggregate(String colName, List partNames, .getStatsData().getSetField()); } if (!cso.getStatsData().getDoubleStats().isSetBitVectors() - || cso.getStatsData().getDoubleStats().getBitVectors().length() == 0) { + || cso.getStatsData().getDoubleStats().getBitVectors().length == 0) { ndvEstimator = null; break; } else { // check if all of the bit vectors can merge - NumDistinctValueEstimator estimator = NumDistinctValueEstimatorFactory - .getNumDistinctValueEstimator(cso.getStatsData().getDoubleStats().getBitVectors()); + DoubleColumnStatsDataInspector doubleColumnStatsData = + (DoubleColumnStatsDataInspector) cso.getStatsData().getDoubleStats(); + NumDistinctValueEstimator estimator = doubleColumnStatsData.getNdvEstimator(); if (ndvEstimator == null) { ndvEstimator = estimator; } else { @@ -91,19 +93,19 @@ public ColumnStatisticsObj aggregate(String colName, List partNames, LOG.debug("all of the bit vectors can merge for " + colName + " is " + (ndvEstimator != null)); ColumnStatisticsData columnStatisticsData = new ColumnStatisticsData(); if (doAllPartitionContainStats || css.size() < 2) { - DoubleColumnStatsData aggregateData = null; + DoubleColumnStatsDataInspector aggregateData = null; long lowerBound = 0; long higherBound = 0; double densityAvgSum = 0.0; for (ColumnStatistics cs : css) { ColumnStatisticsObj cso = cs.getStatsObjIterator().next(); - DoubleColumnStatsData newData = cso.getStatsData().getDoubleStats(); + DoubleColumnStatsDataInspector newData = + (DoubleColumnStatsDataInspector) cso.getStatsData().getDoubleStats(); lowerBound = Math.max(lowerBound, newData.getNumDVs()); higherBound += newData.getNumDVs(); densityAvgSum += (newData.getHighValue() - newData.getLowValue()) / newData.getNumDVs(); if (ndvEstimator != null) { - ndvEstimator.mergeEstimators(NumDistinctValueEstimatorFactory - .getNumDistinctValueEstimator(newData.getBitVectors())); + ndvEstimator.mergeEstimators(newData.getNdvEstimator()); } if (aggregateData == null) { aggregateData = newData.deepCopy(); @@ -174,7 +176,8 @@ public ColumnStatisticsObj aggregate(String colName, List partNames, for (ColumnStatistics cs : css) { String partName = cs.getStatsDesc().getPartName(); ColumnStatisticsObj cso = cs.getStatsObjIterator().next(); - DoubleColumnStatsData newData = cso.getStatsData().getDoubleStats(); + DoubleColumnStatsDataInspector newData = + (DoubleColumnStatsDataInspector) cso.getStatsData().getDoubleStats(); // newData.isSetBitVectors() should be true for sure because we // already checked it before. if (indexMap.get(partName) != curIndex) { @@ -210,8 +213,7 @@ public ColumnStatisticsObj aggregate(String colName, List partNames, newData.getHighValue())); aggregateData.setNumNulls(aggregateData.getNumNulls() + newData.getNumNulls()); } - ndvEstimator.mergeEstimators(NumDistinctValueEstimatorFactory - .getNumDistinctValueEstimator(newData.getBitVectors())); + ndvEstimator.mergeEstimators(newData.getNdvEstimator()); } if (length > 0) { // we have to set ndv @@ -239,7 +241,7 @@ public void extrapolate(ColumnStatisticsData extrapolateData, int numParts, int numPartsWithStats, Map adjustedIndexMap, Map adjustedStatsMap, double densityAvg) { int rightBorderInd = numParts; - DoubleColumnStatsData extrapolateDoubleData = new DoubleColumnStatsData(); + DoubleColumnStatsDataInspector extrapolateDoubleData = new DoubleColumnStatsDataInspector(); Map extractedAdjustedStatsMap = new HashMap<>(); for (Map.Entry entry : adjustedStatsMap.entrySet()) { extractedAdjustedStatsMap.put(entry.getKey(), entry.getValue().getDoubleStats()); diff --git metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/LongColumnStatsAggregator.java metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/LongColumnStatsAggregator.java index 802ad1a..7ff227a 100644 --- metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/LongColumnStatsAggregator.java +++ metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/LongColumnStatsAggregator.java @@ -33,6 +33,7 @@ import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; import org.apache.hadoop.hive.metastore.api.LongColumnStatsData; import org.apache.hadoop.hive.metastore.api.MetaException; +import org.apache.hadoop.hive.metastore.columnstats.cache.LongColumnStatsDataInspector; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -65,13 +66,14 @@ public ColumnStatisticsObj aggregate(String colName, List partNames, .getStatsData().getSetField()); } if (!cso.getStatsData().getLongStats().isSetBitVectors() - || cso.getStatsData().getLongStats().getBitVectors().length() == 0) { + || cso.getStatsData().getLongStats().getBitVectors().length == 0) { ndvEstimator = null; break; } else { // check if all of the bit vectors can merge - NumDistinctValueEstimator estimator = NumDistinctValueEstimatorFactory - .getNumDistinctValueEstimator(cso.getStatsData().getLongStats().getBitVectors()); + LongColumnStatsDataInspector longColumnStatsData = + (LongColumnStatsDataInspector) cso.getStatsData().getLongStats(); + NumDistinctValueEstimator estimator = longColumnStatsData.getNdvEstimator(); if (ndvEstimator == null) { ndvEstimator = estimator; } else { @@ -91,19 +93,19 @@ public ColumnStatisticsObj aggregate(String colName, List partNames, LOG.debug("all of the bit vectors can merge for " + colName + " is " + (ndvEstimator != null)); ColumnStatisticsData columnStatisticsData = new ColumnStatisticsData(); if (doAllPartitionContainStats || css.size() < 2) { - LongColumnStatsData aggregateData = null; + LongColumnStatsDataInspector aggregateData = null; long lowerBound = 0; long higherBound = 0; double densityAvgSum = 0.0; for (ColumnStatistics cs : css) { ColumnStatisticsObj cso = cs.getStatsObjIterator().next(); - LongColumnStatsData newData = cso.getStatsData().getLongStats(); + LongColumnStatsDataInspector newData = + (LongColumnStatsDataInspector) cso.getStatsData().getLongStats(); lowerBound = Math.max(lowerBound, newData.getNumDVs()); higherBound += newData.getNumDVs(); densityAvgSum += (newData.getHighValue() - newData.getLowValue()) / newData.getNumDVs(); if (ndvEstimator != null) { - ndvEstimator.mergeEstimators(NumDistinctValueEstimatorFactory - .getNumDistinctValueEstimator(newData.getBitVectors())); + ndvEstimator.mergeEstimators(newData.getNdvEstimator()); } if (aggregateData == null) { aggregateData = newData.deepCopy(); @@ -171,11 +173,12 @@ public ColumnStatisticsObj aggregate(String colName, List partNames, double pseudoIndexSum = 0; int length = 0; int curIndex = -1; - LongColumnStatsData aggregateData = null; + LongColumnStatsDataInspector aggregateData = null; for (ColumnStatistics cs : css) { String partName = cs.getStatsDesc().getPartName(); ColumnStatisticsObj cso = cs.getStatsObjIterator().next(); - LongColumnStatsData newData = cso.getStatsData().getLongStats(); + LongColumnStatsDataInspector newData = + (LongColumnStatsDataInspector) cso.getStatsData().getLongStats(); // newData.isSetBitVectors() should be true for sure because we // already checked it before. if (indexMap.get(partName) != curIndex) { @@ -211,8 +214,7 @@ public ColumnStatisticsObj aggregate(String colName, List partNames, newData.getHighValue())); aggregateData.setNumNulls(aggregateData.getNumNulls() + newData.getNumNulls()); } - ndvEstimator.mergeEstimators(NumDistinctValueEstimatorFactory - .getNumDistinctValueEstimator(newData.getBitVectors())); + ndvEstimator.mergeEstimators(newData.getNdvEstimator()); } if (length > 0) { // we have to set ndv @@ -240,7 +242,7 @@ public void extrapolate(ColumnStatisticsData extrapolateData, int numParts, int numPartsWithStats, Map adjustedIndexMap, Map adjustedStatsMap, double densityAvg) { int rightBorderInd = numParts; - LongColumnStatsData extrapolateLongData = new LongColumnStatsData(); + LongColumnStatsDataInspector extrapolateLongData = new LongColumnStatsDataInspector(); Map extractedAdjustedStatsMap = new HashMap<>(); for (Map.Entry entry : adjustedStatsMap.entrySet()) { extractedAdjustedStatsMap.put(entry.getKey(), entry.getValue().getLongStats()); diff --git metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/StringColumnStatsAggregator.java metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/StringColumnStatsAggregator.java index e1a781f..a5761e8 100644 --- metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/StringColumnStatsAggregator.java +++ metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/StringColumnStatsAggregator.java @@ -33,6 +33,7 @@ import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; import org.apache.hadoop.hive.metastore.api.MetaException; import org.apache.hadoop.hive.metastore.api.StringColumnStatsData; +import org.apache.hadoop.hive.metastore.columnstats.cache.StringColumnStatsDataInspector; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -66,13 +67,14 @@ public ColumnStatisticsObj aggregate(String colName, List partNames, .getStatsData().getSetField()); } if (!cso.getStatsData().getStringStats().isSetBitVectors() - || cso.getStatsData().getStringStats().getBitVectors().length() == 0) { + || cso.getStatsData().getStringStats().getBitVectors().length == 0) { ndvEstimator = null; break; } else { // check if all of the bit vectors can merge - NumDistinctValueEstimator estimator = NumDistinctValueEstimatorFactory - .getNumDistinctValueEstimator(cso.getStatsData().getStringStats().getBitVectors()); + StringColumnStatsDataInspector stringColumnStatsData = + (StringColumnStatsDataInspector) cso.getStatsData().getStringStats(); + NumDistinctValueEstimator estimator = stringColumnStatsData.getNdvEstimator(); if (ndvEstimator == null) { ndvEstimator = estimator; } else { @@ -92,13 +94,13 @@ public ColumnStatisticsObj aggregate(String colName, List partNames, LOG.debug("all of the bit vectors can merge for " + colName + " is " + (ndvEstimator != null)); ColumnStatisticsData columnStatisticsData = new ColumnStatisticsData(); if (doAllPartitionContainStats || css.size() < 2) { - StringColumnStatsData aggregateData = null; + StringColumnStatsDataInspector aggregateData = null; for (ColumnStatistics cs : css) { ColumnStatisticsObj cso = cs.getStatsObjIterator().next(); - StringColumnStatsData newData = cso.getStatsData().getStringStats(); + StringColumnStatsDataInspector newData = + (StringColumnStatsDataInspector) cso.getStatsData().getStringStats(); if (ndvEstimator != null) { - ndvEstimator.mergeEstimators(NumDistinctValueEstimatorFactory - .getNumDistinctValueEstimator(newData.getBitVectors())); + ndvEstimator.mergeEstimators(newData.getNdvEstimator()); } if (aggregateData == null) { aggregateData = newData.deepCopy(); @@ -146,11 +148,12 @@ public ColumnStatisticsObj aggregate(String colName, List partNames, double pseudoIndexSum = 0; int length = 0; int curIndex = -1; - StringColumnStatsData aggregateData = null; + StringColumnStatsDataInspector aggregateData = null; for (ColumnStatistics cs : css) { String partName = cs.getStatsDesc().getPartName(); ColumnStatisticsObj cso = cs.getStatsObjIterator().next(); - StringColumnStatsData newData = cso.getStatsData().getStringStats(); + StringColumnStatsDataInspector newData = + (StringColumnStatsDataInspector) cso.getStatsData().getStringStats(); // newData.isSetBitVectors() should be true for sure because we // already checked it before. if (indexMap.get(partName) != curIndex) { @@ -185,8 +188,7 @@ public ColumnStatisticsObj aggregate(String colName, List partNames, newData.getMaxColLen())); aggregateData.setNumNulls(aggregateData.getNumNulls() + newData.getNumNulls()); } - ndvEstimator.mergeEstimators(NumDistinctValueEstimatorFactory - .getNumDistinctValueEstimator(newData.getBitVectors())); + ndvEstimator.mergeEstimators(newData.getNdvEstimator()); } if (length > 0) { // we have to set ndv @@ -211,7 +213,7 @@ public void extrapolate(ColumnStatisticsData extrapolateData, int numParts, int numPartsWithStats, Map adjustedIndexMap, Map adjustedStatsMap, double densityAvg) { int rightBorderInd = numParts; - StringColumnStatsData extrapolateStringData = new StringColumnStatsData(); + StringColumnStatsDataInspector extrapolateStringData = new StringColumnStatsDataInspector(); Map extractedAdjustedStatsMap = new HashMap<>(); for (Map.Entry entry : adjustedStatsMap.entrySet()) { extractedAdjustedStatsMap.put(entry.getKey(), entry.getValue().getStringStats()); @@ -295,7 +297,6 @@ public int compare(Map.Entry o1, ndv = (long) (min + (max - min) * minInd / (minInd - maxInd)); } extrapolateStringData.setAvgColLen(avgColLen); - ; extrapolateStringData.setMaxColLen((long) maxColLen); extrapolateStringData.setNumNulls(numNulls); extrapolateStringData.setNumDVs(ndv); diff --git metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/cache/DateColumnStatsDataInspector.java metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/cache/DateColumnStatsDataInspector.java new file mode 100644 index 0000000..a7c5aa5 --- /dev/null +++ metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/cache/DateColumnStatsDataInspector.java @@ -0,0 +1,124 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.metastore.columnstats.cache; + +import java.nio.ByteBuffer; + +import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimator; +import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimatorFactory; +import org.apache.hadoop.hive.metastore.api.DateColumnStatsData; + +@SuppressWarnings("serial") +public class DateColumnStatsDataInspector extends DateColumnStatsData { + + private NumDistinctValueEstimator ndvEstimator; + + public DateColumnStatsDataInspector() { + super(); + } + + public DateColumnStatsDataInspector(long numNulls, long numDVs) { + super(numNulls, numDVs); + } + + public DateColumnStatsDataInspector(DateColumnStatsDataInspector other) { + super(other); + if (other.ndvEstimator != null) { + super.setBitVectors(ndvEstimator.serialize()); + } + } + + @Override + public DateColumnStatsDataInspector deepCopy() { + return new DateColumnStatsDataInspector(this); + } + + @Override + public byte[] getBitVectors() { + if (ndvEstimator != null) { + updateBitVectors(); + } + return super.getBitVectors(); + } + + @Override + public ByteBuffer bufferForBitVectors() { + if (ndvEstimator != null) { + updateBitVectors(); + } + return super.bufferForBitVectors(); + } + + @Override + public void setBitVectors(byte[] bitVectors) { + super.setBitVectors(bitVectors); + this.ndvEstimator = null; + } + + @Override + public void setBitVectors(ByteBuffer bitVectors) { + super.setBitVectors(bitVectors); + this.ndvEstimator = null; + } + + @Override + public void unsetBitVectors() { + super.unsetBitVectors(); + this.ndvEstimator = null; + } + + @Override + public boolean isSetBitVectors() { + if (ndvEstimator != null) { + updateBitVectors(); + } + return super.isSetBitVectors(); + } + + @Override + public void setBitVectorsIsSet(boolean value) { + if (ndvEstimator != null) { + updateBitVectors(); + } + super.setBitVectorsIsSet(value); + } + + public NumDistinctValueEstimator getNdvEstimator() { + if (isSetBitVectors()) { + updateNdvEstimator(); + } + return ndvEstimator; + } + + public void setNdvEstimator(NumDistinctValueEstimator ndvEstimator) { + super.unsetBitVectors(); + this.ndvEstimator = ndvEstimator; + } + + private void updateBitVectors() { + super.setBitVectors(ndvEstimator.serialize()); + this.ndvEstimator = null; + } + + private void updateNdvEstimator() { + this.ndvEstimator = NumDistinctValueEstimatorFactory + .getNumDistinctValueEstimator(super.getBitVectors()); + super.unsetBitVectors(); + } + +} diff --git metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/cache/DecimalColumnStatsDataInspector.java metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/cache/DecimalColumnStatsDataInspector.java new file mode 100644 index 0000000..4929340 --- /dev/null +++ metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/cache/DecimalColumnStatsDataInspector.java @@ -0,0 +1,124 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.metastore.columnstats.cache; + +import java.nio.ByteBuffer; + +import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimator; +import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimatorFactory; +import org.apache.hadoop.hive.metastore.api.DecimalColumnStatsData; + +@SuppressWarnings("serial") +public class DecimalColumnStatsDataInspector extends DecimalColumnStatsData { + + private NumDistinctValueEstimator ndvEstimator; + + public DecimalColumnStatsDataInspector() { + super(); + } + + public DecimalColumnStatsDataInspector(long numNulls, long numDVs) { + super(numNulls, numDVs); + } + + public DecimalColumnStatsDataInspector(DecimalColumnStatsDataInspector other) { + super(other); + if (other.ndvEstimator != null) { + super.setBitVectors(ndvEstimator.serialize()); + } + } + + @Override + public DecimalColumnStatsDataInspector deepCopy() { + return new DecimalColumnStatsDataInspector(this); + } + + @Override + public byte[] getBitVectors() { + if (ndvEstimator != null) { + updateBitVectors(); + } + return super.getBitVectors(); + } + + @Override + public ByteBuffer bufferForBitVectors() { + if (ndvEstimator != null) { + updateBitVectors(); + } + return super.bufferForBitVectors(); + } + + @Override + public void setBitVectors(byte[] bitVectors) { + super.setBitVectors(bitVectors); + this.ndvEstimator = null; + } + + @Override + public void setBitVectors(ByteBuffer bitVectors) { + super.setBitVectors(bitVectors); + this.ndvEstimator = null; + } + + @Override + public void unsetBitVectors() { + super.unsetBitVectors(); + this.ndvEstimator = null; + } + + @Override + public boolean isSetBitVectors() { + if (ndvEstimator != null) { + updateBitVectors(); + } + return super.isSetBitVectors(); + } + + @Override + public void setBitVectorsIsSet(boolean value) { + if (ndvEstimator != null) { + updateBitVectors(); + } + super.setBitVectorsIsSet(value); + } + + public NumDistinctValueEstimator getNdvEstimator() { + if (isSetBitVectors()) { + updateNdvEstimator(); + } + return ndvEstimator; + } + + public void setNdvEstimator(NumDistinctValueEstimator ndvEstimator) { + super.unsetBitVectors(); + this.ndvEstimator = ndvEstimator; + } + + private void updateBitVectors() { + super.setBitVectors(ndvEstimator.serialize()); + this.ndvEstimator = null; + } + + private void updateNdvEstimator() { + this.ndvEstimator = NumDistinctValueEstimatorFactory + .getNumDistinctValueEstimator(super.getBitVectors()); + super.unsetBitVectors(); + } + +} diff --git metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/cache/DoubleColumnStatsDataInspector.java metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/cache/DoubleColumnStatsDataInspector.java new file mode 100644 index 0000000..c59598e --- /dev/null +++ metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/cache/DoubleColumnStatsDataInspector.java @@ -0,0 +1,124 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.metastore.columnstats.cache; + +import java.nio.ByteBuffer; + +import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimator; +import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimatorFactory; +import org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData; + +@SuppressWarnings("serial") +public class DoubleColumnStatsDataInspector extends DoubleColumnStatsData { + + private NumDistinctValueEstimator ndvEstimator; + + public DoubleColumnStatsDataInspector() { + super(); + } + + public DoubleColumnStatsDataInspector(long numNulls, long numDVs) { + super(numNulls, numDVs); + } + + public DoubleColumnStatsDataInspector(DoubleColumnStatsDataInspector other) { + super(other); + if (other.ndvEstimator != null) { + super.setBitVectors(ndvEstimator.serialize()); + } + } + + @Override + public DoubleColumnStatsDataInspector deepCopy() { + return new DoubleColumnStatsDataInspector(this); + } + + @Override + public byte[] getBitVectors() { + if (ndvEstimator != null) { + updateBitVectors(); + } + return super.getBitVectors(); + } + + @Override + public ByteBuffer bufferForBitVectors() { + if (ndvEstimator != null) { + updateBitVectors(); + } + return super.bufferForBitVectors(); + } + + @Override + public void setBitVectors(byte[] bitVectors) { + super.setBitVectors(bitVectors); + this.ndvEstimator = null; + } + + @Override + public void setBitVectors(ByteBuffer bitVectors) { + super.setBitVectors(bitVectors); + this.ndvEstimator = null; + } + + @Override + public void unsetBitVectors() { + super.unsetBitVectors(); + this.ndvEstimator = null; + } + + @Override + public boolean isSetBitVectors() { + if (ndvEstimator != null) { + updateBitVectors(); + } + return super.isSetBitVectors(); + } + + @Override + public void setBitVectorsIsSet(boolean value) { + if (ndvEstimator != null) { + updateBitVectors(); + } + super.setBitVectorsIsSet(value); + } + + public NumDistinctValueEstimator getNdvEstimator() { + if (isSetBitVectors()) { + updateNdvEstimator(); + } + return ndvEstimator; + } + + public void setNdvEstimator(NumDistinctValueEstimator ndvEstimator) { + super.unsetBitVectors(); + this.ndvEstimator = ndvEstimator; + } + + private void updateBitVectors() { + super.setBitVectors(ndvEstimator.serialize()); + this.ndvEstimator = null; + } + + private void updateNdvEstimator() { + this.ndvEstimator = NumDistinctValueEstimatorFactory + .getNumDistinctValueEstimator(super.getBitVectors()); + super.unsetBitVectors(); + } + +} diff --git metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/cache/LongColumnStatsDataInspector.java metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/cache/LongColumnStatsDataInspector.java new file mode 100644 index 0000000..6df03ac --- /dev/null +++ metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/cache/LongColumnStatsDataInspector.java @@ -0,0 +1,124 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.metastore.columnstats.cache; + +import java.nio.ByteBuffer; + +import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimator; +import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimatorFactory; +import org.apache.hadoop.hive.metastore.api.LongColumnStatsData; + +@SuppressWarnings("serial") +public class LongColumnStatsDataInspector extends LongColumnStatsData { + + private NumDistinctValueEstimator ndvEstimator; + + public LongColumnStatsDataInspector() { + super(); + } + + public LongColumnStatsDataInspector(long numNulls, long numDVs) { + super(numNulls, numDVs); + } + + public LongColumnStatsDataInspector(LongColumnStatsDataInspector other) { + super(other); + if (other.ndvEstimator != null) { + super.setBitVectors(ndvEstimator.serialize()); + } + } + + @Override + public LongColumnStatsDataInspector deepCopy() { + return new LongColumnStatsDataInspector(this); + } + + @Override + public byte[] getBitVectors() { + if (ndvEstimator != null) { + updateBitVectors(); + } + return super.getBitVectors(); + } + + @Override + public ByteBuffer bufferForBitVectors() { + if (ndvEstimator != null) { + updateBitVectors(); + } + return super.bufferForBitVectors(); + } + + @Override + public void setBitVectors(byte[] bitVectors) { + super.setBitVectors(bitVectors); + this.ndvEstimator = null; + } + + @Override + public void setBitVectors(ByteBuffer bitVectors) { + super.setBitVectors(bitVectors); + this.ndvEstimator = null; + } + + @Override + public void unsetBitVectors() { + super.unsetBitVectors(); + this.ndvEstimator = null; + } + + @Override + public boolean isSetBitVectors() { + if (ndvEstimator != null) { + updateBitVectors(); + } + return super.isSetBitVectors(); + } + + @Override + public void setBitVectorsIsSet(boolean value) { + if (ndvEstimator != null) { + updateBitVectors(); + } + super.setBitVectorsIsSet(value); + } + + public NumDistinctValueEstimator getNdvEstimator() { + if (isSetBitVectors()) { + updateNdvEstimator(); + } + return ndvEstimator; + } + + public void setNdvEstimator(NumDistinctValueEstimator ndvEstimator) { + super.unsetBitVectors(); + this.ndvEstimator = ndvEstimator; + } + + private void updateBitVectors() { + super.setBitVectors(ndvEstimator.serialize()); + this.ndvEstimator = null; + } + + private void updateNdvEstimator() { + this.ndvEstimator = NumDistinctValueEstimatorFactory + .getNumDistinctValueEstimator(super.getBitVectors()); + super.unsetBitVectors(); + } + +} diff --git metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/cache/StringColumnStatsDataInspector.java metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/cache/StringColumnStatsDataInspector.java new file mode 100644 index 0000000..929fc26 --- /dev/null +++ metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/cache/StringColumnStatsDataInspector.java @@ -0,0 +1,125 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.metastore.columnstats.cache; + +import java.nio.ByteBuffer; + +import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimator; +import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimatorFactory; +import org.apache.hadoop.hive.metastore.api.StringColumnStatsData; + +@SuppressWarnings("serial") +public class StringColumnStatsDataInspector extends StringColumnStatsData { + + private NumDistinctValueEstimator ndvEstimator; + + public StringColumnStatsDataInspector() { + super(); + } + + public StringColumnStatsDataInspector(long maxColLen, double avgColLen, + long numNulls, long numDVs) { + super(maxColLen, avgColLen, numNulls, numDVs); + } + + public StringColumnStatsDataInspector(StringColumnStatsDataInspector other) { + super(other); + if (other.ndvEstimator != null) { + super.setBitVectors(ndvEstimator.serialize()); + } + } + + @Override + public StringColumnStatsDataInspector deepCopy() { + return new StringColumnStatsDataInspector(this); + } + + @Override + public byte[] getBitVectors() { + if (ndvEstimator != null) { + updateBitVectors(); + } + return super.getBitVectors(); + } + + @Override + public ByteBuffer bufferForBitVectors() { + if (ndvEstimator != null) { + updateBitVectors(); + } + return super.bufferForBitVectors(); + } + + @Override + public void setBitVectors(byte[] bitVectors) { + super.setBitVectors(bitVectors); + this.ndvEstimator = null; + } + + @Override + public void setBitVectors(ByteBuffer bitVectors) { + super.setBitVectors(bitVectors); + this.ndvEstimator = null; + } + + @Override + public void unsetBitVectors() { + super.unsetBitVectors(); + this.ndvEstimator = null; + } + + @Override + public boolean isSetBitVectors() { + if (ndvEstimator != null) { + updateBitVectors(); + } + return super.isSetBitVectors(); + } + + @Override + public void setBitVectorsIsSet(boolean value) { + if (ndvEstimator != null) { + updateBitVectors(); + } + super.setBitVectorsIsSet(value); + } + + public NumDistinctValueEstimator getNdvEstimator() { + if (isSetBitVectors()) { + updateNdvEstimator(); + } + return ndvEstimator; + } + + public void setNdvEstimator(NumDistinctValueEstimator ndvEstimator) { + super.unsetBitVectors(); + this.ndvEstimator = ndvEstimator; + } + + private void updateBitVectors() { + super.setBitVectors(ndvEstimator.serialize()); + this.ndvEstimator = null; + } + + private void updateNdvEstimator() { + this.ndvEstimator = NumDistinctValueEstimatorFactory + .getNumDistinctValueEstimator(super.getBitVectors()); + super.unsetBitVectors(); + } + +} diff --git metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/merge/ColumnStatsMergerFactory.java metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/merge/ColumnStatsMergerFactory.java index 0ce1847..66be524 100644 --- metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/merge/ColumnStatsMergerFactory.java +++ metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/merge/ColumnStatsMergerFactory.java @@ -19,19 +19,16 @@ package org.apache.hadoop.hive.metastore.columnstats.merge; -import org.apache.commons.lang.StringUtils; -import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimatorFactory; -import org.apache.hadoop.hive.common.ndv.hll.HyperLogLog; import org.apache.hadoop.hive.metastore.api.BinaryColumnStatsData; import org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData; import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData; import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData._Fields; import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; -import org.apache.hadoop.hive.metastore.api.DateColumnStatsData; -import org.apache.hadoop.hive.metastore.api.DecimalColumnStatsData; -import org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData; -import org.apache.hadoop.hive.metastore.api.LongColumnStatsData; -import org.apache.hadoop.hive.metastore.api.StringColumnStatsData; +import org.apache.hadoop.hive.metastore.columnstats.cache.DateColumnStatsDataInspector; +import org.apache.hadoop.hive.metastore.columnstats.cache.DecimalColumnStatsDataInspector; +import org.apache.hadoop.hive.metastore.columnstats.cache.DoubleColumnStatsDataInspector; +import org.apache.hadoop.hive.metastore.columnstats.cache.LongColumnStatsDataInspector; +import org.apache.hadoop.hive.metastore.columnstats.cache.StringColumnStatsDataInspector; public class ColumnStatsMergerFactory { @@ -89,15 +86,15 @@ public static ColumnStatisticsObj newColumnStaticsObj(String colName, String col break; case LONG_STATS: - csd.setLongStats(new LongColumnStatsData()); + csd.setLongStats(new LongColumnStatsDataInspector()); break; case DOUBLE_STATS: - csd.setDoubleStats(new DoubleColumnStatsData()); + csd.setDoubleStats(new DoubleColumnStatsDataInspector()); break; case STRING_STATS: - csd.setStringStats(new StringColumnStatsData()); + csd.setStringStats(new StringColumnStatsDataInspector()); break; case BINARY_STATS: @@ -105,11 +102,11 @@ public static ColumnStatisticsObj newColumnStaticsObj(String colName, String col break; case DECIMAL_STATS: - csd.setDecimalStats(new DecimalColumnStatsData()); + csd.setDecimalStats(new DecimalColumnStatsDataInspector()); break; case DATE_STATS: - csd.setDateStats(new DateColumnStatsData()); + csd.setDateStats(new DateColumnStatsDataInspector()); break; default: diff --git metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/merge/DateColumnStatsMerger.java metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/merge/DateColumnStatsMerger.java index 2542a00..05144f2 100644 --- metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/merge/DateColumnStatsMerger.java +++ metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/merge/DateColumnStatsMerger.java @@ -20,16 +20,17 @@ package org.apache.hadoop.hive.metastore.columnstats.merge; import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimator; -import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimatorFactory; import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; import org.apache.hadoop.hive.metastore.api.Date; -import org.apache.hadoop.hive.metastore.api.DateColumnStatsData; +import org.apache.hadoop.hive.metastore.columnstats.cache.DateColumnStatsDataInspector; public class DateColumnStatsMerger extends ColumnStatsMerger { @Override public void merge(ColumnStatisticsObj aggregateColStats, ColumnStatisticsObj newColStats) { - DateColumnStatsData aggregateData = aggregateColStats.getStatsData().getDateStats(); - DateColumnStatsData newData = newColStats.getStatsData().getDateStats(); + DateColumnStatsDataInspector aggregateData = + (DateColumnStatsDataInspector) aggregateColStats.getStatsData().getDateStats(); + DateColumnStatsDataInspector newData = + (DateColumnStatsDataInspector) newColStats.getStatsData().getDateStats(); Date lowValue = aggregateData.getLowValue().compareTo(newData.getLowValue()) < 0 ? aggregateData .getLowValue() : newData.getLowValue(); aggregateData.setLowValue(lowValue); @@ -37,19 +38,17 @@ public void merge(ColumnStatisticsObj aggregateColStats, ColumnStatisticsObj new .getHighValue() : newData.getHighValue(); aggregateData.setHighValue(highValue); aggregateData.setNumNulls(aggregateData.getNumNulls() + newData.getNumNulls()); - if (!aggregateData.isSetBitVectors() || aggregateData.getBitVectors().length() == 0 - || !newData.isSetBitVectors() || newData.getBitVectors().length() == 0) { + if (!aggregateData.isSetBitVectors() || aggregateData.getBitVectors().length == 0 + || !newData.isSetBitVectors() || newData.getBitVectors().length == 0) { aggregateData.setNumDVs(Math.max(aggregateData.getNumDVs(), newData.getNumDVs())); } else { - NumDistinctValueEstimator oldEst = NumDistinctValueEstimatorFactory - .getNumDistinctValueEstimator(aggregateData.getBitVectors()); - NumDistinctValueEstimator newEst = NumDistinctValueEstimatorFactory - .getNumDistinctValueEstimator(newData.getBitVectors()); + NumDistinctValueEstimator oldEst = aggregateData.getNdvEstimator(); + NumDistinctValueEstimator newEst = newData.getNdvEstimator(); long ndv = -1; if (oldEst.canMerge(newEst)) { oldEst.mergeEstimators(newEst); ndv = oldEst.estimateNumDistinctValues(); - aggregateData.setBitVectors(oldEst.serialize()); + aggregateData.setNdvEstimator(oldEst); } else { ndv = Math.max(aggregateData.getNumDVs(), newData.getNumDVs()); } diff --git metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/merge/DecimalColumnStatsMerger.java metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/merge/DecimalColumnStatsMerger.java index 4e8e129..dc28b95 100644 --- metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/merge/DecimalColumnStatsMerger.java +++ metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/merge/DecimalColumnStatsMerger.java @@ -20,16 +20,17 @@ package org.apache.hadoop.hive.metastore.columnstats.merge; import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimator; -import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimatorFactory; import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; import org.apache.hadoop.hive.metastore.api.Decimal; -import org.apache.hadoop.hive.metastore.api.DecimalColumnStatsData; +import org.apache.hadoop.hive.metastore.columnstats.cache.DecimalColumnStatsDataInspector; public class DecimalColumnStatsMerger extends ColumnStatsMerger { @Override public void merge(ColumnStatisticsObj aggregateColStats, ColumnStatisticsObj newColStats) { - DecimalColumnStatsData aggregateData = aggregateColStats.getStatsData().getDecimalStats(); - DecimalColumnStatsData newData = newColStats.getStatsData().getDecimalStats(); + DecimalColumnStatsDataInspector aggregateData = + (DecimalColumnStatsDataInspector) aggregateColStats.getStatsData().getDecimalStats(); + DecimalColumnStatsDataInspector newData = + (DecimalColumnStatsDataInspector) newColStats.getStatsData().getDecimalStats(); Decimal lowValue = aggregateData.getLowValue() != null && (aggregateData.getLowValue().compareTo(newData.getLowValue()) > 0) ? aggregateData .getLowValue() : newData.getLowValue(); @@ -39,19 +40,17 @@ public void merge(ColumnStatisticsObj aggregateColStats, ColumnStatisticsObj new .getHighValue() : newData.getHighValue(); aggregateData.setHighValue(highValue); aggregateData.setNumNulls(aggregateData.getNumNulls() + newData.getNumNulls()); - if (!aggregateData.isSetBitVectors() || aggregateData.getBitVectors().length() == 0 - || !newData.isSetBitVectors() || newData.getBitVectors().length() == 0) { + if (!aggregateData.isSetBitVectors() || aggregateData.getBitVectors().length == 0 + || !newData.isSetBitVectors() || newData.getBitVectors().length == 0) { aggregateData.setNumDVs(Math.max(aggregateData.getNumDVs(), newData.getNumDVs())); } else { - NumDistinctValueEstimator oldEst = NumDistinctValueEstimatorFactory - .getNumDistinctValueEstimator(aggregateData.getBitVectors()); - NumDistinctValueEstimator newEst = NumDistinctValueEstimatorFactory - .getNumDistinctValueEstimator(newData.getBitVectors()); + NumDistinctValueEstimator oldEst = aggregateData.getNdvEstimator(); + NumDistinctValueEstimator newEst = newData.getNdvEstimator(); long ndv = -1; if (oldEst.canMerge(newEst)) { oldEst.mergeEstimators(newEst); ndv = oldEst.estimateNumDistinctValues(); - aggregateData.setBitVectors(oldEst.serialize()); + aggregateData.setNdvEstimator(oldEst); } else { ndv = Math.max(aggregateData.getNumDVs(), newData.getNumDVs()); } diff --git metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/merge/DoubleColumnStatsMerger.java metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/merge/DoubleColumnStatsMerger.java index 4ef5c39..e686e81 100644 --- metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/merge/DoubleColumnStatsMerger.java +++ metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/merge/DoubleColumnStatsMerger.java @@ -20,31 +20,30 @@ package org.apache.hadoop.hive.metastore.columnstats.merge; import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimator; -import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimatorFactory; import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; -import org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData; +import org.apache.hadoop.hive.metastore.columnstats.cache.DoubleColumnStatsDataInspector; public class DoubleColumnStatsMerger extends ColumnStatsMerger { @Override public void merge(ColumnStatisticsObj aggregateColStats, ColumnStatisticsObj newColStats) { - DoubleColumnStatsData aggregateData = aggregateColStats.getStatsData().getDoubleStats(); - DoubleColumnStatsData newData = newColStats.getStatsData().getDoubleStats(); + DoubleColumnStatsDataInspector aggregateData = + (DoubleColumnStatsDataInspector) aggregateColStats.getStatsData().getDoubleStats(); + DoubleColumnStatsDataInspector newData = + (DoubleColumnStatsDataInspector) newColStats.getStatsData().getDoubleStats(); aggregateData.setLowValue(Math.min(aggregateData.getLowValue(), newData.getLowValue())); aggregateData.setHighValue(Math.max(aggregateData.getHighValue(), newData.getHighValue())); aggregateData.setNumNulls(aggregateData.getNumNulls() + newData.getNumNulls()); - if (!aggregateData.isSetBitVectors() || aggregateData.getBitVectors().length() == 0 - || !newData.isSetBitVectors() || newData.getBitVectors().length() == 0) { + if (!aggregateData.isSetBitVectors() || aggregateData.getBitVectors().length == 0 + || !newData.isSetBitVectors() || newData.getBitVectors().length == 0) { aggregateData.setNumDVs(Math.max(aggregateData.getNumDVs(), newData.getNumDVs())); } else { - NumDistinctValueEstimator oldEst = NumDistinctValueEstimatorFactory - .getNumDistinctValueEstimator(aggregateData.getBitVectors()); - NumDistinctValueEstimator newEst = NumDistinctValueEstimatorFactory - .getNumDistinctValueEstimator(newData.getBitVectors()); + NumDistinctValueEstimator oldEst = aggregateData.getNdvEstimator(); + NumDistinctValueEstimator newEst = newData.getNdvEstimator(); long ndv = -1; if (oldEst.canMerge(newEst)) { oldEst.mergeEstimators(newEst); ndv = oldEst.estimateNumDistinctValues(); - aggregateData.setBitVectors(oldEst.serialize()); + aggregateData.setNdvEstimator(oldEst); } else { ndv = Math.max(aggregateData.getNumDVs(), newData.getNumDVs()); } diff --git metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/merge/LongColumnStatsMerger.java metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/merge/LongColumnStatsMerger.java index acf7f03..4d42f86 100644 --- metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/merge/LongColumnStatsMerger.java +++ metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/merge/LongColumnStatsMerger.java @@ -20,31 +20,30 @@ package org.apache.hadoop.hive.metastore.columnstats.merge; import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimator; -import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimatorFactory; import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; -import org.apache.hadoop.hive.metastore.api.LongColumnStatsData; +import org.apache.hadoop.hive.metastore.columnstats.cache.LongColumnStatsDataInspector; public class LongColumnStatsMerger extends ColumnStatsMerger { @Override public void merge(ColumnStatisticsObj aggregateColStats, ColumnStatisticsObj newColStats) { - LongColumnStatsData aggregateData = aggregateColStats.getStatsData().getLongStats(); - LongColumnStatsData newData = newColStats.getStatsData().getLongStats(); + LongColumnStatsDataInspector aggregateData = + (LongColumnStatsDataInspector) aggregateColStats.getStatsData().getLongStats(); + LongColumnStatsDataInspector newData = + (LongColumnStatsDataInspector) newColStats.getStatsData().getLongStats(); aggregateData.setLowValue(Math.min(aggregateData.getLowValue(), newData.getLowValue())); aggregateData.setHighValue(Math.max(aggregateData.getHighValue(), newData.getHighValue())); aggregateData.setNumNulls(aggregateData.getNumNulls() + newData.getNumNulls()); - if (!aggregateData.isSetBitVectors() || aggregateData.getBitVectors().length() == 0 - || !newData.isSetBitVectors() || newData.getBitVectors().length() == 0) { + if (!aggregateData.isSetBitVectors() || aggregateData.getBitVectors().length == 0 + || !newData.isSetBitVectors() || newData.getBitVectors().length == 0) { aggregateData.setNumDVs(Math.max(aggregateData.getNumDVs(), newData.getNumDVs())); } else { - NumDistinctValueEstimator oldEst = NumDistinctValueEstimatorFactory - .getNumDistinctValueEstimator(aggregateData.getBitVectors()); - NumDistinctValueEstimator newEst = NumDistinctValueEstimatorFactory - .getNumDistinctValueEstimator(newData.getBitVectors()); + NumDistinctValueEstimator oldEst = aggregateData.getNdvEstimator(); + NumDistinctValueEstimator newEst = newData.getNdvEstimator(); long ndv = -1; if (oldEst.canMerge(newEst)) { oldEst.mergeEstimators(newEst); ndv = oldEst.estimateNumDistinctValues(); - aggregateData.setBitVectors(oldEst.serialize()); + aggregateData.setNdvEstimator(oldEst); } else { ndv = Math.max(aggregateData.getNumDVs(), newData.getNumDVs()); } diff --git metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/merge/StringColumnStatsMerger.java metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/merge/StringColumnStatsMerger.java index b3cd33c..ec9d7a1 100644 --- metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/merge/StringColumnStatsMerger.java +++ metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/merge/StringColumnStatsMerger.java @@ -20,31 +20,30 @@ package org.apache.hadoop.hive.metastore.columnstats.merge; import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimator; -import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimatorFactory; import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; -import org.apache.hadoop.hive.metastore.api.StringColumnStatsData; +import org.apache.hadoop.hive.metastore.columnstats.cache.StringColumnStatsDataInspector; public class StringColumnStatsMerger extends ColumnStatsMerger { @Override public void merge(ColumnStatisticsObj aggregateColStats, ColumnStatisticsObj newColStats) { - StringColumnStatsData aggregateData = aggregateColStats.getStatsData().getStringStats(); - StringColumnStatsData newData = newColStats.getStatsData().getStringStats(); + StringColumnStatsDataInspector aggregateData = + (StringColumnStatsDataInspector) aggregateColStats.getStatsData().getStringStats(); + StringColumnStatsDataInspector newData = + (StringColumnStatsDataInspector) newColStats.getStatsData().getStringStats(); aggregateData.setMaxColLen(Math.max(aggregateData.getMaxColLen(), newData.getMaxColLen())); aggregateData.setAvgColLen(Math.max(aggregateData.getAvgColLen(), newData.getAvgColLen())); aggregateData.setNumNulls(aggregateData.getNumNulls() + newData.getNumNulls()); - if (!aggregateData.isSetBitVectors() || aggregateData.getBitVectors().length() == 0 - || !newData.isSetBitVectors() || newData.getBitVectors().length() == 0) { + if (!aggregateData.isSetBitVectors() || aggregateData.getBitVectors().length == 0 + || !newData.isSetBitVectors() || newData.getBitVectors().length == 0) { aggregateData.setNumDVs(Math.max(aggregateData.getNumDVs(), newData.getNumDVs())); } else { - NumDistinctValueEstimator oldEst = NumDistinctValueEstimatorFactory - .getNumDistinctValueEstimator(aggregateData.getBitVectors()); - NumDistinctValueEstimator newEst = NumDistinctValueEstimatorFactory - .getNumDistinctValueEstimator(newData.getBitVectors()); + NumDistinctValueEstimator oldEst = aggregateData.getNdvEstimator(); + NumDistinctValueEstimator newEst = newData.getNdvEstimator(); long ndv = -1; if (oldEst.canMerge(newEst)) { oldEst.mergeEstimators(newEst); ndv = oldEst.estimateNumDistinctValues(); - aggregateData.setBitVectors(oldEst.serialize()); + aggregateData.setNdvEstimator(oldEst); } else { ndv = Math.max(aggregateData.getNumDVs(), newData.getNumDVs()); } diff --git metastore/src/test/org/apache/hadoop/hive/metastore/TestOldSchema.java metastore/src/test/org/apache/hadoop/hive/metastore/TestOldSchema.java index 54828f2..8312b34 100644 --- metastore/src/test/org/apache/hadoop/hive/metastore/TestOldSchema.java +++ metastore/src/test/org/apache/hadoop/hive/metastore/TestOldSchema.java @@ -20,10 +20,7 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; -import java.util.HashMap; -import java.util.HashSet; import java.util.List; -import java.util.Set; import org.apache.hadoop.hive.common.ndv.hll.HyperLogLog; import org.apache.hadoop.hive.conf.HiveConf; @@ -35,27 +32,21 @@ import org.apache.hadoop.hive.metastore.api.Database; import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.metastore.api.FileMetadataExprType; -import org.apache.hadoop.hive.metastore.api.Function; -import org.apache.hadoop.hive.metastore.api.Index; import org.apache.hadoop.hive.metastore.api.InvalidInputException; import org.apache.hadoop.hive.metastore.api.InvalidObjectException; import org.apache.hadoop.hive.metastore.api.LongColumnStatsData; import org.apache.hadoop.hive.metastore.api.MetaException; import org.apache.hadoop.hive.metastore.api.NoSuchObjectException; import org.apache.hadoop.hive.metastore.api.Partition; -import org.apache.hadoop.hive.metastore.api.SQLForeignKey; -import org.apache.hadoop.hive.metastore.api.SQLPrimaryKey; import org.apache.hadoop.hive.metastore.api.SerDeInfo; import org.apache.hadoop.hive.metastore.api.StorageDescriptor; import org.apache.hadoop.hive.metastore.api.Table; import org.apache.hadoop.hive.ql.io.sarg.SearchArgument; -import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; import org.junit.After; import org.junit.Assert; import org.junit.Before; import org.junit.Test; -import org.mockito.Mockito; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -93,7 +84,7 @@ public FileFormatProxy getFileFormatProxy(FileMetadataExprType type) { } } - String bitVectors[] = new String[2]; + byte bitVectors[][] = new byte[2][]; @Before public void setUp() throws Exception { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsTask.java ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsTask.java index d96f432..2b2c004 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsTask.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsTask.java @@ -24,10 +24,7 @@ import java.util.ArrayList; import java.util.List; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; import org.apache.hadoop.hive.common.type.HiveDecimal; -import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.conf.HiveConf.ConfVars; import org.apache.hadoop.hive.metastore.Warehouse; import org.apache.hadoop.hive.metastore.api.BinaryColumnStatsData; @@ -37,15 +34,15 @@ import org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc; import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; import org.apache.hadoop.hive.metastore.api.Date; -import org.apache.hadoop.hive.metastore.api.DateColumnStatsData; import org.apache.hadoop.hive.metastore.api.Decimal; -import org.apache.hadoop.hive.metastore.api.DecimalColumnStatsData; -import org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData; import org.apache.hadoop.hive.metastore.api.FieldSchema; -import org.apache.hadoop.hive.metastore.api.LongColumnStatsData; import org.apache.hadoop.hive.metastore.api.MetaException; import org.apache.hadoop.hive.metastore.api.SetPartitionsStatsRequest; -import org.apache.hadoop.hive.metastore.api.StringColumnStatsData; +import org.apache.hadoop.hive.metastore.columnstats.cache.DateColumnStatsDataInspector; +import org.apache.hadoop.hive.metastore.columnstats.cache.DecimalColumnStatsDataInspector; +import org.apache.hadoop.hive.metastore.columnstats.cache.DoubleColumnStatsDataInspector; +import org.apache.hadoop.hive.metastore.columnstats.cache.LongColumnStatsDataInspector; +import org.apache.hadoop.hive.metastore.columnstats.cache.StringColumnStatsDataInspector; import org.apache.hadoop.hive.ql.CompilationOpContext; import org.apache.hadoop.hive.ql.DriverContext; import org.apache.hadoop.hive.ql.QueryPlan; @@ -63,6 +60,7 @@ import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.StructField; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.DateObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveDecimalObjectInspector; @@ -70,6 +68,8 @@ import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.util.StringUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** * ColumnStatsTask implementation. @@ -136,8 +136,8 @@ private void unpackDoubleStats(ObjectInspector oi, Object o, String fName, statsObj.getStatsData().getDoubleStats().setLowValue(d); } else if (fName.equals("ndvbitvector")) { PrimitiveObjectInspector poi = (PrimitiveObjectInspector) oi; - String v = ((StringObjectInspector) poi).getPrimitiveJavaObject(o); - statsObj.getStatsData().getDoubleStats().setBitVectors(v);; + byte[] buf = ((BinaryObjectInspector) poi).getPrimitiveJavaObject(o); + statsObj.getStatsData().getDoubleStats().setBitVectors(buf); } } @@ -157,8 +157,8 @@ private void unpackDecimalStats(ObjectInspector oi, Object o, String fName, statsObj.getStatsData().getDecimalStats().setLowValue(convertToThriftDecimal(d)); } else if (fName.equals("ndvbitvector")) { PrimitiveObjectInspector poi = (PrimitiveObjectInspector) oi; - String v = ((StringObjectInspector) poi).getPrimitiveJavaObject(o); - statsObj.getStatsData().getDecimalStats().setBitVectors(v);; + byte[] buf = ((BinaryObjectInspector) poi).getPrimitiveJavaObject(o); + statsObj.getStatsData().getDecimalStats().setBitVectors(buf); } } @@ -182,8 +182,8 @@ private void unpackLongStats(ObjectInspector oi, Object o, String fName, statsObj.getStatsData().getLongStats().setLowValue(v); } else if (fName.equals("ndvbitvector")) { PrimitiveObjectInspector poi = (PrimitiveObjectInspector) oi; - String v = ((StringObjectInspector) poi).getPrimitiveJavaObject(o); - statsObj.getStatsData().getLongStats().setBitVectors(v);; + byte[] buf = ((BinaryObjectInspector) poi).getPrimitiveJavaObject(o); + statsObj.getStatsData().getLongStats().setBitVectors(buf); } } @@ -203,8 +203,8 @@ private void unpackStringStats(ObjectInspector oi, Object o, String fName, statsObj.getStatsData().getStringStats().setMaxColLen(v); } else if (fName.equals("ndvbitvector")) { PrimitiveObjectInspector poi = (PrimitiveObjectInspector) oi; - String v = ((StringObjectInspector) poi).getPrimitiveJavaObject(o); - statsObj.getStatsData().getStringStats().setBitVectors(v);; + byte[] buf = ((BinaryObjectInspector) poi).getPrimitiveJavaObject(o); + statsObj.getStatsData().getStringStats().setBitVectors(buf); } } @@ -238,8 +238,8 @@ private void unpackDateStats(ObjectInspector oi, Object o, String fName, statsObj.getStatsData().getDateStats().setLowValue(new Date(v.getDays())); } else if (fName.equals("ndvbitvector")) { PrimitiveObjectInspector poi = (PrimitiveObjectInspector) oi; - String v = ((StringObjectInspector) poi).getPrimitiveJavaObject(o); - statsObj.getStatsData().getDateStats().setBitVectors(v);; + byte[] buf = ((BinaryObjectInspector) poi).getPrimitiveJavaObject(o); + statsObj.getStatsData().getDateStats().setBitVectors(buf); } } @@ -255,15 +255,15 @@ private void unpackPrimitiveObject (ObjectInspector oi, Object o, String fieldNa ColumnStatisticsData statsData = new ColumnStatisticsData(); if (s.equalsIgnoreCase("long")) { - LongColumnStatsData longStats = new LongColumnStatsData(); + LongColumnStatsDataInspector longStats = new LongColumnStatsDataInspector(); statsData.setLongStats(longStats); statsObj.setStatsData(statsData); } else if (s.equalsIgnoreCase("double")) { - DoubleColumnStatsData doubleStats = new DoubleColumnStatsData(); + DoubleColumnStatsDataInspector doubleStats = new DoubleColumnStatsDataInspector(); statsData.setDoubleStats(doubleStats); statsObj.setStatsData(statsData); } else if (s.equalsIgnoreCase("string")) { - StringColumnStatsData stringStats = new StringColumnStatsData(); + StringColumnStatsDataInspector stringStats = new StringColumnStatsDataInspector(); statsData.setStringStats(stringStats); statsObj.setStatsData(statsData); } else if (s.equalsIgnoreCase("boolean")) { @@ -275,11 +275,11 @@ private void unpackPrimitiveObject (ObjectInspector oi, Object o, String fieldNa statsData.setBinaryStats(binaryStats); statsObj.setStatsData(statsData); } else if (s.equalsIgnoreCase("decimal")) { - DecimalColumnStatsData decimalStats = new DecimalColumnStatsData(); + DecimalColumnStatsDataInspector decimalStats = new DecimalColumnStatsDataInspector(); statsData.setDecimalStats(decimalStats); statsObj.setStatsData(statsData); } else if (s.equalsIgnoreCase("date")) { - DateColumnStatsData dateStats = new DateColumnStatsData(); + DateColumnStatsDataInspector dateStats = new DateColumnStatsDataInspector(); statsData.setDateStats(dateStats); statsObj.setStatsData(statsData); } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsUpdateTask.java ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsUpdateTask.java index 2acc777..82fbf28 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsUpdateTask.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsUpdateTask.java @@ -26,8 +26,6 @@ import java.util.Map; import java.util.Map.Entry; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; import org.apache.hadoop.hive.metastore.api.BinaryColumnStatsData; import org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData; import org.apache.hadoop.hive.metastore.api.ColumnStatistics; @@ -35,14 +33,14 @@ import org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc; import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; import org.apache.hadoop.hive.metastore.api.Date; -import org.apache.hadoop.hive.metastore.api.DateColumnStatsData; import org.apache.hadoop.hive.metastore.api.Decimal; -import org.apache.hadoop.hive.metastore.api.DecimalColumnStatsData; -import org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData; -import org.apache.hadoop.hive.metastore.api.LongColumnStatsData; import org.apache.hadoop.hive.metastore.api.MetaException; import org.apache.hadoop.hive.metastore.api.SetPartitionsStatsRequest; -import org.apache.hadoop.hive.metastore.api.StringColumnStatsData; +import org.apache.hadoop.hive.metastore.columnstats.cache.DateColumnStatsDataInspector; +import org.apache.hadoop.hive.metastore.columnstats.cache.DecimalColumnStatsDataInspector; +import org.apache.hadoop.hive.metastore.columnstats.cache.DoubleColumnStatsDataInspector; +import org.apache.hadoop.hive.metastore.columnstats.cache.LongColumnStatsDataInspector; +import org.apache.hadoop.hive.metastore.columnstats.cache.StringColumnStatsDataInspector; import org.apache.hadoop.hive.ql.CompilationOpContext; import org.apache.hadoop.hive.ql.DriverContext; import org.apache.hadoop.hive.ql.QueryPlan; @@ -55,6 +53,8 @@ import org.apache.hadoop.hive.ql.plan.api.StageType; import org.apache.hadoop.hive.ql.session.SessionState; import org.apache.hadoop.hive.serde2.io.DateWritable; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** * ColumnStatsUpdateTask implementation. For example, ALTER TABLE src_stat @@ -101,7 +101,7 @@ private ColumnStatistics constructColumnStatsFromInput() if (columnType.equalsIgnoreCase("long") || columnType.equalsIgnoreCase("tinyint") || columnType.equalsIgnoreCase("smallint") || columnType.equalsIgnoreCase("int") || columnType.equalsIgnoreCase("bigint") || columnType.equalsIgnoreCase("timestamp")) { - LongColumnStatsData longStats = new LongColumnStatsData(); + LongColumnStatsDataInspector longStats = new LongColumnStatsDataInspector(); longStats.setNumNullsIsSet(false); longStats.setNumDVsIsSet(false); longStats.setLowValueIsSet(false); @@ -125,7 +125,7 @@ private ColumnStatistics constructColumnStatsFromInput() statsData.setLongStats(longStats); statsObj.setStatsData(statsData); } else if (columnType.equalsIgnoreCase("double") || columnType.equalsIgnoreCase("float")) { - DoubleColumnStatsData doubleStats = new DoubleColumnStatsData(); + DoubleColumnStatsDataInspector doubleStats = new DoubleColumnStatsDataInspector(); doubleStats.setNumNullsIsSet(false); doubleStats.setNumDVsIsSet(false); doubleStats.setLowValueIsSet(false); @@ -150,7 +150,7 @@ private ColumnStatistics constructColumnStatsFromInput() statsObj.setStatsData(statsData); } else if (columnType.equalsIgnoreCase("string") || columnType.toLowerCase().startsWith("char") || columnType.toLowerCase().startsWith("varchar")) { //char(x),varchar(x) types - StringColumnStatsData stringStats = new StringColumnStatsData(); + StringColumnStatsDataInspector stringStats = new StringColumnStatsDataInspector(); stringStats.setMaxColLenIsSet(false); stringStats.setAvgColLenIsSet(false); stringStats.setNumNullsIsSet(false); @@ -216,7 +216,7 @@ private ColumnStatistics constructColumnStatsFromInput() statsData.setBinaryStats(binaryStats); statsObj.setStatsData(statsData); } else if (columnType.toLowerCase().startsWith("decimal")) { //decimal(a,b) type - DecimalColumnStatsData decimalStats = new DecimalColumnStatsData(); + DecimalColumnStatsDataInspector decimalStats = new DecimalColumnStatsDataInspector(); decimalStats.setNumNullsIsSet(false); decimalStats.setNumDVsIsSet(false); decimalStats.setLowValueIsSet(false); @@ -244,7 +244,7 @@ private ColumnStatistics constructColumnStatsFromInput() statsData.setDecimalStats(decimalStats); statsObj.setStatsData(statsData); } else if (columnType.equalsIgnoreCase("date")) { - DateColumnStatsData dateStats = new DateColumnStatsData(); + DateColumnStatsDataInspector dateStats = new DateColumnStatsDataInspector(); Map mapProp = work.getMapProp(); for (Entry entry : mapProp.entrySet()) { String fName = entry.getKey(); diff --git ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/MetaDataFormatUtils.java ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/MetaDataFormatUtils.java index 2380073..a065248 100644 --- ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/MetaDataFormatUtils.java +++ ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/MetaDataFormatUtils.java @@ -18,6 +18,7 @@ package org.apache.hadoop.hive.ql.metadata.formatting; +import org.apache.commons.codec.binary.Base64; import org.apache.commons.lang.StringEscapeUtils; import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.conf.HiveConf; @@ -54,6 +55,7 @@ import java.math.BigInteger; import java.util.ArrayList; +import java.util.Arrays; import java.util.Collections; import java.util.Comparator; import java.util.Date; @@ -176,6 +178,16 @@ private static String convertToString(org.apache.hadoop.hive.metastore.api.Date return writableValue.toString(); } + private static String convertToString(byte[] buf) { + if (buf == null) { + return ""; + } + byte[] sub = new byte[2]; + sub[0] = (byte) buf[0]; + sub[1] = (byte) buf[1]; + return new String(sub); + } + private static ColumnStatisticsObj getColumnStatisticsObject(String colName, String colType, List colStats) { if (colStats != null && !colStats.isEmpty()) { @@ -700,7 +712,7 @@ private static void formatWithIndentation(String colName, String colType, String } else if (csd.isSetStringStats()) { StringColumnStatsData scsd = csd.getStringStats(); appendColumnStats(tableInfo, "", "", scsd.getNumNulls(), scsd.getNumDVs(), - scsd.getBitVectors() == null ? "" : scsd.getBitVectors(), scsd.getAvgColLen(), + convertToString(scsd.getBitVectors()), scsd.getAvgColLen(), scsd.getMaxColLen(), "", ""); } else if (csd.isSetBooleanStats()) { BooleanColumnStatsData bcsd = csd.getBooleanStats(); @@ -710,22 +722,26 @@ private static void formatWithIndentation(String colName, String colType, String DecimalColumnStatsData dcsd = csd.getDecimalStats(); appendColumnStats(tableInfo, convertToString(dcsd.getLowValue()), convertToString(dcsd.getHighValue()), dcsd.getNumNulls(), dcsd.getNumDVs(), - dcsd.getBitVectors() == null ? "" : dcsd.getBitVectors(), + convertToString(dcsd.getBitVectors()), "", "", "", ""); } else if (csd.isSetDoubleStats()) { DoubleColumnStatsData dcsd = csd.getDoubleStats(); appendColumnStats(tableInfo, dcsd.getLowValue(), dcsd.getHighValue(), dcsd.getNumNulls(), - dcsd.getNumDVs(), dcsd.getBitVectors() == null ? "" : dcsd.getBitVectors(), "", "", "", ""); + dcsd.getNumDVs(), convertToString(dcsd.getBitVectors()), + "", "", "", ""); } else if (csd.isSetLongStats()) { LongColumnStatsData lcsd = csd.getLongStats(); appendColumnStats(tableInfo, lcsd.getLowValue(), lcsd.getHighValue(), lcsd.getNumNulls(), - lcsd.getNumDVs(), lcsd.getBitVectors() == null ? "" : lcsd.getBitVectors(), "", "", "", ""); + lcsd.getNumDVs(), convertToString(lcsd.getBitVectors()), + "", "", "", ""); } else if (csd.isSetDateStats()) { DateColumnStatsData dcsd = csd.getDateStats(); appendColumnStats(tableInfo, convertToString(dcsd.getLowValue()), convertToString(dcsd.getHighValue()), - dcsd.getNumNulls(), dcsd.getNumDVs(), dcsd.getBitVectors() == null ? "" : dcsd.getBitVectors(), "", "", "", ""); + dcsd.getNumNulls(), dcsd.getNumDVs(), + convertToString(dcsd.getBitVectors()), + "", "", "", ""); } } else { appendColumnStats(tableInfo, "", "", "", "", "", "", "", "", ""); diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFComputeStats.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFComputeStats.java index 8ee41bf..f5f6307 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFComputeStats.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFComputeStats.java @@ -20,13 +20,9 @@ import java.util.ArrayList; import java.util.List; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; import org.apache.hadoop.hive.common.classification.InterfaceAudience; import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimator; import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimatorFactory; -import org.apache.hadoop.hive.common.ndv.fm.FMSketch; -import org.apache.hadoop.hive.common.ndv.hll.HyperLogLog; import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.ql.exec.Description; import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException; @@ -40,14 +36,22 @@ import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.StructField; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.*; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.DateObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveDecimalObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.JavaHiveDecimalObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableLongObjectInspector; import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.io.BytesWritable; -import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; -import org.apache.hadoop.util.StringUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** * GenericUDAFComputeStats @@ -315,7 +319,7 @@ public Object terminate(AggregationBuffer agg) throws HiveException { protected transient LongObjectInspector countNullsFieldOI; protected transient StructField ndvField; - protected transient StringObjectInspector ndvFieldOI; + protected transient BinaryObjectInspector ndvFieldOI; /* Partial aggregation result returned by TerminatePartial. Partial result is a struct * containing a long field named "count". @@ -354,7 +358,7 @@ public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveExc countNullsFieldOI = (LongObjectInspector) countNullsField.getFieldObjectInspector(); ndvField = soi.getStructFieldRef("bitvector"); - ndvFieldOI = (StringObjectInspector) ndvField.getFieldObjectInspector(); + ndvFieldOI = (BinaryObjectInspector) ndvField.getFieldObjectInspector(); } @@ -365,7 +369,7 @@ public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveExc foi.add(getValueObjectInspector(inputOI.getTypeInfo())); foi.add(getValueObjectInspector(inputOI.getTypeInfo())); foi.add(PrimitiveObjectInspectorFactory.writableLongObjectInspector); - foi.add(PrimitiveObjectInspectorFactory.writableStringObjectInspector); + foi.add(PrimitiveObjectInspectorFactory.writableBinaryObjectInspector); List fname = new ArrayList(); fname.add("columnType"); @@ -377,7 +381,7 @@ public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveExc partialResult = new Object[6]; partialResult[0] = new Text(); partialResult[3] = new LongWritable(0); - partialResult[4] = new Text(); + partialResult[4] = new BytesWritable(); return ObjectInspectorFactory.getStandardStructObjectInspector(fname, foi); @@ -388,7 +392,7 @@ public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveExc foi.add(maxFieldOI != null ? getValueObjectInspector(maxFieldOI.getTypeInfo()) : getValueObjectInspector()); foi.add(PrimitiveObjectInspectorFactory.writableLongObjectInspector); foi.add(PrimitiveObjectInspectorFactory.writableLongObjectInspector); - foi.add(PrimitiveObjectInspectorFactory.writableStringObjectInspector); + foi.add(PrimitiveObjectInspectorFactory.writableBinaryObjectInspector); List fname = new ArrayList(); fname.add("columnType"); @@ -402,7 +406,7 @@ public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveExc result[0] = new Text(); result[3] = new LongWritable(0); result[4] = new LongWritable(0); - result[5] = new Text(); + result[5] = new BytesWritable(); return ObjectInspectorFactory.getStandardStructObjectInspector(fname, foi); @@ -443,7 +447,8 @@ protected Object serialize(Object[] result) { long dv = numDV != null ? numDV.estimateNumDistinctValues() : 0; ((LongWritable) result[4]).set(dv); if (numDV != null) { - ((Text) result[5]).set(numDV.serialize()); + byte[] buf = numDV.serialize(); + ((BytesWritable) result[5]).set(buf, 0, buf.length); } return result; } @@ -454,7 +459,8 @@ protected Object serializePartial(Object[] result) { if (numDV != null) { // Serialize numDistinctValue Estimator - ((Text) result[4]).set(numDV.serialize()); + byte[] buf = numDV.serialize(); + ((BytesWritable) result[4]).set(buf, 0, buf.length); } return result; } @@ -536,14 +542,14 @@ public void merge(AggregationBuffer agg, Object partial) throws HiveException { // Merge numDistinctValue Estimators Object numDistinct = soi.getStructFieldData(partial, ndvField); - String v = ndvFieldOI.getPrimitiveJavaObject(numDistinct); + byte[] buf = ndvFieldOI.getPrimitiveJavaObject(numDistinct); - if (v != null && v.length() != 0) { + if (buf != null && buf.length != 0) { if (myagg.numDV == null) { - myagg.numDV = NumDistinctValueEstimatorFactory.getNumDistinctValueEstimator(v); + myagg.numDV = NumDistinctValueEstimatorFactory.getNumDistinctValueEstimator(buf); } else { myagg.numDV.mergeEstimators(NumDistinctValueEstimatorFactory - .getNumDistinctValueEstimator(v)); + .getNumDistinctValueEstimator(buf)); } } } @@ -722,7 +728,7 @@ public void reset(AggregationBuffer agg) throws HiveException { private transient LongObjectInspector countNullsFieldOI; private transient StructField ndvField; - private transient StringObjectInspector ndvFieldOI; + private transient BinaryObjectInspector ndvFieldOI; /* Output of final result of the aggregation */ @@ -755,7 +761,7 @@ public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveExc countNullsFieldOI = (LongObjectInspector) countNullsField.getFieldObjectInspector(); ndvField = soi.getStructFieldRef("bitvector"); - ndvFieldOI = (StringObjectInspector) ndvField.getFieldObjectInspector(); + ndvFieldOI = (BinaryObjectInspector) ndvField.getFieldObjectInspector(); } @@ -767,7 +773,7 @@ public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveExc foi.add(PrimitiveObjectInspectorFactory.writableLongObjectInspector); foi.add(PrimitiveObjectInspectorFactory.writableLongObjectInspector); foi.add(PrimitiveObjectInspectorFactory.writableLongObjectInspector); - foi.add(PrimitiveObjectInspectorFactory.writableStringObjectInspector); + foi.add(PrimitiveObjectInspectorFactory.writableBinaryObjectInspector); List fname = new ArrayList(); fname.add("columntype"); @@ -783,7 +789,7 @@ public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveExc partialResult[2] = new LongWritable(0); partialResult[3] = new LongWritable(0); partialResult[4] = new LongWritable(0); - partialResult[5] = new Text(); + partialResult[5] = new BytesWritable(); return ObjectInspectorFactory.getStandardStructObjectInspector(fname, foi); @@ -794,7 +800,7 @@ public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveExc foi.add(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector); foi.add(PrimitiveObjectInspectorFactory.writableLongObjectInspector); foi.add(PrimitiveObjectInspectorFactory.writableLongObjectInspector); - foi.add(PrimitiveObjectInspectorFactory.writableStringObjectInspector); + foi.add(PrimitiveObjectInspectorFactory.writableBinaryObjectInspector); List fname = new ArrayList(); fname.add("columntype"); @@ -810,7 +816,7 @@ public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveExc result[2] = new DoubleWritable(0); result[3] = new LongWritable(0); result[4] = new LongWritable(0); - result[5] = new Text(); + result[5] = new BytesWritable(); return ObjectInspectorFactory.getStandardStructObjectInspector(fname, foi); @@ -918,7 +924,8 @@ public Object terminatePartial(AggregationBuffer agg) throws HiveException { ((LongWritable) partialResult[4]).set(myagg.countNulls); // Serialize numDistinctValue Estimator if (myagg.numDV != null) { - ((Text) partialResult[5]).set(myagg.numDV.serialize()); + byte[] buf = myagg.numDV.serialize(); + ((BytesWritable) partialResult[5]).set(buf, 0, buf.length); } return partialResult; } @@ -948,14 +955,14 @@ public void merge(AggregationBuffer agg, Object partial) throws HiveException { // Merge numDistinctValue Estimators partialValue = soi.getStructFieldData(partial, ndvField); - String v = ndvFieldOI.getPrimitiveJavaObject(partialValue); - - if (v != null && v.length() != 0) { + byte[] buf = ndvFieldOI.getPrimitiveJavaObject(partialValue); + + if (buf != null && buf.length != 0) { if (myagg.numDV == null) { - myagg.numDV = NumDistinctValueEstimatorFactory.getNumDistinctValueEstimator(v); + myagg.numDV = NumDistinctValueEstimatorFactory.getNumDistinctValueEstimator(buf); } else { myagg.numDV.mergeEstimators(NumDistinctValueEstimatorFactory - .getNumDistinctValueEstimator(v)); + .getNumDistinctValueEstimator(buf)); } } } @@ -980,7 +987,8 @@ public Object terminate(AggregationBuffer agg) throws HiveException { ((LongWritable) result[3]).set(myagg.countNulls); ((LongWritable) result[4]).set(numDV); if (myagg.numDV != null) { - ((Text) result[5]).set(myagg.numDV.serialize()); + byte[] buf = myagg.numDV.serialize(); + ((BytesWritable) result[5]).set(buf, 0, buf.length); } return result; } diff --git ql/src/test/results/clientpositive/autoColumnStats_4.q.out ql/src/test/results/clientpositive/autoColumnStats_4.q.out index e844999..a0581f8 100644 --- ql/src/test/results/clientpositive/autoColumnStats_4.q.out +++ ql/src/test/results/clientpositive/autoColumnStats_4.q.out @@ -119,7 +119,7 @@ STAGE PLANS: aggregations: compute_stats(a, 'hll'), compute_stats(b, 'hll') mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 944 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -153,17 +153,17 @@ STAGE PLANS: TableScan Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 944 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Reduce Operator Tree: Group By Operator aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/autoColumnStats_5.q.out ql/src/test/results/clientpositive/autoColumnStats_5.q.out index 2996397..4497498 100644 --- ql/src/test/results/clientpositive/autoColumnStats_5.q.out +++ ql/src/test/results/clientpositive/autoColumnStats_5.q.out @@ -56,7 +56,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 1 Data size: 44 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: struct), _col2 (type: struct) + value expressions: _col1 (type: struct), _col2 (type: struct) Reduce Operator Tree: Group By Operator aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) @@ -65,7 +65,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 44 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: int) + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: int) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 44 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -295,7 +295,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 1 Data size: 60 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) Reduce Operator Tree: Group By Operator aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) @@ -304,7 +304,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 1 Data size: 60 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col0 (type: int) + expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 1 Data size: 60 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -490,7 +490,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) Reduce Operator Tree: Group By Operator aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) @@ -499,7 +499,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col0 (type: int) + expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE File Output Operator diff --git ql/src/test/results/clientpositive/autoColumnStats_6.q.out ql/src/test/results/clientpositive/autoColumnStats_6.q.out index 1b12570..c4ab489 100644 --- ql/src/test/results/clientpositive/autoColumnStats_6.q.out +++ ql/src/test/results/clientpositive/autoColumnStats_6.q.out @@ -69,7 +69,7 @@ STAGE PLANS: sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: struct), _col4 (type: struct) + value expressions: _col3 (type: struct), _col4 (type: struct) Reduce Operator Tree: Group By Operator aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) @@ -78,7 +78,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col3 (type: struct), _col4 (type: struct), _col0 (type: string), _col1 (type: string), _col2 (type: string) + expressions: _col3 (type: struct), _col4 (type: struct), _col0 (type: string), _col1 (type: string), _col2 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE File Output Operator diff --git ql/src/test/results/clientpositive/autoColumnStats_7.q.out ql/src/test/results/clientpositive/autoColumnStats_7.q.out index 9e2121e..2dc9fc2 100644 --- ql/src/test/results/clientpositive/autoColumnStats_7.q.out +++ ql/src/test/results/clientpositive/autoColumnStats_7.q.out @@ -135,7 +135,7 @@ STAGE PLANS: aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll'), compute_stats(VALUE._col3, 'hll') mode: partial1 outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1424 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -149,17 +149,17 @@ STAGE PLANS: TableScan Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 1424 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) Reduce Operator Tree: Group By Operator aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) mode: final outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/autoColumnStats_8.q.out ql/src/test/results/clientpositive/autoColumnStats_8.q.out index cdf2082..c913d97 100644 --- ql/src/test/results/clientpositive/autoColumnStats_8.q.out +++ ql/src/test/results/clientpositive/autoColumnStats_8.q.out @@ -116,7 +116,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: _col2 (type: struct), _col3 (type: struct) + value expressions: _col2 (type: struct), _col3 (type: struct) auto parallelism: false Filter Operator isSamplingPred: false @@ -177,7 +177,7 @@ STAGE PLANS: properties: column.name.delimiter , columns _col0,_col1,_col2,_col3 - columns.types string,string,struct,struct + columns.types string,string,struct,struct escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe @@ -393,7 +393,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -408,7 +408,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: columns _col0,_col1,_col2,_col3 - columns.types struct:struct:string:string + columns.types struct:struct:string:string escape.delim \ hive.serialization.extend.additional.nesting.levels true serialization.escape.crlf true @@ -511,7 +511,7 @@ STAGE PLANS: Map-reduce partition columns: '2008-12-31' (type: string), _col1 (type: string) Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: _col2 (type: struct), _col3 (type: struct) + value expressions: _col2 (type: struct), _col3 (type: struct) auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -524,7 +524,7 @@ STAGE PLANS: properties: column.name.delimiter , columns _col0,_col1,_col2,_col3 - columns.types string,string,struct,struct + columns.types string,string,struct,struct escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe @@ -534,7 +534,7 @@ STAGE PLANS: properties: column.name.delimiter , columns _col0,_col1,_col2,_col3 - columns.types string,string,struct,struct + columns.types string,string,struct,struct escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe @@ -549,7 +549,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), '2008-12-31' (type: string), _col1 (type: string) + expressions: _col2 (type: struct), _col3 (type: struct), '2008-12-31' (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -564,7 +564,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: columns _col0,_col1,_col2,_col3 - columns.types struct:struct:string:string + columns.types struct:struct:string:string escape.delim \ hive.serialization.extend.additional.nesting.levels true serialization.escape.crlf true diff --git ql/src/test/results/clientpositive/autoColumnStats_9.q.out ql/src/test/results/clientpositive/autoColumnStats_9.q.out index e32c884..f35698f 100644 --- ql/src/test/results/clientpositive/autoColumnStats_9.q.out +++ ql/src/test/results/clientpositive/autoColumnStats_9.q.out @@ -89,7 +89,7 @@ STAGE PLANS: aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 944 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -145,7 +145,7 @@ STAGE PLANS: aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 944 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -181,17 +181,17 @@ STAGE PLANS: TableScan Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 944 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Reduce Operator Tree: Group By Operator aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/char_udf1.q.out ql/src/test/results/clientpositive/char_udf1.q.out index e701d64..69d76d7 100644 --- ql/src/test/results/clientpositive/char_udf1.q.out +++ ql/src/test/results/clientpositive/char_udf1.q.out @@ -406,7 +406,7 @@ from char_udf_1 POSTHOOK: type: QUERY POSTHOOK: Input: default@char_udf_1 #### A masked pattern was here #### -{"columntype":"String","maxlength":7,"avglength":7.0,"countnulls":0,"numdistinctvalues":1,"ndvbitvector":"Rk0QAAEAAAAIAAAABAAAAAgAAAACAAAAAQAAAAQAAAABAAAAAgAAAAEAAAABAAAAAgAAAAgAAAAE\r\nAAAAAQAAAAgAAAA=\r\n"} {"columntype":"String","maxlength":7,"avglength":7.0,"countnulls":0,"numdistinctvalues":1,"ndvbitvector":"Rk0QAAEAAAAIAAAABAAAAAgAAAACAAAAAQAAAAQAAAABAAAAAgAAAAEAAAABAAAAAgAAAAgAAAAE\r\nAAAAAQAAAAgAAAA=\r\n"} +{"columntype":"String","maxlength":7,"avglength":7.0,"countnulls":0,"numdistinctvalues":1,"ndvbitvector":FM} {"columntype":"String","maxlength":7,"avglength":7.0,"countnulls":0,"numdistinctvalues":1,"ndvbitvector":FM} PREHOOK: query: select min(c2), min(c4) diff --git ql/src/test/results/clientpositive/column_pruner_multiple_children.q.out ql/src/test/results/clientpositive/column_pruner_multiple_children.q.out index 00e53dc..d82c922 100644 --- ql/src/test/results/clientpositive/column_pruner_multiple_children.q.out +++ ql/src/test/results/clientpositive/column_pruner_multiple_children.q.out @@ -111,10 +111,10 @@ STAGE PLANS: aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll') mode: complete outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/columnstats_partlvl.q.out ql/src/test/results/clientpositive/columnstats_partlvl.q.out index c0f0071..2e266a4 100644 --- ql/src/test/results/clientpositive/columnstats_partlvl.q.out +++ ql/src/test/results/clientpositive/columnstats_partlvl.q.out @@ -62,7 +62,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: 2000.0 (type: double) Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: struct) + value expressions: _col1 (type: struct) Reduce Operator Tree: Group By Operator aggregations: compute_stats(VALUE._col0) @@ -71,7 +71,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: struct), 2000.0 (type: double) + expressions: _col1 (type: struct), 2000.0 (type: double) outputColumnNames: _col0, _col1 Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -124,7 +124,7 @@ STAGE PLANS: Map-reduce partition columns: 2000.0 (type: double) Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: _col1 (type: struct) + value expressions: _col1 (type: struct) auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -188,7 +188,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: struct), 2000.0 (type: double) + expressions: _col1 (type: struct), 2000.0 (type: double) outputColumnNames: _col0, _col1 Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -203,7 +203,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: columns _col0,_col1 - columns.types struct:double + columns.types struct:double escape.delim \ hive.serialization.extend.additional.nesting.levels true serialization.escape.crlf true @@ -264,7 +264,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: 4000.0 (type: double) Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: struct) + value expressions: _col1 (type: struct) Reduce Operator Tree: Group By Operator aggregations: compute_stats(VALUE._col0) @@ -273,7 +273,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: struct), 4000.0 (type: double) + expressions: _col1 (type: struct), 4000.0 (type: double) outputColumnNames: _col0, _col1 Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -326,7 +326,7 @@ STAGE PLANS: Map-reduce partition columns: 4000.0 (type: double) Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: _col1 (type: struct) + value expressions: _col1 (type: struct) auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -390,7 +390,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: struct), 4000.0 (type: double) + expressions: _col1 (type: struct), 4000.0 (type: double) outputColumnNames: _col0, _col1 Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -405,7 +405,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: columns _col0,_col1 - columns.types struct:double + columns.types struct:double escape.delim \ hive.serialization.extend.additional.nesting.levels true serialization.escape.crlf true @@ -466,7 +466,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: 2000.0 (type: double) Statistics: Num rows: 1 Data size: 105 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: struct), _col2 (type: struct) + value expressions: _col1 (type: struct), _col2 (type: struct) Reduce Operator Tree: Group By Operator aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) @@ -475,7 +475,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 105 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), 2000.0 (type: double) + expressions: _col1 (type: struct), _col2 (type: struct), 2000.0 (type: double) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 105 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -555,7 +555,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: double) Statistics: Num rows: 2 Data size: 210 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: struct), _col2 (type: struct) + value expressions: _col1 (type: struct), _col2 (type: struct) Reduce Operator Tree: Group By Operator aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) @@ -564,7 +564,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 105 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: double) + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: double) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 105 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -639,20 +639,20 @@ STAGE PLANS: aggregations: compute_stats(employeeid, 'hll'), compute_stats(employeename, 'hll') mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 944 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 944 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Reduce Operator Tree: Group By Operator aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/columnstats_partlvl_dp.q.out ql/src/test/results/clientpositive/columnstats_partlvl_dp.q.out index 0cb4863..8c2d95e 100644 --- ql/src/test/results/clientpositive/columnstats_partlvl_dp.q.out +++ ql/src/test/results/clientpositive/columnstats_partlvl_dp.q.out @@ -98,7 +98,7 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: 4000.0 (type: double), _col1 (type: string) Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: struct), _col3 (type: struct) + value expressions: _col2 (type: struct), _col3 (type: struct) Reduce Operator Tree: Group By Operator aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) @@ -107,7 +107,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), 4000.0 (type: double), _col1 (type: string) + expressions: _col2 (type: struct), _col3 (type: struct), 4000.0 (type: double), _col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -177,7 +177,7 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: 2000.0 (type: double), _col1 (type: string) Statistics: Num rows: 42 Data size: 169 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: struct) + value expressions: _col2 (type: struct) Reduce Operator Tree: Group By Operator aggregations: compute_stats(VALUE._col0) @@ -186,7 +186,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 21 Data size: 84 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: struct), 2000.0 (type: double), _col1 (type: string) + expressions: _col2 (type: struct), 2000.0 (type: double), _col1 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 21 Data size: 84 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -268,7 +268,7 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: double), _col1 (type: string) Statistics: Num rows: 116 Data size: 466 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: struct) + value expressions: _col2 (type: struct) Reduce Operator Tree: Group By Operator aggregations: compute_stats(VALUE._col0) @@ -277,7 +277,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 58 Data size: 233 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: struct), _col0 (type: double), _col1 (type: string) + expressions: _col2 (type: struct), _col0 (type: double), _col1 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 58 Data size: 233 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -357,7 +357,7 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: double), _col1 (type: string) Statistics: Num rows: 2 Data size: 466 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: struct), _col3 (type: struct) + value expressions: _col2 (type: struct), _col3 (type: struct) Reduce Operator Tree: Group By Operator aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) @@ -366,7 +366,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 233 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: double), _col1 (type: string) + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: double), _col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 233 Basic stats: COMPLETE Column stats: NONE File Output Operator diff --git ql/src/test/results/clientpositive/columnstats_quoting.q.out ql/src/test/results/clientpositive/columnstats_quoting.q.out index 7e080fe..b17ce0e 100644 --- ql/src/test/results/clientpositive/columnstats_quoting.q.out +++ ql/src/test/results/clientpositive/columnstats_quoting.q.out @@ -33,20 +33,20 @@ STAGE PLANS: aggregations: compute_stats(user id, 'hll'), compute_stats(user name, 'hll') mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 944 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 944 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Reduce Operator Tree: Group By Operator aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -90,20 +90,20 @@ STAGE PLANS: aggregations: compute_stats(user id, 'hll') mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct) + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct) Reduce Operator Tree: Group By Operator aggregations: compute_stats(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/columnstats_tbllvl.q.out ql/src/test/results/clientpositive/columnstats_tbllvl.q.out index b85c1ff..9b0ff79 100644 --- ql/src/test/results/clientpositive/columnstats_tbllvl.q.out +++ ql/src/test/results/clientpositive/columnstats_tbllvl.q.out @@ -63,20 +63,20 @@ STAGE PLANS: aggregations: compute_stats(sourceip, 'hll'), compute_stats(avgtimeonsite, 'hll'), compute_stats(adrevenue, 'hll') mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1408 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1288 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 1408 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 1288 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) Reduce Operator Tree: Group By Operator aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -115,13 +115,13 @@ STAGE PLANS: aggregations: compute_stats(sourceip, 'hll'), compute_stats(avgtimeonsite, 'hll'), compute_stats(adrevenue, 'hll') mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1408 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1288 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1408 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1288 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -180,20 +180,20 @@ STAGE PLANS: aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: columns _col0,_col1,_col2 - columns.types struct:struct:struct + columns.types struct:struct:struct escape.delim \ hive.serialization.extend.additional.nesting.levels true serialization.escape.crlf true @@ -245,20 +245,20 @@ STAGE PLANS: aggregations: compute_stats(sourceip, 'hll'), compute_stats(desturl, 'hll'), compute_stats(visitdate, 'hll'), compute_stats(adrevenue, 'hll'), compute_stats(useragent, 'hll'), compute_stats(ccode, 'hll'), compute_stats(lcode, 'hll'), compute_stats(skeyword, 'hll'), compute_stats(avgtimeonsite, 'hll') mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 1 Data size: 4288 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 3928 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 4288 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct), _col8 (type: struct) + Statistics: Num rows: 1 Data size: 3928 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct), _col8 (type: struct) Reduce Operator Tree: Group By Operator aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4), compute_stats(VALUE._col5), compute_stats(VALUE._col6), compute_stats(VALUE._col7), compute_stats(VALUE._col8) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 1 Data size: 4320 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 3960 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 4320 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 3960 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -363,20 +363,20 @@ STAGE PLANS: aggregations: compute_stats(a, 'hll'), compute_stats(b, 'hll'), compute_stats(c, 'hll'), compute_stats(d, 'hll'), compute_stats(e, 'hll') mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1848 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + Statistics: Num rows: 1 Data size: 1848 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) Reduce Operator Tree: Group By Operator aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 2000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1880 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 2000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1880 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -498,20 +498,20 @@ STAGE PLANS: aggregations: compute_stats(sourceip, 'hll'), compute_stats(avgtimeonsite, 'hll'), compute_stats(adrevenue, 'hll') mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1408 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1288 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 1408 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 1288 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) Reduce Operator Tree: Group By Operator aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -550,13 +550,13 @@ STAGE PLANS: aggregations: compute_stats(sourceip, 'hll'), compute_stats(avgtimeonsite, 'hll'), compute_stats(adrevenue, 'hll') mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1408 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1288 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1408 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1288 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -615,20 +615,20 @@ STAGE PLANS: aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: columns _col0,_col1,_col2 - columns.types struct:struct:struct + columns.types struct:struct:struct escape.delim \ hive.serialization.extend.additional.nesting.levels true serialization.escape.crlf true @@ -680,20 +680,20 @@ STAGE PLANS: aggregations: compute_stats(sourceip, 'hll'), compute_stats(desturl, 'hll'), compute_stats(visitdate, 'hll'), compute_stats(adrevenue, 'hll'), compute_stats(useragent, 'hll'), compute_stats(ccode, 'hll'), compute_stats(lcode, 'hll'), compute_stats(skeyword, 'hll'), compute_stats(avgtimeonsite, 'hll') mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 1 Data size: 4288 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 3928 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 4288 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct), _col8 (type: struct) + Statistics: Num rows: 1 Data size: 3928 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct), _col8 (type: struct) Reduce Operator Tree: Group By Operator aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4), compute_stats(VALUE._col5), compute_stats(VALUE._col6), compute_stats(VALUE._col7), compute_stats(VALUE._col8) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 1 Data size: 4320 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 3960 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 4320 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 3960 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/compute_stats_date.q.out ql/src/test/results/clientpositive/compute_stats_date.q.out index 78d04f9..e738c25 100644 --- ql/src/test/results/clientpositive/compute_stats_date.q.out +++ ql/src/test/results/clientpositive/compute_stats_date.q.out @@ -43,7 +43,7 @@ POSTHOOK: query: select compute_stats(fl_date, 'hll') from tab_date POSTHOOK: type: QUERY POSTHOOK: Input: default@tab_date #### A masked pattern was here #### -{"columntype":"Date","min":"2000-11-20","max":"2010-10-29","countnulls":0,"numdistinctvalues":19,"ndvbitvector":"SExM4BMTw6qAFv+ogCGC/7ZdgMDTH73K3+4Bgq+jE766tgWAh/xZgIqTVIDhgVDA655SwfXHA4Dy\r\n/Ve//Z0LwMSIToCZ6QOAhZ8Gg8jOEb38rBw=\r\n"} +{"columntype":"Date","min":"2000-11-20","max":"2010-10-29","countnulls":0,"numdistinctvalues":19,"ndvbitvector":HLL�ê����!���]����������������Y���T��P��R������W��� �ĈN������������} PREHOOK: query: explain analyze table tab_date compute statistics for columns fl_date PREHOOK: type: QUERY @@ -69,20 +69,20 @@ STAGE PLANS: aggregations: compute_stats(fl_date, 'hll') mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 560 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 520 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 560 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct) + Statistics: Num rows: 1 Data size: 520 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct) Reduce Operator Tree: Group By Operator aggregations: compute_stats(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 576 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 536 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 576 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 536 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -111,9 +111,7 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@tab_date # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector -fl_date date 2000-11-20 2010-10-29 0 19 SExM4BMTw6qAFv+ogCGC/7ZdgMDTH73K3+4Bgq+jE766tgWAh/xZgIqTVIDhgVDA655SwfXHA4Dy -/Ve//Z0LwMSIToCZ6QOAhZ8Gg8jOEb38rBw= - from deserializer +fl_date date 2000-11-20 2010-10-29 0 19 HL from deserializer COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"fl_date\":\"true\"}} PREHOOK: query: alter table tab_date update statistics for column fl_date set ('numDVs'='19', 'highValue'='2015-01-01', 'lowValue'='0') PREHOOK: type: ALTERTABLE_UPDATETABLESTATS @@ -127,7 +125,5 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@tab_date # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector -fl_date date 1970-01-01 2015-01-01 0 19 SExM4BMTw6qAFv+ogCGC/7ZdgMDTH73K3+4Bgq+jE766tgWAh/xZgIqTVIDhgVDA655SwfXHA4Dy -/Ve//Z0LwMSIToCZ6QOAhZ8Gg8jOEb38rBw= - from deserializer +fl_date date 1970-01-01 2015-01-01 0 19 HL from deserializer COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"fl_date\":\"true\"}} diff --git ql/src/test/results/clientpositive/compute_stats_decimal.q.out ql/src/test/results/clientpositive/compute_stats_decimal.q.out index e18b989..810a8b2 100644 --- ql/src/test/results/clientpositive/compute_stats_decimal.q.out +++ ql/src/test/results/clientpositive/compute_stats_decimal.q.out @@ -31,4 +31,4 @@ POSTHOOK: query: select compute_stats(a, 'fm', 18) from tab_decimal POSTHOOK: type: QUERY POSTHOOK: Input: default@tab_decimal #### A masked pattern was here #### -{"columntype":"Decimal","min":-87.2,"max":123456789012345678901234567890.123,"countnulls":2,"numdistinctvalues":13,"ndvbitvector":"Rk0SAB8AAAAvAAAADwAAAAcAAAAHAAAALwAAAAsAAAAXAAAALwAAAA8AAAAHAAAAAwAAAAcAAAAP\r\nAAAARwEAAA8AAAAHAAAAMwAAAA==\r\n"} +{"columntype":"Decimal","min":-87.2,"max":123456789012345678901234567890.123,"countnulls":2,"numdistinctvalues":13,"ndvbitvector":FM// /G3} diff --git ql/src/test/results/clientpositive/compute_stats_double.q.out ql/src/test/results/clientpositive/compute_stats_double.q.out index d937c3a..c6a9020 100644 --- ql/src/test/results/clientpositive/compute_stats_double.q.out +++ ql/src/test/results/clientpositive/compute_stats_double.q.out @@ -31,4 +31,4 @@ POSTHOOK: query: select compute_stats(a, 'fm', 16) from tab_double POSTHOOK: type: QUERY POSTHOOK: Input: default@tab_double #### A masked pattern was here #### -{"columntype":"Double","min":-87.2,"max":435.33,"countnulls":2,"numdistinctvalues":11,"ndvbitvector":"Rk0QAB8AAAAHAAAAAwAAABsAAAALAAAADwEAAAsAAAAHAAAAEwAAAAcAAAAPAAAADwAAAB8AAAAH\r\nAAAAHwAAAAsAAAA=\r\n"} +{"columntype":"Double","min":-87.2,"max":435.33,"countnulls":2,"numdistinctvalues":11,"ndvbitvector":FM   } diff --git ql/src/test/results/clientpositive/compute_stats_empty_table.q.out ql/src/test/results/clientpositive/compute_stats_empty_table.q.out index 05042c9..2ce83e1 100644 --- ql/src/test/results/clientpositive/compute_stats_empty_table.q.out +++ ql/src/test/results/clientpositive/compute_stats_empty_table.q.out @@ -32,7 +32,7 @@ POSTHOOK: query: select compute_stats(b, 16) from tab_empty POSTHOOK: type: QUERY POSTHOOK: Input: default@tab_empty #### A masked pattern was here #### -{"columntype":"Long","min":null,"max":null,"countnulls":0,"numdistinctvalues":0,"ndvbitvector":""} +{"columntype":"Long","min":null,"max":null,"countnulls":0,"numdistinctvalues":0,"ndvbitvector":} PREHOOK: query: select compute_stats(c, 16) from tab_empty PREHOOK: type: QUERY PREHOOK: Input: default@tab_empty @@ -41,7 +41,7 @@ POSTHOOK: query: select compute_stats(c, 16) from tab_empty POSTHOOK: type: QUERY POSTHOOK: Input: default@tab_empty #### A masked pattern was here #### -{"columntype":"Double","min":null,"max":null,"countnulls":0,"numdistinctvalues":0,"ndvbitvector":""} +{"columntype":"Double","min":null,"max":null,"countnulls":0,"numdistinctvalues":0,"ndvbitvector":} PREHOOK: query: select compute_stats(d, 16) from tab_empty PREHOOK: type: QUERY PREHOOK: Input: default@tab_empty @@ -50,7 +50,7 @@ POSTHOOK: query: select compute_stats(d, 16) from tab_empty POSTHOOK: type: QUERY POSTHOOK: Input: default@tab_empty #### A masked pattern was here #### -{"columntype":"String","maxlength":0,"avglength":0.0,"countnulls":0,"numdistinctvalues":0,"ndvbitvector":""} +{"columntype":"String","maxlength":0,"avglength":0.0,"countnulls":0,"numdistinctvalues":0,"ndvbitvector":} PREHOOK: query: select compute_stats(e, 16) from tab_empty PREHOOK: type: QUERY PREHOOK: Input: default@tab_empty diff --git ql/src/test/results/clientpositive/compute_stats_long.q.out ql/src/test/results/clientpositive/compute_stats_long.q.out index 3451072..138ee3f 100644 --- ql/src/test/results/clientpositive/compute_stats_long.q.out +++ ql/src/test/results/clientpositive/compute_stats_long.q.out @@ -31,4 +31,4 @@ POSTHOOK: query: select compute_stats(a, 'fm', 16) from tab_int POSTHOOK: type: QUERY POSTHOOK: Input: default@tab_int #### A masked pattern was here #### -{"columntype":"Long","min":4,"max":344,"countnulls":1,"numdistinctvalues":11,"ndvbitvector":"Rk0QAA8AAAAlAAAAHwAAANcAAAAXAAAANwAAACcAAAAHAAAADwAAABsAAABnAAAADwAAAAsAAAAP\r\nAAAADwQAABcAAAA=\r\n"} +{"columntype":"Long","min":4,"max":344,"countnulls":1,"numdistinctvalues":11,"ndvbitvector":FM%�7'g } diff --git ql/src/test/results/clientpositive/compute_stats_string.q.out ql/src/test/results/clientpositive/compute_stats_string.q.out index bbb2361..135ac45 100644 --- ql/src/test/results/clientpositive/compute_stats_string.q.out +++ ql/src/test/results/clientpositive/compute_stats_string.q.out @@ -31,4 +31,4 @@ POSTHOOK: query: select compute_stats(a, 'fm', 16) from tab_string POSTHOOK: type: QUERY POSTHOOK: Input: default@tab_string #### A masked pattern was here #### -{"columntype":"String","maxlength":11,"avglength":3.9,"countnulls":0,"numdistinctvalues":7,"ndvbitvector":"Rk0QAA8AAAADAAAACwAAAAUAAAAPAAAACwAAAA8AAAALAAAAAwAAAAMAAAAXAAAAEwAAABUAAAAP\r\nAAAABwAAAAcAAAA=\r\n"} +{"columntype":"String","maxlength":11,"avglength":3.9,"countnulls":0,"numdistinctvalues":7,"ndvbitvector":FM   } diff --git ql/src/test/results/clientpositive/constant_prop_2.q.out ql/src/test/results/clientpositive/constant_prop_2.q.out index 9305041..77a9122 100644 --- ql/src/test/results/clientpositive/constant_prop_2.q.out +++ ql/src/test/results/clientpositive/constant_prop_2.q.out @@ -53,7 +53,7 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: '2008-04-08' (type: string), '11' (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: struct), _col3 (type: struct) + value expressions: _col2 (type: struct), _col3 (type: struct) Reduce Operator Tree: Group By Operator aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) @@ -62,7 +62,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), '2008-04-08' (type: string), '11' (type: string) + expressions: _col2 (type: struct), _col3 (type: struct), '2008-04-08' (type: string), '11' (type: string) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE File Output Operator diff --git ql/src/test/results/clientpositive/display_colstats_tbllvl.q.out ql/src/test/results/clientpositive/display_colstats_tbllvl.q.out index 7cb62a8..628af87 100644 --- ql/src/test/results/clientpositive/display_colstats_tbllvl.q.out +++ ql/src/test/results/clientpositive/display_colstats_tbllvl.q.out @@ -79,20 +79,20 @@ STAGE PLANS: aggregations: compute_stats(sourceip, 'hll'), compute_stats(avgtimeonsite, 'hll'), compute_stats(adrevenue, 'hll') mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1408 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1288 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 1408 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 1288 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) Reduce Operator Tree: Group By Operator aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -131,13 +131,13 @@ STAGE PLANS: aggregations: compute_stats(sourceip, 'hll'), compute_stats(avgtimeonsite, 'hll'), compute_stats(adrevenue, 'hll') mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1408 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1288 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1408 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1288 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -196,20 +196,20 @@ STAGE PLANS: aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: columns _col0,_col1,_col2 - columns.types struct:struct:struct + columns.types struct:struct:struct escape.delim \ hive.serialization.extend.additional.nesting.levels true serialization.escape.crlf true @@ -330,20 +330,20 @@ STAGE PLANS: aggregations: compute_stats(a, 'hll'), compute_stats(b, 'hll'), compute_stats(c, 'hll'), compute_stats(d, 'hll'), compute_stats(e, 'hll') mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1848 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + Statistics: Num rows: 1 Data size: 1848 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) Reduce Operator Tree: Group By Operator aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 2000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1880 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 2000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1880 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/exec_parallel_column_stats.q.out ql/src/test/results/clientpositive/exec_parallel_column_stats.q.out index f6c4237..5fd06af 100644 --- ql/src/test/results/clientpositive/exec_parallel_column_stats.q.out +++ ql/src/test/results/clientpositive/exec_parallel_column_stats.q.out @@ -21,20 +21,20 @@ STAGE PLANS: aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Reduce Operator Tree: Group By Operator aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/fm-sketch.q.out ql/src/test/results/clientpositive/fm-sketch.q.out index 2bd218b..9cfdac9 100644 --- ql/src/test/results/clientpositive/fm-sketch.q.out +++ ql/src/test/results/clientpositive/fm-sketch.q.out @@ -38,20 +38,20 @@ STAGE PLANS: aggregations: compute_stats(key, 'fm', 16) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct) + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct) Reduce Operator Tree: Group By Operator aggregations: compute_stats(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -124,20 +124,20 @@ STAGE PLANS: aggregations: compute_stats(key, 'fm', 16) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct) + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct) Reduce Operator Tree: Group By Operator aggregations: compute_stats(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/hll.q.out ql/src/test/results/clientpositive/hll.q.out index 13da130..00d48a4 100644 --- ql/src/test/results/clientpositive/hll.q.out +++ ql/src/test/results/clientpositive/hll.q.out @@ -38,20 +38,20 @@ STAGE PLANS: aggregations: compute_stats(key, 'hll') mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct) + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct) Reduce Operator Tree: Group By Operator aggregations: compute_stats(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -123,20 +123,20 @@ STAGE PLANS: aggregations: compute_stats(key, 'hll') mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct) + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct) Reduce Operator Tree: Group By Operator aggregations: compute_stats(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/llap/column_table_stats.q.out ql/src/test/results/clientpositive/llap/column_table_stats.q.out index c7726fe..75d8946 100644 --- ql/src/test/results/clientpositive/llap/column_table_stats.q.out +++ ql/src/test/results/clientpositive/llap/column_table_stats.q.out @@ -84,13 +84,13 @@ STAGE PLANS: aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct) + value expressions: _col0 (type: struct), _col1 (type: struct) auto parallelism: false Execution mode: llap LLAP IO: no inputs @@ -151,20 +151,20 @@ STAGE PLANS: aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: columns _col0,_col1 - columns.types struct:struct + columns.types struct:struct escape.delim \ hive.serialization.extend.additional.nesting.levels true serialization.escape.crlf true @@ -344,15 +344,15 @@ STAGE PLANS: keys: ds (type: string), hr (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 2656 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 2 Data size: 2496 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) null sort order: aa sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 2 Data size: 2656 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 2 Data size: 2496 Basic stats: COMPLETE Column stats: PARTIAL tag: -1 - value expressions: _col2 (type: struct), _col3 (type: struct) + value expressions: _col2 (type: struct), _col3 (type: struct) auto parallelism: true Execution mode: llap LLAP IO: no inputs @@ -465,24 +465,24 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 2656 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 2 Data size: 2496 Basic stats: COMPLETE Column stats: PARTIAL Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 2656 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 2 Data size: 2496 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 2 Data size: 2656 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 2 Data size: 2496 Basic stats: COMPLETE Column stats: PARTIAL #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: columns _col0,_col1,_col2,_col3 - columns.types struct:struct:string:string + columns.types struct:struct:string:string escape.delim \ hive.serialization.extend.additional.nesting.levels true serialization.escape.crlf true @@ -761,15 +761,15 @@ STAGE PLANS: keys: ds (type: string), hr (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 2656 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 2 Data size: 2496 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) null sort order: aa sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 2 Data size: 2656 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 2 Data size: 2496 Basic stats: COMPLETE Column stats: PARTIAL tag: -1 - value expressions: _col2 (type: struct), _col3 (type: struct) + value expressions: _col2 (type: struct), _col3 (type: struct) auto parallelism: true Execution mode: llap LLAP IO: no inputs @@ -882,24 +882,24 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 2656 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 2 Data size: 2496 Basic stats: COMPLETE Column stats: PARTIAL Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 2656 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 2 Data size: 2496 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 2 Data size: 2656 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 2 Data size: 2496 Basic stats: COMPLETE Column stats: PARTIAL #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: columns _col0,_col1,_col2,_col3 - columns.types struct:struct:string:string + columns.types struct:struct:string:string escape.delim \ hive.serialization.extend.additional.nesting.levels true serialization.escape.crlf true @@ -1178,15 +1178,15 @@ STAGE PLANS: keys: ds (type: string), '11' (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1230 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 1 Data size: 1150 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator key expressions: _col0 (type: string), '11' (type: string) null sort order: aa sort order: ++ Map-reduce partition columns: _col0 (type: string), '11' (type: string) - Statistics: Num rows: 1 Data size: 1230 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 1 Data size: 1150 Basic stats: COMPLETE Column stats: PARTIAL tag: -1 - value expressions: _col2 (type: struct), _col3 (type: struct) + value expressions: _col2 (type: struct), _col3 (type: struct) auto parallelism: true Execution mode: llap LLAP IO: no inputs @@ -1251,24 +1251,24 @@ STAGE PLANS: keys: KEY._col0 (type: string), '11' (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1230 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 1 Data size: 1150 Basic stats: COMPLETE Column stats: PARTIAL Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), '11' (type: string) + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), '11' (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1230 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 1 Data size: 1150 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1230 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 1 Data size: 1150 Basic stats: COMPLETE Column stats: PARTIAL #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: columns _col0,_col1,_col2,_col3 - columns.types struct:struct:string:string + columns.types struct:struct:string:string escape.delim \ hive.serialization.extend.additional.nesting.levels true serialization.escape.crlf true diff --git ql/src/test/results/clientpositive/llap/column_table_stats_orc.q.out ql/src/test/results/clientpositive/llap/column_table_stats_orc.q.out index 6dff50f..c84a37d 100644 --- ql/src/test/results/clientpositive/llap/column_table_stats_orc.q.out +++ ql/src/test/results/clientpositive/llap/column_table_stats_orc.q.out @@ -85,13 +85,13 @@ STAGE PLANS: aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct) + value expressions: _col0 (type: struct), _col1 (type: struct) auto parallelism: false Execution mode: llap LLAP IO: all inputs @@ -154,20 +154,20 @@ STAGE PLANS: aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: columns _col0,_col1 - columns.types struct:struct + columns.types struct:struct escape.delim \ hive.serialization.extend.additional.nesting.levels true serialization.escape.crlf true @@ -341,15 +341,15 @@ STAGE PLANS: keys: ds (type: string), hr (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1328 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 1 Data size: 1248 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) null sort order: aa sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 1 Data size: 1328 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 1 Data size: 1248 Basic stats: COMPLETE Column stats: PARTIAL tag: -1 - value expressions: _col2 (type: struct), _col3 (type: struct) + value expressions: _col2 (type: struct), _col3 (type: struct) auto parallelism: true Execution mode: llap LLAP IO: all inputs @@ -456,24 +456,24 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1328 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 1 Data size: 1248 Basic stats: COMPLETE Column stats: PARTIAL Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1328 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 1 Data size: 1248 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1328 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 1 Data size: 1248 Basic stats: COMPLETE Column stats: PARTIAL #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: columns _col0,_col1,_col2,_col3 - columns.types struct:struct:string:string + columns.types struct:struct:string:string escape.delim \ hive.serialization.extend.additional.nesting.levels true serialization.escape.crlf true @@ -746,15 +746,15 @@ STAGE PLANS: keys: ds (type: string), '11' (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1230 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 1 Data size: 1150 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator key expressions: _col0 (type: string), '11' (type: string) null sort order: aa sort order: ++ Map-reduce partition columns: _col0 (type: string), '11' (type: string) - Statistics: Num rows: 1 Data size: 1230 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 1 Data size: 1150 Basic stats: COMPLETE Column stats: PARTIAL tag: -1 - value expressions: _col2 (type: struct), _col3 (type: struct) + value expressions: _col2 (type: struct), _col3 (type: struct) auto parallelism: true Execution mode: llap LLAP IO: all inputs @@ -816,24 +816,24 @@ STAGE PLANS: keys: KEY._col0 (type: string), '11' (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1230 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 1 Data size: 1150 Basic stats: COMPLETE Column stats: PARTIAL Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), '11' (type: string) + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), '11' (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1230 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 1 Data size: 1150 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1230 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 1 Data size: 1150 Basic stats: COMPLETE Column stats: PARTIAL #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: columns _col0,_col1,_col2,_col3 - columns.types struct:struct:string:string + columns.types struct:struct:string:string escape.delim \ hive.serialization.extend.additional.nesting.levels true serialization.escape.crlf true diff --git ql/src/test/results/clientpositive/llap/llap_stats.q.out ql/src/test/results/clientpositive/llap/llap_stats.q.out index fda614f..0f32be2 100644 --- ql/src/test/results/clientpositive/llap/llap_stats.q.out +++ ql/src/test/results/clientpositive/llap/llap_stats.q.out @@ -118,13 +118,13 @@ STAGE PLANS: keys: cint (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 5 Data size: 4660 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 5 Data size: 4260 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 5 Data size: 4660 Basic stats: COMPLETE Column stats: PARTIAL - value expressions: _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 5 Data size: 4260 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col1 (type: struct), _col2 (type: struct) Execution mode: llap LLAP IO: all inputs Reducer 2 @@ -135,14 +135,14 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 5 Data size: 4820 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 5 Data size: 4420 Basic stats: COMPLETE Column stats: PARTIAL Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: int) + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: int) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 5 Data size: 4820 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 5 Data size: 4420 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false - Statistics: Num rows: 5 Data size: 4820 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 5 Data size: 4420 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/llap/parallel_colstats.q.out ql/src/test/results/clientpositive/llap/parallel_colstats.q.out index 57498a6..e89bf2f 100644 --- ql/src/test/results/clientpositive/llap/parallel_colstats.q.out +++ ql/src/test/results/clientpositive/llap/parallel_colstats.q.out @@ -105,11 +105,11 @@ STAGE PLANS: aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Group By Operator keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: complete @@ -131,11 +131,11 @@ STAGE PLANS: aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Reducer 4 Execution mode: llap Reduce Operator Tree: @@ -143,10 +143,10 @@ STAGE PLANS: aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -158,10 +158,10 @@ STAGE PLANS: aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/llap/varchar_udf1.q.out ql/src/test/results/clientpositive/llap/varchar_udf1.q.out index 023d51c..fe20d54 100644 --- ql/src/test/results/clientpositive/llap/varchar_udf1.q.out +++ ql/src/test/results/clientpositive/llap/varchar_udf1.q.out @@ -406,7 +406,7 @@ from varchar_udf_1 POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1 #### A masked pattern was here #### -{"columntype":"String","maxlength":7,"avglength":7.0,"countnulls":0,"numdistinctvalues":1,"ndvbitvector":"Rk0QAAEAAAAIAAAABAAAAAgAAAACAAAAAQAAAAQAAAABAAAAAgAAAAEAAAABAAAAAgAAAAgAAAAE\r\nAAAAAQAAAAgAAAA=\r\n"} {"columntype":"String","maxlength":7,"avglength":7.0,"countnulls":0,"numdistinctvalues":1,"ndvbitvector":"Rk0QAAEAAAAIAAAABAAAAAgAAAACAAAAAQAAAAQAAAABAAAAAgAAAAEAAAABAAAAAgAAAAgAAAAE\r\nAAAAAQAAAAgAAAA=\r\n"} +{"columntype":"String","maxlength":7,"avglength":7.0,"countnulls":0,"numdistinctvalues":1,"ndvbitvector":FM} {"columntype":"String","maxlength":7,"avglength":7.0,"countnulls":0,"numdistinctvalues":1,"ndvbitvector":FM} PREHOOK: query: select min(c2), min(c4) diff --git ql/src/test/results/clientpositive/llap/vector_udf1.q.out ql/src/test/results/clientpositive/llap/vector_udf1.q.out index b8d19c5..a07c92b 100644 --- ql/src/test/results/clientpositive/llap/vector_udf1.q.out +++ ql/src/test/results/clientpositive/llap/vector_udf1.q.out @@ -2659,11 +2659,11 @@ STAGE PLANS: aggregations: compute_stats(_col0, 16), compute_stats(_col2, 16) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Execution mode: llap LLAP IO: all inputs Map Vectorization: @@ -2684,10 +2684,10 @@ STAGE PLANS: aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2713,7 +2713,7 @@ from varchar_udf_1 POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1 #### A masked pattern was here #### -{"columntype":"String","maxlength":7,"avglength":7.0,"countnulls":0,"numdistinctvalues":1,"ndvbitvector":"Rk0QAAEAAAAIAAAABAAAAAgAAAACAAAAAQAAAAQAAAABAAAAAgAAAAEAAAABAAAAAgAAAAgAAAAE\r\nAAAAAQAAAAgAAAA=\r\n"} {"columntype":"String","maxlength":7,"avglength":7.0,"countnulls":0,"numdistinctvalues":1,"ndvbitvector":"Rk0QAAEAAAAIAAAABAAAAAgAAAACAAAAAQAAAAQAAAABAAAAAgAAAAEAAAABAAAAAgAAAAgAAAAE\r\nAAAAAQAAAAgAAAA=\r\n"} +{"columntype":"String","maxlength":7,"avglength":7.0,"countnulls":0,"numdistinctvalues":1,"ndvbitvector":FM} {"columntype":"String","maxlength":7,"avglength":7.0,"countnulls":0,"numdistinctvalues":1,"ndvbitvector":FM} PREHOOK: query: explain vectorization detail select min(c2), diff --git ql/src/test/results/clientpositive/parallel_colstats.q.out ql/src/test/results/clientpositive/parallel_colstats.q.out index d5bce1e..83fc14a 100644 --- ql/src/test/results/clientpositive/parallel_colstats.q.out +++ ql/src/test/results/clientpositive/parallel_colstats.q.out @@ -103,7 +103,7 @@ STAGE PLANS: aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -131,7 +131,7 @@ STAGE PLANS: aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -172,17 +172,17 @@ STAGE PLANS: TableScan Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Reduce Operator Tree: Group By Operator aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -207,17 +207,17 @@ STAGE PLANS: TableScan Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Reduce Operator Tree: Group By Operator aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/partial_column_stats.q.out ql/src/test/results/clientpositive/partial_column_stats.q.out index 452d4b6..e8b1a99 100644 --- ql/src/test/results/clientpositive/partial_column_stats.q.out +++ ql/src/test/results/clientpositive/partial_column_stats.q.out @@ -29,20 +29,20 @@ STAGE PLANS: aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 944 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 944 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Reduce Operator Tree: Group By Operator aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/reduceSinkDeDuplication_pRS_key_empty.q.out ql/src/test/results/clientpositive/reduceSinkDeDuplication_pRS_key_empty.q.out index 124a4b4..0957333 100644 --- ql/src/test/results/clientpositive/reduceSinkDeDuplication_pRS_key_empty.q.out +++ ql/src/test/results/clientpositive/reduceSinkDeDuplication_pRS_key_empty.q.out @@ -22,7 +22,7 @@ select POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### -{"columntype":"Double","min":260.182,"max":260.182,"countnulls":0,"numdistinctvalues":1,"ndvbitvector":"Rk0QAAIAAAABAAAAAQAAAAEAAAACAAAAAgAAAAIAAAABAAAAAQAAAAEAAAABAAAAAQAAAAIAAAAE\r\nAAAAAgAAAAEAAAA=\r\n"} {"columntype":"String","maxlength":2,"avglength":2.0,"countnulls":0,"numdistinctvalues":1,"ndvbitvector":"Rk0QAAIAAAAEAAAAAQAAAAgAAABAAAAACAAAAAEAAAACAAAAAgAAAAEAAAABAAAAAQAAAAEAAAAB\r\nAAAAAQAAAAEAAAA=\r\n"} {"columntype":"Double","min":20428.07287599998,"max":20428.07287599998,"countnulls":0,"numdistinctvalues":2,"ndvbitvector":"Rk0QAAEAAAABAAAACAAAAAEAAAACAAAAAgAAAAEAAAABAAAAAQAAAAEAAAABAAAAAQAAAAEAAAAQ\r\nAAAABAAAAAEAAAA=\r\n"} {"columntype":"Double","min":20469.01089779557,"max":20469.01089779557,"countnulls":0,"numdistinctvalues":1,"ndvbitvector":"Rk0QAAEAAAACAAAACAAAAAQAAAAIAAAAIAAAAAQAAAABAAAAAgAAAAEAAAACAAAAAgAAAAIAAAAC\r\nAAAAAQAAAAIAAAA=\r\n"} +{"columntype":"Double","min":260.182,"max":260.182,"countnulls":0,"numdistinctvalues":1,"ndvbitvector":FM} {"columntype":"String","maxlength":2,"avglength":2.0,"countnulls":0,"numdistinctvalues":1,"ndvbitvector":FM@} {"columntype":"Double","min":20428.07287599998,"max":20428.07287599998,"countnulls":0,"numdistinctvalues":2,"ndvbitvector":FM} {"columntype":"Double","min":20469.01089779557,"max":20469.01089779557,"countnulls":0,"numdistinctvalues":1,"ndvbitvector":FM } PREHOOK: query: explain select compute_stats(a,'fm',16),compute_stats(b,'fm',16),compute_stats(c,'fm',16),compute_stats(d,'fm',16) from ( @@ -72,10 +72,10 @@ STAGE PLANS: aggregations: compute_stats(_col0, 'fm', 16), compute_stats(_col1, 'fm', 16), compute_stats(_col2, 'fm', 16), compute_stats(_col3, 'fm', 16) mode: complete outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 2004 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1844 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 2004 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1844 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -111,7 +111,7 @@ select POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### -{"columntype":"Double","min":256.10355987055016,"max":256.10355987055016,"countnulls":0,"numdistinctvalues":1,"ndvbitvector":"Rk0QAAQAAAACAAAAAQAAAAQAAAABAAAAAgAAAAIAAAACAAAAAQAAAAEAAAACAAAAAgAAAAEAAAAE\r\nAAAAAgAAAAEAAAA=\r\n"} {"columntype":"String","maxlength":2,"avglength":2.0,"countnulls":0,"numdistinctvalues":1,"ndvbitvector":"Rk0QAAIAAAAEAAAAAQAAAAgAAABAAAAACAAAAAEAAAACAAAAAgAAAAEAAAABAAAAAQAAAAEAAAAB\r\nAAAAAQAAAAEAAAA=\r\n"} {"columntype":"Double","min":20428.07287599999,"max":20428.07287599999,"countnulls":0,"numdistinctvalues":1,"ndvbitvector":"Rk0QAAIAAAAQAAAAAQAAAAEAAAAQAAAACAAAAAEAAAACAAAAAQAAAAEAAAABAAAAAQAAAAEAAAAB\r\nAAAAAgAAAAQAAAA=\r\n"} {"columntype":"Double","min":20469.010897795582,"max":20469.010897795582,"countnulls":0,"numdistinctvalues":2,"ndvbitvector":"Rk0QAAQAAAABAAAABAAAAAQAAAABAAAAAQAAAAQAAAABAAAAAQAAAAEAAAABAAAAAQAAAAIAAAAB\r\nAAAAAQAAAAEAAAA=\r\n"} +{"columntype":"Double","min":256.10355987055016,"max":256.10355987055016,"countnulls":0,"numdistinctvalues":1,"ndvbitvector":FM} {"columntype":"String","maxlength":2,"avglength":2.0,"countnulls":0,"numdistinctvalues":1,"ndvbitvector":FM@} {"columntype":"Double","min":20428.07287599999,"max":20428.07287599999,"countnulls":0,"numdistinctvalues":1,"ndvbitvector":FM} {"columntype":"Double","min":20469.010897795582,"max":20469.010897795582,"countnulls":0,"numdistinctvalues":2,"ndvbitvector":FM} PREHOOK: query: explain select compute_stats(a,'fm',16),compute_stats(b,'fm',16),compute_stats(c,'fm',16),compute_stats(d,'fm',16) from ( @@ -178,10 +178,10 @@ STAGE PLANS: aggregations: compute_stats(VALUE._col0, 'fm', 16), compute_stats(VALUE._col3, 'fm', 16), compute_stats(VALUE._col4, 'fm', 16), compute_stats(VALUE._col5, 'fm', 16) mode: complete outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 2004 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1844 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 2004 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1844 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -217,4 +217,4 @@ select POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### -{"columntype":"Double","min":256.10355987055016,"max":256.10355987055016,"countnulls":0,"numdistinctvalues":1,"ndvbitvector":"Rk0QAAQAAAACAAAAAQAAAAQAAAABAAAAAgAAAAIAAAACAAAAAQAAAAEAAAACAAAAAgAAAAEAAAAE\r\nAAAAAgAAAAEAAAA=\r\n"} {"columntype":"String","maxlength":2,"avglength":2.0,"countnulls":0,"numdistinctvalues":1,"ndvbitvector":"Rk0QAAIAAAAEAAAAAQAAAAgAAABAAAAACAAAAAEAAAACAAAAAgAAAAEAAAABAAAAAQAAAAEAAAAB\r\nAAAAAQAAAAEAAAA=\r\n"} {"columntype":"Double","min":20428.07287599999,"max":20428.07287599999,"countnulls":0,"numdistinctvalues":1,"ndvbitvector":"Rk0QAAIAAAAQAAAAAQAAAAEAAAAQAAAACAAAAAEAAAACAAAAAQAAAAEAAAABAAAAAQAAAAEAAAAB\r\nAAAAAgAAAAQAAAA=\r\n"} {"columntype":"Double","min":20469.010897795582,"max":20469.010897795582,"countnulls":0,"numdistinctvalues":2,"ndvbitvector":"Rk0QAAQAAAABAAAABAAAAAQAAAABAAAAAQAAAAQAAAABAAAAAQAAAAEAAAABAAAAAQAAAAIAAAAB\r\nAAAAAQAAAAEAAAA=\r\n"} +{"columntype":"Double","min":256.10355987055016,"max":256.10355987055016,"countnulls":0,"numdistinctvalues":1,"ndvbitvector":FM} {"columntype":"String","maxlength":2,"avglength":2.0,"countnulls":0,"numdistinctvalues":1,"ndvbitvector":FM@} {"columntype":"Double","min":20428.07287599999,"max":20428.07287599999,"countnulls":0,"numdistinctvalues":1,"ndvbitvector":FM} {"columntype":"Double","min":20469.010897795582,"max":20469.010897795582,"countnulls":0,"numdistinctvalues":2,"ndvbitvector":FM} diff --git ql/src/test/results/clientpositive/temp_table_display_colstats_tbllvl.q.out ql/src/test/results/clientpositive/temp_table_display_colstats_tbllvl.q.out index 8d94ac6..da84573 100644 --- ql/src/test/results/clientpositive/temp_table_display_colstats_tbllvl.q.out +++ ql/src/test/results/clientpositive/temp_table_display_colstats_tbllvl.q.out @@ -83,20 +83,20 @@ STAGE PLANS: aggregations: compute_stats(sourceip, 'hll'), compute_stats(avgtimeonsite, 'hll'), compute_stats(adrevenue, 'hll') mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1408 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1288 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 1408 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 1288 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) Reduce Operator Tree: Group By Operator aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -135,13 +135,13 @@ STAGE PLANS: aggregations: compute_stats(sourceip, 'hll'), compute_stats(avgtimeonsite, 'hll'), compute_stats(adrevenue, 'hll') mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1408 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1288 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1408 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1288 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -196,20 +196,20 @@ STAGE PLANS: aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: columns _col0,_col1,_col2 - columns.types struct:struct:struct + columns.types struct:struct:struct escape.delim \ hive.serialization.extend.additional.nesting.levels true serialization.escape.crlf true @@ -327,20 +327,20 @@ STAGE PLANS: aggregations: compute_stats(a, 'hll'), compute_stats(b, 'hll'), compute_stats(c, 'hll'), compute_stats(d, 'hll'), compute_stats(e, 'hll') mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1848 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + Statistics: Num rows: 1 Data size: 1848 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) Reduce Operator Tree: Group By Operator aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 2000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1880 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 2000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1880 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/tez/explainanalyze_3.q.out ql/src/test/results/clientpositive/tez/explainanalyze_3.q.out index 9d47066..d8a0951 100644 --- ql/src/test/results/clientpositive/tez/explainanalyze_3.q.out +++ ql/src/test/results/clientpositive/tez/explainanalyze_3.q.out @@ -255,7 +255,7 @@ Stage-3 Stage-0 Reducer 2 File Output Operator [FS_5] - Group By Operator [GBY_3] (rows=1/1 width=960) + Group By Operator [GBY_3] (rows=1/1 width=880) Output:["_col0","_col1"],aggregations:["compute_stats(VALUE._col0, 'hll')","compute_stats(VALUE._col2, 'hll')"] <-Map 1 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_2] diff --git ql/src/test/results/clientpositive/tez/explainanalyze_5.q.out ql/src/test/results/clientpositive/tez/explainanalyze_5.q.out index 1764164..344f773 100644 --- ql/src/test/results/clientpositive/tez/explainanalyze_5.q.out +++ ql/src/test/results/clientpositive/tez/explainanalyze_5.q.out @@ -57,7 +57,7 @@ Stage-3 Stage-0 Reducer 2 File Output Operator [FS_5] - Group By Operator [GBY_3] (rows=1/1 width=960) + Group By Operator [GBY_3] (rows=1/1 width=880) Output:["_col0","_col1"],aggregations:["compute_stats(VALUE._col0, 'hll')","compute_stats(VALUE._col2, 'hll')"] <-Map 1 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_2] @@ -113,7 +113,7 @@ Stage-4 Stage-1 Reducer 5 File Output Operator [FS_5] - Group By Operator [GBY_3] (rows=1/1 width=960) + Group By Operator [GBY_3] (rows=1/1 width=880) Output:["_col0","_col1"],aggregations:["compute_stats(VALUE._col0, 'hll')","compute_stats(VALUE._col2, 'hll')"] <-Reducer 4 [CUSTOM_SIMPLE_EDGE] File Output Operator [FS_19] diff --git standalone-metastore/src/gen/thrift/gen-cpp/hive_metastore_types.cpp standalone-metastore/src/gen/thrift/gen-cpp/hive_metastore_types.cpp index d178f10..a4f72b6 100644 --- standalone-metastore/src/gen/thrift/gen-cpp/hive_metastore_types.cpp +++ standalone-metastore/src/gen/thrift/gen-cpp/hive_metastore_types.cpp @@ -6532,7 +6532,7 @@ uint32_t BooleanColumnStatsData::read(::apache::thrift::protocol::TProtocol* ipr break; case 4: if (ftype == ::apache::thrift::protocol::T_STRING) { - xfer += iprot->readString(this->bitVectors); + xfer += iprot->readBinary(this->bitVectors); this->__isset.bitVectors = true; } else { xfer += iprot->skip(ftype); @@ -6575,7 +6575,7 @@ uint32_t BooleanColumnStatsData::write(::apache::thrift::protocol::TProtocol* op if (this->__isset.bitVectors) { xfer += oprot->writeFieldBegin("bitVectors", ::apache::thrift::protocol::T_STRING, 4); - xfer += oprot->writeString(this->bitVectors); + xfer += oprot->writeBinary(this->bitVectors); xfer += oprot->writeFieldEnd(); } xfer += oprot->writeFieldStop(); @@ -6702,7 +6702,7 @@ uint32_t DoubleColumnStatsData::read(::apache::thrift::protocol::TProtocol* ipro break; case 5: if (ftype == ::apache::thrift::protocol::T_STRING) { - xfer += iprot->readString(this->bitVectors); + xfer += iprot->readBinary(this->bitVectors); this->__isset.bitVectors = true; } else { xfer += iprot->skip(ftype); @@ -6749,7 +6749,7 @@ uint32_t DoubleColumnStatsData::write(::apache::thrift::protocol::TProtocol* opr if (this->__isset.bitVectors) { xfer += oprot->writeFieldBegin("bitVectors", ::apache::thrift::protocol::T_STRING, 5); - xfer += oprot->writeString(this->bitVectors); + xfer += oprot->writeBinary(this->bitVectors); xfer += oprot->writeFieldEnd(); } xfer += oprot->writeFieldStop(); @@ -6880,7 +6880,7 @@ uint32_t LongColumnStatsData::read(::apache::thrift::protocol::TProtocol* iprot) break; case 5: if (ftype == ::apache::thrift::protocol::T_STRING) { - xfer += iprot->readString(this->bitVectors); + xfer += iprot->readBinary(this->bitVectors); this->__isset.bitVectors = true; } else { xfer += iprot->skip(ftype); @@ -6927,7 +6927,7 @@ uint32_t LongColumnStatsData::write(::apache::thrift::protocol::TProtocol* oprot if (this->__isset.bitVectors) { xfer += oprot->writeFieldBegin("bitVectors", ::apache::thrift::protocol::T_STRING, 5); - xfer += oprot->writeString(this->bitVectors); + xfer += oprot->writeBinary(this->bitVectors); xfer += oprot->writeFieldEnd(); } xfer += oprot->writeFieldStop(); @@ -7058,7 +7058,7 @@ uint32_t StringColumnStatsData::read(::apache::thrift::protocol::TProtocol* ipro break; case 5: if (ftype == ::apache::thrift::protocol::T_STRING) { - xfer += iprot->readString(this->bitVectors); + xfer += iprot->readBinary(this->bitVectors); this->__isset.bitVectors = true; } else { xfer += iprot->skip(ftype); @@ -7107,7 +7107,7 @@ uint32_t StringColumnStatsData::write(::apache::thrift::protocol::TProtocol* opr if (this->__isset.bitVectors) { xfer += oprot->writeFieldBegin("bitVectors", ::apache::thrift::protocol::T_STRING, 5); - xfer += oprot->writeString(this->bitVectors); + xfer += oprot->writeBinary(this->bitVectors); xfer += oprot->writeFieldEnd(); } xfer += oprot->writeFieldStop(); @@ -7225,7 +7225,7 @@ uint32_t BinaryColumnStatsData::read(::apache::thrift::protocol::TProtocol* ipro break; case 4: if (ftype == ::apache::thrift::protocol::T_STRING) { - xfer += iprot->readString(this->bitVectors); + xfer += iprot->readBinary(this->bitVectors); this->__isset.bitVectors = true; } else { xfer += iprot->skip(ftype); @@ -7268,7 +7268,7 @@ uint32_t BinaryColumnStatsData::write(::apache::thrift::protocol::TProtocol* opr if (this->__isset.bitVectors) { xfer += oprot->writeFieldBegin("bitVectors", ::apache::thrift::protocol::T_STRING, 4); - xfer += oprot->writeString(this->bitVectors); + xfer += oprot->writeBinary(this->bitVectors); xfer += oprot->writeFieldEnd(); } xfer += oprot->writeFieldStop(); @@ -7504,7 +7504,7 @@ uint32_t DecimalColumnStatsData::read(::apache::thrift::protocol::TProtocol* ipr break; case 5: if (ftype == ::apache::thrift::protocol::T_STRING) { - xfer += iprot->readString(this->bitVectors); + xfer += iprot->readBinary(this->bitVectors); this->__isset.bitVectors = true; } else { xfer += iprot->skip(ftype); @@ -7551,7 +7551,7 @@ uint32_t DecimalColumnStatsData::write(::apache::thrift::protocol::TProtocol* op if (this->__isset.bitVectors) { xfer += oprot->writeFieldBegin("bitVectors", ::apache::thrift::protocol::T_STRING, 5); - xfer += oprot->writeString(this->bitVectors); + xfer += oprot->writeBinary(this->bitVectors); xfer += oprot->writeFieldEnd(); } xfer += oprot->writeFieldStop(); @@ -7768,7 +7768,7 @@ uint32_t DateColumnStatsData::read(::apache::thrift::protocol::TProtocol* iprot) break; case 5: if (ftype == ::apache::thrift::protocol::T_STRING) { - xfer += iprot->readString(this->bitVectors); + xfer += iprot->readBinary(this->bitVectors); this->__isset.bitVectors = true; } else { xfer += iprot->skip(ftype); @@ -7815,7 +7815,7 @@ uint32_t DateColumnStatsData::write(::apache::thrift::protocol::TProtocol* oprot if (this->__isset.bitVectors) { xfer += oprot->writeFieldBegin("bitVectors", ::apache::thrift::protocol::T_STRING, 5); - xfer += oprot->writeString(this->bitVectors); + xfer += oprot->writeBinary(this->bitVectors); xfer += oprot->writeFieldEnd(); } xfer += oprot->writeFieldStop(); diff --git standalone-metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/BinaryColumnStatsData.java standalone-metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/BinaryColumnStatsData.java index eeb5105..cf3bd14 100644 --- standalone-metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/BinaryColumnStatsData.java +++ standalone-metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/BinaryColumnStatsData.java @@ -52,7 +52,7 @@ private long maxColLen; // required private double avgColLen; // required private long numNulls; // required - private String bitVectors; // optional + private ByteBuffer bitVectors; // optional /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ public enum _Fields implements org.apache.thrift.TFieldIdEnum { @@ -137,7 +137,7 @@ public String getFieldName() { tmpMap.put(_Fields.NUM_NULLS, new org.apache.thrift.meta_data.FieldMetaData("numNulls", org.apache.thrift.TFieldRequirementType.REQUIRED, new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I64))); tmpMap.put(_Fields.BIT_VECTORS, new org.apache.thrift.meta_data.FieldMetaData("bitVectors", org.apache.thrift.TFieldRequirementType.OPTIONAL, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING))); + new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING , true))); metaDataMap = Collections.unmodifiableMap(tmpMap); org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(BinaryColumnStatsData.class, metaDataMap); } @@ -168,7 +168,7 @@ public BinaryColumnStatsData(BinaryColumnStatsData other) { this.avgColLen = other.avgColLen; this.numNulls = other.numNulls; if (other.isSetBitVectors()) { - this.bitVectors = other.bitVectors; + this.bitVectors = org.apache.thrift.TBaseHelper.copyBinary(other.bitVectors); } } @@ -253,12 +253,21 @@ public void setNumNullsIsSet(boolean value) { __isset_bitfield = EncodingUtils.setBit(__isset_bitfield, __NUMNULLS_ISSET_ID, value); } - public String getBitVectors() { - return this.bitVectors; + public byte[] getBitVectors() { + setBitVectors(org.apache.thrift.TBaseHelper.rightSize(bitVectors)); + return bitVectors == null ? null : bitVectors.array(); } - public void setBitVectors(String bitVectors) { - this.bitVectors = bitVectors; + public ByteBuffer bufferForBitVectors() { + return org.apache.thrift.TBaseHelper.copyBinary(bitVectors); + } + + public void setBitVectors(byte[] bitVectors) { + this.bitVectors = bitVectors == null ? (ByteBuffer)null : ByteBuffer.wrap(Arrays.copyOf(bitVectors, bitVectors.length)); + } + + public void setBitVectors(ByteBuffer bitVectors) { + this.bitVectors = org.apache.thrift.TBaseHelper.copyBinary(bitVectors); } public void unsetBitVectors() { @@ -306,7 +315,7 @@ public void setFieldValue(_Fields field, Object value) { if (value == null) { unsetBitVectors(); } else { - setBitVectors((String)value); + setBitVectors((ByteBuffer)value); } break; @@ -514,7 +523,7 @@ public String toString() { if (this.bitVectors == null) { sb.append("null"); } else { - sb.append(this.bitVectors); + org.apache.thrift.TBaseHelper.toString(this.bitVectors, sb); } first = false; } @@ -601,7 +610,7 @@ public void read(org.apache.thrift.protocol.TProtocol iprot, BinaryColumnStatsDa break; case 4: // BIT_VECTORS if (schemeField.type == org.apache.thrift.protocol.TType.STRING) { - struct.bitVectors = iprot.readString(); + struct.bitVectors = iprot.readBinary(); struct.setBitVectorsIsSet(true); } else { org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); @@ -632,7 +641,7 @@ public void write(org.apache.thrift.protocol.TProtocol oprot, BinaryColumnStatsD if (struct.bitVectors != null) { if (struct.isSetBitVectors()) { oprot.writeFieldBegin(BIT_VECTORS_FIELD_DESC); - oprot.writeString(struct.bitVectors); + oprot.writeBinary(struct.bitVectors); oprot.writeFieldEnd(); } } @@ -662,7 +671,7 @@ public void write(org.apache.thrift.protocol.TProtocol prot, BinaryColumnStatsDa } oprot.writeBitSet(optionals, 1); if (struct.isSetBitVectors()) { - oprot.writeString(struct.bitVectors); + oprot.writeBinary(struct.bitVectors); } } @@ -677,7 +686,7 @@ public void read(org.apache.thrift.protocol.TProtocol prot, BinaryColumnStatsDat struct.setNumNullsIsSet(true); BitSet incoming = iprot.readBitSet(1); if (incoming.get(0)) { - struct.bitVectors = iprot.readString(); + struct.bitVectors = iprot.readBinary(); struct.setBitVectorsIsSet(true); } } diff --git standalone-metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/BooleanColumnStatsData.java standalone-metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/BooleanColumnStatsData.java index de39d21..469dc92 100644 --- standalone-metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/BooleanColumnStatsData.java +++ standalone-metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/BooleanColumnStatsData.java @@ -52,7 +52,7 @@ private long numTrues; // required private long numFalses; // required private long numNulls; // required - private String bitVectors; // optional + private ByteBuffer bitVectors; // optional /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ public enum _Fields implements org.apache.thrift.TFieldIdEnum { @@ -137,7 +137,7 @@ public String getFieldName() { tmpMap.put(_Fields.NUM_NULLS, new org.apache.thrift.meta_data.FieldMetaData("numNulls", org.apache.thrift.TFieldRequirementType.REQUIRED, new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I64))); tmpMap.put(_Fields.BIT_VECTORS, new org.apache.thrift.meta_data.FieldMetaData("bitVectors", org.apache.thrift.TFieldRequirementType.OPTIONAL, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING))); + new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING , true))); metaDataMap = Collections.unmodifiableMap(tmpMap); org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(BooleanColumnStatsData.class, metaDataMap); } @@ -168,7 +168,7 @@ public BooleanColumnStatsData(BooleanColumnStatsData other) { this.numFalses = other.numFalses; this.numNulls = other.numNulls; if (other.isSetBitVectors()) { - this.bitVectors = other.bitVectors; + this.bitVectors = org.apache.thrift.TBaseHelper.copyBinary(other.bitVectors); } } @@ -253,12 +253,21 @@ public void setNumNullsIsSet(boolean value) { __isset_bitfield = EncodingUtils.setBit(__isset_bitfield, __NUMNULLS_ISSET_ID, value); } - public String getBitVectors() { - return this.bitVectors; + public byte[] getBitVectors() { + setBitVectors(org.apache.thrift.TBaseHelper.rightSize(bitVectors)); + return bitVectors == null ? null : bitVectors.array(); } - public void setBitVectors(String bitVectors) { - this.bitVectors = bitVectors; + public ByteBuffer bufferForBitVectors() { + return org.apache.thrift.TBaseHelper.copyBinary(bitVectors); + } + + public void setBitVectors(byte[] bitVectors) { + this.bitVectors = bitVectors == null ? (ByteBuffer)null : ByteBuffer.wrap(Arrays.copyOf(bitVectors, bitVectors.length)); + } + + public void setBitVectors(ByteBuffer bitVectors) { + this.bitVectors = org.apache.thrift.TBaseHelper.copyBinary(bitVectors); } public void unsetBitVectors() { @@ -306,7 +315,7 @@ public void setFieldValue(_Fields field, Object value) { if (value == null) { unsetBitVectors(); } else { - setBitVectors((String)value); + setBitVectors((ByteBuffer)value); } break; @@ -514,7 +523,7 @@ public String toString() { if (this.bitVectors == null) { sb.append("null"); } else { - sb.append(this.bitVectors); + org.apache.thrift.TBaseHelper.toString(this.bitVectors, sb); } first = false; } @@ -601,7 +610,7 @@ public void read(org.apache.thrift.protocol.TProtocol iprot, BooleanColumnStatsD break; case 4: // BIT_VECTORS if (schemeField.type == org.apache.thrift.protocol.TType.STRING) { - struct.bitVectors = iprot.readString(); + struct.bitVectors = iprot.readBinary(); struct.setBitVectorsIsSet(true); } else { org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); @@ -632,7 +641,7 @@ public void write(org.apache.thrift.protocol.TProtocol oprot, BooleanColumnStats if (struct.bitVectors != null) { if (struct.isSetBitVectors()) { oprot.writeFieldBegin(BIT_VECTORS_FIELD_DESC); - oprot.writeString(struct.bitVectors); + oprot.writeBinary(struct.bitVectors); oprot.writeFieldEnd(); } } @@ -662,7 +671,7 @@ public void write(org.apache.thrift.protocol.TProtocol prot, BooleanColumnStatsD } oprot.writeBitSet(optionals, 1); if (struct.isSetBitVectors()) { - oprot.writeString(struct.bitVectors); + oprot.writeBinary(struct.bitVectors); } } @@ -677,7 +686,7 @@ public void read(org.apache.thrift.protocol.TProtocol prot, BooleanColumnStatsDa struct.setNumNullsIsSet(true); BitSet incoming = iprot.readBitSet(1); if (incoming.get(0)) { - struct.bitVectors = iprot.readString(); + struct.bitVectors = iprot.readBinary(); struct.setBitVectorsIsSet(true); } } diff --git standalone-metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/DateColumnStatsData.java standalone-metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/DateColumnStatsData.java index edc87a1..b6cb24e 100644 --- standalone-metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/DateColumnStatsData.java +++ standalone-metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/DateColumnStatsData.java @@ -54,7 +54,7 @@ private Date highValue; // optional private long numNulls; // required private long numDVs; // required - private String bitVectors; // optional + private ByteBuffer bitVectors; // optional /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ public enum _Fields implements org.apache.thrift.TFieldIdEnum { @@ -143,7 +143,7 @@ public String getFieldName() { tmpMap.put(_Fields.NUM_DVS, new org.apache.thrift.meta_data.FieldMetaData("numDVs", org.apache.thrift.TFieldRequirementType.REQUIRED, new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I64))); tmpMap.put(_Fields.BIT_VECTORS, new org.apache.thrift.meta_data.FieldMetaData("bitVectors", org.apache.thrift.TFieldRequirementType.OPTIONAL, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING))); + new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING , true))); metaDataMap = Collections.unmodifiableMap(tmpMap); org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(DateColumnStatsData.class, metaDataMap); } @@ -176,7 +176,7 @@ public DateColumnStatsData(DateColumnStatsData other) { this.numNulls = other.numNulls; this.numDVs = other.numDVs; if (other.isSetBitVectors()) { - this.bitVectors = other.bitVectors; + this.bitVectors = org.apache.thrift.TBaseHelper.copyBinary(other.bitVectors); } } @@ -285,12 +285,21 @@ public void setNumDVsIsSet(boolean value) { __isset_bitfield = EncodingUtils.setBit(__isset_bitfield, __NUMDVS_ISSET_ID, value); } - public String getBitVectors() { - return this.bitVectors; + public byte[] getBitVectors() { + setBitVectors(org.apache.thrift.TBaseHelper.rightSize(bitVectors)); + return bitVectors == null ? null : bitVectors.array(); } - public void setBitVectors(String bitVectors) { - this.bitVectors = bitVectors; + public ByteBuffer bufferForBitVectors() { + return org.apache.thrift.TBaseHelper.copyBinary(bitVectors); + } + + public void setBitVectors(byte[] bitVectors) { + this.bitVectors = bitVectors == null ? (ByteBuffer)null : ByteBuffer.wrap(Arrays.copyOf(bitVectors, bitVectors.length)); + } + + public void setBitVectors(ByteBuffer bitVectors) { + this.bitVectors = org.apache.thrift.TBaseHelper.copyBinary(bitVectors); } public void unsetBitVectors() { @@ -346,7 +355,7 @@ public void setFieldValue(_Fields field, Object value) { if (value == null) { unsetBitVectors(); } else { - setBitVectors((String)value); + setBitVectors((ByteBuffer)value); } break; @@ -599,7 +608,7 @@ public String toString() { if (this.bitVectors == null) { sb.append("null"); } else { - sb.append(this.bitVectors); + org.apache.thrift.TBaseHelper.toString(this.bitVectors, sb); } first = false; } @@ -698,7 +707,7 @@ public void read(org.apache.thrift.protocol.TProtocol iprot, DateColumnStatsData break; case 5: // BIT_VECTORS if (schemeField.type == org.apache.thrift.protocol.TType.STRING) { - struct.bitVectors = iprot.readString(); + struct.bitVectors = iprot.readBinary(); struct.setBitVectorsIsSet(true); } else { org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); @@ -740,7 +749,7 @@ public void write(org.apache.thrift.protocol.TProtocol oprot, DateColumnStatsDat if (struct.bitVectors != null) { if (struct.isSetBitVectors()) { oprot.writeFieldBegin(BIT_VECTORS_FIELD_DESC); - oprot.writeString(struct.bitVectors); + oprot.writeBinary(struct.bitVectors); oprot.writeFieldEnd(); } } @@ -781,7 +790,7 @@ public void write(org.apache.thrift.protocol.TProtocol prot, DateColumnStatsData struct.highValue.write(oprot); } if (struct.isSetBitVectors()) { - oprot.writeString(struct.bitVectors); + oprot.writeBinary(struct.bitVectors); } } @@ -804,7 +813,7 @@ public void read(org.apache.thrift.protocol.TProtocol prot, DateColumnStatsData struct.setHighValueIsSet(true); } if (incoming.get(2)) { - struct.bitVectors = iprot.readString(); + struct.bitVectors = iprot.readBinary(); struct.setBitVectorsIsSet(true); } } diff --git standalone-metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/DecimalColumnStatsData.java standalone-metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/DecimalColumnStatsData.java index ec363dc..6a30944 100644 --- standalone-metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/DecimalColumnStatsData.java +++ standalone-metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/DecimalColumnStatsData.java @@ -54,7 +54,7 @@ private Decimal highValue; // optional private long numNulls; // required private long numDVs; // required - private String bitVectors; // optional + private ByteBuffer bitVectors; // optional /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ public enum _Fields implements org.apache.thrift.TFieldIdEnum { @@ -143,7 +143,7 @@ public String getFieldName() { tmpMap.put(_Fields.NUM_DVS, new org.apache.thrift.meta_data.FieldMetaData("numDVs", org.apache.thrift.TFieldRequirementType.REQUIRED, new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I64))); tmpMap.put(_Fields.BIT_VECTORS, new org.apache.thrift.meta_data.FieldMetaData("bitVectors", org.apache.thrift.TFieldRequirementType.OPTIONAL, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING))); + new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING , true))); metaDataMap = Collections.unmodifiableMap(tmpMap); org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(DecimalColumnStatsData.class, metaDataMap); } @@ -176,7 +176,7 @@ public DecimalColumnStatsData(DecimalColumnStatsData other) { this.numNulls = other.numNulls; this.numDVs = other.numDVs; if (other.isSetBitVectors()) { - this.bitVectors = other.bitVectors; + this.bitVectors = org.apache.thrift.TBaseHelper.copyBinary(other.bitVectors); } } @@ -285,12 +285,21 @@ public void setNumDVsIsSet(boolean value) { __isset_bitfield = EncodingUtils.setBit(__isset_bitfield, __NUMDVS_ISSET_ID, value); } - public String getBitVectors() { - return this.bitVectors; + public byte[] getBitVectors() { + setBitVectors(org.apache.thrift.TBaseHelper.rightSize(bitVectors)); + return bitVectors == null ? null : bitVectors.array(); } - public void setBitVectors(String bitVectors) { - this.bitVectors = bitVectors; + public ByteBuffer bufferForBitVectors() { + return org.apache.thrift.TBaseHelper.copyBinary(bitVectors); + } + + public void setBitVectors(byte[] bitVectors) { + this.bitVectors = bitVectors == null ? (ByteBuffer)null : ByteBuffer.wrap(Arrays.copyOf(bitVectors, bitVectors.length)); + } + + public void setBitVectors(ByteBuffer bitVectors) { + this.bitVectors = org.apache.thrift.TBaseHelper.copyBinary(bitVectors); } public void unsetBitVectors() { @@ -346,7 +355,7 @@ public void setFieldValue(_Fields field, Object value) { if (value == null) { unsetBitVectors(); } else { - setBitVectors((String)value); + setBitVectors((ByteBuffer)value); } break; @@ -599,7 +608,7 @@ public String toString() { if (this.bitVectors == null) { sb.append("null"); } else { - sb.append(this.bitVectors); + org.apache.thrift.TBaseHelper.toString(this.bitVectors, sb); } first = false; } @@ -698,7 +707,7 @@ public void read(org.apache.thrift.protocol.TProtocol iprot, DecimalColumnStatsD break; case 5: // BIT_VECTORS if (schemeField.type == org.apache.thrift.protocol.TType.STRING) { - struct.bitVectors = iprot.readString(); + struct.bitVectors = iprot.readBinary(); struct.setBitVectorsIsSet(true); } else { org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); @@ -740,7 +749,7 @@ public void write(org.apache.thrift.protocol.TProtocol oprot, DecimalColumnStats if (struct.bitVectors != null) { if (struct.isSetBitVectors()) { oprot.writeFieldBegin(BIT_VECTORS_FIELD_DESC); - oprot.writeString(struct.bitVectors); + oprot.writeBinary(struct.bitVectors); oprot.writeFieldEnd(); } } @@ -781,7 +790,7 @@ public void write(org.apache.thrift.protocol.TProtocol prot, DecimalColumnStatsD struct.highValue.write(oprot); } if (struct.isSetBitVectors()) { - oprot.writeString(struct.bitVectors); + oprot.writeBinary(struct.bitVectors); } } @@ -804,7 +813,7 @@ public void read(org.apache.thrift.protocol.TProtocol prot, DecimalColumnStatsDa struct.setHighValueIsSet(true); } if (incoming.get(2)) { - struct.bitVectors = iprot.readString(); + struct.bitVectors = iprot.readBinary(); struct.setBitVectorsIsSet(true); } } diff --git standalone-metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/DoubleColumnStatsData.java standalone-metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/DoubleColumnStatsData.java index e3340e4..e5df337 100644 --- standalone-metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/DoubleColumnStatsData.java +++ standalone-metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/DoubleColumnStatsData.java @@ -54,7 +54,7 @@ private double highValue; // optional private long numNulls; // required private long numDVs; // required - private String bitVectors; // optional + private ByteBuffer bitVectors; // optional /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ public enum _Fields implements org.apache.thrift.TFieldIdEnum { @@ -145,7 +145,7 @@ public String getFieldName() { tmpMap.put(_Fields.NUM_DVS, new org.apache.thrift.meta_data.FieldMetaData("numDVs", org.apache.thrift.TFieldRequirementType.REQUIRED, new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I64))); tmpMap.put(_Fields.BIT_VECTORS, new org.apache.thrift.meta_data.FieldMetaData("bitVectors", org.apache.thrift.TFieldRequirementType.OPTIONAL, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING))); + new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING , true))); metaDataMap = Collections.unmodifiableMap(tmpMap); org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(DoubleColumnStatsData.class, metaDataMap); } @@ -174,7 +174,7 @@ public DoubleColumnStatsData(DoubleColumnStatsData other) { this.numNulls = other.numNulls; this.numDVs = other.numDVs; if (other.isSetBitVectors()) { - this.bitVectors = other.bitVectors; + this.bitVectors = org.apache.thrift.TBaseHelper.copyBinary(other.bitVectors); } } @@ -283,12 +283,21 @@ public void setNumDVsIsSet(boolean value) { __isset_bitfield = EncodingUtils.setBit(__isset_bitfield, __NUMDVS_ISSET_ID, value); } - public String getBitVectors() { - return this.bitVectors; + public byte[] getBitVectors() { + setBitVectors(org.apache.thrift.TBaseHelper.rightSize(bitVectors)); + return bitVectors == null ? null : bitVectors.array(); } - public void setBitVectors(String bitVectors) { - this.bitVectors = bitVectors; + public ByteBuffer bufferForBitVectors() { + return org.apache.thrift.TBaseHelper.copyBinary(bitVectors); + } + + public void setBitVectors(byte[] bitVectors) { + this.bitVectors = bitVectors == null ? (ByteBuffer)null : ByteBuffer.wrap(Arrays.copyOf(bitVectors, bitVectors.length)); + } + + public void setBitVectors(ByteBuffer bitVectors) { + this.bitVectors = org.apache.thrift.TBaseHelper.copyBinary(bitVectors); } public void unsetBitVectors() { @@ -344,7 +353,7 @@ public void setFieldValue(_Fields field, Object value) { if (value == null) { unsetBitVectors(); } else { - setBitVectors((String)value); + setBitVectors((ByteBuffer)value); } break; @@ -589,7 +598,7 @@ public String toString() { if (this.bitVectors == null) { sb.append("null"); } else { - sb.append(this.bitVectors); + org.apache.thrift.TBaseHelper.toString(this.bitVectors, sb); } first = false; } @@ -680,7 +689,7 @@ public void read(org.apache.thrift.protocol.TProtocol iprot, DoubleColumnStatsDa break; case 5: // BIT_VECTORS if (schemeField.type == org.apache.thrift.protocol.TType.STRING) { - struct.bitVectors = iprot.readString(); + struct.bitVectors = iprot.readBinary(); struct.setBitVectorsIsSet(true); } else { org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); @@ -718,7 +727,7 @@ public void write(org.apache.thrift.protocol.TProtocol oprot, DoubleColumnStatsD if (struct.bitVectors != null) { if (struct.isSetBitVectors()) { oprot.writeFieldBegin(BIT_VECTORS_FIELD_DESC); - oprot.writeString(struct.bitVectors); + oprot.writeBinary(struct.bitVectors); oprot.writeFieldEnd(); } } @@ -759,7 +768,7 @@ public void write(org.apache.thrift.protocol.TProtocol prot, DoubleColumnStatsDa oprot.writeDouble(struct.highValue); } if (struct.isSetBitVectors()) { - oprot.writeString(struct.bitVectors); + oprot.writeBinary(struct.bitVectors); } } @@ -780,7 +789,7 @@ public void read(org.apache.thrift.protocol.TProtocol prot, DoubleColumnStatsDat struct.setHighValueIsSet(true); } if (incoming.get(2)) { - struct.bitVectors = iprot.readString(); + struct.bitVectors = iprot.readBinary(); struct.setBitVectorsIsSet(true); } } diff --git standalone-metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/LongColumnStatsData.java standalone-metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/LongColumnStatsData.java index 4404706..56cf5b8 100644 --- standalone-metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/LongColumnStatsData.java +++ standalone-metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/LongColumnStatsData.java @@ -54,7 +54,7 @@ private long highValue; // optional private long numNulls; // required private long numDVs; // required - private String bitVectors; // optional + private ByteBuffer bitVectors; // optional /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ public enum _Fields implements org.apache.thrift.TFieldIdEnum { @@ -145,7 +145,7 @@ public String getFieldName() { tmpMap.put(_Fields.NUM_DVS, new org.apache.thrift.meta_data.FieldMetaData("numDVs", org.apache.thrift.TFieldRequirementType.REQUIRED, new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I64))); tmpMap.put(_Fields.BIT_VECTORS, new org.apache.thrift.meta_data.FieldMetaData("bitVectors", org.apache.thrift.TFieldRequirementType.OPTIONAL, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING))); + new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING , true))); metaDataMap = Collections.unmodifiableMap(tmpMap); org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(LongColumnStatsData.class, metaDataMap); } @@ -174,7 +174,7 @@ public LongColumnStatsData(LongColumnStatsData other) { this.numNulls = other.numNulls; this.numDVs = other.numDVs; if (other.isSetBitVectors()) { - this.bitVectors = other.bitVectors; + this.bitVectors = org.apache.thrift.TBaseHelper.copyBinary(other.bitVectors); } } @@ -283,12 +283,21 @@ public void setNumDVsIsSet(boolean value) { __isset_bitfield = EncodingUtils.setBit(__isset_bitfield, __NUMDVS_ISSET_ID, value); } - public String getBitVectors() { - return this.bitVectors; + public byte[] getBitVectors() { + setBitVectors(org.apache.thrift.TBaseHelper.rightSize(bitVectors)); + return bitVectors == null ? null : bitVectors.array(); } - public void setBitVectors(String bitVectors) { - this.bitVectors = bitVectors; + public ByteBuffer bufferForBitVectors() { + return org.apache.thrift.TBaseHelper.copyBinary(bitVectors); + } + + public void setBitVectors(byte[] bitVectors) { + this.bitVectors = bitVectors == null ? (ByteBuffer)null : ByteBuffer.wrap(Arrays.copyOf(bitVectors, bitVectors.length)); + } + + public void setBitVectors(ByteBuffer bitVectors) { + this.bitVectors = org.apache.thrift.TBaseHelper.copyBinary(bitVectors); } public void unsetBitVectors() { @@ -344,7 +353,7 @@ public void setFieldValue(_Fields field, Object value) { if (value == null) { unsetBitVectors(); } else { - setBitVectors((String)value); + setBitVectors((ByteBuffer)value); } break; @@ -589,7 +598,7 @@ public String toString() { if (this.bitVectors == null) { sb.append("null"); } else { - sb.append(this.bitVectors); + org.apache.thrift.TBaseHelper.toString(this.bitVectors, sb); } first = false; } @@ -680,7 +689,7 @@ public void read(org.apache.thrift.protocol.TProtocol iprot, LongColumnStatsData break; case 5: // BIT_VECTORS if (schemeField.type == org.apache.thrift.protocol.TType.STRING) { - struct.bitVectors = iprot.readString(); + struct.bitVectors = iprot.readBinary(); struct.setBitVectorsIsSet(true); } else { org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); @@ -718,7 +727,7 @@ public void write(org.apache.thrift.protocol.TProtocol oprot, LongColumnStatsDat if (struct.bitVectors != null) { if (struct.isSetBitVectors()) { oprot.writeFieldBegin(BIT_VECTORS_FIELD_DESC); - oprot.writeString(struct.bitVectors); + oprot.writeBinary(struct.bitVectors); oprot.writeFieldEnd(); } } @@ -759,7 +768,7 @@ public void write(org.apache.thrift.protocol.TProtocol prot, LongColumnStatsData oprot.writeI64(struct.highValue); } if (struct.isSetBitVectors()) { - oprot.writeString(struct.bitVectors); + oprot.writeBinary(struct.bitVectors); } } @@ -780,7 +789,7 @@ public void read(org.apache.thrift.protocol.TProtocol prot, LongColumnStatsData struct.setHighValueIsSet(true); } if (incoming.get(2)) { - struct.bitVectors = iprot.readString(); + struct.bitVectors = iprot.readBinary(); struct.setBitVectorsIsSet(true); } } diff --git standalone-metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/StringColumnStatsData.java standalone-metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/StringColumnStatsData.java index c9afe87..52fcce4 100644 --- standalone-metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/StringColumnStatsData.java +++ standalone-metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/StringColumnStatsData.java @@ -54,7 +54,7 @@ private double avgColLen; // required private long numNulls; // required private long numDVs; // required - private String bitVectors; // optional + private ByteBuffer bitVectors; // optional /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ public enum _Fields implements org.apache.thrift.TFieldIdEnum { @@ -145,7 +145,7 @@ public String getFieldName() { tmpMap.put(_Fields.NUM_DVS, new org.apache.thrift.meta_data.FieldMetaData("numDVs", org.apache.thrift.TFieldRequirementType.REQUIRED, new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I64))); tmpMap.put(_Fields.BIT_VECTORS, new org.apache.thrift.meta_data.FieldMetaData("bitVectors", org.apache.thrift.TFieldRequirementType.OPTIONAL, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING))); + new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING , true))); metaDataMap = Collections.unmodifiableMap(tmpMap); org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(StringColumnStatsData.class, metaDataMap); } @@ -180,7 +180,7 @@ public StringColumnStatsData(StringColumnStatsData other) { this.numNulls = other.numNulls; this.numDVs = other.numDVs; if (other.isSetBitVectors()) { - this.bitVectors = other.bitVectors; + this.bitVectors = org.apache.thrift.TBaseHelper.copyBinary(other.bitVectors); } } @@ -289,12 +289,21 @@ public void setNumDVsIsSet(boolean value) { __isset_bitfield = EncodingUtils.setBit(__isset_bitfield, __NUMDVS_ISSET_ID, value); } - public String getBitVectors() { - return this.bitVectors; + public byte[] getBitVectors() { + setBitVectors(org.apache.thrift.TBaseHelper.rightSize(bitVectors)); + return bitVectors == null ? null : bitVectors.array(); } - public void setBitVectors(String bitVectors) { - this.bitVectors = bitVectors; + public ByteBuffer bufferForBitVectors() { + return org.apache.thrift.TBaseHelper.copyBinary(bitVectors); + } + + public void setBitVectors(byte[] bitVectors) { + this.bitVectors = bitVectors == null ? (ByteBuffer)null : ByteBuffer.wrap(Arrays.copyOf(bitVectors, bitVectors.length)); + } + + public void setBitVectors(ByteBuffer bitVectors) { + this.bitVectors = org.apache.thrift.TBaseHelper.copyBinary(bitVectors); } public void unsetBitVectors() { @@ -350,7 +359,7 @@ public void setFieldValue(_Fields field, Object value) { if (value == null) { unsetBitVectors(); } else { - setBitVectors((String)value); + setBitVectors((ByteBuffer)value); } break; @@ -591,7 +600,7 @@ public String toString() { if (this.bitVectors == null) { sb.append("null"); } else { - sb.append(this.bitVectors); + org.apache.thrift.TBaseHelper.toString(this.bitVectors, sb); } first = false; } @@ -690,7 +699,7 @@ public void read(org.apache.thrift.protocol.TProtocol iprot, StringColumnStatsDa break; case 5: // BIT_VECTORS if (schemeField.type == org.apache.thrift.protocol.TType.STRING) { - struct.bitVectors = iprot.readString(); + struct.bitVectors = iprot.readBinary(); struct.setBitVectorsIsSet(true); } else { org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); @@ -724,7 +733,7 @@ public void write(org.apache.thrift.protocol.TProtocol oprot, StringColumnStatsD if (struct.bitVectors != null) { if (struct.isSetBitVectors()) { oprot.writeFieldBegin(BIT_VECTORS_FIELD_DESC); - oprot.writeString(struct.bitVectors); + oprot.writeBinary(struct.bitVectors); oprot.writeFieldEnd(); } } @@ -755,7 +764,7 @@ public void write(org.apache.thrift.protocol.TProtocol prot, StringColumnStatsDa } oprot.writeBitSet(optionals, 1); if (struct.isSetBitVectors()) { - oprot.writeString(struct.bitVectors); + oprot.writeBinary(struct.bitVectors); } } @@ -772,7 +781,7 @@ public void read(org.apache.thrift.protocol.TProtocol prot, StringColumnStatsDat struct.setNumDVsIsSet(true); BitSet incoming = iprot.readBitSet(1); if (incoming.get(0)) { - struct.bitVectors = iprot.readString(); + struct.bitVectors = iprot.readBinary(); struct.setBitVectorsIsSet(true); } } diff --git standalone-metastore/src/gen/thrift/gen-py/hive_metastore/ThriftHiveMetastore-remote standalone-metastore/src/gen/thrift/gen-py/hive_metastore/ThriftHiveMetastore-remote old mode 100644 new mode 100755 diff --git standalone-metastore/src/gen/thrift/gen-rb/hive_metastore_types.rb standalone-metastore/src/gen/thrift/gen-rb/hive_metastore_types.rb index 02c5717..c67f3b0 100644 --- standalone-metastore/src/gen/thrift/gen-rb/hive_metastore_types.rb +++ standalone-metastore/src/gen/thrift/gen-rb/hive_metastore_types.rb @@ -979,7 +979,7 @@ class BooleanColumnStatsData NUMTRUES => {:type => ::Thrift::Types::I64, :name => 'numTrues'}, NUMFALSES => {:type => ::Thrift::Types::I64, :name => 'numFalses'}, NUMNULLS => {:type => ::Thrift::Types::I64, :name => 'numNulls'}, - BITVECTORS => {:type => ::Thrift::Types::STRING, :name => 'bitVectors', :optional => true} + BITVECTORS => {:type => ::Thrift::Types::STRING, :name => 'bitVectors', :binary => true, :optional => true} } def struct_fields; FIELDS; end @@ -1006,7 +1006,7 @@ class DoubleColumnStatsData HIGHVALUE => {:type => ::Thrift::Types::DOUBLE, :name => 'highValue', :optional => true}, NUMNULLS => {:type => ::Thrift::Types::I64, :name => 'numNulls'}, NUMDVS => {:type => ::Thrift::Types::I64, :name => 'numDVs'}, - BITVECTORS => {:type => ::Thrift::Types::STRING, :name => 'bitVectors', :optional => true} + BITVECTORS => {:type => ::Thrift::Types::STRING, :name => 'bitVectors', :binary => true, :optional => true} } def struct_fields; FIELDS; end @@ -1032,7 +1032,7 @@ class LongColumnStatsData HIGHVALUE => {:type => ::Thrift::Types::I64, :name => 'highValue', :optional => true}, NUMNULLS => {:type => ::Thrift::Types::I64, :name => 'numNulls'}, NUMDVS => {:type => ::Thrift::Types::I64, :name => 'numDVs'}, - BITVECTORS => {:type => ::Thrift::Types::STRING, :name => 'bitVectors', :optional => true} + BITVECTORS => {:type => ::Thrift::Types::STRING, :name => 'bitVectors', :binary => true, :optional => true} } def struct_fields; FIELDS; end @@ -1058,7 +1058,7 @@ class StringColumnStatsData AVGCOLLEN => {:type => ::Thrift::Types::DOUBLE, :name => 'avgColLen'}, NUMNULLS => {:type => ::Thrift::Types::I64, :name => 'numNulls'}, NUMDVS => {:type => ::Thrift::Types::I64, :name => 'numDVs'}, - BITVECTORS => {:type => ::Thrift::Types::STRING, :name => 'bitVectors', :optional => true} + BITVECTORS => {:type => ::Thrift::Types::STRING, :name => 'bitVectors', :binary => true, :optional => true} } def struct_fields; FIELDS; end @@ -1084,7 +1084,7 @@ class BinaryColumnStatsData MAXCOLLEN => {:type => ::Thrift::Types::I64, :name => 'maxColLen'}, AVGCOLLEN => {:type => ::Thrift::Types::DOUBLE, :name => 'avgColLen'}, NUMNULLS => {:type => ::Thrift::Types::I64, :name => 'numNulls'}, - BITVECTORS => {:type => ::Thrift::Types::STRING, :name => 'bitVectors', :optional => true} + BITVECTORS => {:type => ::Thrift::Types::STRING, :name => 'bitVectors', :binary => true, :optional => true} } def struct_fields; FIELDS; end @@ -1131,7 +1131,7 @@ class DecimalColumnStatsData HIGHVALUE => {:type => ::Thrift::Types::STRUCT, :name => 'highValue', :class => ::Decimal, :optional => true}, NUMNULLS => {:type => ::Thrift::Types::I64, :name => 'numNulls'}, NUMDVS => {:type => ::Thrift::Types::I64, :name => 'numDVs'}, - BITVECTORS => {:type => ::Thrift::Types::STRING, :name => 'bitVectors', :optional => true} + BITVECTORS => {:type => ::Thrift::Types::STRING, :name => 'bitVectors', :binary => true, :optional => true} } def struct_fields; FIELDS; end @@ -1174,7 +1174,7 @@ class DateColumnStatsData HIGHVALUE => {:type => ::Thrift::Types::STRUCT, :name => 'highValue', :class => ::Date, :optional => true}, NUMNULLS => {:type => ::Thrift::Types::I64, :name => 'numNulls'}, NUMDVS => {:type => ::Thrift::Types::I64, :name => 'numDVs'}, - BITVECTORS => {:type => ::Thrift::Types::STRING, :name => 'bitVectors', :optional => true} + BITVECTORS => {:type => ::Thrift::Types::STRING, :name => 'bitVectors', :binary => true, :optional => true} } def struct_fields; FIELDS; end diff --git standalone-metastore/src/main/thrift/hive_metastore.thrift standalone-metastore/src/main/thrift/hive_metastore.thrift index 042a5d8..9b0a846 100644 --- standalone-metastore/src/main/thrift/hive_metastore.thrift +++ standalone-metastore/src/main/thrift/hive_metastore.thrift @@ -384,7 +384,7 @@ struct BooleanColumnStatsData { 1: required i64 numTrues, 2: required i64 numFalses, 3: required i64 numNulls, -4: optional string bitVectors +4: optional binary bitVectors } struct DoubleColumnStatsData { @@ -392,7 +392,7 @@ struct DoubleColumnStatsData { 2: optional double highValue, 3: required i64 numNulls, 4: required i64 numDVs, -5: optional string bitVectors +5: optional binary bitVectors } struct LongColumnStatsData { @@ -400,7 +400,7 @@ struct LongColumnStatsData { 2: optional i64 highValue, 3: required i64 numNulls, 4: required i64 numDVs, -5: optional string bitVectors +5: optional binary bitVectors } struct StringColumnStatsData { @@ -408,14 +408,14 @@ struct StringColumnStatsData { 2: required double avgColLen, 3: required i64 numNulls, 4: required i64 numDVs, -5: optional string bitVectors +5: optional binary bitVectors } struct BinaryColumnStatsData { 1: required i64 maxColLen, 2: required double avgColLen, 3: required i64 numNulls, -4: optional string bitVectors +4: optional binary bitVectors } @@ -429,7 +429,7 @@ struct DecimalColumnStatsData { 2: optional Decimal highValue, 3: required i64 numNulls, 4: required i64 numDVs, -5: optional string bitVectors +5: optional binary bitVectors } struct Date { @@ -441,7 +441,7 @@ struct DateColumnStatsData { 2: optional Date highValue, 3: required i64 numNulls, 4: required i64 numDVs, -5: optional string bitVectors +5: optional binary bitVectors } union ColumnStatisticsData {