diff --git a/common/src/java/org/apache/hadoop/hive/common/ndv/NumDistinctValueEstimatorFactory.java b/common/src/java/org/apache/hadoop/hive/common/ndv/NumDistinctValueEstimatorFactory.java index e810ac5487..6a29859df5 100644 --- a/common/src/java/org/apache/hadoop/hive/common/ndv/NumDistinctValueEstimatorFactory.java +++ b/common/src/java/org/apache/hadoop/hive/common/ndv/NumDistinctValueEstimatorFactory.java @@ -19,6 +19,14 @@ package org.apache.hadoop.hive.common.ndv; +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.util.Arrays; + +import org.apache.commons.codec.binary.Base64; +import org.apache.hadoop.hive.common.ndv.fm.FMSketch; +import org.apache.hadoop.hive.common.ndv.fm.FMSketchUtils; import org.apache.hadoop.hive.common.ndv.hll.HyperLogLog; public class NumDistinctValueEstimatorFactory { @@ -26,11 +34,25 @@ private NumDistinctValueEstimatorFactory() { } + private static boolean isFMSketch(String s) throws IOException { + InputStream in = new ByteArrayInputStream(Base64.decodeBase64(s)); + byte[] magic = new byte[2]; + magic[0] = (byte) in.read(); + magic[1] = (byte) in.read(); + in.close(); + return Arrays.equals(magic, FMSketchUtils.MAGIC); + } + public static NumDistinctValueEstimator getNumDistinctValueEstimator(String s) { - if (s.startsWith("{")) { - return new FMSketch(s); - } else { - return HyperLogLog.builder().build().deserialize(s); + // Right now we assume only FM and HLL are available. + try { + if (isFMSketch(s)) { + return FMSketchUtils.deserializeFM(s); + } else { + return HyperLogLog.builder().build().deserialize(s); + } + } catch (IOException e) { + throw new RuntimeException(e); } } diff --git a/common/src/java/org/apache/hadoop/hive/common/ndv/FMSketch.java b/common/src/java/org/apache/hadoop/hive/common/ndv/fm/FMSketch.java similarity index 80% rename from common/src/java/org/apache/hadoop/hive/common/ndv/FMSketch.java rename to common/src/java/org/apache/hadoop/hive/common/ndv/fm/FMSketch.java index e20d29954a..f20054d392 100644 --- a/common/src/java/org/apache/hadoop/hive/common/ndv/FMSketch.java +++ b/common/src/java/org/apache/hadoop/hive/common/ndv/fm/FMSketch.java @@ -15,22 +15,28 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.hadoop.hive.common.ndv; +package org.apache.hadoop.hive.common.ndv.fm; +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.InputStream; import java.util.Random; import javolution.util.FastBitSet; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import org.apache.commons.lang.StringUtils; +import org.apache.commons.codec.binary.Base64; import org.apache.hadoop.hive.common.classification.InterfaceAudience; +import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimator; import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.ql.util.JavaDataModel; public class FMSketch implements NumDistinctValueEstimator{ static final Logger LOG = LoggerFactory.getLogger(FMSketch.class.getName()); + public static final byte[] MAGIC = new byte[] { 'F', 'M' }; /* We want a,b,x to come from a finite field of size 0 to k, where k is a prime number. * 2^p - 1 is prime for p = 31. Hence bitvectorSize has to be 31. Pick k to be 2^p -1. @@ -38,7 +44,7 @@ * independent. As a consequence, the hash values will not distribute uniformly from 0 to 2^p-1 * thus introducing errors in the estimates. */ - private static final int BIT_VECTOR_SIZE = 31; + public static final int BIT_VECTOR_SIZE = 31; // Refer to Flajolet-Martin'86 for the value of phi private static final double PHI = 0.77351; @@ -111,27 +117,6 @@ public FMSketch(int numBitVectors) { } } - public FMSketch(String s, int numBitVectors) { - this.numBitVectors = numBitVectors; - FastBitSet bitVectorDeser[] = genBitSet(s, numBitVectors); - bitVector = new FastBitSet[numBitVectors]; - for(int i=0; i = '0' && c <= '9') { - String t = new String(); - t = t + c; - c = s.charAt(i); - i = i + 1; - - while (c != ',' && c!= '}') { - t = t + c; - c = s.charAt(i); - i = i + 1; - } - - int bitIndex = Integer.parseInt(t); - assert(bitIndex >= 0); - assert(vectorIndex < numBitVectors); - b[vectorIndex].set(bitIndex); - if (c == '}') { - vectorIndex = vectorIndex + 1; - } - } + @Override + public NumDistinctValueEstimator deserialize(String s) { + InputStream is = new ByteArrayInputStream(Base64.decodeBase64(s)); + try { + NumDistinctValueEstimator n = FMSketchUtils.deserializeFM(is); + is.close(); + return n; + } catch (IOException e) { + throw new RuntimeException(e); } - return b; } private int generateHash(long v, int hashNum) { @@ -387,11 +337,6 @@ public int lengthFor(JavaDataModel model) { return lengthFor(model, getnumBitVectors()); } - @Override - public NumDistinctValueEstimator deserialize(String s) { - return new FMSketch(s); - } - // the caller needs to gurrantee that they are the same type based on numBitVectors @Override public void mergeEstimators(NumDistinctValueEstimator o) { diff --git a/common/src/java/org/apache/hadoop/hive/common/ndv/fm/FMSketchUtils.java b/common/src/java/org/apache/hadoop/hive/common/ndv/fm/FMSketchUtils.java new file mode 100644 index 0000000000..eafc65d9cd --- /dev/null +++ b/common/src/java/org/apache/hadoop/hive/common/ndv/fm/FMSketchUtils.java @@ -0,0 +1,131 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.common.ndv.fm; + +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.util.Arrays; + +import javolution.util.FastBitSet; + +import org.apache.commons.codec.binary.Base64; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class FMSketchUtils { + + static final Logger LOG = LoggerFactory.getLogger(FMSketch.class.getName()); + public static final byte[] MAGIC = new byte[] { 'F', 'M' }; + + /* + * Serializes a distinctValueEstimator object to Text for transport. + * + * 4 byte header is encoded like below 2 bytes - FM magic string to + * identify serialized stream 2 bytes - numbitvectors because + * BIT_VECTOR_SIZE=31, 4 bytes are enough to hold positions of 0-31 + */ + public static void serializeFM(OutputStream out, FMSketch fm) throws IOException { + out.write(MAGIC); + + // max of numBitVectors = 1024, 2 bytes is enough. + byte[] nbv = new byte[2]; + nbv[0] = (byte) fm.getnumBitVectors(); + nbv[1] = (byte) (fm.getnumBitVectors() >>> 8); + + out.write(nbv); + + // original toString takes too much space + // we compress a fastbitset to 4 bytes + for (int i = 0; i < fm.getnumBitVectors(); i++) { + writeBitVector(out, fm.getBitVector(i)); + } + } + + private static void writeBitVector(OutputStream out, FastBitSet bit) throws IOException { + int num = 0; + for (int pos = 0; pos < FMSketch.BIT_VECTOR_SIZE; pos++) { + if (bit.get(pos)) { + num |= 1 << pos; + } + } + byte[] i = new byte[4]; + for (int j = 0; j < 4; j++) { + i[j] = (byte) ((num >>> (8 * j)) & 0xff); + } + out.write(i); + } + + /* + * Deserializes from string to FastBitSet; Creates a NumDistinctValueEstimator + * object and returns it. + */ + public static FMSketch deserializeFM(String s) throws IOException { + InputStream is = new ByteArrayInputStream(Base64.decodeBase64(s)); + try { + FMSketch sketch = deserializeFM(is); + is.close(); + return sketch; + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + public static FMSketch deserializeFM(InputStream in) throws IOException { + checkMagicString(in); + + byte[] nbv = new byte[2]; + nbv[0] = (byte) in.read(); + nbv[1] = (byte) in.read(); + + int numBitVectors = 0; + numBitVectors |= (nbv[0] & 0xff); + numBitVectors |= ((nbv[1] & 0xff) << 8); + + FMSketch sketch = new FMSketch(numBitVectors); + for (int n = 0; n < numBitVectors; n++) { + sketch.setBitVector(readBitVector(in), n); + } + return sketch; + } + + private static FastBitSet readBitVector(InputStream in) throws IOException { + FastBitSet fastBitSet = new FastBitSet(); + fastBitSet.clear(); + for (int i = 0; i < 4; i++) { + byte b = (byte) in.read(); + for (int j = 0; j < 8; j++) { + if ((b & (1 << j)) != 0) { + fastBitSet.set(j + 8 * i); + } + } + } + return fastBitSet; + } + + private static void checkMagicString(InputStream in) throws IOException { + byte[] magic = new byte[2]; + magic[0] = (byte) in.read(); + magic[1] = (byte) in.read(); + + if (!Arrays.equals(magic, MAGIC)) { + throw new IllegalArgumentException("The input stream is not a FMSketch stream."); + } + } +} diff --git a/common/src/java/org/apache/hadoop/hive/common/ndv/hll/HyperLogLog.java b/common/src/java/org/apache/hadoop/hive/common/ndv/hll/HyperLogLog.java index d1955468a6..182560afbe 100644 --- a/common/src/java/org/apache/hadoop/hive/common/ndv/hll/HyperLogLog.java +++ b/common/src/java/org/apache/hadoop/hive/common/ndv/hll/HyperLogLog.java @@ -277,7 +277,9 @@ public void add(long hashcode) { } public long estimateNumDistinctValues() { - return count(); + // FMSketch treats the ndv of all nulls as 1 but hll treates the ndv as 0. + // In order to get rid of divide by 0 problem, we follow FMSketch + return count() > 0 ? count() : 1; } public long count() { diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index df45f2cc32..852365799c 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -1729,7 +1729,9 @@ private static void populateLlapDaemonVarsSet(Set llapDaemonVarsSetLocal "Whether column accesses are tracked in the QueryPlan.\n" + "This is useful to identify how tables are accessed and to determine if there are wasted columns that can be trimmed."), HIVE_STATS_NDV_ALGO("hive.stats.ndv.algo", "hll", new PatternSet("hll", "fm"), - "hll and fm stand for HyperLogLog and FM-sketch, respectively for computing ndv."), + "hll and fm stand for HyperLogLog and FM-sketch, respectively for computing ndv."), + HIVE_STATS_FETCH_BITVECTOR("hive.stats.fetch.bitvector", true, + "Whether we fetch bitvector when we compute ndv. Users can turn it off if they want to use old schema"), // standard error allowed for ndv estimates for FM-sketch. A lower value indicates higher accuracy and a // higher compute cost. HIVE_STATS_NDV_ERROR("hive.stats.ndv.error", (float)20.0, diff --git a/common/src/test/org/apache/hadoop/hive/common/ndv/fm/TestFMSketchSerialization.java b/common/src/test/org/apache/hadoop/hive/common/ndv/fm/TestFMSketchSerialization.java new file mode 100644 index 0000000000..74fdf58d2d --- /dev/null +++ b/common/src/test/org/apache/hadoop/hive/common/ndv/fm/TestFMSketchSerialization.java @@ -0,0 +1,97 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.common.ndv.fm; + +import static org.junit.Assert.assertEquals; + +import java.io.IOException; + +import javolution.util.FastBitSet; + +import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimatorFactory; +import org.junit.Test; + +public class TestFMSketchSerialization { + + private FastBitSet[] deserialize(String s, int numBitVectors) { + FastBitSet[] b = new FastBitSet[numBitVectors]; + for (int j = 0; j < numBitVectors; j++) { + b[j] = new FastBitSet(FMSketch.BIT_VECTOR_SIZE); + b[j].clear(); + } + + int vectorIndex = 0; + + /* + * Parse input string to obtain the indexes that are set in the bitvector. + * When a toString() is called on a FastBitSet object to serialize it, the + * serialization adds { and } to the beginning and end of the return String. + * Skip "{", "}", ",", " " in the input string. + */ + for (int i = 1; i < s.length() - 1;) { + char c = s.charAt(i); + i = i + 1; + + // Move on to the next bit vector + if (c == '}') { + vectorIndex = vectorIndex + 1; + } + + // Encountered a numeric value; Extract out the entire number + if (c >= '0' && c <= '9') { + String t = new String(); + t = t + c; + c = s.charAt(i); + i = i + 1; + + while (c != ',' && c != '}') { + t = t + c; + c = s.charAt(i); + i = i + 1; + } + + int bitIndex = Integer.parseInt(t); + assert (bitIndex >= 0); + assert (vectorIndex < numBitVectors); + b[vectorIndex].set(bitIndex); + if (c == '}') { + vectorIndex = vectorIndex + 1; + } + } + } + return b; + } + + @Test + public void testSerDe() throws IOException { + String bitVectors = "{0, 4, 5, 7}{0, 1}{0, 1, 2}{0, 1, 4}{0}{0, 2}{0, 3}{0, 2, 3, 4}{0, 1, 4}{0, 1}{0}{0, 1, 3, 8}{0, 2}{0, 2}{0, 9}{0, 1, 4}"; + FastBitSet[] fastBitSet = deserialize(bitVectors, 16); + FMSketch sketch = new FMSketch(16); + for (int i = 0; i < 16; i++) { + sketch.setBitVector(fastBitSet[i], i); + } + assertEquals(sketch.estimateNumDistinctValues(), 3); + String s = sketch.serialize(); + FMSketch newSketch = (FMSketch) NumDistinctValueEstimatorFactory + .getNumDistinctValueEstimator(s); + sketch.equals(newSketch); + assertEquals(newSketch.estimateNumDistinctValues(), 3); + assertEquals(newSketch.serialize(), s); + } + +} \ No newline at end of file diff --git a/metastore/scripts/upgrade/derby/044-HIVE-16997.derby.sql b/metastore/scripts/upgrade/derby/044-HIVE-16997.derby.sql new file mode 100644 index 0000000000..5819e2ca6e --- /dev/null +++ b/metastore/scripts/upgrade/derby/044-HIVE-16997.derby.sql @@ -0,0 +1 @@ +ALTER TABLE "APP"."PART_COL_STATS" ADD COLUMN "BIT_VECTOR" VARCHAR(16400); diff --git a/metastore/scripts/upgrade/derby/hive-schema-3.0.0.derby.sql b/metastore/scripts/upgrade/derby/hive-schema-3.0.0.derby.sql index a9a532906f..f4cbba65db 100644 --- a/metastore/scripts/upgrade/derby/hive-schema-3.0.0.derby.sql +++ b/metastore/scripts/upgrade/derby/hive-schema-3.0.0.derby.sql @@ -94,7 +94,7 @@ CREATE TABLE "APP"."MASTER_KEYS" ("KEY_ID" INTEGER NOT NULL generated always as CREATE TABLE "APP"."DELEGATION_TOKENS" ( "TOKEN_IDENT" VARCHAR(767) NOT NULL, "TOKEN" VARCHAR(767)); -CREATE TABLE "APP"."PART_COL_STATS"("DB_NAME" VARCHAR(128) NOT NULL,"TABLE_NAME" VARCHAR(256) NOT NULL, "PARTITION_NAME" VARCHAR(767) NOT NULL, "COLUMN_NAME" VARCHAR(767) NOT NULL, "COLUMN_TYPE" VARCHAR(128) NOT NULL, "LONG_LOW_VALUE" BIGINT, "LONG_HIGH_VALUE" BIGINT, "DOUBLE_LOW_VALUE" DOUBLE, "DOUBLE_HIGH_VALUE" DOUBLE, "BIG_DECIMAL_LOW_VALUE" VARCHAR(4000), "BIG_DECIMAL_HIGH_VALUE" VARCHAR(4000),"NUM_DISTINCTS" BIGINT, "NUM_NULLS" BIGINT NOT NULL, "AVG_COL_LEN" DOUBLE, "MAX_COL_LEN" BIGINT, "NUM_TRUES" BIGINT, "NUM_FALSES" BIGINT, "LAST_ANALYZED" BIGINT, "CS_ID" BIGINT NOT NULL, "PART_ID" BIGINT NOT NULL); +CREATE TABLE "APP"."PART_COL_STATS"("DB_NAME" VARCHAR(128) NOT NULL,"TABLE_NAME" VARCHAR(256) NOT NULL, "PARTITION_NAME" VARCHAR(767) NOT NULL, "COLUMN_NAME" VARCHAR(767) NOT NULL, "COLUMN_TYPE" VARCHAR(128) NOT NULL, "LONG_LOW_VALUE" BIGINT, "LONG_HIGH_VALUE" BIGINT, "DOUBLE_LOW_VALUE" DOUBLE, "DOUBLE_HIGH_VALUE" DOUBLE, "BIG_DECIMAL_LOW_VALUE" VARCHAR(4000), "BIG_DECIMAL_HIGH_VALUE" VARCHAR(4000),"NUM_DISTINCTS" BIGINT, "BIT_VECTOR" BLOB, "NUM_NULLS" BIGINT NOT NULL, "AVG_COL_LEN" DOUBLE, "MAX_COL_LEN" BIGINT, "NUM_TRUES" BIGINT, "NUM_FALSES" BIGINT, "LAST_ANALYZED" BIGINT, "CS_ID" BIGINT NOT NULL, "PART_ID" BIGINT NOT NULL); CREATE TABLE "APP"."VERSION" ("VER_ID" BIGINT NOT NULL, "SCHEMA_VERSION" VARCHAR(127) NOT NULL, "VERSION_COMMENT" VARCHAR(255)); diff --git a/metastore/scripts/upgrade/derby/upgrade-2.3.0-to-3.0.0.derby.sql b/metastore/scripts/upgrade/derby/upgrade-2.3.0-to-3.0.0.derby.sql index 30513dc882..01b6f908f5 100644 --- a/metastore/scripts/upgrade/derby/upgrade-2.3.0-to-3.0.0.derby.sql +++ b/metastore/scripts/upgrade/derby/upgrade-2.3.0-to-3.0.0.derby.sql @@ -2,5 +2,6 @@ RUN '041-HIVE-16556.derby.sql'; RUN '042-HIVE-16575.derby.sql'; RUN '043-HIVE-16922.derby.sql'; +RUN '044-HIVE-16997.derby.sql'; UPDATE "APP".VERSION SET SCHEMA_VERSION='3.0.0', VERSION_COMMENT='Hive release version 3.0.0' where VER_ID=1; diff --git a/metastore/scripts/upgrade/mssql/029-HIVE-16997.mssql.sql b/metastore/scripts/upgrade/mssql/029-HIVE-16997.mssql.sql new file mode 100644 index 0000000000..fc2a6e0f2e --- /dev/null +++ b/metastore/scripts/upgrade/mssql/029-HIVE-16997.mssql.sql @@ -0,0 +1 @@ +ALTER TABLE PART_COL_STATS ADD BIT_VECTOR nvarchar(16400); diff --git a/metastore/scripts/upgrade/mssql/hive-schema-3.0.0.mssql.sql b/metastore/scripts/upgrade/mssql/hive-schema-3.0.0.mssql.sql index 1cfe2d1b2d..498d089262 100644 --- a/metastore/scripts/upgrade/mssql/hive-schema-3.0.0.mssql.sql +++ b/metastore/scripts/upgrade/mssql/hive-schema-3.0.0.mssql.sql @@ -87,6 +87,7 @@ CREATE TABLE PART_COL_STATS LONG_LOW_VALUE bigint NULL, MAX_COL_LEN bigint NULL, NUM_DISTINCTS bigint NULL, + BIT_VECTOR nvarchar(16400) NULL, NUM_FALSES bigint NULL, NUM_NULLS bigint NOT NULL, NUM_TRUES bigint NULL, diff --git a/metastore/scripts/upgrade/mssql/upgrade-2.3.0-to-3.0.0.mssql.sql b/metastore/scripts/upgrade/mssql/upgrade-2.3.0-to-3.0.0.mssql.sql index 5683254b04..21d62ae470 100644 --- a/metastore/scripts/upgrade/mssql/upgrade-2.3.0-to-3.0.0.mssql.sql +++ b/metastore/scripts/upgrade/mssql/upgrade-2.3.0-to-3.0.0.mssql.sql @@ -3,6 +3,7 @@ SELECT 'Upgrading MetaStore schema from 2.3.0 to 3.0.0' AS MESSAGE; :r 026-HIVE-16556.mssql.sql :r 027-HIVE-16575.mssql.sql :r 028-HIVE-16922.mssql.sql +:r 029-HIVE-16997.mssql.sql UPDATE VERSION SET SCHEMA_VERSION='3.0.0', VERSION_COMMENT='Hive release version 3.0.0' where VER_ID=1; SELECT 'Finished upgrading MetaStore schema from 2.3.0 to 3.0.0' AS MESSAGE; diff --git a/metastore/scripts/upgrade/mysql/044-HIVE-16997.mysql.sql b/metastore/scripts/upgrade/mysql/044-HIVE-16997.mysql.sql new file mode 100644 index 0000000000..6b41c77c15 --- /dev/null +++ b/metastore/scripts/upgrade/mysql/044-HIVE-16997.mysql.sql @@ -0,0 +1 @@ +ALTER TABLE PART_COL_STATS ADD COLUMN BIT_VECTOR VARCHAR(16400); diff --git a/metastore/scripts/upgrade/mysql/hive-schema-3.0.0.mysql.sql b/metastore/scripts/upgrade/mysql/hive-schema-3.0.0.mysql.sql index 97d881f263..31963d0309 100644 --- a/metastore/scripts/upgrade/mysql/hive-schema-3.0.0.mysql.sql +++ b/metastore/scripts/upgrade/mysql/hive-schema-3.0.0.mysql.sql @@ -690,6 +690,7 @@ CREATE TABLE IF NOT EXISTS `PART_COL_STATS` ( `BIG_DECIMAL_HIGH_VALUE` varchar(4000) CHARACTER SET latin1 COLLATE latin1_bin, `NUM_NULLS` bigint(20) NOT NULL, `NUM_DISTINCTS` bigint(20), + `BIT_VECTOR` blob, `AVG_COL_LEN` double(53,4), `MAX_COL_LEN` bigint(20), `NUM_TRUES` bigint(20), diff --git a/metastore/scripts/upgrade/mysql/upgrade-2.3.0-to-3.0.0.mysql.sql b/metastore/scripts/upgrade/mysql/upgrade-2.3.0-to-3.0.0.mysql.sql index ba62939809..9cd3a62663 100644 --- a/metastore/scripts/upgrade/mysql/upgrade-2.3.0-to-3.0.0.mysql.sql +++ b/metastore/scripts/upgrade/mysql/upgrade-2.3.0-to-3.0.0.mysql.sql @@ -3,6 +3,7 @@ SELECT 'Upgrading MetaStore schema from 2.3.0 to 3.0.0' AS ' '; SOURCE 041-HIVE-16556.mysql.sql; SOURCE 042-HIVE-16575.mysql.sql; SOURCE 043-HIVE-16922.mysql.sql; +SOURCE 044-HIVE-16997.mysql.sql; UPDATE VERSION SET SCHEMA_VERSION='3.0.0', VERSION_COMMENT='Hive release version 3.0.0' where VER_ID=1; SELECT 'Finished upgrading MetaStore schema from 2.3.0 to 3.0.0' AS ' '; diff --git a/metastore/scripts/upgrade/oracle/044-HIVE-16997.oracle.sql b/metastore/scripts/upgrade/oracle/044-HIVE-16997.oracle.sql new file mode 100644 index 0000000000..0ee529fa77 --- /dev/null +++ b/metastore/scripts/upgrade/oracle/044-HIVE-16997.oracle.sql @@ -0,0 +1 @@ +ALTER TABLE PART_COL_STATS ADD BIT_VECTOR VARCHAR2(16400) NULL; diff --git a/metastore/scripts/upgrade/oracle/hive-schema-3.0.0.oracle.sql b/metastore/scripts/upgrade/oracle/hive-schema-3.0.0.oracle.sql index 8fdb552367..b636673e68 100644 --- a/metastore/scripts/upgrade/oracle/hive-schema-3.0.0.oracle.sql +++ b/metastore/scripts/upgrade/oracle/hive-schema-3.0.0.oracle.sql @@ -515,6 +515,7 @@ CREATE TABLE PART_COL_STATS ( BIG_DECIMAL_HIGH_VALUE VARCHAR2(4000), NUM_NULLS NUMBER NOT NULL, NUM_DISTINCTS NUMBER, + BIT_VECTOR VARCHAR2(16400), AVG_COL_LEN NUMBER, MAX_COL_LEN NUMBER, NUM_TRUES NUMBER, diff --git a/metastore/scripts/upgrade/oracle/upgrade-2.3.0-to-3.0.0.oracle.sql b/metastore/scripts/upgrade/oracle/upgrade-2.3.0-to-3.0.0.oracle.sql index 0a70d47cca..6a266498b5 100644 --- a/metastore/scripts/upgrade/oracle/upgrade-2.3.0-to-3.0.0.oracle.sql +++ b/metastore/scripts/upgrade/oracle/upgrade-2.3.0-to-3.0.0.oracle.sql @@ -3,6 +3,7 @@ SELECT 'Upgrading MetaStore schema from 2.3.0 to 3.0.0' AS Status from dual; @041-HIVE-16556.oracle.sql; @042-HIVE-16575.oracle.sql; @043-HIVE-16922.oracle.sql; +@044-HIVE-16997.oracle.sql; UPDATE VERSION SET SCHEMA_VERSION='3.0.0', VERSION_COMMENT='Hive release version 3.0.0' where VER_ID=1; SELECT 'Finished upgrading MetaStore schema from 2.3.0 to 3.0.0' AS Status from dual; diff --git a/metastore/scripts/upgrade/postgres/043-HIVE-16997.postgres.sql b/metastore/scripts/upgrade/postgres/043-HIVE-16997.postgres.sql new file mode 100644 index 0000000000..86003cf2fa --- /dev/null +++ b/metastore/scripts/upgrade/postgres/043-HIVE-16997.postgres.sql @@ -0,0 +1 @@ +ALTER TABLE "PART_COL_STATS" ADD COLUMN "BIT_VECTOR" VARCHAR(16400); diff --git a/metastore/scripts/upgrade/postgres/hive-schema-3.0.0.postgres.sql b/metastore/scripts/upgrade/postgres/hive-schema-3.0.0.postgres.sql index 1cdeb6b45a..eda2e40c71 100644 --- a/metastore/scripts/upgrade/postgres/hive-schema-3.0.0.postgres.sql +++ b/metastore/scripts/upgrade/postgres/hive-schema-3.0.0.postgres.sql @@ -534,6 +534,7 @@ CREATE TABLE "PART_COL_STATS" ( "BIG_DECIMAL_HIGH_VALUE" character varying(4000) DEFAULT NULL::character varying, "NUM_NULLS" bigint NOT NULL, "NUM_DISTINCTS" bigint, + "BIT_VECTOR" character varying(16400) DEFAULT NULL::character varying, "AVG_COL_LEN" double precision, "MAX_COL_LEN" bigint, "NUM_TRUES" bigint, diff --git a/metastore/scripts/upgrade/postgres/upgrade-2.3.0-to-3.0.0.postgres.sql b/metastore/scripts/upgrade/postgres/upgrade-2.3.0-to-3.0.0.postgres.sql index c44dd067fc..ee5a673a72 100644 --- a/metastore/scripts/upgrade/postgres/upgrade-2.3.0-to-3.0.0.postgres.sql +++ b/metastore/scripts/upgrade/postgres/upgrade-2.3.0-to-3.0.0.postgres.sql @@ -3,6 +3,7 @@ SELECT 'Upgrading MetaStore schema from 2.3.0 to 3.0.0'; \i 040-HIVE-16556.postgres.sql; \i 041-HIVE-16575.postgres.sql; \i 042-HIVE-16922.postgres.sql; +\i 043-HIVE-16997.postgres.sql; UPDATE "VERSION" SET "SCHEMA_VERSION"='3.0.0', "VERSION_COMMENT"='Hive release version 3.0.0' where "VER_ID"=1; SELECT 'Finished upgrading MetaStore schema from 2.3.0 to 3.0.0'; diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java b/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java index a960b2d26b..8eae30e915 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java @@ -21,6 +21,7 @@ import static org.apache.commons.lang.StringUtils.join; import static org.apache.commons.lang.StringUtils.repeat; +import java.sql.Blob; import java.sql.Clob; import java.sql.Connection; import java.sql.Statement; @@ -33,6 +34,7 @@ import java.util.Iterator; import java.util.List; import java.util.Map; +import java.util.Map.Entry; import java.util.TreeMap; import javax.jdo.PersistenceManager; @@ -64,6 +66,8 @@ import org.apache.hadoop.hive.metastore.api.SkewedInfo; import org.apache.hadoop.hive.metastore.api.StorageDescriptor; import org.apache.hadoop.hive.metastore.api.Table; +import org.apache.hadoop.hive.metastore.columnstats.aggr.ColumnStatsAggregator; +import org.apache.hadoop.hive.metastore.columnstats.aggr.ColumnStatsAggregatorFactory; import org.apache.hadoop.hive.metastore.model.MConstraint; import org.apache.hadoop.hive.metastore.model.MDatabase; import org.apache.hadoop.hive.metastore.model.MPartitionColumnStatistics; @@ -941,6 +945,22 @@ private String extractSqlClob(Object value) { } } + static String extractSqlBlob(Object value) throws MetaException { + if (value == null) + return null; + if (value instanceof Blob) { + try { + return new String(((Blob) value).getBytes(1, (int) ((Blob) value).length())); + } catch (SQLException e) { + throw new MetaException("Encounter error while processing blob."); + } + } else { + // this may happen when enablebitvector is false + LOG.debug("Expected blob type but got " + value.getClass().getName()); + return null; + } + } + private static String trimCommaList(StringBuilder sb) { if (sb.length() > 0) { sb.setLength(sb.length() - 1); @@ -1221,12 +1241,12 @@ public void visit(LeafNode node) throws MetaException { * @throws MetaException */ public ColumnStatistics getTableStats(final String dbName, final String tableName, - List colNames) throws MetaException { + List colNames, boolean enableBitVector) throws MetaException { if (colNames == null || colNames.isEmpty()) { return null; } final boolean doTrace = LOG.isDebugEnabled(); - final String queryText0 = "select " + STATS_COLLIST + " from " + TAB_COL_STATS + " " + final String queryText0 = "select " + getStatsList(enableBitVector) + " from " + TAB_COL_STATS + " " + " where \"DB_NAME\" = ? and \"TABLE_NAME\" = ? and \"COLUMN_NAME\" in ("; Batchable b = new Batchable() { public List run(List input) throws MetaException { @@ -1260,8 +1280,8 @@ public ColumnStatistics getTableStats(final String dbName, final String tableNam } public AggrStats aggrColStatsForPartitions(String dbName, String tableName, - List partNames, List colNames, boolean useDensityFunctionForNDVEstimation, double ndvTuner) - throws MetaException { + List partNames, List colNames, boolean useDensityFunctionForNDVEstimation, + double ndvTuner, boolean enableBitVector) throws MetaException { if (colNames.isEmpty() || partNames.isEmpty()) { LOG.debug("Columns is empty or partNames is empty : Short-circuiting stats eval"); return new AggrStats(Collections.emptyList(), 0); // Nothing to aggregate @@ -1295,7 +1315,7 @@ public AggrStats aggrColStatsForPartitions(String dbName, String tableName, // Read aggregated stats for one column colStatsAggrFromDB = columnStatisticsObjForPartitions(dbName, tableName, partNames, colNamesForDB, - partsFound, useDensityFunctionForNDVEstimation, ndvTuner); + partsFound, useDensityFunctionForNDVEstimation, ndvTuner, enableBitVector); if (!colStatsAggrFromDB.isEmpty()) { ColumnStatisticsObj colStatsAggr = colStatsAggrFromDB.get(0); colStatsList.add(colStatsAggr); @@ -1308,7 +1328,7 @@ public AggrStats aggrColStatsForPartitions(String dbName, String tableName, partsFound = partsFoundForPartitions(dbName, tableName, partNames, colNames); colStatsList = columnStatisticsObjForPartitions(dbName, tableName, partNames, colNames, partsFound, - useDensityFunctionForNDVEstimation, ndvTuner); + useDensityFunctionForNDVEstimation, ndvTuner, enableBitVector); } LOG.info("useDensityFunctionForNDVEstimation = " + useDensityFunctionForNDVEstimation + "\npartsFound = " + partsFound + "\nColumnStatisticsObj = " @@ -1371,14 +1391,14 @@ private long partsFoundForPartitions(final String dbName, final String tableName private List columnStatisticsObjForPartitions(final String dbName, final String tableName, final List partNames, List colNames, long partsFound, - final boolean useDensityFunctionForNDVEstimation, final double ndvTuner) throws MetaException { + final boolean useDensityFunctionForNDVEstimation, final double ndvTuner, final boolean enableBitVector) throws MetaException { final boolean areAllPartsFound = (partsFound == partNames.size()); return runBatched(colNames, new Batchable() { public List run(final List inputColNames) throws MetaException { return runBatched(partNames, new Batchable() { public List run(List inputPartNames) throws MetaException { return columnStatisticsObjForPartitionsBatch(dbName, tableName, inputPartNames, - inputColNames, areAllPartsFound, useDensityFunctionForNDVEstimation, ndvTuner); + inputColNames, areAllPartsFound, useDensityFunctionForNDVEstimation, ndvTuner, enableBitVector); } }); } @@ -1388,14 +1408,10 @@ private long partsFoundForPartitions(final String dbName, final String tableName // Get aggregated column stats for a table per partition for all columns in the partition // This is primarily used to populate stats object when using CachedStore (Check CachedStore#prewarm) public Map> getColStatsForTablePartitions(String dbName, - String tblName) throws MetaException { - String queryText = - "select \"PARTITION_NAME\", \"COLUMN_NAME\", \"COLUMN_TYPE\", \"LONG_LOW_VALUE\", " - + "\"LONG_HIGH_VALUE\", \"DOUBLE_LOW_VALUE\", \"DOUBLE_HIGH_VALUE\", " - + "\"BIG_DECIMAL_LOW_VALUE\", \"BIG_DECIMAL_HIGH_VALUE\", \"NUM_NULLS\", " - + "\"NUM_DISTINCTS\", \"AVG_COL_LEN\", \"MAX_COL_LEN\", \"NUM_TRUES\", \"NUM_FALSES\"" - + " from " + PART_COL_STATS + " where \"DB_NAME\" = ? and \"TABLE_NAME\" = ?" - + " order by \"PARTITION_NAME\""; + String tblName, boolean enableBitVector) throws MetaException { + String queryText = "select \"PARTITION_NAME\", " + getStatsList(enableBitVector) + " from " + + " " + PART_COL_STATS + " where \"DB_NAME\" = ? and \"TABLE_NAME\" = ?" + + " order by \"PARTITION_NAME\""; long start = 0; long end = 0; Query query = null; @@ -1446,6 +1462,28 @@ private long partsFoundForPartitions(final String dbName, final String tableName /** Should be called with the list short enough to not trip up Oracle/etc. */ private List columnStatisticsObjForPartitionsBatch(String dbName, String tableName, List partNames, List colNames, boolean areAllPartsFound, + boolean useDensityFunctionForNDVEstimation, double ndvTuner, boolean enableBitVector) throws MetaException { + if(enableBitVector) { + return aggrStatsUseJava(dbName, tableName, partNames, colNames, areAllPartsFound, useDensityFunctionForNDVEstimation, ndvTuner); + } + else { + return aggrStatsUseDB(dbName, tableName, partNames, colNames, areAllPartsFound, useDensityFunctionForNDVEstimation, ndvTuner); + } + } + + private List aggrStatsUseJava(String dbName, String tableName, + List partNames, List colNames, boolean areAllPartsFound, + boolean useDensityFunctionForNDVEstimation, double ndvTuner) throws MetaException { + // 1. get all the stats for colNames in partNames; + List partStats = getPartitionStats(dbName, tableName, partNames, colNames, + true); + // 2. use util function to aggr stats + return MetaStoreUtils.aggrPartitionStats(partStats, dbName, tableName, partNames, colNames, + areAllPartsFound, useDensityFunctionForNDVEstimation, ndvTuner); + } + + private List aggrStatsUseDB(String dbName, + String tableName, List partNames, List colNames, boolean areAllPartsFound, boolean useDensityFunctionForNDVEstimation, double ndvTuner) throws MetaException { // TODO: all the extrapolation logic should be moved out of this class, // only mechanical data retrieval should remain here. @@ -1717,10 +1755,10 @@ private ColumnStatisticsObj prepareCSObj (Object[] row, int i) throws MetaExcept ColumnStatisticsData data = new ColumnStatisticsData(); ColumnStatisticsObj cso = new ColumnStatisticsObj((String)row[i++], (String)row[i++], data); Object llow = row[i++], lhigh = row[i++], dlow = row[i++], dhigh = row[i++], - declow = row[i++], dechigh = row[i++], nulls = row[i++], dist = row[i++], + declow = row[i++], dechigh = row[i++], nulls = row[i++], dist = row[i++], bitVector = row[i++], avglen = row[i++], maxlen = row[i++], trues = row[i++], falses = row[i++]; StatObjectConverter.fillColumnStatisticsData(cso.getColType(), data, - llow, lhigh, dlow, dhigh, declow, dechigh, nulls, dist, avglen, maxlen, trues, falses); + llow, lhigh, dlow, dhigh, declow, dechigh, nulls, dist, bitVector, avglen, maxlen, trues, falses); return cso; } @@ -1753,14 +1791,14 @@ private ColumnStatisticsObj prepareCSObjWithAdjustedNDV(Object[] row, int i, } public List getPartitionStats(final String dbName, final String tableName, - final List partNames, List colNames) throws MetaException { + final List partNames, List colNames, boolean enableBitVector) throws MetaException { if (colNames.isEmpty() || partNames.isEmpty()) { return Collections.emptyList(); } final boolean doTrace = LOG.isDebugEnabled(); - final String queryText0 = "select \"PARTITION_NAME\", " + STATS_COLLIST + " from " - + " " + PART_COL_STATS + " where \"DB_NAME\" = ? and \"TABLE_NAME\" = ? and \"COLUMN_NAME\"" - + " in (%1$s) AND \"PARTITION_NAME\" in (%2$s) order by \"PARTITION_NAME\""; + final String queryText0 = "select \"PARTITION_NAME\", " + getStatsList(enableBitVector) + " from " + + " " + PART_COL_STATS + " where \"DB_NAME\" = ? and \"TABLE_NAME\" = ? and \"COLUMN_NAME\"" + + " in (%1$s) AND \"PARTITION_NAME\" in (%2$s) order by \"PARTITION_NAME\""; Batchable b = new Batchable() { public List run(final List inputColNames) throws MetaException { Batchable b2 = new Batchable() { @@ -1812,11 +1850,13 @@ private ColumnStatisticsObj prepareCSObjWithAdjustedNDV(Object[] row, int i, } /** The common query part for table and partition stats */ - private static final String STATS_COLLIST = - "\"COLUMN_NAME\", \"COLUMN_TYPE\", \"LONG_LOW_VALUE\", \"LONG_HIGH_VALUE\", " - + "\"DOUBLE_LOW_VALUE\", \"DOUBLE_HIGH_VALUE\", \"BIG_DECIMAL_LOW_VALUE\", " - + "\"BIG_DECIMAL_HIGH_VALUE\", \"NUM_NULLS\", \"NUM_DISTINCTS\", \"AVG_COL_LEN\", " - + "\"MAX_COL_LEN\", \"NUM_TRUES\", \"NUM_FALSES\", \"LAST_ANALYZED\" "; + private final String getStatsList(boolean enableBitVector) { + return "\"COLUMN_NAME\", \"COLUMN_TYPE\", \"LONG_LOW_VALUE\", \"LONG_HIGH_VALUE\", " + + "\"DOUBLE_LOW_VALUE\", \"DOUBLE_HIGH_VALUE\", \"BIG_DECIMAL_LOW_VALUE\", " + + "\"BIG_DECIMAL_HIGH_VALUE\", \"NUM_NULLS\", \"NUM_DISTINCTS\", " + + (enableBitVector ? "\"BIT_VECTOR\", " : "\'\', ") + "\"AVG_COL_LEN\", " + + "\"MAX_COL_LEN\", \"NUM_TRUES\", \"NUM_FALSES\", \"LAST_ANALYZED\" "; + } private ColumnStatistics makeColumnStats( List list, ColumnStatisticsDesc csd, int offset) throws MetaException { @@ -1826,7 +1866,7 @@ private ColumnStatistics makeColumnStats( for (Object[] row : list) { // LastAnalyzed is stored per column but thrift has it per several; // get the lowest for now as nobody actually uses this field. - Object laObj = row[offset + 14]; + Object laObj = row[offset + 15]; if (laObj != null && (!csd.isSetLastAnalyzed() || csd.getLastAnalyzed() > extractSqlLong(laObj))) { csd.setLastAnalyzed(extractSqlLong(laObj)); } diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java b/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java index b52c94c9fb..edfbf3a6b0 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java @@ -71,8 +71,10 @@ import org.apache.hadoop.hive.metastore.api.StorageDescriptor; import org.apache.hadoop.hive.metastore.api.Table; import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants; -import org.apache.hadoop.hive.metastore.hbase.stats.merge.ColumnStatsMerger; -import org.apache.hadoop.hive.metastore.hbase.stats.merge.ColumnStatsMergerFactory; +import org.apache.hadoop.hive.metastore.columnstats.aggr.ColumnStatsAggregator; +import org.apache.hadoop.hive.metastore.columnstats.aggr.ColumnStatsAggregatorFactory; +import org.apache.hadoop.hive.metastore.columnstats.merge.ColumnStatsMerger; +import org.apache.hadoop.hive.metastore.columnstats.merge.ColumnStatsMergerFactory; import org.apache.hadoop.hive.metastore.partition.spec.PartitionSpecProxy; import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.Deserializer; @@ -1936,7 +1938,7 @@ public static MetaException newMetaException(String errorMessage, Exception e) { } return metaException; } - + public static List getColumnNames(List schema) { List cols = new ArrayList<>(schema.size()); for (FieldSchema fs : schema) { @@ -1945,4 +1947,45 @@ public static MetaException newMetaException(String errorMessage, Exception e) { return cols; } + // given a list of partStats, this function will give you an aggr stats + public static List aggrPartitionStats(List partStats, + String dbName, String tableName, List partNames, List colNames, + boolean areAllPartsFound, boolean useDensityFunctionForNDVEstimation, double ndvTuner) + throws MetaException { + // 1. group by the stats by colNames + // map the colName to List + Map> map = new HashMap<>(); + for (ColumnStatistics css : partStats) { + List objs = css.getStatsObj(); + for (ColumnStatisticsObj obj : objs) { + List singleObj = new ArrayList<>(); + singleObj.add(obj); + ColumnStatistics singleCS = new ColumnStatistics(css.getStatsDesc(), singleObj); + if (!map.containsKey(obj.getColName())) { + map.put(obj.getColName(), new ArrayList()); + } + map.get(obj.getColName()).add(singleCS); + } + } + return aggrPartitionStats(map,dbName,tableName,partNames,colNames,areAllPartsFound,useDensityFunctionForNDVEstimation, ndvTuner); + } + + public static List aggrPartitionStats( + Map> map, String dbName, String tableName, + List partNames, List colNames, boolean areAllPartsFound, + boolean useDensityFunctionForNDVEstimation, double ndvTuner) throws MetaException { + List colStats = new ArrayList<>(); + // 2. aggr stats for each colName + // TODO: thread pool can be used to speed up the process + for (Entry> entry : map.entrySet()) { + List css = entry.getValue(); + ColumnStatsAggregator aggregator = ColumnStatsAggregatorFactory.getColumnStatsAggregator(css + .iterator().next().getStatsObj().iterator().next().getStatsData().getSetField(), + useDensityFunctionForNDVEstimation, ndvTuner); + ColumnStatisticsObj statsObj = aggregator.aggregate(entry.getKey(), partNames, css); + colStats.add(statsObj); + } + return colStats; + } + } diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java b/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java index db4ec91cdb..eea12291e7 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java @@ -7193,11 +7193,13 @@ public ColumnStatistics getTableColumnStatistics(String dbName, String tableName protected ColumnStatistics getTableColumnStatisticsInternal( String dbName, String tableName, final List colNames, boolean allowSql, boolean allowJdo) throws MetaException, NoSuchObjectException { + final boolean enableBitVector = HiveConf.getBoolVar(getConf(), + HiveConf.ConfVars.HIVE_STATS_FETCH_BITVECTOR); return new GetStatHelper(HiveStringUtils.normalizeIdentifier(dbName), HiveStringUtils.normalizeIdentifier(tableName), allowSql, allowJdo) { @Override protected ColumnStatistics getSqlResult(GetHelper ctx) throws MetaException { - return directSql.getTableStats(dbName, tblName, colNames); + return directSql.getTableStats(dbName, tblName, colNames, enableBitVector); } @Override protected ColumnStatistics getJdoResult( @@ -7215,7 +7217,7 @@ protected ColumnStatistics getJdoResult( if (desc.getLastAnalyzed() > mStat.getLastAnalyzed()) { desc.setLastAnalyzed(mStat.getLastAnalyzed()); } - statObjs.add(StatObjectConverter.getTableColumnStatisticsObj(mStat)); + statObjs.add(StatObjectConverter.getTableColumnStatisticsObj(mStat, enableBitVector)); Deadline.checkTimeout(); } return new ColumnStatistics(desc, statObjs); @@ -7236,11 +7238,13 @@ protected ColumnStatistics getJdoResult( protected List getPartitionColumnStatisticsInternal( String dbName, String tableName, final List partNames, final List colNames, boolean allowSql, boolean allowJdo) throws MetaException, NoSuchObjectException { + final boolean enableBitVector = HiveConf.getBoolVar(getConf(), + HiveConf.ConfVars.HIVE_STATS_FETCH_BITVECTOR); return new GetListHelper(dbName, tableName, allowSql, allowJdo) { @Override protected List getSqlResult( GetHelper> ctx) throws MetaException { - return directSql.getPartitionStats(dbName, tblName, partNames, colNames); + return directSql.getPartitionStats(dbName, tblName, partNames, colNames, enableBitVector); } @Override protected List getJdoResult( @@ -7268,7 +7272,7 @@ protected ColumnStatistics getJdoResult( csd = StatObjectConverter.getPartitionColumnStatisticsDesc(mStatsObj); curList = new ArrayList(colNames.size()); } - curList.add(StatObjectConverter.getPartitionColumnStatisticsObj(mStatsObj)); + curList.add(StatObjectConverter.getPartitionColumnStatisticsObj(mStatsObj, enableBitVector)); lastPartName = partName; Deadline.checkTimeout(); } @@ -7288,12 +7292,14 @@ public AggrStats get_aggr_stats_for(String dbName, String tblName, HiveConf.ConfVars.HIVE_METASTORE_STATS_NDV_DENSITY_FUNCTION); final double ndvTuner = HiveConf.getFloatVar(getConf(), HiveConf.ConfVars.HIVE_METASTORE_STATS_NDV_TUNER); + final boolean enableBitVector = HiveConf.getBoolVar(getConf(), + HiveConf.ConfVars.HIVE_STATS_FETCH_BITVECTOR); return new GetHelper(dbName, tblName, true, false) { @Override protected AggrStats getSqlResult(GetHelper ctx) throws MetaException { return directSql.aggrColStatsForPartitions(dbName, tblName, partNames, - colNames, useDensityFunctionForNDVEstimation, ndvTuner); + colNames, useDensityFunctionForNDVEstimation, ndvTuner, enableBitVector); } @Override protected AggrStats getJdoResult(GetHelper ctx) @@ -7313,11 +7319,13 @@ protected String describeResult() { @Override public Map> getColStatsForTablePartitions(String dbName, String tableName) throws MetaException, NoSuchObjectException { + final boolean enableBitVector = HiveConf.getBoolVar(getConf(), + HiveConf.ConfVars.HIVE_STATS_FETCH_BITVECTOR); return new GetHelper>>(dbName, tableName, true, false) { @Override protected Map> getSqlResult( GetHelper>> ctx) throws MetaException { - return directSql.getColStatsForTablePartitions(dbName, tblName); + return directSql.getColStatsForTablePartitions(dbName, tblName, enableBitVector); } @Override diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/StatObjectConverter.java b/metastore/src/java/org/apache/hadoop/hive/metastore/StatObjectConverter.java index 2dc2804343..d53ea4c5b2 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/StatObjectConverter.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/StatObjectConverter.java @@ -76,6 +76,7 @@ public static MTableColumnStatistics convertToMTableColumnStatistics(MTable tabl mColStats.setLongStats( longStats.isSetNumNulls() ? longStats.getNumNulls() : null, longStats.isSetNumDVs() ? longStats.getNumDVs() : null, + longStats.isSetBitVectors() ? longStats.getBitVectors().getBytes() : null, longStats.isSetLowValue() ? longStats.getLowValue() : null, longStats.isSetHighValue() ? longStats.getHighValue() : null); } else if (statsObj.getStatsData().isSetDoubleStats()) { @@ -83,6 +84,7 @@ public static MTableColumnStatistics convertToMTableColumnStatistics(MTable tabl mColStats.setDoubleStats( doubleStats.isSetNumNulls() ? doubleStats.getNumNulls() : null, doubleStats.isSetNumDVs() ? doubleStats.getNumDVs() : null, + doubleStats.isSetBitVectors() ? doubleStats.getBitVectors().getBytes() : null, doubleStats.isSetLowValue() ? doubleStats.getLowValue() : null, doubleStats.isSetHighValue() ? doubleStats.getHighValue() : null); } else if (statsObj.getStatsData().isSetDecimalStats()) { @@ -92,12 +94,14 @@ public static MTableColumnStatistics convertToMTableColumnStatistics(MTable tabl mColStats.setDecimalStats( decimalStats.isSetNumNulls() ? decimalStats.getNumNulls() : null, decimalStats.isSetNumDVs() ? decimalStats.getNumDVs() : null, + decimalStats.isSetBitVectors() ? decimalStats.getBitVectors().getBytes() : null, low, high); } else if (statsObj.getStatsData().isSetStringStats()) { StringColumnStatsData stringStats = statsObj.getStatsData().getStringStats(); mColStats.setStringStats( stringStats.isSetNumNulls() ? stringStats.getNumNulls() : null, stringStats.isSetNumDVs() ? stringStats.getNumDVs() : null, + stringStats.isSetBitVectors() ? stringStats.getBitVectors().getBytes() : null, stringStats.isSetMaxColLen() ? stringStats.getMaxColLen() : null, stringStats.isSetAvgColLen() ? stringStats.getAvgColLen() : null); } else if (statsObj.getStatsData().isSetBinaryStats()) { @@ -111,6 +115,7 @@ public static MTableColumnStatistics convertToMTableColumnStatistics(MTable tabl mColStats.setDateStats( dateStats.isSetNumNulls() ? dateStats.getNumNulls() : null, dateStats.isSetNumDVs() ? dateStats.getNumDVs() : null, + dateStats.isSetBitVectors() ? dateStats.getBitVectors().getBytes() : null, dateStats.isSetLowValue() ? dateStats.getLowValue().getDaysSinceEpoch() : null, dateStats.isSetHighValue() ? dateStats.getHighValue().getDaysSinceEpoch() : null); } @@ -146,6 +151,9 @@ public static void setFieldsIntoOldStats( if (mStatsObj.getNumDVs() != null) { oldStatsObj.setNumDVs(mStatsObj.getNumDVs()); } + if (mStatsObj.getBitVector() != null) { + oldStatsObj.setBitVector(mStatsObj.getBitVector()); + } if (mStatsObj.getNumFalses() != null) { oldStatsObj.setNumFalses(mStatsObj.getNumFalses()); } @@ -188,6 +196,9 @@ public static void setFieldsIntoOldStats( if (mStatsObj.getNumDVs() != null) { oldStatsObj.setNumDVs(mStatsObj.getNumDVs()); } + if (mStatsObj.getBitVector() != null) { + oldStatsObj.setBitVector(mStatsObj.getBitVector()); + } if (mStatsObj.getNumFalses() != null) { oldStatsObj.setNumFalses(mStatsObj.getNumFalses()); } @@ -200,7 +211,7 @@ public static void setFieldsIntoOldStats( } public static ColumnStatisticsObj getTableColumnStatisticsObj( - MTableColumnStatistics mStatsObj) { + MTableColumnStatistics mStatsObj, boolean enableBitVector) { ColumnStatisticsObj statsObj = new ColumnStatisticsObj(); statsObj.setColType(mStatsObj.getColType()); statsObj.setColName(mStatsObj.getColName()); @@ -220,6 +231,7 @@ public static ColumnStatisticsObj getTableColumnStatisticsObj( stringStats.setAvgColLen(mStatsObj.getAvgColLen()); stringStats.setMaxColLen(mStatsObj.getMaxColLen()); stringStats.setNumDVs(mStatsObj.getNumDVs()); + stringStats.setBitVectors((mStatsObj.getBitVector()==null||!enableBitVector)? null : new String(mStatsObj.getBitVector())); colStatsData.setStringStats(stringStats); } else if (colType.equals("binary")) { BinaryColumnStatsData binaryStats = new BinaryColumnStatsData(); @@ -241,6 +253,7 @@ public static ColumnStatisticsObj getTableColumnStatisticsObj( longStats.setLowValue(longLowValue); } longStats.setNumDVs(mStatsObj.getNumDVs()); + longStats.setBitVectors((mStatsObj.getBitVector()==null||!enableBitVector)? null : new String(mStatsObj.getBitVector())); colStatsData.setLongStats(longStats); } else if (colType.equals("double") || colType.equals("float")) { DoubleColumnStatsData doubleStats = new DoubleColumnStatsData(); @@ -254,6 +267,7 @@ public static ColumnStatisticsObj getTableColumnStatisticsObj( doubleStats.setLowValue(doubleLowValue); } doubleStats.setNumDVs(mStatsObj.getNumDVs()); + doubleStats.setBitVectors((mStatsObj.getBitVector()==null||!enableBitVector)? null : new String(mStatsObj.getBitVector())); colStatsData.setDoubleStats(doubleStats); } else if (colType.startsWith("decimal")) { DecimalColumnStatsData decimalStats = new DecimalColumnStatsData(); @@ -267,6 +281,7 @@ public static ColumnStatisticsObj getTableColumnStatisticsObj( decimalStats.setLowValue(createThriftDecimal(decimalLowValue)); } decimalStats.setNumDVs(mStatsObj.getNumDVs()); + decimalStats.setBitVectors((mStatsObj.getBitVector()==null||!enableBitVector)? null : new String(mStatsObj.getBitVector())); colStatsData.setDecimalStats(decimalStats); } else if (colType.equals("date")) { DateColumnStatsData dateStats = new DateColumnStatsData(); @@ -280,6 +295,7 @@ public static ColumnStatisticsObj getTableColumnStatisticsObj( dateStats.setLowValue(new Date(lowValue)); } dateStats.setNumDVs(mStatsObj.getNumDVs()); + dateStats.setBitVectors((mStatsObj.getBitVector()==null||!enableBitVector)? null : new String(mStatsObj.getBitVector())); colStatsData.setDateStats(dateStats); } statsObj.setStatsData(colStatsData); @@ -323,6 +339,7 @@ public static MPartitionColumnStatistics convertToMPartitionColumnStatistics( mColStats.setLongStats( longStats.isSetNumNulls() ? longStats.getNumNulls() : null, longStats.isSetNumDVs() ? longStats.getNumDVs() : null, + longStats.isSetBitVectors() ? longStats.getBitVectors().getBytes() : null, longStats.isSetLowValue() ? longStats.getLowValue() : null, longStats.isSetHighValue() ? longStats.getHighValue() : null); } else if (statsObj.getStatsData().isSetDoubleStats()) { @@ -330,6 +347,7 @@ public static MPartitionColumnStatistics convertToMPartitionColumnStatistics( mColStats.setDoubleStats( doubleStats.isSetNumNulls() ? doubleStats.getNumNulls() : null, doubleStats.isSetNumDVs() ? doubleStats.getNumDVs() : null, + doubleStats.isSetBitVectors() ? doubleStats.getBitVectors().getBytes() : null, doubleStats.isSetLowValue() ? doubleStats.getLowValue() : null, doubleStats.isSetHighValue() ? doubleStats.getHighValue() : null); } else if (statsObj.getStatsData().isSetDecimalStats()) { @@ -339,12 +357,14 @@ public static MPartitionColumnStatistics convertToMPartitionColumnStatistics( mColStats.setDecimalStats( decimalStats.isSetNumNulls() ? decimalStats.getNumNulls() : null, decimalStats.isSetNumDVs() ? decimalStats.getNumDVs() : null, + decimalStats.isSetBitVectors() ? decimalStats.getBitVectors().getBytes() : null, low, high); } else if (statsObj.getStatsData().isSetStringStats()) { StringColumnStatsData stringStats = statsObj.getStatsData().getStringStats(); mColStats.setStringStats( stringStats.isSetNumNulls() ? stringStats.getNumNulls() : null, stringStats.isSetNumDVs() ? stringStats.getNumDVs() : null, + stringStats.isSetBitVectors() ? stringStats.getBitVectors().getBytes() : null, stringStats.isSetMaxColLen() ? stringStats.getMaxColLen() : null, stringStats.isSetAvgColLen() ? stringStats.getAvgColLen() : null); } else if (statsObj.getStatsData().isSetBinaryStats()) { @@ -358,6 +378,7 @@ public static MPartitionColumnStatistics convertToMPartitionColumnStatistics( mColStats.setDateStats( dateStats.isSetNumNulls() ? dateStats.getNumNulls() : null, dateStats.isSetNumDVs() ? dateStats.getNumDVs() : null, + dateStats.isSetBitVectors() ? dateStats.getBitVectors().getBytes() : null, dateStats.isSetLowValue() ? dateStats.getLowValue().getDaysSinceEpoch() : null, dateStats.isSetHighValue() ? dateStats.getHighValue().getDaysSinceEpoch() : null); } @@ -365,7 +386,7 @@ public static MPartitionColumnStatistics convertToMPartitionColumnStatistics( } public static ColumnStatisticsObj getPartitionColumnStatisticsObj( - MPartitionColumnStatistics mStatsObj) { + MPartitionColumnStatistics mStatsObj, boolean enableBitVector) { ColumnStatisticsObj statsObj = new ColumnStatisticsObj(); statsObj.setColType(mStatsObj.getColType()); statsObj.setColName(mStatsObj.getColName()); @@ -385,6 +406,7 @@ public static ColumnStatisticsObj getPartitionColumnStatisticsObj( stringStats.setAvgColLen(mStatsObj.getAvgColLen()); stringStats.setMaxColLen(mStatsObj.getMaxColLen()); stringStats.setNumDVs(mStatsObj.getNumDVs()); + stringStats.setBitVectors((mStatsObj.getBitVector()==null||!enableBitVector)? null : new String(mStatsObj.getBitVector())); colStatsData.setStringStats(stringStats); } else if (colType.equals("binary")) { BinaryColumnStatsData binaryStats = new BinaryColumnStatsData(); @@ -404,6 +426,7 @@ public static ColumnStatisticsObj getPartitionColumnStatisticsObj( longStats.setLowValue(mStatsObj.getLongLowValue()); } longStats.setNumDVs(mStatsObj.getNumDVs()); + longStats.setBitVectors((mStatsObj.getBitVector()==null||!enableBitVector)? null : new String(mStatsObj.getBitVector())); colStatsData.setLongStats(longStats); } else if (colType.equals("double") || colType.equals("float")) { DoubleColumnStatsData doubleStats = new DoubleColumnStatsData(); @@ -415,6 +438,7 @@ public static ColumnStatisticsObj getPartitionColumnStatisticsObj( doubleStats.setLowValue(mStatsObj.getDoubleLowValue()); } doubleStats.setNumDVs(mStatsObj.getNumDVs()); + doubleStats.setBitVectors((mStatsObj.getBitVector()==null||!enableBitVector)? null : new String(mStatsObj.getBitVector())); colStatsData.setDoubleStats(doubleStats); } else if (colType.startsWith("decimal")) { DecimalColumnStatsData decimalStats = new DecimalColumnStatsData(); @@ -426,6 +450,7 @@ public static ColumnStatisticsObj getPartitionColumnStatisticsObj( decimalStats.setLowValue(createThriftDecimal(mStatsObj.getDecimalLowValue())); } decimalStats.setNumDVs(mStatsObj.getNumDVs()); + decimalStats.setBitVectors((mStatsObj.getBitVector()==null||!enableBitVector)? null : new String(mStatsObj.getBitVector())); colStatsData.setDecimalStats(decimalStats); } else if (colType.equals("date")) { DateColumnStatsData dateStats = new DateColumnStatsData(); @@ -433,6 +458,7 @@ public static ColumnStatisticsObj getPartitionColumnStatisticsObj( dateStats.setHighValue(new Date(mStatsObj.getLongHighValue())); dateStats.setLowValue(new Date(mStatsObj.getLongLowValue())); dateStats.setNumDVs(mStatsObj.getNumDVs()); + dateStats.setBitVectors((mStatsObj.getBitVector()==null||!enableBitVector)? null : new String(mStatsObj.getBitVector())); colStatsData.setDateStats(dateStats); } statsObj.setStatsData(colStatsData); @@ -450,10 +476,10 @@ public static ColumnStatisticsDesc getPartitionColumnStatisticsDesc( return statsDesc; } - // SQL + // JAVA public static void fillColumnStatisticsData(String colType, ColumnStatisticsData data, Object llow, Object lhigh, Object dlow, Object dhigh, Object declow, Object dechigh, - Object nulls, Object dist, Object avglen, Object maxlen, Object trues, Object falses) throws MetaException { + Object nulls, Object dist, Object bitVector, Object avglen, Object maxlen, Object trues, Object falses) throws MetaException { colType = colType.toLowerCase(); if (colType.equals("boolean")) { BooleanColumnStatsData boolStats = new BooleanColumnStatsData(); @@ -468,6 +494,7 @@ public static void fillColumnStatisticsData(String colType, ColumnStatisticsData stringStats.setAvgColLen(MetaStoreDirectSql.extractSqlDouble(avglen)); stringStats.setMaxColLen(MetaStoreDirectSql.extractSqlLong(maxlen)); stringStats.setNumDVs(MetaStoreDirectSql.extractSqlLong(dist)); + stringStats.setBitVectors(MetaStoreDirectSql.extractSqlBlob(bitVector)); data.setStringStats(stringStats); } else if (colType.equals("binary")) { BinaryColumnStatsData binaryStats = new BinaryColumnStatsData(); @@ -487,6 +514,7 @@ public static void fillColumnStatisticsData(String colType, ColumnStatisticsData longStats.setLowValue(MetaStoreDirectSql.extractSqlLong(llow)); } longStats.setNumDVs(MetaStoreDirectSql.extractSqlLong(dist)); + longStats.setBitVectors(MetaStoreDirectSql.extractSqlBlob(bitVector)); data.setLongStats(longStats); } else if (colType.equals("double") || colType.equals("float")) { DoubleColumnStatsData doubleStats = new DoubleColumnStatsData(); @@ -498,6 +526,7 @@ public static void fillColumnStatisticsData(String colType, ColumnStatisticsData doubleStats.setLowValue(MetaStoreDirectSql.extractSqlDouble(dlow)); } doubleStats.setNumDVs(MetaStoreDirectSql.extractSqlLong(dist)); + doubleStats.setBitVectors(MetaStoreDirectSql.extractSqlBlob(bitVector)); data.setDoubleStats(doubleStats); } else if (colType.startsWith("decimal")) { DecimalColumnStatsData decimalStats = new DecimalColumnStatsData(); @@ -509,6 +538,7 @@ public static void fillColumnStatisticsData(String colType, ColumnStatisticsData decimalStats.setLowValue(createThriftDecimal((String)declow)); } decimalStats.setNumDVs(MetaStoreDirectSql.extractSqlLong(dist)); + decimalStats.setBitVectors(MetaStoreDirectSql.extractSqlBlob(bitVector)); data.setDecimalStats(decimalStats); } else if (colType.equals("date")) { DateColumnStatsData dateStats = new DateColumnStatsData(); @@ -520,10 +550,12 @@ public static void fillColumnStatisticsData(String colType, ColumnStatisticsData dateStats.setLowValue(new Date(MetaStoreDirectSql.extractSqlLong(llow))); } dateStats.setNumDVs(MetaStoreDirectSql.extractSqlLong(dist)); + dateStats.setBitVectors(MetaStoreDirectSql.extractSqlBlob(bitVector)); data.setDateStats(dateStats); } } + //DB public static void fillColumnStatisticsData(String colType, ColumnStatisticsData data, Object llow, Object lhigh, Object dlow, Object dhigh, Object declow, Object dechigh, Object nulls, Object dist, Object avglen, Object maxlen, Object trues, Object falses, diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/cache/CachedStore.java b/metastore/src/java/org/apache/hadoop/hive/metastore/cache/CachedStore.java index 3ac4fe1604..5d8d51cc27 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/cache/CachedStore.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/cache/CachedStore.java @@ -19,6 +19,7 @@ import java.nio.ByteBuffer; import java.util.ArrayList; +import java.util.HashMap; import java.util.LinkedList; import java.util.List; import java.util.Map; @@ -80,8 +81,8 @@ import org.apache.hadoop.hive.metastore.api.UnknownDBException; import org.apache.hadoop.hive.metastore.api.UnknownPartitionException; import org.apache.hadoop.hive.metastore.api.UnknownTableException; -import org.apache.hadoop.hive.metastore.hbase.stats.merge.ColumnStatsMerger; -import org.apache.hadoop.hive.metastore.hbase.stats.merge.ColumnStatsMergerFactory; +import org.apache.hadoop.hive.metastore.columnstats.merge.ColumnStatsMerger; +import org.apache.hadoop.hive.metastore.columnstats.merge.ColumnStatsMergerFactory; import org.apache.hadoop.hive.metastore.partition.spec.PartitionSpecProxy; import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; @@ -92,6 +93,7 @@ import org.slf4j.LoggerFactory; import com.google.common.annotations.VisibleForTesting; +import com.google.common.collect.Lists; // TODO filter->expr // TODO functionCache @@ -1557,27 +1559,37 @@ public AggrStats get_aggr_stats_for(String dbName, String tblName, List private ColumnStatisticsObj mergeColStatsForPartitions(String dbName, String tblName, List partNames, String colName) throws MetaException { - ColumnStatisticsObj colStats = null; + final boolean useDensityFunctionForNDVEstimation = HiveConf.getBoolVar(getConf(), + HiveConf.ConfVars.HIVE_METASTORE_STATS_NDV_DENSITY_FUNCTION); + final double ndvTuner = HiveConf.getFloatVar(getConf(), + HiveConf.ConfVars.HIVE_METASTORE_STATS_NDV_TUNER); + Map> map = new HashMap<>(); + List list = new ArrayList<>(); + boolean areAllPartsFound = true; for (String partName : partNames) { - String colStatsCacheKey = - CacheUtils.buildKey(dbName, tblName, partNameToVals(partName), colName); - ColumnStatisticsObj colStatsForPart = - SharedCache.getCachedPartitionColStats(colStatsCacheKey); - if (colStatsForPart == null) { - // we don't have stats for all the partitions - // logic for extrapolation hasn't been added to CacheStore - // So stop now, and lets fallback to underlying RawStore - return null; - } - if (colStats == null) { - colStats = colStatsForPart; + String colStatsCacheKey = CacheUtils.buildKey(dbName, tblName, partNameToVals(partName), + colName); + List singleObj = new ArrayList<>(); + ColumnStatisticsObj colStatsForPart = SharedCache + .getCachedPartitionColStats(colStatsCacheKey); + if (colStatsForPart != null) { + singleObj.add(colStatsForPart); + ColumnStatisticsDesc css = new ColumnStatisticsDesc(false, dbName, tblName); + css.setPartName(partName); + list.add(new ColumnStatistics(css, singleObj)); } else { - ColumnStatsMerger merger = - ColumnStatsMergerFactory.getColumnStatsMerger(colStats, colStatsForPart); - merger.merge(colStats, colStatsForPart); + areAllPartsFound = false; } } - return colStats; + map.put(colName, list); + List colNames = new ArrayList<>(); + colNames.add(colName); + // Note that enableBitVector does not apply here because ColumnStatisticsObj + // itself will tell whether + // bitvector is null or not and aggr logic can automatically apply. + return MetaStoreUtils + .aggrPartitionStats(map, dbName, tblName, partNames, colNames, areAllPartsFound, + useDensityFunctionForNDVEstimation, ndvTuner).iterator().next(); } @Override diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/BinaryColumnStatsAggregator.java b/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/BinaryColumnStatsAggregator.java similarity index 97% rename from metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/BinaryColumnStatsAggregator.java rename to metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/BinaryColumnStatsAggregator.java index d81d612e92..e6c836b183 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/BinaryColumnStatsAggregator.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/BinaryColumnStatsAggregator.java @@ -17,7 +17,7 @@ * under the License. */ -package org.apache.hadoop.hive.metastore.hbase.stats; +package org.apache.hadoop.hive.metastore.columnstats.aggr; import java.util.List; diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/BooleanColumnStatsAggregator.java b/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/BooleanColumnStatsAggregator.java similarity index 97% rename from metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/BooleanColumnStatsAggregator.java rename to metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/BooleanColumnStatsAggregator.java index e796df2422..a34bc9f38b 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/BooleanColumnStatsAggregator.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/BooleanColumnStatsAggregator.java @@ -17,7 +17,7 @@ * under the License. */ -package org.apache.hadoop.hive.metastore.hbase.stats; +package org.apache.hadoop.hive.metastore.columnstats.aggr; import java.util.List; diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/ColumnStatsAggregator.java b/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/ColumnStatsAggregator.java similarity index 93% rename from metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/ColumnStatsAggregator.java rename to metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/ColumnStatsAggregator.java index 29a05390bf..a52e5e5275 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/ColumnStatsAggregator.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/ColumnStatsAggregator.java @@ -17,7 +17,7 @@ * under the License. */ -package org.apache.hadoop.hive.metastore.hbase.stats; +package org.apache.hadoop.hive.metastore.columnstats.aggr; import java.util.List; @@ -27,7 +27,7 @@ public abstract class ColumnStatsAggregator { public boolean useDensityFunctionForNDVEstimation; - + public double ndvTuner; public abstract ColumnStatisticsObj aggregate(String colName, List partNames, List css) throws MetaException; } diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/ColumnStatsAggregatorFactory.java b/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/ColumnStatsAggregatorFactory.java similarity index 89% rename from metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/ColumnStatsAggregatorFactory.java rename to metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/ColumnStatsAggregatorFactory.java index 568bf0609b..173e06fe8e 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/ColumnStatsAggregatorFactory.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/ColumnStatsAggregatorFactory.java @@ -17,13 +17,14 @@ * under the License. */ -package org.apache.hadoop.hive.metastore.hbase.stats; +package org.apache.hadoop.hive.metastore.columnstats.aggr; import org.apache.hadoop.hive.metastore.api.BinaryColumnStatsData; import org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData; import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData; import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData._Fields; import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; +import org.apache.hadoop.hive.metastore.api.DateColumnStatsData; import org.apache.hadoop.hive.metastore.api.DecimalColumnStatsData; import org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData; import org.apache.hadoop.hive.metastore.api.LongColumnStatsData; @@ -34,7 +35,8 @@ private ColumnStatsAggregatorFactory() { } - public static ColumnStatsAggregator getColumnStatsAggregator(_Fields type, boolean useDensityFunctionForNDVEstimation) { + public static ColumnStatsAggregator getColumnStatsAggregator(_Fields type, + boolean useDensityFunctionForNDVEstimation, double ndvTuner) { ColumnStatsAggregator agg; switch (type) { case BOOLEAN_STATS: @@ -43,6 +45,9 @@ public static ColumnStatsAggregator getColumnStatsAggregator(_Fields type, boole case LONG_STATS: agg = new LongColumnStatsAggregator(); break; + case DATE_STATS: + agg = new DateColumnStatsAggregator(); + break; case DOUBLE_STATS: agg = new DoubleColumnStatsAggregator(); break; @@ -59,6 +64,7 @@ public static ColumnStatsAggregator getColumnStatsAggregator(_Fields type, boole throw new RuntimeException("Woh, bad. Unknown stats type " + type.toString()); } agg.useDensityFunctionForNDVEstimation = useDensityFunctionForNDVEstimation; + agg.ndvTuner = ndvTuner; return agg; } @@ -76,6 +82,10 @@ public static ColumnStatisticsObj newColumnStaticsObj(String colName, String col csd.setLongStats(new LongColumnStatsData()); break; + case DATE_STATS: + csd.setDateStats(new DateColumnStatsData()); + break; + case DOUBLE_STATS: csd.setDoubleStats(new DoubleColumnStatsData()); break; diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/DateColumnStatsAggregator.java b/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/DateColumnStatsAggregator.java new file mode 100644 index 0000000000..5cca7efb50 --- /dev/null +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/DateColumnStatsAggregator.java @@ -0,0 +1,357 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hadoop.hive.metastore.columnstats.aggr; + +import java.util.Collections; +import java.util.Comparator; +import java.util.HashMap; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; + +import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimator; +import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimatorFactory; +import org.apache.hadoop.hive.metastore.api.ColumnStatistics; +import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData; +import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; +import org.apache.hadoop.hive.metastore.api.Date; +import org.apache.hadoop.hive.metastore.api.DateColumnStatsData; +import org.apache.hadoop.hive.metastore.api.MetaException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class DateColumnStatsAggregator extends ColumnStatsAggregator implements + IExtrapolatePartStatus { + + private static final Logger LOG = LoggerFactory.getLogger(DateColumnStatsAggregator.class); + + @Override + public ColumnStatisticsObj aggregate(String colName, List partNames, + List css) throws MetaException { + ColumnStatisticsObj statsObj = null; + + // check if all the ColumnStatisticsObjs contain stats and all the ndv are + // bitvectors + boolean doAllPartitionContainStats = partNames.size() == css.size(); + LOG.debug("doAllPartitionContainStats for " + colName + " is " + doAllPartitionContainStats); + NumDistinctValueEstimator ndvEstimator = null; + String colType = null; + for (ColumnStatistics cs : css) { + if (cs.getStatsObjSize() != 1) { + throw new MetaException( + "The number of columns should be exactly one in aggrStats, but found " + + cs.getStatsObjSize()); + } + ColumnStatisticsObj cso = cs.getStatsObjIterator().next(); + if (statsObj == null) { + colType = cso.getColType(); + statsObj = ColumnStatsAggregatorFactory.newColumnStaticsObj(colName, colType, cso + .getStatsData().getSetField()); + } + if (!cso.getStatsData().getDateStats().isSetBitVectors() + || cso.getStatsData().getDateStats().getBitVectors().length() == 0) { + ndvEstimator = null; + break; + } else { + // check if all of the bit vectors can merge + NumDistinctValueEstimator estimator = NumDistinctValueEstimatorFactory + .getNumDistinctValueEstimator(cso.getStatsData().getDateStats().getBitVectors()); + if (ndvEstimator == null) { + ndvEstimator = estimator; + } else { + if (ndvEstimator.canMerge(estimator)) { + continue; + } else { + ndvEstimator = null; + break; + } + } + } + } + if (ndvEstimator != null) { + ndvEstimator = NumDistinctValueEstimatorFactory + .getEmptyNumDistinctValueEstimator(ndvEstimator); + } + LOG.debug("all of the bit vectors can merge for " + colName + " is " + (ndvEstimator != null)); + ColumnStatisticsData columnStatisticsData = new ColumnStatisticsData(); + if (doAllPartitionContainStats || css.size() < 2) { + DateColumnStatsData aggregateData = null; + long lowerBound = 0; + long higherBound = 0; + double densityAvgSum = 0.0; + for (ColumnStatistics cs : css) { + ColumnStatisticsObj cso = cs.getStatsObjIterator().next(); + DateColumnStatsData newData = cso.getStatsData().getDateStats(); + lowerBound = Math.max(lowerBound, newData.getNumDVs()); + higherBound += newData.getNumDVs(); + densityAvgSum += (diff(newData.getHighValue(), newData.getLowValue())) + / newData.getNumDVs(); + if (ndvEstimator != null) { + ndvEstimator.mergeEstimators(NumDistinctValueEstimatorFactory + .getNumDistinctValueEstimator(newData.getBitVectors())); + } + if (aggregateData == null) { + aggregateData = newData.deepCopy(); + } else { + aggregateData.setLowValue(min(aggregateData.getLowValue(), newData.getLowValue())); + aggregateData + .setHighValue(max(aggregateData.getHighValue(), newData.getHighValue())); + aggregateData.setNumNulls(aggregateData.getNumNulls() + newData.getNumNulls()); + aggregateData.setNumDVs(Math.max(aggregateData.getNumDVs(), newData.getNumDVs())); + } + } + if (ndvEstimator != null) { + // if all the ColumnStatisticsObjs contain bitvectors, we do not need to + // use uniform distribution assumption because we can merge bitvectors + // to get a good estimation. + LOG.debug("Ndv estimatation using bitvector for " + colName); + aggregateData.setNumDVs(ndvEstimator.estimateNumDistinctValues()); + } else { + long estimation; + if (useDensityFunctionForNDVEstimation) { + // We have estimation, lowerbound and higherbound. We use estimation + // if it is between lowerbound and higherbound. + double densityAvg = densityAvgSum / partNames.size(); + estimation = (long) (diff(aggregateData.getHighValue(), aggregateData.getLowValue()) / densityAvg); + if (estimation < lowerBound) { + estimation = lowerBound; + } else if (estimation > higherBound) { + estimation = higherBound; + } + } else { + estimation = (long) (lowerBound + (higherBound - lowerBound) * ndvTuner); + } + aggregateData.setNumDVs(estimation); + } + columnStatisticsData.setDateStats(aggregateData); + } else { + // we need extrapolation + LOG.debug("start extrapolation for " + colName); + + Map indexMap = new HashMap(); + for (int index = 0; index < partNames.size(); index++) { + indexMap.put(partNames.get(index), index); + } + Map adjustedIndexMap = new HashMap(); + Map adjustedStatsMap = new HashMap(); + // while we scan the css, we also get the densityAvg, lowerbound and + // higerbound when useDensityFunctionForNDVEstimation is true. + double densityAvgSum = 0.0; + if (ndvEstimator == null) { + // if not every partition uses bitvector for ndv, we just fall back to + // the traditional extrapolation methods. + for (ColumnStatistics cs : css) { + String partName = cs.getStatsDesc().getPartName(); + ColumnStatisticsObj cso = cs.getStatsObjIterator().next(); + DateColumnStatsData newData = cso.getStatsData().getDateStats(); + if (useDensityFunctionForNDVEstimation) { + densityAvgSum += diff(newData.getHighValue(), newData.getLowValue()) / newData.getNumDVs(); + } + adjustedIndexMap.put(partName, (double) indexMap.get(partName)); + adjustedStatsMap.put(partName, cso.getStatsData()); + } + } else { + // we first merge all the adjacent bitvectors that we could merge and + // derive new partition names and index. + StringBuilder pseudoPartName = new StringBuilder(); + double pseudoIndexSum = 0; + int length = 0; + int curIndex = -1; + DateColumnStatsData aggregateData = null; + for (ColumnStatistics cs : css) { + String partName = cs.getStatsDesc().getPartName(); + ColumnStatisticsObj cso = cs.getStatsObjIterator().next(); + DateColumnStatsData newData = cso.getStatsData().getDateStats(); + // newData.isSetBitVectors() should be true for sure because we + // already checked it before. + if (indexMap.get(partName) != curIndex) { + // There is bitvector, but it is not adjacent to the previous ones. + if (length > 0) { + // we have to set ndv + adjustedIndexMap.put(pseudoPartName.toString(), pseudoIndexSum / length); + aggregateData.setNumDVs(ndvEstimator.estimateNumDistinctValues()); + ColumnStatisticsData csd = new ColumnStatisticsData(); + csd.setDateStats(aggregateData); + adjustedStatsMap.put(pseudoPartName.toString(), csd); + if (useDensityFunctionForNDVEstimation) { + densityAvgSum += diff(aggregateData.getHighValue(), aggregateData.getLowValue()) + / aggregateData.getNumDVs(); + } + // reset everything + pseudoPartName = new StringBuilder(); + pseudoIndexSum = 0; + length = 0; + ndvEstimator = NumDistinctValueEstimatorFactory.getEmptyNumDistinctValueEstimator(ndvEstimator); + } + aggregateData = null; + } + curIndex = indexMap.get(partName); + pseudoPartName.append(partName); + pseudoIndexSum += curIndex; + length++; + curIndex++; + if (aggregateData == null) { + aggregateData = newData.deepCopy(); + } else { + aggregateData.setLowValue(min(aggregateData.getLowValue(), newData.getLowValue())); + aggregateData.setHighValue(max(aggregateData.getHighValue(), newData.getHighValue())); + aggregateData.setNumNulls(aggregateData.getNumNulls() + newData.getNumNulls()); + } + ndvEstimator.mergeEstimators(NumDistinctValueEstimatorFactory + .getNumDistinctValueEstimator(newData.getBitVectors())); + } + if (length > 0) { + // we have to set ndv + adjustedIndexMap.put(pseudoPartName.toString(), pseudoIndexSum / length); + aggregateData.setNumDVs(ndvEstimator.estimateNumDistinctValues()); + ColumnStatisticsData csd = new ColumnStatisticsData(); + csd.setDateStats(aggregateData); + adjustedStatsMap.put(pseudoPartName.toString(), csd); + if (useDensityFunctionForNDVEstimation) { + densityAvgSum += diff(aggregateData.getHighValue(), aggregateData.getLowValue()) + / aggregateData.getNumDVs(); + } + } + } + extrapolate(columnStatisticsData, partNames.size(), css.size(), adjustedIndexMap, + adjustedStatsMap, densityAvgSum / adjustedStatsMap.size()); + } + statsObj.setStatsData(columnStatisticsData); + return statsObj; + } + + private long diff(Date d1, Date d2) { + return d1.getDaysSinceEpoch() - d2.getDaysSinceEpoch(); + } + + private Date min(Date d1, Date d2) { + return d1.compareTo(d2) < 0 ? d1 : d2; + } + + private Date max(Date d1, Date d2) { + return d1.compareTo(d2) < 0 ? d2 : d1; + } + + @Override + public void extrapolate(ColumnStatisticsData extrapolateData, int numParts, + int numPartsWithStats, Map adjustedIndexMap, + Map adjustedStatsMap, double densityAvg) { + int rightBorderInd = numParts; + DateColumnStatsData extrapolateDateData = new DateColumnStatsData(); + Map extractedAdjustedStatsMap = new HashMap<>(); + for (Map.Entry entry : adjustedStatsMap.entrySet()) { + extractedAdjustedStatsMap.put(entry.getKey(), entry.getValue().getDateStats()); + } + List> list = new LinkedList>( + extractedAdjustedStatsMap.entrySet()); + // get the lowValue + Collections.sort(list, new Comparator>() { + public int compare(Map.Entry o1, + Map.Entry o2) { + return diff(o1.getValue().getLowValue(), o2.getValue().getLowValue()) < 0 ? -1 : 1; + } + }); + double minInd = adjustedIndexMap.get(list.get(0).getKey()); + double maxInd = adjustedIndexMap.get(list.get(list.size() - 1).getKey()); + long lowValue = 0; + long min = list.get(0).getValue().getLowValue().getDaysSinceEpoch(); + long max = list.get(list.size() - 1).getValue().getLowValue().getDaysSinceEpoch(); + if (minInd == maxInd) { + lowValue = min; + } else if (minInd < maxInd) { + // left border is the min + lowValue = (long) (max - (max - min) * maxInd / (maxInd - minInd)); + } else { + // right border is the min + lowValue = (long) (max - (max - min) * (rightBorderInd - maxInd) / (minInd - maxInd)); + } + + // get the highValue + Collections.sort(list, new Comparator>() { + public int compare(Map.Entry o1, + Map.Entry o2) { + return diff(o1.getValue().getHighValue(), o2.getValue().getHighValue()) < 0 ? -1 : 1; + } + }); + minInd = adjustedIndexMap.get(list.get(0).getKey()); + maxInd = adjustedIndexMap.get(list.get(list.size() - 1).getKey()); + long highValue = 0; + min = list.get(0).getValue().getHighValue().getDaysSinceEpoch(); + max = list.get(list.size() - 1).getValue().getHighValue().getDaysSinceEpoch(); + if (minInd == maxInd) { + highValue = min; + } else if (minInd < maxInd) { + // right border is the max + highValue = (long) (min + (max - min) * (rightBorderInd - minInd) / (maxInd - minInd)); + } else { + // left border is the max + highValue = (long) (min + (max - min) * minInd / (minInd - maxInd)); + } + + // get the #nulls + long numNulls = 0; + for (Map.Entry entry : extractedAdjustedStatsMap.entrySet()) { + numNulls += entry.getValue().getNumNulls(); + } + // we scale up sumNulls based on the number of partitions + numNulls = numNulls * numParts / numPartsWithStats; + + // get the ndv + long ndv = 0; + Collections.sort(list, new Comparator>() { + public int compare(Map.Entry o1, + Map.Entry o2) { + return o1.getValue().getNumDVs() < o2.getValue().getNumDVs() ? -1 : 1; + } + }); + long lowerBound = list.get(list.size() - 1).getValue().getNumDVs(); + long higherBound = 0; + for (Map.Entry entry : list) { + higherBound += entry.getValue().getNumDVs(); + } + if (useDensityFunctionForNDVEstimation && densityAvg != 0.0) { + ndv = (long) ((highValue - lowValue) / densityAvg); + if (ndv < lowerBound) { + ndv = lowerBound; + } else if (ndv > higherBound) { + ndv = higherBound; + } + } else { + minInd = adjustedIndexMap.get(list.get(0).getKey()); + maxInd = adjustedIndexMap.get(list.get(list.size() - 1).getKey()); + min = list.get(0).getValue().getNumDVs(); + max = list.get(list.size() - 1).getValue().getNumDVs(); + if (minInd == maxInd) { + ndv = min; + } else if (minInd < maxInd) { + // right border is the max + ndv = (long) (min + (max - min) * (rightBorderInd - minInd) / (maxInd - minInd)); + } else { + // left border is the max + ndv = (long) (min + (max - min) * minInd / (minInd - maxInd)); + } + } + extrapolateDateData.setLowValue(new Date(lowValue)); + extrapolateDateData.setHighValue(new Date(highValue)); + extrapolateDateData.setNumNulls(numNulls); + extrapolateDateData.setNumDVs(ndv); + extrapolateData.setDateStats(extrapolateDateData); + } +} diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/DecimalColumnStatsAggregator.java b/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/DecimalColumnStatsAggregator.java similarity index 93% rename from metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/DecimalColumnStatsAggregator.java rename to metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/DecimalColumnStatsAggregator.java index 8eb64e0143..81f27d2458 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/DecimalColumnStatsAggregator.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/DecimalColumnStatsAggregator.java @@ -17,7 +17,7 @@ * under the License. */ -package org.apache.hadoop.hive.metastore.hbase.stats; +package org.apache.hadoop.hive.metastore.columnstats.aggr; import java.util.Collections; import java.util.Comparator; @@ -35,9 +35,13 @@ import org.apache.hadoop.hive.metastore.api.DecimalColumnStatsData; import org.apache.hadoop.hive.metastore.api.MetaException; import org.apache.hadoop.hive.metastore.hbase.HBaseUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; public class DecimalColumnStatsAggregator extends ColumnStatsAggregator implements IExtrapolatePartStatus { + + private static final Logger LOG = LoggerFactory.getLogger(DecimalColumnStatsAggregator.class); @Override public ColumnStatisticsObj aggregate(String colName, List partNames, @@ -47,6 +51,7 @@ public ColumnStatisticsObj aggregate(String colName, List partNames, // check if all the ColumnStatisticsObjs contain stats and all the ndv are // bitvectors boolean doAllPartitionContainStats = partNames.size() == css.size(); + LOG.debug("doAllPartitionContainStats for " + colName + " is " + doAllPartitionContainStats); NumDistinctValueEstimator ndvEstimator = null; String colType = null; for (ColumnStatistics cs : css) { @@ -85,6 +90,7 @@ public ColumnStatisticsObj aggregate(String colName, List partNames, ndvEstimator = NumDistinctValueEstimatorFactory .getEmptyNumDistinctValueEstimator(ndvEstimator); } + LOG.debug("all of the bit vectors can merge for " + colName + " is " + (ndvEstimator != null)); ColumnStatisticsData columnStatisticsData = new ColumnStatisticsData(); if (doAllPartitionContainStats || css.size() < 2) { DecimalColumnStatsData aggregateData = null; @@ -94,12 +100,10 @@ public ColumnStatisticsObj aggregate(String colName, List partNames, for (ColumnStatistics cs : css) { ColumnStatisticsObj cso = cs.getStatsObjIterator().next(); DecimalColumnStatsData newData = cso.getStatsData().getDecimalStats(); - if (useDensityFunctionForNDVEstimation) { - lowerBound = Math.max(lowerBound, newData.getNumDVs()); - higherBound += newData.getNumDVs(); - densityAvgSum += (HBaseUtils.getDoubleValue(newData.getHighValue()) - HBaseUtils - .getDoubleValue(newData.getLowValue())) / newData.getNumDVs(); - } + lowerBound = Math.max(lowerBound, newData.getNumDVs()); + higherBound += newData.getNumDVs(); + densityAvgSum += (HBaseUtils.getDoubleValue(newData.getHighValue()) - HBaseUtils + .getDoubleValue(newData.getLowValue())) / newData.getNumDVs(); if (ndvEstimator != null) { ndvEstimator.mergeEstimators(NumDistinctValueEstimatorFactory .getNumDistinctValueEstimator(newData.getBitVectors())); @@ -127,30 +131,30 @@ public ColumnStatisticsObj aggregate(String colName, List partNames, // if all the ColumnStatisticsObjs contain bitvectors, we do not need to // use uniform distribution assumption because we can merge bitvectors // to get a good estimation. + LOG.debug("Ndv estimatation using bitvector for " + colName); aggregateData.setNumDVs(ndvEstimator.estimateNumDistinctValues()); } else { + long estimation; if (useDensityFunctionForNDVEstimation) { // We have estimation, lowerbound and higherbound. We use estimation // if it is between lowerbound and higherbound. double densityAvg = densityAvgSum / partNames.size(); - long estimation = (long) ((HBaseUtils.getDoubleValue(aggregateData.getHighValue()) - HBaseUtils + estimation = (long) ((HBaseUtils.getDoubleValue(aggregateData.getHighValue()) - HBaseUtils .getDoubleValue(aggregateData.getLowValue())) / densityAvg); if (estimation < lowerBound) { - aggregateData.setNumDVs(lowerBound); + estimation = lowerBound; } else if (estimation > higherBound) { - aggregateData.setNumDVs(higherBound); - } else { - aggregateData.setNumDVs(estimation); + estimation = higherBound; } } else { - // Without useDensityFunctionForNDVEstimation, we just use the - // default one, which is the max of all the partitions and it is - // already done. + estimation = (long) (lowerBound + (higherBound - lowerBound) * ndvTuner); } + aggregateData.setNumDVs(estimation); } columnStatisticsData.setDecimalStats(aggregateData); } else { // we need extrapolation + LOG.debug("start extrapolation for " + colName); Map indexMap = new HashMap(); for (int index = 0; index < partNames.size(); index++) { indexMap.put(partNames.get(index), index); diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/DoubleColumnStatsAggregator.java b/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/DoubleColumnStatsAggregator.java similarity index 93% rename from metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/DoubleColumnStatsAggregator.java rename to metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/DoubleColumnStatsAggregator.java index b6b86123b2..b7cd33a3dc 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/DoubleColumnStatsAggregator.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/DoubleColumnStatsAggregator.java @@ -17,7 +17,7 @@ * under the License. */ -package org.apache.hadoop.hive.metastore.hbase.stats; +package org.apache.hadoop.hive.metastore.columnstats.aggr; import java.util.Collections; import java.util.Comparator; @@ -33,10 +33,14 @@ import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; import org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData; import org.apache.hadoop.hive.metastore.api.MetaException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; public class DoubleColumnStatsAggregator extends ColumnStatsAggregator implements IExtrapolatePartStatus { + private static final Logger LOG = LoggerFactory.getLogger(LongColumnStatsAggregator.class); + @Override public ColumnStatisticsObj aggregate(String colName, List partNames, List css) throws MetaException { @@ -45,6 +49,7 @@ public ColumnStatisticsObj aggregate(String colName, List partNames, // check if all the ColumnStatisticsObjs contain stats and all the ndv are // bitvectors boolean doAllPartitionContainStats = partNames.size() == css.size(); + LOG.debug("doAllPartitionContainStats for " + colName + " is " + doAllPartitionContainStats); NumDistinctValueEstimator ndvEstimator = null; String colType = null; for (ColumnStatistics cs : css) { @@ -83,6 +88,7 @@ public ColumnStatisticsObj aggregate(String colName, List partNames, ndvEstimator = NumDistinctValueEstimatorFactory .getEmptyNumDistinctValueEstimator(ndvEstimator); } + LOG.debug("all of the bit vectors can merge for " + colName + " is " + (ndvEstimator != null)); ColumnStatisticsData columnStatisticsData = new ColumnStatisticsData(); if (doAllPartitionContainStats || css.size() < 2) { DoubleColumnStatsData aggregateData = null; @@ -92,11 +98,9 @@ public ColumnStatisticsObj aggregate(String colName, List partNames, for (ColumnStatistics cs : css) { ColumnStatisticsObj cso = cs.getStatsObjIterator().next(); DoubleColumnStatsData newData = cso.getStatsData().getDoubleStats(); - if (useDensityFunctionForNDVEstimation) { - lowerBound = Math.max(lowerBound, newData.getNumDVs()); - higherBound += newData.getNumDVs(); - densityAvgSum += (newData.getHighValue() - newData.getLowValue()) / newData.getNumDVs(); - } + lowerBound = Math.max(lowerBound, newData.getNumDVs()); + higherBound += newData.getNumDVs(); + densityAvgSum += (newData.getHighValue() - newData.getLowValue()) / newData.getNumDVs(); if (ndvEstimator != null) { ndvEstimator.mergeEstimators(NumDistinctValueEstimatorFactory .getNumDistinctValueEstimator(newData.getBitVectors())); @@ -115,29 +119,29 @@ public ColumnStatisticsObj aggregate(String colName, List partNames, // if all the ColumnStatisticsObjs contain bitvectors, we do not need to // use uniform distribution assumption because we can merge bitvectors // to get a good estimation. + LOG.debug("Ndv estimatation using bitvector for " + colName); aggregateData.setNumDVs(ndvEstimator.estimateNumDistinctValues()); } else { + long estimation; if (useDensityFunctionForNDVEstimation) { // We have estimation, lowerbound and higherbound. We use estimation // if it is between lowerbound and higherbound. double densityAvg = densityAvgSum / partNames.size(); - long estimation = (long) ((aggregateData.getHighValue() - aggregateData.getLowValue()) / densityAvg); + estimation = (long) ((aggregateData.getHighValue() - aggregateData.getLowValue()) / densityAvg); if (estimation < lowerBound) { - aggregateData.setNumDVs(lowerBound); + estimation = lowerBound; } else if (estimation > higherBound) { - aggregateData.setNumDVs(higherBound); - } else { - aggregateData.setNumDVs(estimation); + estimation = higherBound; } } else { - // Without useDensityFunctionForNDVEstimation, we just use the - // default one, which is the max of all the partitions and it is - // already done. + estimation = (long) (lowerBound + (higherBound - lowerBound) * ndvTuner); } + aggregateData.setNumDVs(estimation); } columnStatisticsData.setDoubleStats(aggregateData); } else { // we need extrapolation + LOG.debug("start extrapolation for " + colName); Map indexMap = new HashMap(); for (int index = 0; index < partNames.size(); index++) { indexMap.put(partNames.get(index), index); diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/IExtrapolatePartStatus.java b/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/IExtrapolatePartStatus.java similarity index 96% rename from metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/IExtrapolatePartStatus.java rename to metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/IExtrapolatePartStatus.java index af75bced72..acf679e1c3 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/IExtrapolatePartStatus.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/IExtrapolatePartStatus.java @@ -15,7 +15,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.hadoop.hive.metastore.hbase.stats; +package org.apache.hadoop.hive.metastore.columnstats.aggr; import java.util.Map; diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/LongColumnStatsAggregator.java b/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/LongColumnStatsAggregator.java similarity index 93% rename from metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/LongColumnStatsAggregator.java rename to metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/LongColumnStatsAggregator.java index 2da6f60167..01b3664253 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/LongColumnStatsAggregator.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/LongColumnStatsAggregator.java @@ -17,7 +17,7 @@ * under the License. */ -package org.apache.hadoop.hive.metastore.hbase.stats; +package org.apache.hadoop.hive.metastore.columnstats.aggr; import java.util.Collections; import java.util.Comparator; @@ -33,10 +33,14 @@ import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; import org.apache.hadoop.hive.metastore.api.LongColumnStatsData; import org.apache.hadoop.hive.metastore.api.MetaException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; public class LongColumnStatsAggregator extends ColumnStatsAggregator implements IExtrapolatePartStatus { + private static final Logger LOG = LoggerFactory.getLogger(LongColumnStatsAggregator.class); + @Override public ColumnStatisticsObj aggregate(String colName, List partNames, List css) throws MetaException { @@ -45,6 +49,7 @@ public ColumnStatisticsObj aggregate(String colName, List partNames, // check if all the ColumnStatisticsObjs contain stats and all the ndv are // bitvectors boolean doAllPartitionContainStats = partNames.size() == css.size(); + LOG.debug("doAllPartitionContainStats for " + colName + " is " + doAllPartitionContainStats); NumDistinctValueEstimator ndvEstimator = null; String colType = null; for (ColumnStatistics cs : css) { @@ -83,6 +88,7 @@ public ColumnStatisticsObj aggregate(String colName, List partNames, ndvEstimator = NumDistinctValueEstimatorFactory .getEmptyNumDistinctValueEstimator(ndvEstimator); } + LOG.debug("all of the bit vectors can merge for " + colName + " is " + (ndvEstimator != null)); ColumnStatisticsData columnStatisticsData = new ColumnStatisticsData(); if (doAllPartitionContainStats || css.size() < 2) { LongColumnStatsData aggregateData = null; @@ -92,11 +98,9 @@ public ColumnStatisticsObj aggregate(String colName, List partNames, for (ColumnStatistics cs : css) { ColumnStatisticsObj cso = cs.getStatsObjIterator().next(); LongColumnStatsData newData = cso.getStatsData().getLongStats(); - if (useDensityFunctionForNDVEstimation) { - lowerBound = Math.max(lowerBound, newData.getNumDVs()); - higherBound += newData.getNumDVs(); - densityAvgSum += (newData.getHighValue() - newData.getLowValue()) / newData.getNumDVs(); - } + lowerBound = Math.max(lowerBound, newData.getNumDVs()); + higherBound += newData.getNumDVs(); + densityAvgSum += (newData.getHighValue() - newData.getLowValue()) / newData.getNumDVs(); if (ndvEstimator != null) { ndvEstimator.mergeEstimators(NumDistinctValueEstimatorFactory .getNumDistinctValueEstimator(newData.getBitVectors())); @@ -115,29 +119,30 @@ public ColumnStatisticsObj aggregate(String colName, List partNames, // if all the ColumnStatisticsObjs contain bitvectors, we do not need to // use uniform distribution assumption because we can merge bitvectors // to get a good estimation. + LOG.debug("Ndv estimatation using bitvector for " + colName); aggregateData.setNumDVs(ndvEstimator.estimateNumDistinctValues()); } else { + long estimation; if (useDensityFunctionForNDVEstimation) { // We have estimation, lowerbound and higherbound. We use estimation // if it is between lowerbound and higherbound. double densityAvg = densityAvgSum / partNames.size(); - long estimation = (long) ((aggregateData.getHighValue() - aggregateData.getLowValue()) / densityAvg); + estimation = (long) ((aggregateData.getHighValue() - aggregateData.getLowValue()) / densityAvg); if (estimation < lowerBound) { - aggregateData.setNumDVs(lowerBound); + estimation = lowerBound; } else if (estimation > higherBound) { - aggregateData.setNumDVs(higherBound); - } else { - aggregateData.setNumDVs(estimation); + estimation = higherBound; } } else { - // Without useDensityFunctionForNDVEstimation, we just use the - // default one, which is the max of all the partitions and it is - // already done. + estimation = (long) (lowerBound + (higherBound - lowerBound) * ndvTuner); } + aggregateData.setNumDVs(estimation); } columnStatisticsData.setLongStats(aggregateData); } else { // we need extrapolation + LOG.debug("start extrapolation for " + colName); + Map indexMap = new HashMap(); for (int index = 0; index < partNames.size(); index++) { indexMap.put(partNames.get(index), index); diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/StringColumnStatsAggregator.java b/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/StringColumnStatsAggregator.java new file mode 100644 index 0000000000..97744d9ca6 --- /dev/null +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/StringColumnStatsAggregator.java @@ -0,0 +1,302 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hadoop.hive.metastore.columnstats.aggr; + +import java.util.Collections; +import java.util.Comparator; +import java.util.HashMap; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; + +import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimator; +import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimatorFactory; +import org.apache.hadoop.hive.metastore.api.ColumnStatistics; +import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData; +import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; +import org.apache.hadoop.hive.metastore.api.MetaException; +import org.apache.hadoop.hive.metastore.api.StringColumnStatsData; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class StringColumnStatsAggregator extends ColumnStatsAggregator implements + IExtrapolatePartStatus { + + private static final Logger LOG = LoggerFactory.getLogger(LongColumnStatsAggregator.class); + + @Override + public ColumnStatisticsObj aggregate(String colName, List partNames, + List css) throws MetaException { + ColumnStatisticsObj statsObj = null; + + // check if all the ColumnStatisticsObjs contain stats and all the ndv are + // bitvectors. Only when both of the conditions are true, we merge bit + // vectors. Otherwise, just use the maximum function. + boolean doAllPartitionContainStats = partNames.size() == css.size(); + LOG.debug("doAllPartitionContainStats for " + colName + " is " + doAllPartitionContainStats); + NumDistinctValueEstimator ndvEstimator = null; + String colType = null; + for (ColumnStatistics cs : css) { + if (cs.getStatsObjSize() != 1) { + throw new MetaException( + "The number of columns should be exactly one in aggrStats, but found " + + cs.getStatsObjSize()); + } + ColumnStatisticsObj cso = cs.getStatsObjIterator().next(); + if (statsObj == null) { + colType = cso.getColType(); + statsObj = ColumnStatsAggregatorFactory.newColumnStaticsObj(colName, colType, cso + .getStatsData().getSetField()); + } + if (!cso.getStatsData().getStringStats().isSetBitVectors() + || cso.getStatsData().getStringStats().getBitVectors().length() == 0) { + ndvEstimator = null; + break; + } else { + // check if all of the bit vectors can merge + NumDistinctValueEstimator estimator = NumDistinctValueEstimatorFactory + .getNumDistinctValueEstimator(cso.getStatsData().getStringStats().getBitVectors()); + if (ndvEstimator == null) { + ndvEstimator = estimator; + } else { + if (ndvEstimator.canMerge(estimator)) { + continue; + } else { + ndvEstimator = null; + break; + } + } + } + } + if (ndvEstimator != null) { + ndvEstimator = NumDistinctValueEstimatorFactory + .getEmptyNumDistinctValueEstimator(ndvEstimator); + } + LOG.debug("all of the bit vectors can merge for " + colName + " is " + (ndvEstimator != null)); + ColumnStatisticsData columnStatisticsData = new ColumnStatisticsData(); + if (doAllPartitionContainStats || css.size() < 2) { + StringColumnStatsData aggregateData = null; + for (ColumnStatistics cs : css) { + ColumnStatisticsObj cso = cs.getStatsObjIterator().next(); + StringColumnStatsData newData = cso.getStatsData().getStringStats(); + if (ndvEstimator != null) { + ndvEstimator.mergeEstimators(NumDistinctValueEstimatorFactory + .getNumDistinctValueEstimator(newData.getBitVectors())); + } + if (aggregateData == null) { + aggregateData = newData.deepCopy(); + } else { + aggregateData + .setMaxColLen(Math.max(aggregateData.getMaxColLen(), newData.getMaxColLen())); + aggregateData + .setAvgColLen(Math.max(aggregateData.getAvgColLen(), newData.getAvgColLen())); + aggregateData.setNumNulls(aggregateData.getNumNulls() + newData.getNumNulls()); + aggregateData.setNumDVs(Math.max(aggregateData.getNumDVs(), newData.getNumDVs())); + } + } + if (ndvEstimator != null) { + // if all the ColumnStatisticsObjs contain bitvectors, we do not need to + // use uniform distribution assumption because we can merge bitvectors + // to get a good estimation. + LOG.debug("Ndv estimatation using bitvector for " + colName); + aggregateData.setNumDVs(ndvEstimator.estimateNumDistinctValues()); + } else { + // aggregateData already has the ndv of the max of all + } + columnStatisticsData.setStringStats(aggregateData); + } else { + // we need extrapolation + LOG.debug("start extrapolation for " + colName); + + Map indexMap = new HashMap(); + for (int index = 0; index < partNames.size(); index++) { + indexMap.put(partNames.get(index), index); + } + Map adjustedIndexMap = new HashMap(); + Map adjustedStatsMap = new HashMap(); + if (ndvEstimator == null) { + // if not every partition uses bitvector for ndv, we just fall back to + // the traditional extrapolation methods. + for (ColumnStatistics cs : css) { + String partName = cs.getStatsDesc().getPartName(); + ColumnStatisticsObj cso = cs.getStatsObjIterator().next(); + StringColumnStatsData newData = cso.getStatsData().getStringStats(); + adjustedIndexMap.put(partName, (double) indexMap.get(partName)); + adjustedStatsMap.put(partName, cso.getStatsData()); + } + } else { + // we first merge all the adjacent bitvectors that we could merge and + // derive new partition names and index. + StringBuilder pseudoPartName = new StringBuilder(); + double pseudoIndexSum = 0; + int length = 0; + int curIndex = -1; + StringColumnStatsData aggregateData = null; + for (ColumnStatistics cs : css) { + String partName = cs.getStatsDesc().getPartName(); + ColumnStatisticsObj cso = cs.getStatsObjIterator().next(); + StringColumnStatsData newData = cso.getStatsData().getStringStats(); + // newData.isSetBitVectors() should be true for sure because we + // already checked it before. + if (indexMap.get(partName) != curIndex) { + // There is bitvector, but it is not adjacent to the previous ones. + if (length > 0) { + // we have to set ndv + adjustedIndexMap.put(pseudoPartName.toString(), pseudoIndexSum / length); + aggregateData.setNumDVs(ndvEstimator.estimateNumDistinctValues()); + ColumnStatisticsData csd = new ColumnStatisticsData(); + csd.setStringStats(aggregateData); + adjustedStatsMap.put(pseudoPartName.toString(), csd); + // reset everything + pseudoPartName = new StringBuilder(); + pseudoIndexSum = 0; + length = 0; + ndvEstimator = NumDistinctValueEstimatorFactory + .getEmptyNumDistinctValueEstimator(ndvEstimator); + } + aggregateData = null; + } + curIndex = indexMap.get(partName); + pseudoPartName.append(partName); + pseudoIndexSum += curIndex; + length++; + curIndex++; + if (aggregateData == null) { + aggregateData = newData.deepCopy(); + } else { + aggregateData.setAvgColLen(Math.min(aggregateData.getAvgColLen(), + newData.getAvgColLen())); + aggregateData.setMaxColLen(Math.max(aggregateData.getMaxColLen(), + newData.getMaxColLen())); + aggregateData.setNumNulls(aggregateData.getNumNulls() + newData.getNumNulls()); + } + ndvEstimator.mergeEstimators(NumDistinctValueEstimatorFactory + .getNumDistinctValueEstimator(newData.getBitVectors())); + } + if (length > 0) { + // we have to set ndv + adjustedIndexMap.put(pseudoPartName.toString(), pseudoIndexSum / length); + aggregateData.setNumDVs(ndvEstimator.estimateNumDistinctValues()); + ColumnStatisticsData csd = new ColumnStatisticsData(); + csd.setStringStats(aggregateData); + adjustedStatsMap.put(pseudoPartName.toString(), csd); + } + } + extrapolate(columnStatisticsData, partNames.size(), css.size(), adjustedIndexMap, + adjustedStatsMap, -1); + } + statsObj.setStatsData(columnStatisticsData); + return statsObj; + } + + @Override + public void extrapolate(ColumnStatisticsData extrapolateData, int numParts, + int numPartsWithStats, Map adjustedIndexMap, + Map adjustedStatsMap, double densityAvg) { + int rightBorderInd = numParts; + StringColumnStatsData extrapolateStringData = new StringColumnStatsData(); + Map extractedAdjustedStatsMap = new HashMap<>(); + for (Map.Entry entry : adjustedStatsMap.entrySet()) { + extractedAdjustedStatsMap.put(entry.getKey(), entry.getValue().getStringStats()); + } + List> list = new LinkedList>( + extractedAdjustedStatsMap.entrySet()); + // get the avgLen + Collections.sort(list, new Comparator>() { + public int compare(Map.Entry o1, + Map.Entry o2) { + return o1.getValue().getAvgColLen() < o2.getValue().getAvgColLen() ? -1 : 1; + } + }); + double minInd = adjustedIndexMap.get(list.get(0).getKey()); + double maxInd = adjustedIndexMap.get(list.get(list.size() - 1).getKey()); + double avgColLen = 0; + double min = list.get(0).getValue().getAvgColLen(); + double max = list.get(list.size() - 1).getValue().getAvgColLen(); + if (minInd == maxInd) { + avgColLen = min; + } else if (minInd < maxInd) { + // right border is the max + avgColLen = (min + (max - min) * (rightBorderInd - minInd) / (maxInd - minInd)); + } else { + // left border is the max + avgColLen = (min + (max - min) * minInd / (minInd - maxInd)); + } + + // get the maxLen + Collections.sort(list, new Comparator>() { + public int compare(Map.Entry o1, + Map.Entry o2) { + return o1.getValue().getMaxColLen() < o2.getValue().getMaxColLen() ? -1 : 1; + } + }); + minInd = adjustedIndexMap.get(list.get(0).getKey()); + maxInd = adjustedIndexMap.get(list.get(list.size() - 1).getKey()); + double maxColLen = 0; + min = list.get(0).getValue().getAvgColLen(); + max = list.get(list.size() - 1).getValue().getAvgColLen(); + if (minInd == maxInd) { + maxColLen = min; + } else if (minInd < maxInd) { + // right border is the max + maxColLen = (min + (max - min) * (rightBorderInd - minInd) / (maxInd - minInd)); + } else { + // left border is the max + maxColLen = (min + (max - min) * minInd / (minInd - maxInd)); + } + + // get the #nulls + long numNulls = 0; + for (Map.Entry entry : extractedAdjustedStatsMap.entrySet()) { + numNulls += entry.getValue().getNumNulls(); + } + // we scale up sumNulls based on the number of partitions + numNulls = numNulls * numParts / numPartsWithStats; + + // get the ndv + long ndv = 0; + Collections.sort(list, new Comparator>() { + public int compare(Map.Entry o1, + Map.Entry o2) { + return o1.getValue().getNumDVs() < o2.getValue().getNumDVs() ? -1 : 1; + } + }); + minInd = adjustedIndexMap.get(list.get(0).getKey()); + maxInd = adjustedIndexMap.get(list.get(list.size() - 1).getKey()); + min = list.get(0).getValue().getNumDVs(); + max = list.get(list.size() - 1).getValue().getNumDVs(); + if (minInd == maxInd) { + ndv = (long) min; + } else if (minInd < maxInd) { + // right border is the max + ndv = (long) (min + (max - min) * (rightBorderInd - minInd) / (maxInd - minInd)); + } else { + // left border is the max + ndv = (long) (min + (max - min) * minInd / (minInd - maxInd)); + } + extrapolateStringData.setAvgColLen(avgColLen); + ; + extrapolateStringData.setMaxColLen((long) maxColLen); + extrapolateStringData.setNumNulls(numNulls); + extrapolateStringData.setNumDVs(ndv); + extrapolateData.setStringStats(extrapolateStringData); + } + +} diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/BinaryColumnStatsMerger.java b/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/merge/BinaryColumnStatsMerger.java similarity index 96% rename from metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/BinaryColumnStatsMerger.java rename to metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/merge/BinaryColumnStatsMerger.java index af0669eb65..4c2d1bc602 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/BinaryColumnStatsMerger.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/merge/BinaryColumnStatsMerger.java @@ -17,7 +17,7 @@ * under the License. */ -package org.apache.hadoop.hive.metastore.hbase.stats.merge; +package org.apache.hadoop.hive.metastore.columnstats.merge; import org.apache.hadoop.hive.metastore.api.BinaryColumnStatsData; import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/BooleanColumnStatsMerger.java b/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/merge/BooleanColumnStatsMerger.java similarity index 96% rename from metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/BooleanColumnStatsMerger.java rename to metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/merge/BooleanColumnStatsMerger.java index 33ff6a19f5..8e5015323f 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/BooleanColumnStatsMerger.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/merge/BooleanColumnStatsMerger.java @@ -17,7 +17,7 @@ * under the License. */ -package org.apache.hadoop.hive.metastore.hbase.stats.merge; +package org.apache.hadoop.hive.metastore.columnstats.merge; import org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData; import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/ColumnStatsMerger.java b/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/merge/ColumnStatsMerger.java similarity index 95% rename from metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/ColumnStatsMerger.java rename to metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/merge/ColumnStatsMerger.java index d3051a2b00..474d4ddcd1 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/ColumnStatsMerger.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/merge/ColumnStatsMerger.java @@ -17,7 +17,7 @@ * under the License. */ -package org.apache.hadoop.hive.metastore.hbase.stats.merge; +package org.apache.hadoop.hive.metastore.columnstats.merge; import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; import org.slf4j.Logger; diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/ColumnStatsMergerFactory.java b/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/merge/ColumnStatsMergerFactory.java similarity index 98% rename from metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/ColumnStatsMergerFactory.java rename to metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/merge/ColumnStatsMergerFactory.java index c013ba5c5d..0ce1847d1c 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/ColumnStatsMergerFactory.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/merge/ColumnStatsMergerFactory.java @@ -17,7 +17,7 @@ * under the License. */ -package org.apache.hadoop.hive.metastore.hbase.stats.merge; +package org.apache.hadoop.hive.metastore.columnstats.merge; import org.apache.commons.lang.StringUtils; import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimatorFactory; diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/DateColumnStatsMerger.java b/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/merge/DateColumnStatsMerger.java similarity index 98% rename from metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/DateColumnStatsMerger.java rename to metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/merge/DateColumnStatsMerger.java index e899bfe85f..2542a00d36 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/DateColumnStatsMerger.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/merge/DateColumnStatsMerger.java @@ -17,7 +17,7 @@ * under the License. */ -package org.apache.hadoop.hive.metastore.hbase.stats.merge; +package org.apache.hadoop.hive.metastore.columnstats.merge; import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimator; import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimatorFactory; diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/DecimalColumnStatsMerger.java b/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/merge/DecimalColumnStatsMerger.java similarity index 98% rename from metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/DecimalColumnStatsMerger.java rename to metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/merge/DecimalColumnStatsMerger.java index 4099ffcace..4e8e129758 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/DecimalColumnStatsMerger.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/merge/DecimalColumnStatsMerger.java @@ -17,7 +17,7 @@ * under the License. */ -package org.apache.hadoop.hive.metastore.hbase.stats.merge; +package org.apache.hadoop.hive.metastore.columnstats.merge; import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimator; import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimatorFactory; diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/DoubleColumnStatsMerger.java b/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/merge/DoubleColumnStatsMerger.java similarity index 97% rename from metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/DoubleColumnStatsMerger.java rename to metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/merge/DoubleColumnStatsMerger.java index 1691fc97df..4ef5c39d1c 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/DoubleColumnStatsMerger.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/merge/DoubleColumnStatsMerger.java @@ -17,7 +17,7 @@ * under the License. */ -package org.apache.hadoop.hive.metastore.hbase.stats.merge; +package org.apache.hadoop.hive.metastore.columnstats.merge; import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimator; import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimatorFactory; diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/LongColumnStatsMerger.java b/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/merge/LongColumnStatsMerger.java similarity index 97% rename from metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/LongColumnStatsMerger.java rename to metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/merge/LongColumnStatsMerger.java index 361af350fe..acf7f03c72 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/LongColumnStatsMerger.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/merge/LongColumnStatsMerger.java @@ -17,7 +17,7 @@ * under the License. */ -package org.apache.hadoop.hive.metastore.hbase.stats.merge; +package org.apache.hadoop.hive.metastore.columnstats.merge; import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimator; import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimatorFactory; diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/StringColumnStatsMerger.java b/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/merge/StringColumnStatsMerger.java similarity index 97% rename from metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/StringColumnStatsMerger.java rename to metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/merge/StringColumnStatsMerger.java index 8e28f907ee..b3cd33c671 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/StringColumnStatsMerger.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/merge/StringColumnStatsMerger.java @@ -17,7 +17,7 @@ * under the License. */ -package org.apache.hadoop.hive.metastore.hbase.stats.merge; +package org.apache.hadoop.hive.metastore.columnstats.merge; import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimator; import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimatorFactory; diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/StatsCache.java b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/StatsCache.java index 0e119896a5..78a962a0e6 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/StatsCache.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/StatsCache.java @@ -32,8 +32,8 @@ import org.apache.hadoop.hive.metastore.api.AggrStats; import org.apache.hadoop.hive.metastore.api.ColumnStatistics; import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; -import org.apache.hadoop.hive.metastore.hbase.stats.ColumnStatsAggregator; -import org.apache.hadoop.hive.metastore.hbase.stats.ColumnStatsAggregatorFactory; +import org.apache.hadoop.hive.metastore.columnstats.aggr.ColumnStatsAggregator; +import org.apache.hadoop.hive.metastore.columnstats.aggr.ColumnStatsAggregatorFactory; import java.io.IOException; import java.security.MessageDigest; @@ -84,7 +84,10 @@ private StatsCache(final Configuration conf) { .build(new CacheLoader() { @Override public AggrStats load(StatsCacheKey key) throws Exception { - boolean useDensityFunctionForNDVEstimation = HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_METASTORE_STATS_NDV_DENSITY_FUNCTION); + boolean useDensityFunctionForNDVEstimation = HiveConf.getBoolVar(conf, + HiveConf.ConfVars.HIVE_METASTORE_STATS_NDV_DENSITY_FUNCTION); + double ndvTuner = HiveConf.getFloatVar(conf, + HiveConf.ConfVars.HIVE_METASTORE_STATS_NDV_TUNER); HBaseReadWrite hrw = HBaseReadWrite.getInstance(); AggrStats aggrStats = hrw.getAggregatedStats(key.hashed); if (aggrStats == null) { @@ -100,7 +103,7 @@ public AggrStats load(StatsCacheKey key) throws Exception { if (aggregator == null) { aggregator = ColumnStatsAggregatorFactory.getColumnStatsAggregator(css.iterator() .next().getStatsObj().iterator().next().getStatsData().getSetField(), - useDensityFunctionForNDVEstimation); + useDensityFunctionForNDVEstimation, ndvTuner); } ColumnStatisticsObj statsObj = aggregator .aggregate(key.colName, key.partNames, css); diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/StringColumnStatsAggregator.java b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/StringColumnStatsAggregator.java deleted file mode 100644 index 83c6c54fd2..0000000000 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/StringColumnStatsAggregator.java +++ /dev/null @@ -1,122 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hadoop.hive.metastore.hbase.stats; - -import java.util.List; - -import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimator; -import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimatorFactory; -import org.apache.hadoop.hive.metastore.api.ColumnStatistics; -import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData; -import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; -import org.apache.hadoop.hive.metastore.api.MetaException; -import org.apache.hadoop.hive.metastore.api.StringColumnStatsData; - -public class StringColumnStatsAggregator extends ColumnStatsAggregator { - - @Override - public ColumnStatisticsObj aggregate(String colName, List partNames, - List css) throws MetaException { - ColumnStatisticsObj statsObj = null; - - // check if all the ColumnStatisticsObjs contain stats and all the ndv are - // bitvectors. Only when both of the conditions are true, we merge bit - // vectors. Otherwise, just use the maximum function. - boolean doAllPartitionContainStats = partNames.size() == css.size(); - NumDistinctValueEstimator ndvEstimator = null; - String colType = null; - for (ColumnStatistics cs : css) { - if (cs.getStatsObjSize() != 1) { - throw new MetaException( - "The number of columns should be exactly one in aggrStats, but found " - + cs.getStatsObjSize()); - } - ColumnStatisticsObj cso = cs.getStatsObjIterator().next(); - if (statsObj == null) { - colType = cso.getColType(); - statsObj = ColumnStatsAggregatorFactory.newColumnStaticsObj(colName, colType, cso - .getStatsData().getSetField()); - } - if (!cso.getStatsData().getStringStats().isSetBitVectors() - || cso.getStatsData().getStringStats().getBitVectors().length() == 0) { - ndvEstimator = null; - break; - } else { - // check if all of the bit vectors can merge - NumDistinctValueEstimator estimator = NumDistinctValueEstimatorFactory - .getNumDistinctValueEstimator(cso.getStatsData().getStringStats().getBitVectors()); - if (ndvEstimator == null) { - ndvEstimator = estimator; - } else { - if (ndvEstimator.canMerge(estimator)) { - continue; - } else { - ndvEstimator = null; - break; - } - } - } - } - if (ndvEstimator != null) { - ndvEstimator = NumDistinctValueEstimatorFactory.getEmptyNumDistinctValueEstimator(ndvEstimator); - } - ColumnStatisticsData columnStatisticsData = new ColumnStatisticsData(); - if (doAllPartitionContainStats && ndvEstimator!=null) { - StringColumnStatsData aggregateData = null; - for (ColumnStatistics cs : css) { - ColumnStatisticsObj cso = cs.getStatsObjIterator().next(); - StringColumnStatsData newData = cso.getStatsData().getStringStats(); - ndvEstimator.mergeEstimators(NumDistinctValueEstimatorFactory - .getNumDistinctValueEstimator(newData.getBitVectors())); - if (aggregateData == null) { - aggregateData = newData.deepCopy(); - } else { - aggregateData - .setMaxColLen(Math.max(aggregateData.getMaxColLen(), newData.getMaxColLen())); - aggregateData - .setAvgColLen(Math.max(aggregateData.getAvgColLen(), newData.getAvgColLen())); - aggregateData.setNumNulls(aggregateData.getNumNulls() + newData.getNumNulls()); - } - } - aggregateData.setNumDVs(ndvEstimator.estimateNumDistinctValues()); - columnStatisticsData.setStringStats(aggregateData); - } else { - StringColumnStatsData aggregateData = null; - for (ColumnStatistics cs : css) { - ColumnStatisticsObj cso = cs.getStatsObjIterator().next(); - StringColumnStatsData newData = cso.getStatsData().getStringStats(); - if (aggregateData == null) { - aggregateData = newData.deepCopy(); - } else { - aggregateData - .setMaxColLen(Math.max(aggregateData.getMaxColLen(), newData.getMaxColLen())); - aggregateData - .setAvgColLen(Math.max(aggregateData.getAvgColLen(), newData.getAvgColLen())); - aggregateData.setNumNulls(aggregateData.getNumNulls() + newData.getNumNulls()); - aggregateData.setNumDVs(Math.max(aggregateData.getNumDVs(), newData.getNumDVs())); - } - } - columnStatisticsData.setStringStats(aggregateData); - } - statsObj.setStatsData(columnStatisticsData); - return statsObj; - } - -} diff --git a/metastore/src/model/org/apache/hadoop/hive/metastore/model/MPartitionColumnStatistics.java b/metastore/src/model/org/apache/hadoop/hive/metastore/model/MPartitionColumnStatistics.java index 2967a60fae..20129bb312 100644 --- a/metastore/src/model/org/apache/hadoop/hive/metastore/model/MPartitionColumnStatistics.java +++ b/metastore/src/model/org/apache/hadoop/hive/metastore/model/MPartitionColumnStatistics.java @@ -48,6 +48,7 @@ private String decimalHighValue; private Long numNulls; private Long numDVs; + private byte[] bitVector; private Double avgColLen; private Long maxColLen; private Long numTrues; @@ -166,31 +167,35 @@ public void setBooleanStats(Long numTrues, Long numFalses, Long numNulls) { this.numNulls = numNulls; } - public void setLongStats(Long numNulls, Long numNDVs, Long lowValue, Long highValue) { + public void setLongStats(Long numNulls, Long numNDVs, byte[] bitVector, Long lowValue, Long highValue) { this.numNulls = numNulls; this.numDVs = numNDVs; + this.bitVector = bitVector; this.longLowValue = lowValue; this.longHighValue = highValue; } - public void setDoubleStats(Long numNulls, Long numNDVs, Double lowValue, Double highValue) { + public void setDoubleStats(Long numNulls, Long numNDVs, byte[] bitVector, Double lowValue, Double highValue) { this.numNulls = numNulls; this.numDVs = numNDVs; + this.bitVector = bitVector; this.doubleLowValue = lowValue; this.doubleHighValue = highValue; } public void setDecimalStats( - Long numNulls, Long numNDVs, String lowValue, String highValue) { + Long numNulls, Long numNDVs, byte[] bitVector, String lowValue, String highValue) { this.numNulls = numNulls; this.numDVs = numNDVs; + this.bitVector = bitVector; this.decimalLowValue = lowValue; this.decimalHighValue = highValue; } - public void setStringStats(Long numNulls, Long numNDVs, Long maxColLen, Double avgColLen) { + public void setStringStats(Long numNulls, Long numNDVs, byte[] bitVector, Long maxColLen, Double avgColLen) { this.numNulls = numNulls; this.numDVs = numNDVs; + this.bitVector = bitVector; this.maxColLen = maxColLen; this.avgColLen = avgColLen; } @@ -201,9 +206,10 @@ public void setBinaryStats(Long numNulls, Long maxColLen, Double avgColLen) { this.avgColLen = avgColLen; } - public void setDateStats(Long numNulls, Long numNDVs, Long lowValue, Long highValue) { + public void setDateStats(Long numNulls, Long numNDVs, byte[] bitVector, Long lowValue, Long highValue) { this.numNulls = numNulls; this.numDVs = numNDVs; + this.bitVector = bitVector; this.longLowValue = lowValue; this.longHighValue = highValue; } @@ -255,4 +261,12 @@ public String getDecimalHighValue() { public void setDecimalHighValue(String decimalHighValue) { this.decimalHighValue = decimalHighValue; } + + public byte[] getBitVector() { + return bitVector; + } + + public void setBitVector(byte[] bitVector) { + this.bitVector = bitVector; + } } diff --git a/metastore/src/model/org/apache/hadoop/hive/metastore/model/MTableColumnStatistics.java b/metastore/src/model/org/apache/hadoop/hive/metastore/model/MTableColumnStatistics.java index 132f7a137b..6cfaca38af 100644 --- a/metastore/src/model/org/apache/hadoop/hive/metastore/model/MTableColumnStatistics.java +++ b/metastore/src/model/org/apache/hadoop/hive/metastore/model/MTableColumnStatistics.java @@ -46,6 +46,7 @@ private String decimalHighValue; private Long numNulls; private Long numDVs; + private byte[] bitVector; private Double avgColLen; private Long maxColLen; private Long numTrues; @@ -156,31 +157,35 @@ public void setBooleanStats(Long numTrues, Long numFalses, Long numNulls) { this.numNulls = numNulls; } - public void setLongStats(Long numNulls, Long numNDVs, Long lowValue, Long highValue) { + public void setLongStats(Long numNulls, Long numNDVs, byte[] bitVector, Long lowValue, Long highValue) { this.numNulls = numNulls; this.numDVs = numNDVs; + this.bitVector = bitVector; this.longLowValue = lowValue; this.longHighValue = highValue; } - public void setDoubleStats(Long numNulls, Long numNDVs, Double lowValue, Double highValue) { + public void setDoubleStats(Long numNulls, Long numNDVs, byte[] bitVector, Double lowValue, Double highValue) { this.numNulls = numNulls; this.numDVs = numNDVs; + this.bitVector = bitVector; this.doubleLowValue = lowValue; this.doubleHighValue = highValue; } public void setDecimalStats( - Long numNulls, Long numNDVs, String lowValue, String highValue) { + Long numNulls, Long numNDVs, byte[] bitVector, String lowValue, String highValue) { this.numNulls = numNulls; this.numDVs = numNDVs; + this.bitVector = bitVector; this.decimalLowValue = lowValue; this.decimalHighValue = highValue; } - public void setStringStats(Long numNulls, Long numNDVs, Long maxColLen, Double avgColLen) { + public void setStringStats(Long numNulls, Long numNDVs, byte[] bitVector, Long maxColLen, Double avgColLen) { this.numNulls = numNulls; this.numDVs = numNDVs; + this.bitVector = bitVector; this.maxColLen = maxColLen; this.avgColLen = avgColLen; } @@ -191,9 +196,10 @@ public void setBinaryStats(Long numNulls, Long maxColLen, Double avgColLen) { this.avgColLen = avgColLen; } - public void setDateStats(Long numNulls, Long numNDVs, Long lowValue, Long highValue) { + public void setDateStats(Long numNulls, Long numNDVs, byte[] bitVector, Long lowValue, Long highValue) { this.numNulls = numNulls; this.numDVs = numNDVs; + this.bitVector = bitVector; this.longLowValue = lowValue; this.longHighValue = highValue; } @@ -246,4 +252,12 @@ public String getDecimalHighValue() { public void setDecimalHighValue(String decimalHighValue) { this.decimalHighValue = decimalHighValue; } + + public byte[] getBitVector() { + return bitVector; + } + + public void setBitVector(byte[] bitVector) { + this.bitVector = bitVector; + } } diff --git a/metastore/src/model/package.jdo b/metastore/src/model/package.jdo index 9c4bc219f2..570fd44c21 100644 --- a/metastore/src/model/package.jdo +++ b/metastore/src/model/package.jdo @@ -879,6 +879,9 @@ + + + @@ -943,6 +946,9 @@ + + + diff --git a/metastore/src/test/org/apache/hadoop/hive/metastore/TestOldSchema.java b/metastore/src/test/org/apache/hadoop/hive/metastore/TestOldSchema.java new file mode 100644 index 0000000000..54828f2289 --- /dev/null +++ b/metastore/src/test/org/apache/hadoop/hive/metastore/TestOldSchema.java @@ -0,0 +1,229 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.metastore; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +import org.apache.hadoop.hive.common.ndv.hll.HyperLogLog; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.metastore.api.AggrStats; +import org.apache.hadoop.hive.metastore.api.ColumnStatistics; +import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData; +import org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc; +import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; +import org.apache.hadoop.hive.metastore.api.Database; +import org.apache.hadoop.hive.metastore.api.FieldSchema; +import org.apache.hadoop.hive.metastore.api.FileMetadataExprType; +import org.apache.hadoop.hive.metastore.api.Function; +import org.apache.hadoop.hive.metastore.api.Index; +import org.apache.hadoop.hive.metastore.api.InvalidInputException; +import org.apache.hadoop.hive.metastore.api.InvalidObjectException; +import org.apache.hadoop.hive.metastore.api.LongColumnStatsData; +import org.apache.hadoop.hive.metastore.api.MetaException; +import org.apache.hadoop.hive.metastore.api.NoSuchObjectException; +import org.apache.hadoop.hive.metastore.api.Partition; +import org.apache.hadoop.hive.metastore.api.SQLForeignKey; +import org.apache.hadoop.hive.metastore.api.SQLPrimaryKey; +import org.apache.hadoop.hive.metastore.api.SerDeInfo; +import org.apache.hadoop.hive.metastore.api.StorageDescriptor; +import org.apache.hadoop.hive.metastore.api.Table; +import org.apache.hadoop.hive.ql.io.sarg.SearchArgument; +import org.apache.hadoop.hive.serde.serdeConstants; +import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; +import org.junit.After; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; +import org.mockito.Mockito; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class TestOldSchema { + private ObjectStore store = null; + + private static final Logger LOG = LoggerFactory.getLogger(TestOldSchema.class.getName()); + + public static class MockPartitionExpressionProxy implements PartitionExpressionProxy { + @Override + public String convertExprToFilter(byte[] expr) throws MetaException { + return null; + } + + @Override + public boolean filterPartitionsByExpr(List partColumnNames, + List partColumnTypeInfos, byte[] expr, String defaultPartitionName, + List partitionNames) throws MetaException { + return false; + } + + @Override + public FileMetadataExprType getMetadataType(String inputFormat) { + return null; + } + + @Override + public SearchArgument createSarg(byte[] expr) { + return null; + } + + @Override + public FileFormatProxy getFileFormatProxy(FileMetadataExprType type) { + return null; + } + } + + String bitVectors[] = new String[2]; + + @Before + public void setUp() throws Exception { + HiveConf conf = new HiveConf(); + conf.setVar(HiveConf.ConfVars.METASTORE_EXPRESSION_PROXY_CLASS, + MockPartitionExpressionProxy.class.getName()); + conf.setBoolVar(HiveConf.ConfVars.HIVE_STATS_FETCH_BITVECTOR, false); + + store = new ObjectStore(); + store.setConf(conf); + dropAllStoreObjects(store); + + HyperLogLog hll = HyperLogLog.builder().build(); + hll.addLong(1); + bitVectors[1] = hll.serialize(); + hll = HyperLogLog.builder().build(); + hll.addLong(2); + hll.addLong(3); + hll.addLong(3); + hll.addLong(4); + bitVectors[0] = hll.serialize(); + } + + @After + public void tearDown() { + } + + /** + * Tests partition operations + * + * @throws Exception + */ + @Test + public void testPartitionOps() throws Exception { + String dbName = "default"; + String tableName = "snp"; + Database db1 = new Database(dbName, "description", "locationurl", null); + store.createDatabase(db1); + long now = System.currentTimeMillis(); + List cols = new ArrayList<>(); + cols.add(new FieldSchema("col1", "long", "nocomment")); + SerDeInfo serde = new SerDeInfo("serde", "seriallib", null); + StorageDescriptor sd = new StorageDescriptor(cols, "file:/tmp", "input", "output", false, 0, + serde, null, null, Collections. emptyMap()); + List partCols = new ArrayList<>(); + partCols.add(new FieldSchema("ds", "string", "")); + Table table = new Table(tableName, dbName, "me", (int) now, (int) now, 0, sd, partCols, + Collections. emptyMap(), null, null, null); + store.createTable(table); + + Deadline.startTimer("getPartition"); + for (int i = 0; i < 10; i++) { + List partVal = new ArrayList<>(); + partVal.add(String.valueOf(i)); + StorageDescriptor psd = new StorageDescriptor(sd); + psd.setLocation("file:/tmp/default/hit/ds=" + partVal); + Partition part = new Partition(partVal, dbName, tableName, (int) now, (int) now, psd, + Collections. emptyMap()); + store.addPartition(part); + ColumnStatistics cs = new ColumnStatistics(); + ColumnStatisticsDesc desc = new ColumnStatisticsDesc(false, dbName, tableName); + desc.setLastAnalyzed(now); + desc.setPartName("ds=" + String.valueOf(i)); + cs.setStatsDesc(desc); + ColumnStatisticsObj obj = new ColumnStatisticsObj(); + obj.setColName("col1"); + obj.setColType("bigint"); + ColumnStatisticsData data = new ColumnStatisticsData(); + LongColumnStatsData dcsd = new LongColumnStatsData(); + dcsd.setHighValue(1000 + i); + dcsd.setLowValue(-1000 - i); + dcsd.setNumNulls(i); + dcsd.setNumDVs(10 * i + 1); + dcsd.setBitVectors(bitVectors[0]); + data.setLongStats(dcsd); + obj.setStatsData(data); + cs.addToStatsObj(obj); + store.updatePartitionColumnStatistics(cs, partVal); + + } + + Checker statChecker = new Checker() { + @Override + public void checkStats(AggrStats aggrStats) throws Exception { + Assert.assertEquals(10, aggrStats.getPartsFound()); + Assert.assertEquals(1, aggrStats.getColStatsSize()); + ColumnStatisticsObj cso = aggrStats.getColStats().get(0); + Assert.assertEquals("col1", cso.getColName()); + Assert.assertEquals("bigint", cso.getColType()); + LongColumnStatsData lcsd = cso.getStatsData().getLongStats(); + Assert.assertEquals(1009, lcsd.getHighValue(), 0.01); + Assert.assertEquals(-1009, lcsd.getLowValue(), 0.01); + Assert.assertEquals(45, lcsd.getNumNulls()); + Assert.assertEquals(91, lcsd.getNumDVs()); + } + }; + List partNames = new ArrayList<>(); + for (int i = 0; i < 10; i++) { + partNames.add("ds=" + i); + } + AggrStats aggrStats = store.get_aggr_stats_for(dbName, tableName, partNames, + Arrays.asList("col1")); + statChecker.checkStats(aggrStats); + + } + + private static interface Checker { + void checkStats(AggrStats aggrStats) throws Exception; + } + + public static void dropAllStoreObjects(RawStore store) throws MetaException, + InvalidObjectException, InvalidInputException { + try { + Deadline.registerIfNot(100000); + Deadline.startTimer("getPartition"); + List dbs = store.getAllDatabases(); + for (int i = 0; i < dbs.size(); i++) { + String db = dbs.get(i); + List tbls = store.getAllTables(db); + for (String tbl : tbls) { + List parts = store.getPartitions(db, tbl, 100); + for (Partition part : parts) { + store.dropPartition(db, tbl, part.getValues()); + } + store.dropTable(db, tbl); + } + store.dropDatabase(db); + } + } catch (NoSuchObjectException e) { + } + } + +} diff --git a/metastore/src/test/org/apache/hadoop/hive/metastore/cache/TestCachedStore.java b/metastore/src/test/org/apache/hadoop/hive/metastore/cache/TestCachedStore.java index 1fa9447145..e31dad397b 100644 --- a/metastore/src/test/org/apache/hadoop/hive/metastore/cache/TestCachedStore.java +++ b/metastore/src/test/org/apache/hadoop/hive/metastore/cache/TestCachedStore.java @@ -23,6 +23,8 @@ import java.util.List; import java.util.Map; +import org.apache.hadoop.hive.common.ndv.hll.HyperLogLog; +import org.apache.hadoop.hive.common.ndv.hll.HyperLogLogUtils; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.ObjectStore; import org.apache.hadoop.hive.metastore.TableType; @@ -740,4 +742,158 @@ public void testAggrStatsRepeatedRead() throws Exception { aggrStats = cachedStore.get_aggr_stats_for(dbName, tblName, aggrPartVals, colNames); Assert.assertEquals(aggrStats.getColStats().get(0).getStatsData().getLongStats().getNumNulls(), 100); } + + @Test + public void testPartitionAggrStats() throws Exception { + String dbName = "testTableColStatsOps1"; + String tblName = "tbl1"; + String colName = "f1"; + + Database db = new Database(dbName, null, "some_location", null); + cachedStore.createDatabase(db); + + List cols = new ArrayList(); + cols.add(new FieldSchema(colName, "int", null)); + List partCols = new ArrayList(); + partCols.add(new FieldSchema("col", "int", null)); + StorageDescriptor sd = + new StorageDescriptor(cols, null, "input", "output", false, 0, new SerDeInfo("serde", "seriallib", new HashMap()), + null, null, null); + + Table tbl = + new Table(tblName, dbName, null, 0, 0, 0, sd, partCols, new HashMap(), + null, null, TableType.MANAGED_TABLE.toString()); + cachedStore.createTable(tbl); + + List partVals1 = new ArrayList(); + partVals1.add("1"); + List partVals2 = new ArrayList(); + partVals2.add("2"); + + Partition ptn1 = + new Partition(partVals1, dbName, tblName, 0, 0, sd, new HashMap()); + cachedStore.addPartition(ptn1); + Partition ptn2 = + new Partition(partVals2, dbName, tblName, 0, 0, sd, new HashMap()); + cachedStore.addPartition(ptn2); + + ColumnStatistics stats = new ColumnStatistics(); + ColumnStatisticsDesc statsDesc = new ColumnStatisticsDesc(true, dbName, tblName); + statsDesc.setPartName("col"); + List colStatObjs = new ArrayList(); + + ColumnStatisticsData data = new ColumnStatisticsData(); + ColumnStatisticsObj colStats = new ColumnStatisticsObj(colName, "int", data); + LongColumnStatsData longStats = new LongColumnStatsData(); + longStats.setLowValue(0); + longStats.setHighValue(100); + longStats.setNumNulls(50); + longStats.setNumDVs(30); + data.setLongStats(longStats); + colStatObjs.add(colStats); + + stats.setStatsDesc(statsDesc); + stats.setStatsObj(colStatObjs); + + cachedStore.updatePartitionColumnStatistics(stats.deepCopy(), partVals1); + + longStats.setNumDVs(40); + cachedStore.updatePartitionColumnStatistics(stats.deepCopy(), partVals2); + + List colNames = new ArrayList(); + colNames.add(colName); + List aggrPartVals = new ArrayList(); + aggrPartVals.add("1"); + aggrPartVals.add("2"); + AggrStats aggrStats = cachedStore.get_aggr_stats_for(dbName, tblName, aggrPartVals, colNames); + Assert.assertEquals(aggrStats.getColStats().get(0).getStatsData().getLongStats().getNumNulls(), 100); + Assert.assertEquals(aggrStats.getColStats().get(0).getStatsData().getLongStats().getNumDVs(), 40); + aggrStats = cachedStore.get_aggr_stats_for(dbName, tblName, aggrPartVals, colNames); + Assert.assertEquals(aggrStats.getColStats().get(0).getStatsData().getLongStats().getNumNulls(), 100); + Assert.assertEquals(aggrStats.getColStats().get(0).getStatsData().getLongStats().getNumDVs(), 40); + } + + @Test + public void testPartitionAggrStatsBitVector() throws Exception { + String dbName = "testTableColStatsOps2"; + String tblName = "tbl2"; + String colName = "f1"; + + Database db = new Database(dbName, null, "some_location", null); + cachedStore.createDatabase(db); + + List cols = new ArrayList(); + cols.add(new FieldSchema(colName, "int", null)); + List partCols = new ArrayList(); + partCols.add(new FieldSchema("col", "int", null)); + StorageDescriptor sd = + new StorageDescriptor(cols, null, "input", "output", false, 0, new SerDeInfo("serde", "seriallib", new HashMap()), + null, null, null); + + Table tbl = + new Table(tblName, dbName, null, 0, 0, 0, sd, partCols, new HashMap(), + null, null, TableType.MANAGED_TABLE.toString()); + cachedStore.createTable(tbl); + + List partVals1 = new ArrayList(); + partVals1.add("1"); + List partVals2 = new ArrayList(); + partVals2.add("2"); + + Partition ptn1 = + new Partition(partVals1, dbName, tblName, 0, 0, sd, new HashMap()); + cachedStore.addPartition(ptn1); + Partition ptn2 = + new Partition(partVals2, dbName, tblName, 0, 0, sd, new HashMap()); + cachedStore.addPartition(ptn2); + + ColumnStatistics stats = new ColumnStatistics(); + ColumnStatisticsDesc statsDesc = new ColumnStatisticsDesc(true, dbName, tblName); + statsDesc.setPartName("col"); + List colStatObjs = new ArrayList(); + + ColumnStatisticsData data = new ColumnStatisticsData(); + ColumnStatisticsObj colStats = new ColumnStatisticsObj(colName, "int", data); + LongColumnStatsData longStats = new LongColumnStatsData(); + longStats.setLowValue(0); + longStats.setHighValue(100); + longStats.setNumNulls(50); + longStats.setNumDVs(30); + + HyperLogLog hll = HyperLogLog.builder().build(); + hll.addLong(1); + hll.addLong(2); + hll.addLong(3); + longStats.setBitVectors(hll.serialize()); + + data.setLongStats(longStats); + colStatObjs.add(colStats); + + stats.setStatsDesc(statsDesc); + stats.setStatsObj(colStatObjs); + + cachedStore.updatePartitionColumnStatistics(stats.deepCopy(), partVals1); + + longStats.setNumDVs(40); + hll = HyperLogLog.builder().build(); + hll.addLong(2); + hll.addLong(3); + hll.addLong(4); + hll.addLong(5); + longStats.setBitVectors(hll.serialize()); + + cachedStore.updatePartitionColumnStatistics(stats.deepCopy(), partVals2); + + List colNames = new ArrayList(); + colNames.add(colName); + List aggrPartVals = new ArrayList(); + aggrPartVals.add("1"); + aggrPartVals.add("2"); + AggrStats aggrStats = cachedStore.get_aggr_stats_for(dbName, tblName, aggrPartVals, colNames); + Assert.assertEquals(aggrStats.getColStats().get(0).getStatsData().getLongStats().getNumNulls(), 100); + Assert.assertEquals(aggrStats.getColStats().get(0).getStatsData().getLongStats().getNumDVs(), 5); + aggrStats = cachedStore.get_aggr_stats_for(dbName, tblName, aggrPartVals, colNames); + Assert.assertEquals(aggrStats.getColStats().get(0).getStatsData().getLongStats().getNumNulls(), 100); + Assert.assertEquals(aggrStats.getColStats().get(0).getStatsData().getLongStats().getNumDVs(), 5); + } } diff --git a/metastore/src/test/org/apache/hadoop/hive/metastore/hbase/TestHBaseAggregateStatsCacheWithBitVector.java b/metastore/src/test/org/apache/hadoop/hive/metastore/hbase/TestHBaseAggregateStatsCacheWithBitVector.java index ecc99c3300..9cf1fb8986 100644 --- a/metastore/src/test/org/apache/hadoop/hive/metastore/hbase/TestHBaseAggregateStatsCacheWithBitVector.java +++ b/metastore/src/test/org/apache/hadoop/hive/metastore/hbase/TestHBaseAggregateStatsCacheWithBitVector.java @@ -28,6 +28,7 @@ import org.apache.hadoop.hbase.Cell; import org.apache.hadoop.hbase.client.HTableInterface; +import org.apache.hadoop.hive.common.ndv.hll.HyperLogLog; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.api.AggrStats; import org.apache.hadoop.hive.metastore.api.ColumnStatistics; @@ -115,7 +116,11 @@ public void allPartitions() throws Exception { dcsd.setLowValue(-20.1234213423); dcsd.setNumNulls(30); dcsd.setNumDVs(12342); - dcsd.setBitVectors("{0, 4, 5, 7}{0, 1}{0, 1, 2}{0, 1, 4}{0}{0, 2}{0, 3}{0, 2, 3, 4}{0, 1, 4}{0, 1}{0}{0, 1, 3, 8}{0, 2}{0, 2}{0, 9}{0, 1, 4}"); + HyperLogLog hll = HyperLogLog.builder().build(); + hll.addDouble(1); + hll.addDouble(2); + hll.addDouble(3); + dcsd.setBitVectors(hll.serialize()); data.setDoubleStats(dcsd); obj.setStatsData(data); cs.addToStatsObj(obj); @@ -135,7 +140,11 @@ public void allPartitions() throws Exception { dcsd.setLowValue(-20.1234213423); dcsd.setNumNulls(30); dcsd.setNumDVs(12342); - dcsd.setBitVectors("{0, 1}{0, 1}{1, 2, 4}{0, 1, 2}{0, 1, 2}{0, 2}{0, 1, 3, 4}{0, 1}{0, 1}{3, 4, 6}{2}{0, 1}{0, 3}{0}{0, 1}{0, 1, 4}"); + hll = HyperLogLog.builder().build(); + hll.addDouble(3); + hll.addDouble(4); + hll.addDouble(5); + dcsd.setBitVectors(hll.serialize()); data.setDoubleStats(dcsd); obj.setStatsData(data); cs.addToStatsObj(obj); diff --git a/metastore/src/test/org/apache/hadoop/hive/metastore/hbase/TestHBaseAggregateStatsExtrapolation.java b/metastore/src/test/org/apache/hadoop/hive/metastore/hbase/TestHBaseAggregateStatsExtrapolation.java index 99ce96ca0d..4d868b0146 100644 --- a/metastore/src/test/org/apache/hadoop/hive/metastore/hbase/TestHBaseAggregateStatsExtrapolation.java +++ b/metastore/src/test/org/apache/hadoop/hive/metastore/hbase/TestHBaseAggregateStatsExtrapolation.java @@ -28,6 +28,7 @@ import org.apache.hadoop.hbase.Cell; import org.apache.hadoop.hbase.client.HTableInterface; +import org.apache.hadoop.hive.common.ndv.hll.HyperLogLog; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.StatObjectConverter; import org.apache.hadoop.hive.metastore.api.AggrStats; @@ -62,8 +63,7 @@ SortedMap rows = new TreeMap<>(); // NDV will be 3 for the bitVectors - String bitVectors = "{0, 4, 5, 7}{0, 1}{0, 1, 2}{0, 1, 4}{0}{0, 2}{0, 3}{0, 2, 3, 4}{0, 1, 4}{0, 1}{0}{0, 1, 3, 8}{0, 2}{0, 2}{0, 9}{0, 1, 4}"; - + String bitVectors = null; @Before public void before() throws IOException { MockitoAnnotations.initMocks(this); @@ -71,6 +71,11 @@ public void before() throws IOException { conf.setBoolean(HBaseReadWrite.NO_CACHE_CONF, true); store = MockUtils.init(conf, htable, rows); store.backdoor().getStatsCache().resetCounters(); + HyperLogLog hll = HyperLogLog.builder().build(); + hll.addLong(1); + hll.addLong(2); + hll.addLong(3); + bitVectors = hll.serialize(); } private static interface Checker { @@ -395,7 +400,7 @@ public void noPartitionsHaveBitVectorStatus() throws Exception { dcsd.setHighValue(1000 + i); dcsd.setLowValue(-1000 - i); dcsd.setNumNulls(i); - dcsd.setNumDVs(10 * i); + dcsd.setNumDVs(i == 0 ? 1 : 10 * i); data.setLongStats(dcsd); obj.setStatsData(data); cs.addToStatsObj(obj); diff --git a/metastore/src/test/org/apache/hadoop/hive/metastore/hbase/TestHBaseAggregateStatsNDVUniformDist.java b/metastore/src/test/org/apache/hadoop/hive/metastore/hbase/TestHBaseAggregateStatsNDVUniformDist.java index 74e16695a9..0ad27806d1 100644 --- a/metastore/src/test/org/apache/hadoop/hive/metastore/hbase/TestHBaseAggregateStatsNDVUniformDist.java +++ b/metastore/src/test/org/apache/hadoop/hive/metastore/hbase/TestHBaseAggregateStatsNDVUniformDist.java @@ -28,6 +28,7 @@ import org.apache.hadoop.hbase.Cell; import org.apache.hadoop.hbase.client.HTableInterface; +import org.apache.hadoop.hive.common.ndv.hll.HyperLogLog; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.StatObjectConverter; import org.apache.hadoop.hive.metastore.api.AggrStats; @@ -61,9 +62,8 @@ SortedMap rows = new TreeMap<>(); // NDV will be 3 for bitVectors[0] and 1 for bitVectors[1] - String bitVectors[] = { - "{0, 4, 5, 7}{0, 1}{0, 1, 2}{0, 1, 4}{0}{0, 2}{0, 3}{0, 2, 3, 4}{0, 1, 4}{0, 1}{0}{0, 1, 3, 8}{0, 2}{0, 2}{0, 9}{0, 1, 4}", - "{1, 2}{1, 2}{1, 2}{1, 2}{1, 2}{1, 2}{1, 2}{1, 2}{1, 2}{1, 2}{1, 2}{1, 2}{1, 2}{1, 2}{1, 2}{1, 2}" }; + String bitVectors[] = new String[2]; + @Before public void before() throws IOException { @@ -73,6 +73,15 @@ public void before() throws IOException { conf.setBoolean(HiveConf.ConfVars.HIVE_METASTORE_STATS_NDV_DENSITY_FUNCTION.varname, true); store = MockUtils.init(conf, htable, rows); store.backdoor().getStatsCache().resetCounters(); + HyperLogLog hll = HyperLogLog.builder().build(); + hll.addLong(1); + bitVectors[1] = hll.serialize(); + hll = HyperLogLog.builder().build(); + hll.addLong(2); + hll.addLong(3); + hll.addLong(3); + hll.addLong(4); + bitVectors[0] = hll.serialize(); } private static interface Checker { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java index 97bf839ae1..16c440fc61 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java @@ -3396,7 +3396,7 @@ private int describeTable(Hive db, DescTableDesc descTbl) throws HiveException, ColStatistics.Range r = cs.getRange(); StatObjectConverter.fillColumnStatisticsData(partCol.getType(), data, r == null ? null : r.minValue, r == null ? null : r.maxValue, r == null ? null : r.minValue, r == null ? null : r.maxValue, r == null ? null : r.minValue.toString(), r == null ? null : r.maxValue.toString(), - cs.getNumNulls(), cs.getCountDistint(), cs.getAvgColLen(), cs.getAvgColLen(), cs.getNumTrues(), cs.getNumFalses()); + cs.getNumNulls(), cs.getCountDistint(), null, cs.getAvgColLen(), cs.getAvgColLen(), cs.getNumTrues(), cs.getNumFalses()); ColumnStatisticsObj cso = new ColumnStatisticsObj(partCol.getName(), partCol.getType(), data); colStats = Collections.singletonList(cso); StatsSetupConst.setColumnStatsState(tblProps, colNames); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/MetaDataFormatUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/MetaDataFormatUtils.java index aa77234c28..23800734f7 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/MetaDataFormatUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/MetaDataFormatUtils.java @@ -695,38 +695,40 @@ private static void formatWithIndentation(String colName, String colType, String ColumnStatisticsData csd = cso.getStatsData(); if (csd.isSetBinaryStats()) { BinaryColumnStatsData bcsd = csd.getBinaryStats(); - appendColumnStats(tableInfo, "", "", bcsd.getNumNulls(), "", bcsd.getAvgColLen(), + appendColumnStats(tableInfo, "", "", bcsd.getNumNulls(), "", "", bcsd.getAvgColLen(), bcsd.getMaxColLen(), "", ""); } else if (csd.isSetStringStats()) { StringColumnStatsData scsd = csd.getStringStats(); appendColumnStats(tableInfo, "", "", scsd.getNumNulls(), scsd.getNumDVs(), - scsd.getAvgColLen(), scsd.getMaxColLen(), "", ""); + scsd.getBitVectors() == null ? "" : scsd.getBitVectors(), scsd.getAvgColLen(), + scsd.getMaxColLen(), "", ""); } else if (csd.isSetBooleanStats()) { BooleanColumnStatsData bcsd = csd.getBooleanStats(); - appendColumnStats(tableInfo, "", "", bcsd.getNumNulls(), "", "", "", + appendColumnStats(tableInfo, "", "", bcsd.getNumNulls(), "", "", "", "", bcsd.getNumTrues(), bcsd.getNumFalses()); } else if (csd.isSetDecimalStats()) { DecimalColumnStatsData dcsd = csd.getDecimalStats(); appendColumnStats(tableInfo, convertToString(dcsd.getLowValue()), convertToString(dcsd.getHighValue()), dcsd.getNumNulls(), dcsd.getNumDVs(), + dcsd.getBitVectors() == null ? "" : dcsd.getBitVectors(), "", "", "", ""); } else if (csd.isSetDoubleStats()) { DoubleColumnStatsData dcsd = csd.getDoubleStats(); appendColumnStats(tableInfo, dcsd.getLowValue(), dcsd.getHighValue(), dcsd.getNumNulls(), - dcsd.getNumDVs(), "", "", "", ""); + dcsd.getNumDVs(), dcsd.getBitVectors() == null ? "" : dcsd.getBitVectors(), "", "", "", ""); } else if (csd.isSetLongStats()) { LongColumnStatsData lcsd = csd.getLongStats(); appendColumnStats(tableInfo, lcsd.getLowValue(), lcsd.getHighValue(), lcsd.getNumNulls(), - lcsd.getNumDVs(), "", "", "", ""); + lcsd.getNumDVs(), lcsd.getBitVectors() == null ? "" : lcsd.getBitVectors(), "", "", "", ""); } else if (csd.isSetDateStats()) { DateColumnStatsData dcsd = csd.getDateStats(); appendColumnStats(tableInfo, convertToString(dcsd.getLowValue()), convertToString(dcsd.getHighValue()), - dcsd.getNumNulls(), dcsd.getNumDVs(), "", "", "", ""); + dcsd.getNumNulls(), dcsd.getNumDVs(), dcsd.getBitVectors() == null ? "" : dcsd.getBitVectors(), "", "", "", ""); } } else { - appendColumnStats(tableInfo, "", "", "", "", "", "", "", ""); + appendColumnStats(tableInfo, "", "", "", "", "", "", "", "", ""); } } @@ -779,7 +781,7 @@ private static void printPadding(StringBuilder tableInfo, int[] columnWidths) { } private static void appendColumnStats(StringBuilder sb, Object min, Object max, Object numNulls, - Object ndv, Object avgColLen, Object maxColLen, Object numTrues, Object numFalses) { + Object ndv, Object bitVector, Object avgColLen, Object maxColLen, Object numTrues, Object numFalses) { sb.append(String.format("%-" + ALIGNMENT + "s", min)).append(FIELD_DELIM); sb.append(String.format("%-" + ALIGNMENT + "s", max)).append(FIELD_DELIM); sb.append(String.format("%-" + ALIGNMENT + "s", numNulls)).append(FIELD_DELIM); @@ -788,6 +790,7 @@ private static void appendColumnStats(StringBuilder sb, Object min, Object max, sb.append(String.format("%-" + ALIGNMENT + "s", maxColLen)).append(FIELD_DELIM); sb.append(String.format("%-" + ALIGNMENT + "s", numTrues)).append(FIELD_DELIM); sb.append(String.format("%-" + ALIGNMENT + "s", numFalses)).append(FIELD_DELIM); + sb.append(String.format("%-" + ALIGNMENT + "s", bitVector)).append(FIELD_DELIM); } private static void appendColumnStatsNoFormatting(StringBuilder sb, Object min, diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/ColStatistics.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/ColStatistics.java index 41a1c7a582..f2d2e2dc0b 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/ColStatistics.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/ColStatistics.java @@ -18,9 +18,6 @@ package org.apache.hadoop.hive.ql.plan; -import org.apache.hadoop.hive.ql.stats.StatsUtils; - - public class ColStatistics { private String colName; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/DescTableDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/DescTableDesc.java index d7a9888389..845ffcfb53 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/DescTableDesc.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/DescTableDesc.java @@ -59,8 +59,8 @@ public void setPartSpec(Map partSpec) { */ private static final String schema = "col_name,data_type,comment#string:string:string"; private static final String colStatsSchema = "col_name,data_type,min,max,num_nulls," - + "distinct_count,avg_col_len,max_col_len,num_trues,num_falses,comment" - + "#string:string:string:string:string:string:string:string:string:string:string"; + + "distinct_count,avg_col_len,max_col_len,num_trues,num_falses,comment,bitVector" + + "#string:string:string:string:string:string:string:string:string:string:string:string"; public DescTableDesc() { } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFComputeStats.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFComputeStats.java index 2d56950cb1..8ee41bfab2 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFComputeStats.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFComputeStats.java @@ -23,9 +23,9 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.hive.common.classification.InterfaceAudience; -import org.apache.hadoop.hive.common.ndv.FMSketch; import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimator; import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimatorFactory; +import org.apache.hadoop.hive.common.ndv.fm.FMSketch; import org.apache.hadoop.hive.common.ndv.hll.HyperLogLog; import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.ql.exec.Description; diff --git a/ql/src/test/queries/clientpositive/alter_table_update_status_disable_bitvector.q b/ql/src/test/queries/clientpositive/alter_table_update_status_disable_bitvector.q new file mode 100644 index 0000000000..d64263fe84 --- /dev/null +++ b/ql/src/test/queries/clientpositive/alter_table_update_status_disable_bitvector.q @@ -0,0 +1,139 @@ +set hive.stats.fetch.bitvector=false; + +create table src_stat as select * from src1; + +create table src_stat_int ( + key double, + value string +); + +LOAD DATA LOCAL INPATH '../../data/files/kv3.txt' INTO TABLE src_stat_int; + +ANALYZE TABLE src_stat COMPUTE STATISTICS for columns key; + +describe formatted src_stat key; + +ALTER TABLE src_stat UPDATE STATISTICS for column key SET ('numDVs'='1111','avgColLen'='1.111'); + +describe formatted src_stat key; + +ALTER TABLE src_stat UPDATE STATISTICS for column value SET ('numDVs'='121','numNulls'='122','avgColLen'='1.23','maxColLen'='124'); + +describe formatted src_stat value; + +ANALYZE TABLE src_stat_int COMPUTE STATISTICS for columns key; + +describe formatted src_stat_int key; + +ALTER TABLE src_stat_int UPDATE STATISTICS for column key SET ('numDVs'='2222','lowValue'='333.22','highValue'='22.22'); + +describe formatted src_stat_int key; + + + +create database if not exists dummydb; + +use dummydb; + +ALTER TABLE default.src_stat UPDATE STATISTICS for column key SET ('numDVs'='3333','avgColLen'='2.222'); + +describe formatted default.src_stat key; + +ALTER TABLE default.src_stat UPDATE STATISTICS for column value SET ('numDVs'='232','numNulls'='233','avgColLen'='2.34','maxColLen'='235'); + +describe formatted default.src_stat value; + +use default; + +drop database dummydb; + +create table datatype_stats( + t TINYINT, + s SMALLINT, + i INT, + b BIGINT, + f FLOAT, + d DOUBLE, + dem DECIMAL, --default decimal (10,0) + ts TIMESTAMP, + dt DATE, + str STRING, + v VARCHAR(12), + c CHAR(5), + bl BOOLEAN, + bin BINARY); + +INSERT INTO datatype_stats values(2, 3, 45, 456, 45454.4, 454.6565, 2355, '2012-01-01 01:02:03', '2012-01-01', 'update_statistics', 'stats', 'hive', 'true', 'bin'); +INSERT INTO datatype_stats values(NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL); +DESC FORMATTED datatype_stats s; +DESC FORMATTED datatype_stats i; +DESC FORMATTED datatype_stats b; +DESC FORMATTED datatype_stats f; +DESC FORMATTED datatype_stats d; +DESC FORMATTED datatype_stats dem; +DESC FORMATTED datatype_stats ts; +DESC FORMATTED datatype_stats dt; +DESC FORMATTED datatype_stats str; +DESC FORMATTED datatype_stats v; +DESC FORMATTED datatype_stats c; +DESC FORMATTED datatype_stats bl; +DESC FORMATTED datatype_stats bin; + +--tinyint +DESC FORMATTED datatype_stats t; +ALTER TABLE default.datatype_stats UPDATE STATISTICS for column t SET ('numDVs'='232','numNulls'='233','highValue'='234','lowValue'='35'); +DESC FORMATTED datatype_stats t; +--smallint +DESC FORMATTED datatype_stats s; +ALTER TABLE default.datatype_stats UPDATE STATISTICS for column s SET ('numDVs'='56','numNulls'='56','highValue'='489','lowValue'='25'); +DESC FORMATTED datatype_stats s; +--int +DESC FORMATTED datatype_stats i; +ALTER TABLE default.datatype_stats UPDATE STATISTICS for column i SET ('numDVs'='59','numNulls'='1','highValue'='889','lowValue'='5'); +DESC FORMATTED datatype_stats i; +--bigint +DESC FORMATTED datatype_stats b; +ALTER TABLE default.datatype_stats UPDATE STATISTICS for column b SET ('numDVs'='9','numNulls'='14','highValue'='89','lowValue'='8'); +DESC FORMATTED datatype_stats b; + +--float +DESC FORMATTED datatype_stats f; +ALTER TABLE default.datatype_stats UPDATE STATISTICS for column f SET ('numDVs'='563','numNulls'='45','highValue'='2345.656','lowValue'='8.00'); +DESC FORMATTED datatype_stats f; +--double +DESC FORMATTED datatype_stats d; +ALTER TABLE default.datatype_stats UPDATE STATISTICS for column d SET ('numDVs'='5677','numNulls'='12','highValue'='560.3367','lowValue'='0.00455'); +DESC FORMATTED datatype_stats d; +--decimal +DESC FORMATTED datatype_stats dem; +ALTER TABLE default.datatype_stats UPDATE STATISTICS for column dem SET ('numDVs'='57','numNulls'='912','highValue'='560','lowValue'='0'); +DESC FORMATTED datatype_stats dem; +--timestamp +DESC FORMATTED datatype_stats ts; +ALTER TABLE default.datatype_stats UPDATE STATISTICS for column ts SET ('numDVs'='7','numNulls'='12','highValue'='1357030923','lowValue'='1357030924'); +DESC FORMATTED datatype_stats ts; +--decimal +DESC FORMATTED datatype_stats dt; +ALTER TABLE default.datatype_stats UPDATE STATISTICS for column dt SET ('numDVs'='57','numNulls'='912','highValue'='2012-01-01','lowValue'='2001-02-04'); +DESC FORMATTED datatype_stats dt; +--string +DESC FORMATTED datatype_stats str; +ALTER TABLE default.datatype_stats UPDATE STATISTICS for column str SET ('numDVs'='232','numNulls'='233','avgColLen'='2.34','maxColLen'='235'); +DESC FORMATTED datatype_stats str; +--varchar +DESC FORMATTED datatype_stats v; +ALTER TABLE default.datatype_stats UPDATE STATISTICS for column v SET ('numDVs'='22','numNulls'='33','avgColLen'='4.40','maxColLen'='25'); +DESC FORMATTED datatype_stats v; +--char +DESC FORMATTED datatype_stats c; +ALTER TABLE default.datatype_stats UPDATE STATISTICS for column c SET ('numDVs'='2','numNulls'='03','avgColLen'='9.00','maxColLen'='58'); +DESC FORMATTED datatype_stats c; +--boolean +DESC FORMATTED datatype_stats bl; +ALTER TABLE default.datatype_stats UPDATE STATISTICS for column bl SET ('numNulls'='1','numTrues'='9','numFalses'='8'); +DESC FORMATTED datatype_stats bl; +--binary +DESC FORMATTED datatype_stats bin; +ALTER TABLE default.datatype_stats UPDATE STATISTICS for column bin SET ('numNulls'='8','avgColLen'='2.0','maxColLen'='8'); +DESC FORMATTED datatype_stats bin; + diff --git a/ql/src/test/queries/clientpositive/bitvector.q b/ql/src/test/queries/clientpositive/bitvector.q new file mode 100644 index 0000000000..d8669f254b --- /dev/null +++ b/ql/src/test/queries/clientpositive/bitvector.q @@ -0,0 +1,3 @@ +set hive.mapred.mode=nonstrict; + +desc formatted src key; diff --git a/ql/src/test/queries/clientpositive/fm-sketch.q b/ql/src/test/queries/clientpositive/fm-sketch.q new file mode 100644 index 0000000000..6a65442076 --- /dev/null +++ b/ql/src/test/queries/clientpositive/fm-sketch.q @@ -0,0 +1,58 @@ +set hive.mapred.mode=nonstrict; +set hive.stats.ndv.algo=fm; + +create table n(key int); + +insert overwrite table n select null from src; + +explain analyze table n compute statistics for columns; + +analyze table n compute statistics for columns; + +desc formatted n key; + + +create table i(key int); + +insert overwrite table i select key from src; + +explain analyze table i compute statistics for columns; + +analyze table i compute statistics for columns; + +desc formatted i key; + +drop table i; + +create table i(key double); + +insert overwrite table i select key from src; + +analyze table i compute statistics for columns; + +desc formatted i key; + +drop table i; + +create table i(key decimal); + +insert overwrite table i select key from src; + +analyze table i compute statistics for columns; + +desc formatted i key; + +drop table i; + +create table i(key date); + +insert into i values ('2012-08-17'); +insert into i values ('2012-08-17'); +insert into i values ('2013-08-17'); +insert into i values ('2012-03-17'); +insert into i values ('2012-05-17'); + +analyze table i compute statistics for columns; + +desc formatted i key; + diff --git a/ql/src/test/queries/clientpositive/hll.q b/ql/src/test/queries/clientpositive/hll.q index edfdce8a29..91c4e788d3 100644 --- a/ql/src/test/queries/clientpositive/hll.q +++ b/ql/src/test/queries/clientpositive/hll.q @@ -1,5 +1,16 @@ set hive.mapred.mode=nonstrict; +create table n(key int); + +insert overwrite table n select null from src; + +explain analyze table n compute statistics for columns; + +analyze table n compute statistics for columns; + +desc formatted n key; + + create table i(key int); insert overwrite table i select key from src; diff --git a/ql/src/test/results/clientpositive/alterColumnStats.q.out b/ql/src/test/results/clientpositive/alterColumnStats.q.out index 519a62a190..ea2416f6fb 100644 --- a/ql/src/test/results/clientpositive/alterColumnStats.q.out +++ b/ql/src/test/results/clientpositive/alterColumnStats.q.out @@ -142,17 +142,17 @@ PREHOOK: Input: default@p POSTHOOK: query: desc formatted p c1 POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@p -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -c1 string from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +c1 string from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} PREHOOK: query: desc formatted p c2 PREHOOK: type: DESCTABLE PREHOOK: Input: default@p POSTHOOK: query: desc formatted p c2 POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@p -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -c2 string from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +c2 string from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} diff --git a/ql/src/test/results/clientpositive/alterColumnStatsPart.q.out b/ql/src/test/results/clientpositive/alterColumnStatsPart.q.out index 672bd9f4bb..a315a6be39 100644 --- a/ql/src/test/results/clientpositive/alterColumnStatsPart.q.out +++ b/ql/src/test/results/clientpositive/alterColumnStatsPart.q.out @@ -65,9 +65,9 @@ PREHOOK: Input: default@p POSTHOOK: query: desc formatted p partition (c=1) a POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@p -# col_name data_type comment - -a int from deserializer +# col_name data_type comment + +a int from deserializer PREHOOK: query: desc formatted p partition (c=1) PREHOOK: type: DESCTABLE PREHOOK: Input: default@p diff --git a/ql/src/test/results/clientpositive/alter_partition_update_status.q.out b/ql/src/test/results/clientpositive/alter_partition_update_status.q.out index c0d4eeefb4..700f07fcab 100644 --- a/ql/src/test/results/clientpositive/alter_partition_update_status.q.out +++ b/ql/src/test/results/clientpositive/alter_partition_update_status.q.out @@ -34,9 +34,11 @@ PREHOOK: Input: default@src_stat_part_one POSTHOOK: query: describe formatted src_stat_part_one PARTITION(partitionId=1) key POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@src_stat_part_one -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -key string 0 16 1.72 3 from deserializer +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +key string 0 16 1.72 3 SExM4BAQgaTbFYDg/BOBktRrv6WcHMDErSCAotAegLu2HIHRunr/p8hhwLbpSsD22ZgBgdS9KMHV +myn/wfcugOGjfsCYzA0= + from deserializer PREHOOK: query: ALTER TABLE src_stat_part_one PARTITION(partitionId=1) UPDATE STATISTICS for column key SET ('numDVs'='11','avgColLen'='2.2') PREHOOK: type: ALTERTABLE_UPDATEPARTSTATS POSTHOOK: query: ALTER TABLE src_stat_part_one PARTITION(partitionId=1) UPDATE STATISTICS for column key SET ('numDVs'='11','avgColLen'='2.2') @@ -47,9 +49,11 @@ PREHOOK: Input: default@src_stat_part_one POSTHOOK: query: describe formatted src_stat_part_one PARTITION(partitionId=1) key POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@src_stat_part_one -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -key string 0 11 2.2 3 from deserializer +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +key string 0 11 2.2 3 SExM4BAQgaTbFYDg/BOBktRrv6WcHMDErSCAotAegLu2HIHRunr/p8hhwLbpSsD22ZgBgdS9KMHV +myn/wfcugOGjfsCYzA0= + from deserializer PREHOOK: query: create table src_stat_part_two(key string, value string) partitioned by (px int, py string) PREHOOK: type: CREATETABLE PREHOOK: Output: database:default @@ -86,9 +90,11 @@ PREHOOK: Input: default@src_stat_part_two POSTHOOK: query: describe formatted src_stat_part_two PARTITION(px=1, py='a') key POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@src_stat_part_two -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -key string 0 16 1.72 3 from deserializer +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +key string 0 16 1.72 3 SExM4BAQgaTbFYDg/BOBktRrv6WcHMDErSCAotAegLu2HIHRunr/p8hhwLbpSsD22ZgBgdS9KMHV +myn/wfcugOGjfsCYzA0= + from deserializer PREHOOK: query: ALTER TABLE src_stat_part_two PARTITION(px=1, py='a') UPDATE STATISTICS for column key SET ('numDVs'='30','maxColLen'='40') PREHOOK: type: ALTERTABLE_UPDATEPARTSTATS POSTHOOK: query: ALTER TABLE src_stat_part_two PARTITION(px=1, py='a') UPDATE STATISTICS for column key SET ('numDVs'='30','maxColLen'='40') @@ -99,9 +105,11 @@ PREHOOK: Input: default@src_stat_part_two POSTHOOK: query: describe formatted src_stat_part_two PARTITION(px=1, py='a') key POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@src_stat_part_two -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -key string 0 30 1.72 40 from deserializer +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +key string 0 30 1.72 40 SExM4BAQgaTbFYDg/BOBktRrv6WcHMDErSCAotAegLu2HIHRunr/p8hhwLbpSsD22ZgBgdS9KMHV +myn/wfcugOGjfsCYzA0= + from deserializer PREHOOK: query: create database if not exists dummydb PREHOOK: type: CREATEDATABASE PREHOOK: Output: database:dummydb @@ -124,9 +132,11 @@ PREHOOK: Input: default@src_stat_part_two POSTHOOK: query: describe formatted default.src_stat_part_two PARTITION(px=1, py='a') key POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@src_stat_part_two -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -key string 0 40 1.72 50 from deserializer +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +key string 0 40 1.72 50 SExM4BAQgaTbFYDg/BOBktRrv6WcHMDErSCAotAegLu2HIHRunr/p8hhwLbpSsD22ZgBgdS9KMHV +myn/wfcugOGjfsCYzA0= + from deserializer PREHOOK: query: use default PREHOOK: type: SWITCHDATABASE PREHOOK: Input: database:default diff --git a/ql/src/test/results/clientpositive/alter_table_column_stats.q.out b/ql/src/test/results/clientpositive/alter_table_column_stats.q.out index 96dce1e2c5..3676204acf 100644 --- a/ql/src/test/results/clientpositive/alter_table_column_stats.q.out +++ b/ql/src/test/results/clientpositive/alter_table_column_stats.q.out @@ -123,30 +123,33 @@ PREHOOK: Input: statsdb1@testtable0 POSTHOOK: query: describe formatted statsdb1.testtable0 col1 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testtable0 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col1 int 27 484 0 10 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}} +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +col1 int 27 484 0 10 SExM4AoKwtK/OYCi1Bu/sMcRx9SoWfnl+S+BsIeDAb/T4wfChbuXAf+34EDBsoy/AQ== + from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}} PREHOOK: query: describe formatted statsdb1.testtable0 col2 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testtable0 POSTHOOK: query: describe formatted statsdb1.testtable0 col2 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testtable0 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col2 string 0 10 6.7 7 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}} +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +col2 string 0 10 6.7 7 SExM4AoKge76QcHLqCHAtN4dv5LMNcG35k7A+Oe9Af+Nq6wCgoTFpgG+39Yigri1AQ== + from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}} PREHOOK: query: describe formatted statsdb1.testtable0 col3 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testtable0 POSTHOOK: query: describe formatted statsdb1.testtable0 col3 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testtable0 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col3 string 0 1 4.0 4 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}} +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +col3 string 0 1 4.0 4 SExM4AEBgeL8+wM= + from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}} PREHOOK: query: alter table statsdb1.testtable0 rename to statsdb1.testtable1 PREHOOK: type: ALTERTABLE_RENAME PREHOOK: Input: statsdb1@testtable0 @@ -199,30 +202,33 @@ PREHOOK: Input: statsdb1@testtable1 POSTHOOK: query: describe formatted statsdb1.testtable1 col1 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testtable1 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col1 int 27 484 0 10 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}} +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +col1 int 27 484 0 10 SExM4AoKwtK/OYCi1Bu/sMcRx9SoWfnl+S+BsIeDAb/T4wfChbuXAf+34EDBsoy/AQ== + from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}} PREHOOK: query: describe formatted statsdb1.testtable1 col2 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testtable1 POSTHOOK: query: describe formatted statsdb1.testtable1 col2 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testtable1 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col2 string 0 10 6.7 7 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}} +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +col2 string 0 10 6.7 7 SExM4AoKge76QcHLqCHAtN4dv5LMNcG35k7A+Oe9Af+Nq6wCgoTFpgG+39Yigri1AQ== + from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}} PREHOOK: query: describe formatted statsdb1.testtable1 col3 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testtable1 POSTHOOK: query: describe formatted statsdb1.testtable1 col3 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testtable1 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col3 string 0 1 4.0 4 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}} +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +col3 string 0 1 4.0 4 SExM4AEBgeL8+wM= + from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}} PREHOOK: query: alter table testtable1 replace columns (col1 int, col2 string, col4 string) PREHOOK: type: ALTERTABLE_REPLACECOLS PREHOOK: Input: statsdb1@testtable1 @@ -274,30 +280,32 @@ PREHOOK: Input: statsdb1@testtable1 POSTHOOK: query: describe formatted statsdb1.testtable1 col1 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testtable1 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col1 int 27 484 0 10 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\"}} +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +col1 int 27 484 0 10 SExM4AoKwtK/OYCi1Bu/sMcRx9SoWfnl+S+BsIeDAb/T4wfChbuXAf+34EDBsoy/AQ== + from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\"}} PREHOOK: query: describe formatted statsdb1.testtable1 col2 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testtable1 POSTHOOK: query: describe formatted statsdb1.testtable1 col2 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testtable1 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col2 string 0 10 6.7 7 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\"}} +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +col2 string 0 10 6.7 7 SExM4AoKge76QcHLqCHAtN4dv5LMNcG35k7A+Oe9Af+Nq6wCgoTFpgG+39Yigri1AQ== + from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\"}} PREHOOK: query: describe formatted statsdb1.testtable1 col4 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testtable1 POSTHOOK: query: describe formatted statsdb1.testtable1 col4 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testtable1 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col4 string from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\"}} +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +col4 string from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\"}} PREHOOK: query: alter table testtable1 change col1 col1 string PREHOOK: type: ALTERTABLE_RENAMECOL PREHOOK: Input: statsdb1@testtable1 @@ -349,30 +357,31 @@ PREHOOK: Input: statsdb1@testtable1 POSTHOOK: query: describe formatted statsdb1.testtable1 col1 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testtable1 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col1 string from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col2\":\"true\"}} +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +col1 string from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col2\":\"true\"}} PREHOOK: query: describe formatted statsdb1.testtable1 col2 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testtable1 POSTHOOK: query: describe formatted statsdb1.testtable1 col2 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testtable1 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col2 string 0 10 6.7 7 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col2\":\"true\"}} +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +col2 string 0 10 6.7 7 SExM4AoKge76QcHLqCHAtN4dv5LMNcG35k7A+Oe9Af+Nq6wCgoTFpgG+39Yigri1AQ== + from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col2\":\"true\"}} PREHOOK: query: describe formatted statsdb1.testtable1 col4 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testtable1 POSTHOOK: query: describe formatted statsdb1.testtable1 col4 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testtable1 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col4 string from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col2\":\"true\"}} +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +col4 string from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col2\":\"true\"}} PREHOOK: query: alter table statsdb1.testtable1 rename to statsdb2.testtable2 PREHOOK: type: ALTERTABLE_RENAME PREHOOK: Input: statsdb1@testtable1 @@ -425,30 +434,31 @@ PREHOOK: Input: statsdb2@testtable2 POSTHOOK: query: describe formatted statsdb2.testtable2 col1 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb2@testtable2 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col1 string from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col2\":\"true\"}} +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +col1 string from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col2\":\"true\"}} PREHOOK: query: describe formatted statsdb2.testtable2 col2 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb2@testtable2 POSTHOOK: query: describe formatted statsdb2.testtable2 col2 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb2@testtable2 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col2 string 0 10 6.7 7 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col2\":\"true\"}} +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +col2 string 0 10 6.7 7 SExM4AoKge76QcHLqCHAtN4dv5LMNcG35k7A+Oe9Af+Nq6wCgoTFpgG+39Yigri1AQ== + from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col2\":\"true\"}} PREHOOK: query: describe formatted statsdb2.testtable2 col4 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb2@testtable2 POSTHOOK: query: describe formatted statsdb2.testtable2 col4 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb2@testtable2 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col4 string from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col2\":\"true\"}} +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +col4 string from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col2\":\"true\"}} PREHOOK: query: analyze table testpart0 compute statistics for columns PREHOOK: type: QUERY PREHOOK: Input: statsdb1@testpart0 @@ -549,27 +559,30 @@ PREHOOK: Input: statsdb1@testpart0 POSTHOOK: query: describe formatted statsdb1.testpart0 partition (part = 'part1') col1 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testpart0 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col1 int 27 484 0 10 from deserializer +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +col1 int 27 484 0 10 SExM4AoKwtK/OYCi1Bu/sMcRx9SoWfnl+S+BsIeDAb/T4wfChbuXAf+34EDBsoy/AQ== + from deserializer PREHOOK: query: describe formatted statsdb1.testpart0 partition (part = 'part1') col2 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testpart0 POSTHOOK: query: describe formatted statsdb1.testpart0 partition (part = 'part1') col2 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testpart0 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col2 string 0 10 6.7 7 from deserializer +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +col2 string 0 10 6.7 7 SExM4AoKge76QcHLqCHAtN4dv5LMNcG35k7A+Oe9Af+Nq6wCgoTFpgG+39Yigri1AQ== + from deserializer PREHOOK: query: describe formatted statsdb1.testpart0 partition (part = 'part1') col3 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testpart0 POSTHOOK: query: describe formatted statsdb1.testpart0 partition (part = 'part1') col3 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testpart0 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col3 string 0 1 4.0 4 from deserializer +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +col3 string 0 1 4.0 4 SExM4AEBgeL8+wM= + from deserializer PREHOOK: query: describe formatted statsdb1.testpart0 partition (part = 'part2') PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testpart0 @@ -616,27 +629,32 @@ PREHOOK: Input: statsdb1@testpart0 POSTHOOK: query: describe formatted statsdb1.testpart0 partition (part = 'part2') col1 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testpart0 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col1 int 27 484 0 20 from deserializer +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +col1 int 27 484 0 20 SExM4BQUwv+PD4DTryqAhvQHgJzgE8DtpQL/wqEPx9SoWfm94QmAqJgmgbCHgwG/0+MHwJKHG4De +jWeClaYVvr3WP8H6iQHDxpxl/uvvWb7ssz2AsKk6 + from deserializer PREHOOK: query: describe formatted statsdb1.testpart0 partition (part = 'part2') col2 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testpart0 POSTHOOK: query: describe formatted statsdb1.testpart0 partition (part = 'part2') col2 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testpart0 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col2 string 0 20 6.8 7 from deserializer +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +col2 string 0 20 6.8 7 SExM4BQUwcbsKcCnjhjArvYEgZ2yHMC03h2A4LIS/4KtDMCv7BbAz6JGgejDCP+AlzSA84UvwYTL +WsDygtQBv5uoWIKExaYBvt/WIoK4tQG/vpwPv72sGg== + from deserializer PREHOOK: query: describe formatted statsdb1.testpart0 partition (part = 'part2') col3 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testpart0 POSTHOOK: query: describe formatted statsdb1.testpart0 partition (part = 'part2') col3 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testpart0 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col3 string 0 1 4.0 4 from deserializer +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +col3 string 0 1 4.0 4 SExM4AEBgeL8+wM= + from deserializer PREHOOK: query: alter table statsdb1.testpart0 rename to statsdb1.testpart1 PREHOOK: type: ALTERTABLE_RENAME PREHOOK: Input: statsdb1@testpart0 @@ -735,27 +753,30 @@ PREHOOK: Input: statsdb1@testpart1 POSTHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part1') col1 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testpart1 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col1 int 27 484 0 10 from deserializer +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +col1 int 27 484 0 10 SExM4AoKwtK/OYCi1Bu/sMcRx9SoWfnl+S+BsIeDAb/T4wfChbuXAf+34EDBsoy/AQ== + from deserializer PREHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part1') col2 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testpart1 POSTHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part1') col2 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testpart1 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col2 string 0 10 6.7 7 from deserializer +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +col2 string 0 10 6.7 7 SExM4AoKge76QcHLqCHAtN4dv5LMNcG35k7A+Oe9Af+Nq6wCgoTFpgG+39Yigri1AQ== + from deserializer PREHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part1') col3 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testpart1 POSTHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part1') col3 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testpart1 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col3 string 0 1 4.0 4 from deserializer +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +col3 string 0 1 4.0 4 SExM4AEBgeL8+wM= + from deserializer PREHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part2') PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testpart1 @@ -802,27 +823,32 @@ PREHOOK: Input: statsdb1@testpart1 POSTHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part2') col1 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testpart1 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col1 int 27 484 0 20 from deserializer +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +col1 int 27 484 0 20 SExM4BQUwv+PD4DTryqAhvQHgJzgE8DtpQL/wqEPx9SoWfm94QmAqJgmgbCHgwG/0+MHwJKHG4De +jWeClaYVvr3WP8H6iQHDxpxl/uvvWb7ssz2AsKk6 + from deserializer PREHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part2') col2 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testpart1 POSTHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part2') col2 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testpart1 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col2 string 0 20 6.8 7 from deserializer +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +col2 string 0 20 6.8 7 SExM4BQUwcbsKcCnjhjArvYEgZ2yHMC03h2A4LIS/4KtDMCv7BbAz6JGgejDCP+AlzSA84UvwYTL +WsDygtQBv5uoWIKExaYBvt/WIoK4tQG/vpwPv72sGg== + from deserializer PREHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part2') col3 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testpart1 POSTHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part2') col3 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testpart1 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col3 string 0 1 4.0 4 from deserializer +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +col3 string 0 1 4.0 4 SExM4AEBgeL8+wM= + from deserializer PREHOOK: query: alter table statsdb1.testpart1 partition (part = 'part1') rename to partition (part = 'part11') PREHOOK: type: ALTERTABLE_RENAMEPART PREHOOK: Input: statsdb1@testpart1 @@ -922,27 +948,30 @@ PREHOOK: Input: statsdb1@testpart1 POSTHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part11') col1 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testpart1 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col1 int 27 484 0 10 from deserializer +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +col1 int 27 484 0 10 SExM4AoKwtK/OYCi1Bu/sMcRx9SoWfnl+S+BsIeDAb/T4wfChbuXAf+34EDBsoy/AQ== + from deserializer PREHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part11') col2 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testpart1 POSTHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part11') col2 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testpart1 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col2 string 0 10 6.7 7 from deserializer +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +col2 string 0 10 6.7 7 SExM4AoKge76QcHLqCHAtN4dv5LMNcG35k7A+Oe9Af+Nq6wCgoTFpgG+39Yigri1AQ== + from deserializer PREHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part11') col3 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testpart1 POSTHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part11') col3 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testpart1 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col3 string 0 1 4.0 4 from deserializer +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +col3 string 0 1 4.0 4 SExM4AEBgeL8+wM= + from deserializer PREHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part2') PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testpart1 @@ -989,27 +1018,32 @@ PREHOOK: Input: statsdb1@testpart1 POSTHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part2') col1 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testpart1 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col1 int 27 484 0 20 from deserializer +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +col1 int 27 484 0 20 SExM4BQUwv+PD4DTryqAhvQHgJzgE8DtpQL/wqEPx9SoWfm94QmAqJgmgbCHgwG/0+MHwJKHG4De +jWeClaYVvr3WP8H6iQHDxpxl/uvvWb7ssz2AsKk6 + from deserializer PREHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part2') col2 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testpart1 POSTHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part2') col2 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testpart1 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col2 string 0 20 6.8 7 from deserializer +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +col2 string 0 20 6.8 7 SExM4BQUwcbsKcCnjhjArvYEgZ2yHMC03h2A4LIS/4KtDMCv7BbAz6JGgejDCP+AlzSA84UvwYTL +WsDygtQBv5uoWIKExaYBvt/WIoK4tQG/vpwPv72sGg== + from deserializer PREHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part2') col3 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testpart1 POSTHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part2') col3 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testpart1 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col3 string 0 1 4.0 4 from deserializer +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +col3 string 0 1 4.0 4 SExM4AEBgeL8+wM= + from deserializer PREHOOK: query: alter table statsdb1.testpart1 replace columns (col1 int, col2 string, col4 string) cascade PREHOOK: type: ALTERTABLE_REPLACECOLS PREHOOK: Input: statsdb1@testpart1 @@ -1111,27 +1145,29 @@ PREHOOK: Input: statsdb1@testpart1 POSTHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part11') col1 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testpart1 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col1 int 27 484 0 10 from deserializer +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +col1 int 27 484 0 10 SExM4AoKwtK/OYCi1Bu/sMcRx9SoWfnl+S+BsIeDAb/T4wfChbuXAf+34EDBsoy/AQ== + from deserializer PREHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part11') col2 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testpart1 POSTHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part11') col2 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testpart1 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col2 string 0 10 6.7 7 from deserializer +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +col2 string 0 10 6.7 7 SExM4AoKge76QcHLqCHAtN4dv5LMNcG35k7A+Oe9Af+Nq6wCgoTFpgG+39Yigri1AQ== + from deserializer PREHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part11') col4 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testpart1 POSTHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part11') col4 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testpart1 -# col_name data_type comment - -col4 string from deserializer +# col_name data_type comment + +col4 string from deserializer PREHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part2') PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testpart1 @@ -1178,27 +1214,31 @@ PREHOOK: Input: statsdb1@testpart1 POSTHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part2') col1 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testpart1 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col1 int 27 484 0 20 from deserializer +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +col1 int 27 484 0 20 SExM4BQUwv+PD4DTryqAhvQHgJzgE8DtpQL/wqEPx9SoWfm94QmAqJgmgbCHgwG/0+MHwJKHG4De +jWeClaYVvr3WP8H6iQHDxpxl/uvvWb7ssz2AsKk6 + from deserializer PREHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part2') col2 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testpart1 POSTHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part2') col2 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testpart1 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col2 string 0 20 6.8 7 from deserializer +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +col2 string 0 20 6.8 7 SExM4BQUwcbsKcCnjhjArvYEgZ2yHMC03h2A4LIS/4KtDMCv7BbAz6JGgejDCP+AlzSA84UvwYTL +WsDygtQBv5uoWIKExaYBvt/WIoK4tQG/vpwPv72sGg== + from deserializer PREHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part2') col4 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testpart1 POSTHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part2') col4 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testpart1 -# col_name data_type comment - -col4 string from deserializer +# col_name data_type comment + +col4 string from deserializer PREHOOK: query: alter table statsdb1.testpart1 change column col1 col1 string cascade PREHOOK: type: ALTERTABLE_RENAMECOL PREHOOK: Input: statsdb1@testpart1 @@ -1300,27 +1340,28 @@ PREHOOK: Input: statsdb1@testpart1 POSTHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part11') col1 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testpart1 -# col_name data_type comment - -col1 string from deserializer +# col_name data_type comment + +col1 string from deserializer PREHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part11') col2 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testpart1 POSTHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part11') col2 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testpart1 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col2 string 0 10 6.7 7 from deserializer +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +col2 string 0 10 6.7 7 SExM4AoKge76QcHLqCHAtN4dv5LMNcG35k7A+Oe9Af+Nq6wCgoTFpgG+39Yigri1AQ== + from deserializer PREHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part11') col4 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testpart1 POSTHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part11') col4 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testpart1 -# col_name data_type comment - -col4 string from deserializer +# col_name data_type comment + +col4 string from deserializer PREHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part2') PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testpart1 @@ -1367,27 +1408,29 @@ PREHOOK: Input: statsdb1@testpart1 POSTHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part2') col1 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testpart1 -# col_name data_type comment - -col1 string from deserializer +# col_name data_type comment + +col1 string from deserializer PREHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part2') col2 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testpart1 POSTHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part2') col2 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testpart1 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col2 string 0 20 6.8 7 from deserializer +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +col2 string 0 20 6.8 7 SExM4BQUwcbsKcCnjhjArvYEgZ2yHMC03h2A4LIS/4KtDMCv7BbAz6JGgejDCP+AlzSA84UvwYTL +WsDygtQBv5uoWIKExaYBvt/WIoK4tQG/vpwPv72sGg== + from deserializer PREHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part2') col4 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testpart1 POSTHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part2') col4 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testpart1 -# col_name data_type comment - -col4 string from deserializer +# col_name data_type comment + +col4 string from deserializer PREHOOK: query: alter table statsdb1.testpart1 rename to statsdb2.testpart2 PREHOOK: type: ALTERTABLE_RENAME PREHOOK: Input: statsdb1@testpart1 @@ -1446,54 +1489,57 @@ PREHOOK: Input: statsdb2@testpart2 POSTHOOK: query: describe formatted statsdb2.testpart2 partition (part = 'part11') col1 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb2@testpart2 -# col_name data_type comment - -col1 string from deserializer +# col_name data_type comment + +col1 string from deserializer PREHOOK: query: describe formatted statsdb2.testpart2 partition (part = 'part11') col2 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb2@testpart2 POSTHOOK: query: describe formatted statsdb2.testpart2 partition (part = 'part11') col2 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb2@testpart2 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col2 string 0 10 6.7 7 from deserializer +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +col2 string 0 10 6.7 7 SExM4AoKge76QcHLqCHAtN4dv5LMNcG35k7A+Oe9Af+Nq6wCgoTFpgG+39Yigri1AQ== + from deserializer PREHOOK: query: describe formatted statsdb2.testpart2 partition (part = 'part11') col4 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb2@testpart2 POSTHOOK: query: describe formatted statsdb2.testpart2 partition (part = 'part11') col4 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb2@testpart2 -# col_name data_type comment - -col4 string from deserializer +# col_name data_type comment + +col4 string from deserializer PREHOOK: query: describe formatted statsdb2.testpart2 partition (part = 'part2') col1 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb2@testpart2 POSTHOOK: query: describe formatted statsdb2.testpart2 partition (part = 'part2') col1 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb2@testpart2 -# col_name data_type comment - -col1 string from deserializer +# col_name data_type comment + +col1 string from deserializer PREHOOK: query: describe formatted statsdb2.testpart2 partition (part = 'part2') col2 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb2@testpart2 POSTHOOK: query: describe formatted statsdb2.testpart2 partition (part = 'part2') col2 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb2@testpart2 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col2 string 0 20 6.8 7 from deserializer +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +col2 string 0 20 6.8 7 SExM4BQUwcbsKcCnjhjArvYEgZ2yHMC03h2A4LIS/4KtDMCv7BbAz6JGgejDCP+AlzSA84UvwYTL +WsDygtQBv5uoWIKExaYBvt/WIoK4tQG/vpwPv72sGg== + from deserializer PREHOOK: query: describe formatted statsdb2.testpart2 partition (part = 'part2') col4 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb2@testpart2 POSTHOOK: query: describe formatted statsdb2.testpart2 partition (part = 'part2') col4 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb2@testpart2 -# col_name data_type comment - -col4 string from deserializer +# col_name data_type comment + +col4 string from deserializer PREHOOK: query: use statsdb2 PREHOOK: type: SWITCHDATABASE PREHOOK: Input: database:statsdb2 @@ -1663,30 +1709,33 @@ PREHOOK: Input: statsdb1@testtable0 POSTHOOK: query: describe formatted statsdb1.testtable0 col1 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testtable0 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col1 int 27 484 0 10 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}} +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +col1 int 27 484 0 10 SExM4AoKwtK/OYCi1Bu/sMcRx9SoWfnl+S+BsIeDAb/T4wfChbuXAf+34EDBsoy/AQ== + from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}} PREHOOK: query: describe formatted statsdb1.testtable0 col2 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testtable0 POSTHOOK: query: describe formatted statsdb1.testtable0 col2 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testtable0 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col2 string 0 10 6.7 7 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}} +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +col2 string 0 10 6.7 7 SExM4AoKge76QcHLqCHAtN4dv5LMNcG35k7A+Oe9Af+Nq6wCgoTFpgG+39Yigri1AQ== + from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}} PREHOOK: query: describe formatted statsdb1.testtable0 col3 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testtable0 POSTHOOK: query: describe formatted statsdb1.testtable0 col3 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testtable0 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col3 string 0 1 4.0 4 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}} +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +col3 string 0 1 4.0 4 SExM4AEBgeL8+wM= + from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}} PREHOOK: query: alter table statsdb1.testtable0 rename to statsdb1.testtable1 PREHOOK: type: ALTERTABLE_RENAME PREHOOK: Input: statsdb1@testtable0 @@ -1739,30 +1788,33 @@ PREHOOK: Input: statsdb1@testtable1 POSTHOOK: query: describe formatted statsdb1.testtable1 col1 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testtable1 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col1 int 27 484 0 10 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}} +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +col1 int 27 484 0 10 SExM4AoKwtK/OYCi1Bu/sMcRx9SoWfnl+S+BsIeDAb/T4wfChbuXAf+34EDBsoy/AQ== + from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}} PREHOOK: query: describe formatted statsdb1.testtable1 col2 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testtable1 POSTHOOK: query: describe formatted statsdb1.testtable1 col2 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testtable1 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col2 string 0 10 6.7 7 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}} +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +col2 string 0 10 6.7 7 SExM4AoKge76QcHLqCHAtN4dv5LMNcG35k7A+Oe9Af+Nq6wCgoTFpgG+39Yigri1AQ== + from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}} PREHOOK: query: describe formatted statsdb1.testtable1 col3 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testtable1 POSTHOOK: query: describe formatted statsdb1.testtable1 col3 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testtable1 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col3 string 0 1 4.0 4 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}} +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +col3 string 0 1 4.0 4 SExM4AEBgeL8+wM= + from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}} PREHOOK: query: alter table testtable1 replace columns (col1 int, col2 string, col4 string) PREHOOK: type: ALTERTABLE_REPLACECOLS PREHOOK: Input: statsdb1@testtable1 @@ -1814,30 +1866,32 @@ PREHOOK: Input: statsdb1@testtable1 POSTHOOK: query: describe formatted statsdb1.testtable1 col1 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testtable1 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col1 int 27 484 0 10 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\"}} +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +col1 int 27 484 0 10 SExM4AoKwtK/OYCi1Bu/sMcRx9SoWfnl+S+BsIeDAb/T4wfChbuXAf+34EDBsoy/AQ== + from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\"}} PREHOOK: query: describe formatted statsdb1.testtable1 col2 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testtable1 POSTHOOK: query: describe formatted statsdb1.testtable1 col2 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testtable1 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col2 string 0 10 6.7 7 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\"}} +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +col2 string 0 10 6.7 7 SExM4AoKge76QcHLqCHAtN4dv5LMNcG35k7A+Oe9Af+Nq6wCgoTFpgG+39Yigri1AQ== + from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\"}} PREHOOK: query: describe formatted statsdb1.testtable1 col4 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testtable1 POSTHOOK: query: describe formatted statsdb1.testtable1 col4 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testtable1 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col4 string from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\"}} +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +col4 string from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\"}} PREHOOK: query: alter table testtable1 change col1 col1 string PREHOOK: type: ALTERTABLE_RENAMECOL PREHOOK: Input: statsdb1@testtable1 @@ -1889,30 +1943,31 @@ PREHOOK: Input: statsdb1@testtable1 POSTHOOK: query: describe formatted statsdb1.testtable1 col1 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testtable1 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col1 string from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col2\":\"true\"}} +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +col1 string from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col2\":\"true\"}} PREHOOK: query: describe formatted statsdb1.testtable1 col2 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testtable1 POSTHOOK: query: describe formatted statsdb1.testtable1 col2 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testtable1 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col2 string 0 10 6.7 7 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col2\":\"true\"}} +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +col2 string 0 10 6.7 7 SExM4AoKge76QcHLqCHAtN4dv5LMNcG35k7A+Oe9Af+Nq6wCgoTFpgG+39Yigri1AQ== + from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col2\":\"true\"}} PREHOOK: query: describe formatted statsdb1.testtable1 col4 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testtable1 POSTHOOK: query: describe formatted statsdb1.testtable1 col4 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testtable1 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col4 string from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col2\":\"true\"}} +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +col4 string from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col2\":\"true\"}} PREHOOK: query: alter table statsdb1.testtable1 rename to statsdb2.testtable2 PREHOOK: type: ALTERTABLE_RENAME PREHOOK: Input: statsdb1@testtable1 @@ -1965,30 +2020,31 @@ PREHOOK: Input: statsdb2@testtable2 POSTHOOK: query: describe formatted statsdb2.testtable2 col1 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb2@testtable2 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col1 string from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col2\":\"true\"}} +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +col1 string from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col2\":\"true\"}} PREHOOK: query: describe formatted statsdb2.testtable2 col2 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb2@testtable2 POSTHOOK: query: describe formatted statsdb2.testtable2 col2 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb2@testtable2 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col2 string 0 10 6.7 7 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col2\":\"true\"}} +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +col2 string 0 10 6.7 7 SExM4AoKge76QcHLqCHAtN4dv5LMNcG35k7A+Oe9Af+Nq6wCgoTFpgG+39Yigri1AQ== + from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col2\":\"true\"}} PREHOOK: query: describe formatted statsdb2.testtable2 col4 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb2@testtable2 POSTHOOK: query: describe formatted statsdb2.testtable2 col4 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb2@testtable2 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col4 string from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col2\":\"true\"}} +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +col4 string from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col2\":\"true\"}} PREHOOK: query: analyze table testpart0 compute statistics for columns PREHOOK: type: QUERY PREHOOK: Input: statsdb1@testpart0 @@ -2089,27 +2145,30 @@ PREHOOK: Input: statsdb1@testpart0 POSTHOOK: query: describe formatted statsdb1.testpart0 partition (part = 'part1') col1 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testpart0 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col1 int 27 484 0 10 from deserializer +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +col1 int 27 484 0 10 SExM4AoKwtK/OYCi1Bu/sMcRx9SoWfnl+S+BsIeDAb/T4wfChbuXAf+34EDBsoy/AQ== + from deserializer PREHOOK: query: describe formatted statsdb1.testpart0 partition (part = 'part1') col2 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testpart0 POSTHOOK: query: describe formatted statsdb1.testpart0 partition (part = 'part1') col2 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testpart0 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col2 string 0 10 6.7 7 from deserializer +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +col2 string 0 10 6.7 7 SExM4AoKge76QcHLqCHAtN4dv5LMNcG35k7A+Oe9Af+Nq6wCgoTFpgG+39Yigri1AQ== + from deserializer PREHOOK: query: describe formatted statsdb1.testpart0 partition (part = 'part1') col3 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testpart0 POSTHOOK: query: describe formatted statsdb1.testpart0 partition (part = 'part1') col3 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testpart0 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col3 string 0 1 4.0 4 from deserializer +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +col3 string 0 1 4.0 4 SExM4AEBgeL8+wM= + from deserializer PREHOOK: query: describe formatted statsdb1.testpart0 partition (part = 'part2') PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testpart0 @@ -2156,27 +2215,32 @@ PREHOOK: Input: statsdb1@testpart0 POSTHOOK: query: describe formatted statsdb1.testpart0 partition (part = 'part2') col1 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testpart0 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col1 int 27 484 0 20 from deserializer +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +col1 int 27 484 0 20 SExM4BQUwv+PD4DTryqAhvQHgJzgE8DtpQL/wqEPx9SoWfm94QmAqJgmgbCHgwG/0+MHwJKHG4De +jWeClaYVvr3WP8H6iQHDxpxl/uvvWb7ssz2AsKk6 + from deserializer PREHOOK: query: describe formatted statsdb1.testpart0 partition (part = 'part2') col2 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testpart0 POSTHOOK: query: describe formatted statsdb1.testpart0 partition (part = 'part2') col2 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testpart0 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col2 string 0 20 6.8 7 from deserializer +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +col2 string 0 20 6.8 7 SExM4BQUwcbsKcCnjhjArvYEgZ2yHMC03h2A4LIS/4KtDMCv7BbAz6JGgejDCP+AlzSA84UvwYTL +WsDygtQBv5uoWIKExaYBvt/WIoK4tQG/vpwPv72sGg== + from deserializer PREHOOK: query: describe formatted statsdb1.testpart0 partition (part = 'part2') col3 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testpart0 POSTHOOK: query: describe formatted statsdb1.testpart0 partition (part = 'part2') col3 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testpart0 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col3 string 0 1 4.0 4 from deserializer +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +col3 string 0 1 4.0 4 SExM4AEBgeL8+wM= + from deserializer PREHOOK: query: alter table statsdb1.testpart0 rename to statsdb1.testpart1 PREHOOK: type: ALTERTABLE_RENAME PREHOOK: Input: statsdb1@testpart0 @@ -2275,27 +2339,30 @@ PREHOOK: Input: statsdb1@testpart1 POSTHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part1') col1 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testpart1 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col1 int 27 484 0 10 from deserializer +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +col1 int 27 484 0 10 SExM4AoKwtK/OYCi1Bu/sMcRx9SoWfnl+S+BsIeDAb/T4wfChbuXAf+34EDBsoy/AQ== + from deserializer PREHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part1') col2 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testpart1 POSTHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part1') col2 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testpart1 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col2 string 0 10 6.7 7 from deserializer +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +col2 string 0 10 6.7 7 SExM4AoKge76QcHLqCHAtN4dv5LMNcG35k7A+Oe9Af+Nq6wCgoTFpgG+39Yigri1AQ== + from deserializer PREHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part1') col3 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testpart1 POSTHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part1') col3 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testpart1 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col3 string 0 1 4.0 4 from deserializer +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +col3 string 0 1 4.0 4 SExM4AEBgeL8+wM= + from deserializer PREHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part2') PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testpart1 @@ -2342,27 +2409,32 @@ PREHOOK: Input: statsdb1@testpart1 POSTHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part2') col1 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testpart1 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col1 int 27 484 0 20 from deserializer +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +col1 int 27 484 0 20 SExM4BQUwv+PD4DTryqAhvQHgJzgE8DtpQL/wqEPx9SoWfm94QmAqJgmgbCHgwG/0+MHwJKHG4De +jWeClaYVvr3WP8H6iQHDxpxl/uvvWb7ssz2AsKk6 + from deserializer PREHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part2') col2 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testpart1 POSTHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part2') col2 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testpart1 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col2 string 0 20 6.8 7 from deserializer +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +col2 string 0 20 6.8 7 SExM4BQUwcbsKcCnjhjArvYEgZ2yHMC03h2A4LIS/4KtDMCv7BbAz6JGgejDCP+AlzSA84UvwYTL +WsDygtQBv5uoWIKExaYBvt/WIoK4tQG/vpwPv72sGg== + from deserializer PREHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part2') col3 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testpart1 POSTHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part2') col3 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testpart1 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col3 string 0 1 4.0 4 from deserializer +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +col3 string 0 1 4.0 4 SExM4AEBgeL8+wM= + from deserializer PREHOOK: query: alter table statsdb1.testpart1 partition (part = 'part1') rename to partition (part = 'part11') PREHOOK: type: ALTERTABLE_RENAMEPART PREHOOK: Input: statsdb1@testpart1 @@ -2462,27 +2534,30 @@ PREHOOK: Input: statsdb1@testpart1 POSTHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part11') col1 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testpart1 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col1 int 27 484 0 10 from deserializer +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +col1 int 27 484 0 10 SExM4AoKwtK/OYCi1Bu/sMcRx9SoWfnl+S+BsIeDAb/T4wfChbuXAf+34EDBsoy/AQ== + from deserializer PREHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part11') col2 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testpart1 POSTHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part11') col2 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testpart1 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col2 string 0 10 6.7 7 from deserializer +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +col2 string 0 10 6.7 7 SExM4AoKge76QcHLqCHAtN4dv5LMNcG35k7A+Oe9Af+Nq6wCgoTFpgG+39Yigri1AQ== + from deserializer PREHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part11') col3 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testpart1 POSTHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part11') col3 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testpart1 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col3 string 0 1 4.0 4 from deserializer +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +col3 string 0 1 4.0 4 SExM4AEBgeL8+wM= + from deserializer PREHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part2') PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testpart1 @@ -2529,27 +2604,32 @@ PREHOOK: Input: statsdb1@testpart1 POSTHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part2') col1 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testpart1 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col1 int 27 484 0 20 from deserializer +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +col1 int 27 484 0 20 SExM4BQUwv+PD4DTryqAhvQHgJzgE8DtpQL/wqEPx9SoWfm94QmAqJgmgbCHgwG/0+MHwJKHG4De +jWeClaYVvr3WP8H6iQHDxpxl/uvvWb7ssz2AsKk6 + from deserializer PREHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part2') col2 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testpart1 POSTHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part2') col2 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testpart1 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col2 string 0 20 6.8 7 from deserializer +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +col2 string 0 20 6.8 7 SExM4BQUwcbsKcCnjhjArvYEgZ2yHMC03h2A4LIS/4KtDMCv7BbAz6JGgejDCP+AlzSA84UvwYTL +WsDygtQBv5uoWIKExaYBvt/WIoK4tQG/vpwPv72sGg== + from deserializer PREHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part2') col3 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testpart1 POSTHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part2') col3 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testpart1 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col3 string 0 1 4.0 4 from deserializer +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +col3 string 0 1 4.0 4 SExM4AEBgeL8+wM= + from deserializer PREHOOK: query: alter table statsdb1.testpart1 replace columns (col1 int, col2 string, col4 string) cascade PREHOOK: type: ALTERTABLE_REPLACECOLS PREHOOK: Input: statsdb1@testpart1 @@ -2651,27 +2731,29 @@ PREHOOK: Input: statsdb1@testpart1 POSTHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part11') col1 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testpart1 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col1 int 27 484 0 10 from deserializer +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +col1 int 27 484 0 10 SExM4AoKwtK/OYCi1Bu/sMcRx9SoWfnl+S+BsIeDAb/T4wfChbuXAf+34EDBsoy/AQ== + from deserializer PREHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part11') col2 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testpart1 POSTHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part11') col2 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testpart1 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col2 string 0 10 6.7 7 from deserializer +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +col2 string 0 10 6.7 7 SExM4AoKge76QcHLqCHAtN4dv5LMNcG35k7A+Oe9Af+Nq6wCgoTFpgG+39Yigri1AQ== + from deserializer PREHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part11') col4 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testpart1 POSTHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part11') col4 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testpart1 -# col_name data_type comment - -col4 string from deserializer +# col_name data_type comment + +col4 string from deserializer PREHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part2') PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testpart1 @@ -2718,27 +2800,31 @@ PREHOOK: Input: statsdb1@testpart1 POSTHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part2') col1 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testpart1 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col1 int 27 484 0 20 from deserializer +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +col1 int 27 484 0 20 SExM4BQUwv+PD4DTryqAhvQHgJzgE8DtpQL/wqEPx9SoWfm94QmAqJgmgbCHgwG/0+MHwJKHG4De +jWeClaYVvr3WP8H6iQHDxpxl/uvvWb7ssz2AsKk6 + from deserializer PREHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part2') col2 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testpart1 POSTHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part2') col2 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testpart1 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col2 string 0 20 6.8 7 from deserializer +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +col2 string 0 20 6.8 7 SExM4BQUwcbsKcCnjhjArvYEgZ2yHMC03h2A4LIS/4KtDMCv7BbAz6JGgejDCP+AlzSA84UvwYTL +WsDygtQBv5uoWIKExaYBvt/WIoK4tQG/vpwPv72sGg== + from deserializer PREHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part2') col4 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testpart1 POSTHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part2') col4 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testpart1 -# col_name data_type comment - -col4 string from deserializer +# col_name data_type comment + +col4 string from deserializer PREHOOK: query: alter table statsdb1.testpart1 change column col1 col1 string cascade PREHOOK: type: ALTERTABLE_RENAMECOL PREHOOK: Input: statsdb1@testpart1 @@ -2840,27 +2926,28 @@ PREHOOK: Input: statsdb1@testpart1 POSTHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part11') col1 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testpart1 -# col_name data_type comment - -col1 string from deserializer +# col_name data_type comment + +col1 string from deserializer PREHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part11') col2 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testpart1 POSTHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part11') col2 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testpart1 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col2 string 0 10 6.7 7 from deserializer +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +col2 string 0 10 6.7 7 SExM4AoKge76QcHLqCHAtN4dv5LMNcG35k7A+Oe9Af+Nq6wCgoTFpgG+39Yigri1AQ== + from deserializer PREHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part11') col4 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testpart1 POSTHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part11') col4 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testpart1 -# col_name data_type comment - -col4 string from deserializer +# col_name data_type comment + +col4 string from deserializer PREHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part2') PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testpart1 @@ -2907,27 +2994,29 @@ PREHOOK: Input: statsdb1@testpart1 POSTHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part2') col1 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testpart1 -# col_name data_type comment - -col1 string from deserializer +# col_name data_type comment + +col1 string from deserializer PREHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part2') col2 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testpart1 POSTHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part2') col2 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testpart1 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col2 string 0 20 6.8 7 from deserializer +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +col2 string 0 20 6.8 7 SExM4BQUwcbsKcCnjhjArvYEgZ2yHMC03h2A4LIS/4KtDMCv7BbAz6JGgejDCP+AlzSA84UvwYTL +WsDygtQBv5uoWIKExaYBvt/WIoK4tQG/vpwPv72sGg== + from deserializer PREHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part2') col4 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testpart1 POSTHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part2') col4 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testpart1 -# col_name data_type comment - -col4 string from deserializer +# col_name data_type comment + +col4 string from deserializer PREHOOK: query: alter table statsdb1.testpart1 rename to statsdb2.testpart2 PREHOOK: type: ALTERTABLE_RENAME PREHOOK: Input: statsdb1@testpart1 @@ -2986,54 +3075,57 @@ PREHOOK: Input: statsdb2@testpart2 POSTHOOK: query: describe formatted statsdb2.testpart2 partition (part = 'part11') col1 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb2@testpart2 -# col_name data_type comment - -col1 string from deserializer +# col_name data_type comment + +col1 string from deserializer PREHOOK: query: describe formatted statsdb2.testpart2 partition (part = 'part11') col2 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb2@testpart2 POSTHOOK: query: describe formatted statsdb2.testpart2 partition (part = 'part11') col2 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb2@testpart2 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col2 string 0 10 6.7 7 from deserializer +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +col2 string 0 10 6.7 7 SExM4AoKge76QcHLqCHAtN4dv5LMNcG35k7A+Oe9Af+Nq6wCgoTFpgG+39Yigri1AQ== + from deserializer PREHOOK: query: describe formatted statsdb2.testpart2 partition (part = 'part11') col4 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb2@testpart2 POSTHOOK: query: describe formatted statsdb2.testpart2 partition (part = 'part11') col4 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb2@testpart2 -# col_name data_type comment - -col4 string from deserializer +# col_name data_type comment + +col4 string from deserializer PREHOOK: query: describe formatted statsdb2.testpart2 partition (part = 'part2') col1 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb2@testpart2 POSTHOOK: query: describe formatted statsdb2.testpart2 partition (part = 'part2') col1 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb2@testpart2 -# col_name data_type comment - -col1 string from deserializer +# col_name data_type comment + +col1 string from deserializer PREHOOK: query: describe formatted statsdb2.testpart2 partition (part = 'part2') col2 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb2@testpart2 POSTHOOK: query: describe formatted statsdb2.testpart2 partition (part = 'part2') col2 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb2@testpart2 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col2 string 0 20 6.8 7 from deserializer +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +col2 string 0 20 6.8 7 SExM4BQUwcbsKcCnjhjArvYEgZ2yHMC03h2A4LIS/4KtDMCv7BbAz6JGgejDCP+AlzSA84UvwYTL +WsDygtQBv5uoWIKExaYBvt/WIoK4tQG/vpwPv72sGg== + from deserializer PREHOOK: query: describe formatted statsdb2.testpart2 partition (part = 'part2') col4 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb2@testpart2 POSTHOOK: query: describe formatted statsdb2.testpart2 partition (part = 'part2') col4 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb2@testpart2 -# col_name data_type comment - -col4 string from deserializer +# col_name data_type comment + +col4 string from deserializer PREHOOK: query: use statsdb2 PREHOOK: type: SWITCHDATABASE PREHOOK: Input: database:statsdb2 diff --git a/ql/src/test/results/clientpositive/alter_table_update_status.q.out b/ql/src/test/results/clientpositive/alter_table_update_status.q.out index 9cd9a8dbe0..f23ba5753f 100644 --- a/ql/src/test/results/clientpositive/alter_table_update_status.q.out +++ b/ql/src/test/results/clientpositive/alter_table_update_status.q.out @@ -46,10 +46,12 @@ PREHOOK: Input: default@src_stat POSTHOOK: query: describe formatted src_stat key POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@src_stat -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -key string 0 16 1.72 3 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\"}} +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +key string 0 16 1.72 3 SExM4BAQgaTbFYDg/BOBktRrv6WcHMDErSCAotAegLu2HIHRunr/p8hhwLbpSsD22ZgBgdS9KMHV +myn/wfcugOGjfsCYzA0= + from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\"}} PREHOOK: query: ALTER TABLE src_stat UPDATE STATISTICS for column key SET ('numDVs'='1111','avgColLen'='1.111') PREHOOK: type: ALTERTABLE_UPDATETABLESTATS POSTHOOK: query: ALTER TABLE src_stat UPDATE STATISTICS for column key SET ('numDVs'='1111','avgColLen'='1.111') @@ -60,10 +62,12 @@ PREHOOK: Input: default@src_stat POSTHOOK: query: describe formatted src_stat key POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@src_stat -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -key string 0 1111 1.111 3 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\"}} +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +key string 0 1111 1.111 3 SExM4BAQgaTbFYDg/BOBktRrv6WcHMDErSCAotAegLu2HIHRunr/p8hhwLbpSsD22ZgBgdS9KMHV +myn/wfcugOGjfsCYzA0= + from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\"}} PREHOOK: query: ALTER TABLE src_stat UPDATE STATISTICS for column value SET ('numDVs'='121','numNulls'='122','avgColLen'='1.23','maxColLen'='124') PREHOOK: type: ALTERTABLE_UPDATETABLESTATS POSTHOOK: query: ALTER TABLE src_stat UPDATE STATISTICS for column value SET ('numDVs'='121','numNulls'='122','avgColLen'='1.23','maxColLen'='124') @@ -74,10 +78,10 @@ PREHOOK: Input: default@src_stat POSTHOOK: query: describe formatted src_stat value POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@src_stat -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -value string 122 121 1.23 124 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +value string 122 121 1.23 124 from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} PREHOOK: query: ANALYZE TABLE src_stat_int COMPUTE STATISTICS for columns key PREHOOK: type: QUERY PREHOOK: Input: default@src_stat_int @@ -92,10 +96,12 @@ PREHOOK: Input: default@src_stat_int POSTHOOK: query: describe formatted src_stat_int key POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@src_stat_int -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -key double 66.0 406.0 10 15 from deserializer -COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"key\":\"true\"}} +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +key double 66.0 406.0 10 15 SExM4A8PgZLrJoLyx3uBrPspvqnUPoHIoA/+prAWgPaQT4Du5BLDosR5vZLrGIDtbYDVh+QBwKHW +UIOz9UG+ouNE + from deserializer +COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"key\":\"true\"}} PREHOOK: query: ALTER TABLE src_stat_int UPDATE STATISTICS for column key SET ('numDVs'='2222','lowValue'='333.22','highValue'='22.22') PREHOOK: type: ALTERTABLE_UPDATETABLESTATS POSTHOOK: query: ALTER TABLE src_stat_int UPDATE STATISTICS for column key SET ('numDVs'='2222','lowValue'='333.22','highValue'='22.22') @@ -106,10 +112,12 @@ PREHOOK: Input: default@src_stat_int POSTHOOK: query: describe formatted src_stat_int key POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@src_stat_int -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -key double 333.22 22.22 10 2222 from deserializer -COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"key\":\"true\"}} +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +key double 333.22 22.22 10 2222 SExM4A8PgZLrJoLyx3uBrPspvqnUPoHIoA/+prAWgPaQT4Du5BLDosR5vZLrGIDtbYDVh+QBwKHW +UIOz9UG+ouNE + from deserializer +COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"key\":\"true\"}} PREHOOK: query: create database if not exists dummydb PREHOOK: type: CREATEDATABASE PREHOOK: Output: database:dummydb @@ -132,10 +140,12 @@ PREHOOK: Input: default@src_stat POSTHOOK: query: describe formatted default.src_stat key POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@src_stat -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -key string 0 3333 2.222 3 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +key string 0 3333 2.222 3 SExM4BAQgaTbFYDg/BOBktRrv6WcHMDErSCAotAegLu2HIHRunr/p8hhwLbpSsD22ZgBgdS9KMHV +myn/wfcugOGjfsCYzA0= + from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} PREHOOK: query: ALTER TABLE default.src_stat UPDATE STATISTICS for column value SET ('numDVs'='232','numNulls'='233','avgColLen'='2.34','maxColLen'='235') PREHOOK: type: ALTERTABLE_UPDATETABLESTATS POSTHOOK: query: ALTER TABLE default.src_stat UPDATE STATISTICS for column value SET ('numDVs'='232','numNulls'='233','avgColLen'='2.34','maxColLen'='235') @@ -146,10 +156,10 @@ PREHOOK: Input: default@src_stat POSTHOOK: query: describe formatted default.src_stat value POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@src_stat -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -value string 233 232 2.34 235 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +value string 233 232 2.34 235 from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} PREHOOK: query: use default PREHOOK: type: SWITCHDATABASE PREHOOK: Input: database:default @@ -246,140 +256,140 @@ PREHOOK: Input: default@datatype_stats POSTHOOK: query: DESC FORMATTED datatype_stats s POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@datatype_stats -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -s smallint from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +s smallint from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} PREHOOK: query: DESC FORMATTED datatype_stats i PREHOOK: type: DESCTABLE PREHOOK: Input: default@datatype_stats POSTHOOK: query: DESC FORMATTED datatype_stats i POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@datatype_stats -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -i int from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +i int from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} PREHOOK: query: DESC FORMATTED datatype_stats b PREHOOK: type: DESCTABLE PREHOOK: Input: default@datatype_stats POSTHOOK: query: DESC FORMATTED datatype_stats b POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@datatype_stats -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -b bigint from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +b bigint from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} PREHOOK: query: DESC FORMATTED datatype_stats f PREHOOK: type: DESCTABLE PREHOOK: Input: default@datatype_stats POSTHOOK: query: DESC FORMATTED datatype_stats f POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@datatype_stats -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -f float from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +f float from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} PREHOOK: query: DESC FORMATTED datatype_stats d PREHOOK: type: DESCTABLE PREHOOK: Input: default@datatype_stats POSTHOOK: query: DESC FORMATTED datatype_stats d POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@datatype_stats -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -d double from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +d double from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} PREHOOK: query: DESC FORMATTED datatype_stats dem PREHOOK: type: DESCTABLE PREHOOK: Input: default@datatype_stats POSTHOOK: query: DESC FORMATTED datatype_stats dem POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@datatype_stats -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -dem decimal(10,0) from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +dem decimal(10,0) from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} PREHOOK: query: DESC FORMATTED datatype_stats ts PREHOOK: type: DESCTABLE PREHOOK: Input: default@datatype_stats POSTHOOK: query: DESC FORMATTED datatype_stats ts POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@datatype_stats -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -ts timestamp from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +ts timestamp from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} PREHOOK: query: DESC FORMATTED datatype_stats dt PREHOOK: type: DESCTABLE PREHOOK: Input: default@datatype_stats POSTHOOK: query: DESC FORMATTED datatype_stats dt POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@datatype_stats -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -dt date from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +dt date from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} PREHOOK: query: DESC FORMATTED datatype_stats str PREHOOK: type: DESCTABLE PREHOOK: Input: default@datatype_stats POSTHOOK: query: DESC FORMATTED datatype_stats str POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@datatype_stats -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -str string from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +str string from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} PREHOOK: query: DESC FORMATTED datatype_stats v PREHOOK: type: DESCTABLE PREHOOK: Input: default@datatype_stats POSTHOOK: query: DESC FORMATTED datatype_stats v POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@datatype_stats -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -v varchar(12) from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +v varchar(12) from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} PREHOOK: query: DESC FORMATTED datatype_stats c PREHOOK: type: DESCTABLE PREHOOK: Input: default@datatype_stats POSTHOOK: query: DESC FORMATTED datatype_stats c POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@datatype_stats -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -c char(5) from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +c char(5) from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} PREHOOK: query: DESC FORMATTED datatype_stats bl PREHOOK: type: DESCTABLE PREHOOK: Input: default@datatype_stats POSTHOOK: query: DESC FORMATTED datatype_stats bl POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@datatype_stats -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -bl boolean from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +bl boolean from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} PREHOOK: query: DESC FORMATTED datatype_stats bin PREHOOK: type: DESCTABLE PREHOOK: Input: default@datatype_stats POSTHOOK: query: DESC FORMATTED datatype_stats bin POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@datatype_stats -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -bin binary from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +bin binary from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} PREHOOK: query: DESC FORMATTED datatype_stats t PREHOOK: type: DESCTABLE PREHOOK: Input: default@datatype_stats POSTHOOK: query: DESC FORMATTED datatype_stats t POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@datatype_stats -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -t tinyint from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +t tinyint from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} PREHOOK: query: ALTER TABLE default.datatype_stats UPDATE STATISTICS for column t SET ('numDVs'='232','numNulls'='233','highValue'='234','lowValue'='35') PREHOOK: type: ALTERTABLE_UPDATETABLESTATS POSTHOOK: query: ALTER TABLE default.datatype_stats UPDATE STATISTICS for column t SET ('numDVs'='232','numNulls'='233','highValue'='234','lowValue'='35') @@ -390,20 +400,20 @@ PREHOOK: Input: default@datatype_stats POSTHOOK: query: DESC FORMATTED datatype_stats t POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@datatype_stats -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -t tinyint 35 234 233 232 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"t\":\"true\"}} +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +t tinyint 35 234 233 232 from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"t\":\"true\"}} PREHOOK: query: DESC FORMATTED datatype_stats s PREHOOK: type: DESCTABLE PREHOOK: Input: default@datatype_stats POSTHOOK: query: DESC FORMATTED datatype_stats s POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@datatype_stats -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -s smallint from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"t\":\"true\"}} +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +s smallint from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"t\":\"true\"}} PREHOOK: query: ALTER TABLE default.datatype_stats UPDATE STATISTICS for column s SET ('numDVs'='56','numNulls'='56','highValue'='489','lowValue'='25') PREHOOK: type: ALTERTABLE_UPDATETABLESTATS POSTHOOK: query: ALTER TABLE default.datatype_stats UPDATE STATISTICS for column s SET ('numDVs'='56','numNulls'='56','highValue'='489','lowValue'='25') @@ -414,20 +424,20 @@ PREHOOK: Input: default@datatype_stats POSTHOOK: query: DESC FORMATTED datatype_stats s POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@datatype_stats -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -s smallint 25 489 56 56 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"s\":\"true\",\"t\":\"true\"}} +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +s smallint 25 489 56 56 from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"s\":\"true\",\"t\":\"true\"}} PREHOOK: query: DESC FORMATTED datatype_stats i PREHOOK: type: DESCTABLE PREHOOK: Input: default@datatype_stats POSTHOOK: query: DESC FORMATTED datatype_stats i POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@datatype_stats -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -i int from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"s\":\"true\",\"t\":\"true\"}} +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +i int from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"s\":\"true\",\"t\":\"true\"}} PREHOOK: query: ALTER TABLE default.datatype_stats UPDATE STATISTICS for column i SET ('numDVs'='59','numNulls'='1','highValue'='889','lowValue'='5') PREHOOK: type: ALTERTABLE_UPDATETABLESTATS POSTHOOK: query: ALTER TABLE default.datatype_stats UPDATE STATISTICS for column i SET ('numDVs'='59','numNulls'='1','highValue'='889','lowValue'='5') @@ -438,20 +448,20 @@ PREHOOK: Input: default@datatype_stats POSTHOOK: query: DESC FORMATTED datatype_stats i POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@datatype_stats -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -i int 5 889 1 59 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"i\":\"true\",\"s\":\"true\",\"t\":\"true\"}} +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +i int 5 889 1 59 from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"i\":\"true\",\"s\":\"true\",\"t\":\"true\"}} PREHOOK: query: DESC FORMATTED datatype_stats b PREHOOK: type: DESCTABLE PREHOOK: Input: default@datatype_stats POSTHOOK: query: DESC FORMATTED datatype_stats b POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@datatype_stats -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -b bigint from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"i\":\"true\",\"s\":\"true\",\"t\":\"true\"}} +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +b bigint from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"i\":\"true\",\"s\":\"true\",\"t\":\"true\"}} PREHOOK: query: ALTER TABLE default.datatype_stats UPDATE STATISTICS for column b SET ('numDVs'='9','numNulls'='14','highValue'='89','lowValue'='8') PREHOOK: type: ALTERTABLE_UPDATETABLESTATS POSTHOOK: query: ALTER TABLE default.datatype_stats UPDATE STATISTICS for column b SET ('numDVs'='9','numNulls'='14','highValue'='89','lowValue'='8') @@ -462,20 +472,20 @@ PREHOOK: Input: default@datatype_stats POSTHOOK: query: DESC FORMATTED datatype_stats b POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@datatype_stats -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -b bigint 8 89 14 9 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"i\":\"true\",\"s\":\"true\",\"t\":\"true\"}} +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +b bigint 8 89 14 9 from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"i\":\"true\",\"s\":\"true\",\"t\":\"true\"}} PREHOOK: query: DESC FORMATTED datatype_stats f PREHOOK: type: DESCTABLE PREHOOK: Input: default@datatype_stats POSTHOOK: query: DESC FORMATTED datatype_stats f POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@datatype_stats -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -f float from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"i\":\"true\",\"s\":\"true\",\"t\":\"true\"}} +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +f float from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"i\":\"true\",\"s\":\"true\",\"t\":\"true\"}} PREHOOK: query: ALTER TABLE default.datatype_stats UPDATE STATISTICS for column f SET ('numDVs'='563','numNulls'='45','highValue'='2345.656','lowValue'='8.00') PREHOOK: type: ALTERTABLE_UPDATETABLESTATS POSTHOOK: query: ALTER TABLE default.datatype_stats UPDATE STATISTICS for column f SET ('numDVs'='563','numNulls'='45','highValue'='2345.656','lowValue'='8.00') @@ -486,20 +496,20 @@ PREHOOK: Input: default@datatype_stats POSTHOOK: query: DESC FORMATTED datatype_stats f POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@datatype_stats -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -f float 8.0 2345.656 45 563 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"f\":\"true\",\"i\":\"true\",\"s\":\"true\",\"t\":\"true\"}} +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +f float 8.0 2345.656 45 563 from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"f\":\"true\",\"i\":\"true\",\"s\":\"true\",\"t\":\"true\"}} PREHOOK: query: DESC FORMATTED datatype_stats d PREHOOK: type: DESCTABLE PREHOOK: Input: default@datatype_stats POSTHOOK: query: DESC FORMATTED datatype_stats d POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@datatype_stats -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -d double from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"f\":\"true\",\"i\":\"true\",\"s\":\"true\",\"t\":\"true\"}} +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +d double from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"f\":\"true\",\"i\":\"true\",\"s\":\"true\",\"t\":\"true\"}} PREHOOK: query: ALTER TABLE default.datatype_stats UPDATE STATISTICS for column d SET ('numDVs'='5677','numNulls'='12','highValue'='560.3367','lowValue'='0.00455') PREHOOK: type: ALTERTABLE_UPDATETABLESTATS POSTHOOK: query: ALTER TABLE default.datatype_stats UPDATE STATISTICS for column d SET ('numDVs'='5677','numNulls'='12','highValue'='560.3367','lowValue'='0.00455') @@ -510,20 +520,20 @@ PREHOOK: Input: default@datatype_stats POSTHOOK: query: DESC FORMATTED datatype_stats d POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@datatype_stats -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -d double 0.00455 560.3367 12 5677 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"d\":\"true\",\"f\":\"true\",\"i\":\"true\",\"s\":\"true\",\"t\":\"true\"}} +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +d double 0.00455 560.3367 12 5677 from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"d\":\"true\",\"f\":\"true\",\"i\":\"true\",\"s\":\"true\",\"t\":\"true\"}} PREHOOK: query: DESC FORMATTED datatype_stats dem PREHOOK: type: DESCTABLE PREHOOK: Input: default@datatype_stats POSTHOOK: query: DESC FORMATTED datatype_stats dem POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@datatype_stats -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -dem decimal(10,0) from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"d\":\"true\",\"f\":\"true\",\"i\":\"true\",\"s\":\"true\",\"t\":\"true\"}} +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +dem decimal(10,0) from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"d\":\"true\",\"f\":\"true\",\"i\":\"true\",\"s\":\"true\",\"t\":\"true\"}} PREHOOK: query: ALTER TABLE default.datatype_stats UPDATE STATISTICS for column dem SET ('numDVs'='57','numNulls'='912','highValue'='560','lowValue'='0') PREHOOK: type: ALTERTABLE_UPDATETABLESTATS POSTHOOK: query: ALTER TABLE default.datatype_stats UPDATE STATISTICS for column dem SET ('numDVs'='57','numNulls'='912','highValue'='560','lowValue'='0') @@ -534,20 +544,20 @@ PREHOOK: Input: default@datatype_stats POSTHOOK: query: DESC FORMATTED datatype_stats dem POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@datatype_stats -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -dem decimal(10,0) 0 560 912 57 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"d\":\"true\",\"dem\":\"true\",\"f\":\"true\",\"i\":\"true\",\"s\":\"true\",\"t\":\"true\"}} +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +dem decimal(10,0) 0 560 912 57 from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"d\":\"true\",\"dem\":\"true\",\"f\":\"true\",\"i\":\"true\",\"s\":\"true\",\"t\":\"true\"}} PREHOOK: query: DESC FORMATTED datatype_stats ts PREHOOK: type: DESCTABLE PREHOOK: Input: default@datatype_stats POSTHOOK: query: DESC FORMATTED datatype_stats ts POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@datatype_stats -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -ts timestamp from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"d\":\"true\",\"dem\":\"true\",\"f\":\"true\",\"i\":\"true\",\"s\":\"true\",\"t\":\"true\"}} +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +ts timestamp from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"d\":\"true\",\"dem\":\"true\",\"f\":\"true\",\"i\":\"true\",\"s\":\"true\",\"t\":\"true\"}} PREHOOK: query: ALTER TABLE default.datatype_stats UPDATE STATISTICS for column ts SET ('numDVs'='7','numNulls'='12','highValue'='1357030923','lowValue'='1357030924') PREHOOK: type: ALTERTABLE_UPDATETABLESTATS POSTHOOK: query: ALTER TABLE default.datatype_stats UPDATE STATISTICS for column ts SET ('numDVs'='7','numNulls'='12','highValue'='1357030923','lowValue'='1357030924') @@ -558,20 +568,20 @@ PREHOOK: Input: default@datatype_stats POSTHOOK: query: DESC FORMATTED datatype_stats ts POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@datatype_stats -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -ts timestamp 1357030924 1357030923 12 7 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"d\":\"true\",\"dem\":\"true\",\"f\":\"true\",\"i\":\"true\",\"s\":\"true\",\"t\":\"true\",\"ts\":\"true\"}} +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +ts timestamp 1357030924 1357030923 12 7 from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"d\":\"true\",\"dem\":\"true\",\"f\":\"true\",\"i\":\"true\",\"s\":\"true\",\"t\":\"true\",\"ts\":\"true\"}} PREHOOK: query: DESC FORMATTED datatype_stats dt PREHOOK: type: DESCTABLE PREHOOK: Input: default@datatype_stats POSTHOOK: query: DESC FORMATTED datatype_stats dt POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@datatype_stats -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -dt date from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"d\":\"true\",\"dem\":\"true\",\"f\":\"true\",\"i\":\"true\",\"s\":\"true\",\"t\":\"true\",\"ts\":\"true\"}} +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +dt date from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"d\":\"true\",\"dem\":\"true\",\"f\":\"true\",\"i\":\"true\",\"s\":\"true\",\"t\":\"true\",\"ts\":\"true\"}} PREHOOK: query: ALTER TABLE default.datatype_stats UPDATE STATISTICS for column dt SET ('numDVs'='57','numNulls'='912','highValue'='2012-01-01','lowValue'='2001-02-04') PREHOOK: type: ALTERTABLE_UPDATETABLESTATS POSTHOOK: query: ALTER TABLE default.datatype_stats UPDATE STATISTICS for column dt SET ('numDVs'='57','numNulls'='912','highValue'='2012-01-01','lowValue'='2001-02-04') @@ -582,20 +592,20 @@ PREHOOK: Input: default@datatype_stats POSTHOOK: query: DESC FORMATTED datatype_stats dt POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@datatype_stats -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -dt date 2001-02-04 2012-01-01 912 57 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"d\":\"true\",\"dem\":\"true\",\"dt\":\"true\",\"f\":\"true\",\"i\":\"true\",\"s\":\"true\",\"t\":\"true\",\"ts\":\"true\"}} +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +dt date 2001-02-04 2012-01-01 912 57 from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"d\":\"true\",\"dem\":\"true\",\"dt\":\"true\",\"f\":\"true\",\"i\":\"true\",\"s\":\"true\",\"t\":\"true\",\"ts\":\"true\"}} PREHOOK: query: DESC FORMATTED datatype_stats str PREHOOK: type: DESCTABLE PREHOOK: Input: default@datatype_stats POSTHOOK: query: DESC FORMATTED datatype_stats str POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@datatype_stats -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -str string from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"d\":\"true\",\"dem\":\"true\",\"dt\":\"true\",\"f\":\"true\",\"i\":\"true\",\"s\":\"true\",\"t\":\"true\",\"ts\":\"true\"}} +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +str string from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"d\":\"true\",\"dem\":\"true\",\"dt\":\"true\",\"f\":\"true\",\"i\":\"true\",\"s\":\"true\",\"t\":\"true\",\"ts\":\"true\"}} PREHOOK: query: ALTER TABLE default.datatype_stats UPDATE STATISTICS for column str SET ('numDVs'='232','numNulls'='233','avgColLen'='2.34','maxColLen'='235') PREHOOK: type: ALTERTABLE_UPDATETABLESTATS POSTHOOK: query: ALTER TABLE default.datatype_stats UPDATE STATISTICS for column str SET ('numDVs'='232','numNulls'='233','avgColLen'='2.34','maxColLen'='235') @@ -606,20 +616,20 @@ PREHOOK: Input: default@datatype_stats POSTHOOK: query: DESC FORMATTED datatype_stats str POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@datatype_stats -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -str string 233 232 2.34 235 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"d\":\"true\",\"dem\":\"true\",\"dt\":\"true\",\"f\":\"true\",\"i\":\"true\",\"s\":\"true\",\"str\":\"true\",\"t\":\"true\",\"ts\":\"true\"}} +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +str string 233 232 2.34 235 from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"d\":\"true\",\"dem\":\"true\",\"dt\":\"true\",\"f\":\"true\",\"i\":\"true\",\"s\":\"true\",\"str\":\"true\",\"t\":\"true\",\"ts\":\"true\"}} PREHOOK: query: DESC FORMATTED datatype_stats v PREHOOK: type: DESCTABLE PREHOOK: Input: default@datatype_stats POSTHOOK: query: DESC FORMATTED datatype_stats v POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@datatype_stats -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -v varchar(12) from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"d\":\"true\",\"dem\":\"true\",\"dt\":\"true\",\"f\":\"true\",\"i\":\"true\",\"s\":\"true\",\"str\":\"true\",\"t\":\"true\",\"ts\":\"true\"}} +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +v varchar(12) from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"d\":\"true\",\"dem\":\"true\",\"dt\":\"true\",\"f\":\"true\",\"i\":\"true\",\"s\":\"true\",\"str\":\"true\",\"t\":\"true\",\"ts\":\"true\"}} PREHOOK: query: ALTER TABLE default.datatype_stats UPDATE STATISTICS for column v SET ('numDVs'='22','numNulls'='33','avgColLen'='4.40','maxColLen'='25') PREHOOK: type: ALTERTABLE_UPDATETABLESTATS POSTHOOK: query: ALTER TABLE default.datatype_stats UPDATE STATISTICS for column v SET ('numDVs'='22','numNulls'='33','avgColLen'='4.40','maxColLen'='25') @@ -630,20 +640,20 @@ PREHOOK: Input: default@datatype_stats POSTHOOK: query: DESC FORMATTED datatype_stats v POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@datatype_stats -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -v varchar(12) 33 22 4.4 25 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"d\":\"true\",\"dem\":\"true\",\"dt\":\"true\",\"f\":\"true\",\"i\":\"true\",\"s\":\"true\",\"str\":\"true\",\"t\":\"true\",\"ts\":\"true\",\"v\":\"true\"}} +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +v varchar(12) 33 22 4.4 25 from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"d\":\"true\",\"dem\":\"true\",\"dt\":\"true\",\"f\":\"true\",\"i\":\"true\",\"s\":\"true\",\"str\":\"true\",\"t\":\"true\",\"ts\":\"true\",\"v\":\"true\"}} PREHOOK: query: DESC FORMATTED datatype_stats c PREHOOK: type: DESCTABLE PREHOOK: Input: default@datatype_stats POSTHOOK: query: DESC FORMATTED datatype_stats c POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@datatype_stats -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -c char(5) from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"d\":\"true\",\"dem\":\"true\",\"dt\":\"true\",\"f\":\"true\",\"i\":\"true\",\"s\":\"true\",\"str\":\"true\",\"t\":\"true\",\"ts\":\"true\",\"v\":\"true\"}} +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +c char(5) from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"d\":\"true\",\"dem\":\"true\",\"dt\":\"true\",\"f\":\"true\",\"i\":\"true\",\"s\":\"true\",\"str\":\"true\",\"t\":\"true\",\"ts\":\"true\",\"v\":\"true\"}} PREHOOK: query: ALTER TABLE default.datatype_stats UPDATE STATISTICS for column c SET ('numDVs'='2','numNulls'='03','avgColLen'='9.00','maxColLen'='58') PREHOOK: type: ALTERTABLE_UPDATETABLESTATS POSTHOOK: query: ALTER TABLE default.datatype_stats UPDATE STATISTICS for column c SET ('numDVs'='2','numNulls'='03','avgColLen'='9.00','maxColLen'='58') @@ -654,20 +664,20 @@ PREHOOK: Input: default@datatype_stats POSTHOOK: query: DESC FORMATTED datatype_stats c POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@datatype_stats -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -c char(5) 3 2 9.0 58 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"c\":\"true\",\"d\":\"true\",\"dem\":\"true\",\"dt\":\"true\",\"f\":\"true\",\"i\":\"true\",\"s\":\"true\",\"str\":\"true\",\"t\":\"true\",\"ts\":\"true\",\"v\":\"true\"}} +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +c char(5) 3 2 9.0 58 from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"c\":\"true\",\"d\":\"true\",\"dem\":\"true\",\"dt\":\"true\",\"f\":\"true\",\"i\":\"true\",\"s\":\"true\",\"str\":\"true\",\"t\":\"true\",\"ts\":\"true\",\"v\":\"true\"}} PREHOOK: query: DESC FORMATTED datatype_stats bl PREHOOK: type: DESCTABLE PREHOOK: Input: default@datatype_stats POSTHOOK: query: DESC FORMATTED datatype_stats bl POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@datatype_stats -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -bl boolean from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"c\":\"true\",\"d\":\"true\",\"dem\":\"true\",\"dt\":\"true\",\"f\":\"true\",\"i\":\"true\",\"s\":\"true\",\"str\":\"true\",\"t\":\"true\",\"ts\":\"true\",\"v\":\"true\"}} +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +bl boolean from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"c\":\"true\",\"d\":\"true\",\"dem\":\"true\",\"dt\":\"true\",\"f\":\"true\",\"i\":\"true\",\"s\":\"true\",\"str\":\"true\",\"t\":\"true\",\"ts\":\"true\",\"v\":\"true\"}} PREHOOK: query: ALTER TABLE default.datatype_stats UPDATE STATISTICS for column bl SET ('numNulls'='1','numTrues'='9','numFalses'='8') PREHOOK: type: ALTERTABLE_UPDATETABLESTATS POSTHOOK: query: ALTER TABLE default.datatype_stats UPDATE STATISTICS for column bl SET ('numNulls'='1','numTrues'='9','numFalses'='8') @@ -678,20 +688,20 @@ PREHOOK: Input: default@datatype_stats POSTHOOK: query: DESC FORMATTED datatype_stats bl POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@datatype_stats -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -bl boolean 1 9 8 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"bl\":\"true\",\"c\":\"true\",\"d\":\"true\",\"dem\":\"true\",\"dt\":\"true\",\"f\":\"true\",\"i\":\"true\",\"s\":\"true\",\"str\":\"true\",\"t\":\"true\",\"ts\":\"true\",\"v\":\"true\"}} +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +bl boolean 1 9 8 from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"bl\":\"true\",\"c\":\"true\",\"d\":\"true\",\"dem\":\"true\",\"dt\":\"true\",\"f\":\"true\",\"i\":\"true\",\"s\":\"true\",\"str\":\"true\",\"t\":\"true\",\"ts\":\"true\",\"v\":\"true\"}} PREHOOK: query: DESC FORMATTED datatype_stats bin PREHOOK: type: DESCTABLE PREHOOK: Input: default@datatype_stats POSTHOOK: query: DESC FORMATTED datatype_stats bin POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@datatype_stats -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -bin binary from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"bl\":\"true\",\"c\":\"true\",\"d\":\"true\",\"dem\":\"true\",\"dt\":\"true\",\"f\":\"true\",\"i\":\"true\",\"s\":\"true\",\"str\":\"true\",\"t\":\"true\",\"ts\":\"true\",\"v\":\"true\"}} +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +bin binary from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"bl\":\"true\",\"c\":\"true\",\"d\":\"true\",\"dem\":\"true\",\"dt\":\"true\",\"f\":\"true\",\"i\":\"true\",\"s\":\"true\",\"str\":\"true\",\"t\":\"true\",\"ts\":\"true\",\"v\":\"true\"}} PREHOOK: query: ALTER TABLE default.datatype_stats UPDATE STATISTICS for column bin SET ('numNulls'='8','avgColLen'='2.0','maxColLen'='8') PREHOOK: type: ALTERTABLE_UPDATETABLESTATS POSTHOOK: query: ALTER TABLE default.datatype_stats UPDATE STATISTICS for column bin SET ('numNulls'='8','avgColLen'='2.0','maxColLen'='8') @@ -702,7 +712,7 @@ PREHOOK: Input: default@datatype_stats POSTHOOK: query: DESC FORMATTED datatype_stats bin POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@datatype_stats -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -bin binary 8 2.0 8 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"bin\":\"true\",\"bl\":\"true\",\"c\":\"true\",\"d\":\"true\",\"dem\":\"true\",\"dt\":\"true\",\"f\":\"true\",\"i\":\"true\",\"s\":\"true\",\"str\":\"true\",\"t\":\"true\",\"ts\":\"true\",\"v\":\"true\"}} +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +bin binary 8 2.0 8 from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"bin\":\"true\",\"bl\":\"true\",\"c\":\"true\",\"d\":\"true\",\"dem\":\"true\",\"dt\":\"true\",\"f\":\"true\",\"i\":\"true\",\"s\":\"true\",\"str\":\"true\",\"t\":\"true\",\"ts\":\"true\",\"v\":\"true\"}} diff --git a/ql/src/test/results/clientpositive/alter_table_update_status_disable_bitvector.q.out b/ql/src/test/results/clientpositive/alter_table_update_status_disable_bitvector.q.out new file mode 100644 index 0000000000..1dcc1fcf4e --- /dev/null +++ b/ql/src/test/results/clientpositive/alter_table_update_status_disable_bitvector.q.out @@ -0,0 +1,708 @@ +PREHOOK: query: create table src_stat as select * from src1 +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@src1 +PREHOOK: Output: database:default +PREHOOK: Output: default@src_stat +POSTHOOK: query: create table src_stat as select * from src1 +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@src1 +POSTHOOK: Output: database:default +POSTHOOK: Output: default@src_stat +POSTHOOK: Lineage: src_stat.key SIMPLE [(src1)src1.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: src_stat.value SIMPLE [(src1)src1.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: create table src_stat_int ( + key double, + value string +) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@src_stat_int +POSTHOOK: query: create table src_stat_int ( + key double, + value string +) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@src_stat_int +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/kv3.txt' INTO TABLE src_stat_int +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@src_stat_int +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/kv3.txt' INTO TABLE src_stat_int +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@src_stat_int +PREHOOK: query: ANALYZE TABLE src_stat COMPUTE STATISTICS for columns key +PREHOOK: type: QUERY +PREHOOK: Input: default@src_stat +#### A masked pattern was here #### +POSTHOOK: query: ANALYZE TABLE src_stat COMPUTE STATISTICS for columns key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src_stat +#### A masked pattern was here #### +PREHOOK: query: describe formatted src_stat key +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@src_stat +POSTHOOK: query: describe formatted src_stat key +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@src_stat +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +key string 0 16 1.72 3 from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\"}} +PREHOOK: query: ALTER TABLE src_stat UPDATE STATISTICS for column key SET ('numDVs'='1111','avgColLen'='1.111') +PREHOOK: type: ALTERTABLE_UPDATETABLESTATS +POSTHOOK: query: ALTER TABLE src_stat UPDATE STATISTICS for column key SET ('numDVs'='1111','avgColLen'='1.111') +POSTHOOK: type: ALTERTABLE_UPDATETABLESTATS +PREHOOK: query: describe formatted src_stat key +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@src_stat +POSTHOOK: query: describe formatted src_stat key +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@src_stat +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +key string 0 1111 1.111 3 from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\"}} +PREHOOK: query: ALTER TABLE src_stat UPDATE STATISTICS for column value SET ('numDVs'='121','numNulls'='122','avgColLen'='1.23','maxColLen'='124') +PREHOOK: type: ALTERTABLE_UPDATETABLESTATS +POSTHOOK: query: ALTER TABLE src_stat UPDATE STATISTICS for column value SET ('numDVs'='121','numNulls'='122','avgColLen'='1.23','maxColLen'='124') +POSTHOOK: type: ALTERTABLE_UPDATETABLESTATS +PREHOOK: query: describe formatted src_stat value +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@src_stat +POSTHOOK: query: describe formatted src_stat value +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@src_stat +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +value string 122 121 1.23 124 from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} +PREHOOK: query: ANALYZE TABLE src_stat_int COMPUTE STATISTICS for columns key +PREHOOK: type: QUERY +PREHOOK: Input: default@src_stat_int +#### A masked pattern was here #### +POSTHOOK: query: ANALYZE TABLE src_stat_int COMPUTE STATISTICS for columns key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src_stat_int +#### A masked pattern was here #### +PREHOOK: query: describe formatted src_stat_int key +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@src_stat_int +POSTHOOK: query: describe formatted src_stat_int key +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@src_stat_int +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +key double 66.0 406.0 10 15 from deserializer +COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"key\":\"true\"}} +PREHOOK: query: ALTER TABLE src_stat_int UPDATE STATISTICS for column key SET ('numDVs'='2222','lowValue'='333.22','highValue'='22.22') +PREHOOK: type: ALTERTABLE_UPDATETABLESTATS +POSTHOOK: query: ALTER TABLE src_stat_int UPDATE STATISTICS for column key SET ('numDVs'='2222','lowValue'='333.22','highValue'='22.22') +POSTHOOK: type: ALTERTABLE_UPDATETABLESTATS +PREHOOK: query: describe formatted src_stat_int key +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@src_stat_int +POSTHOOK: query: describe formatted src_stat_int key +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@src_stat_int +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +key double 333.22 22.22 10 2222 from deserializer +COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"key\":\"true\"}} +PREHOOK: query: create database if not exists dummydb +PREHOOK: type: CREATEDATABASE +PREHOOK: Output: database:dummydb +POSTHOOK: query: create database if not exists dummydb +POSTHOOK: type: CREATEDATABASE +POSTHOOK: Output: database:dummydb +PREHOOK: query: use dummydb +PREHOOK: type: SWITCHDATABASE +PREHOOK: Input: database:dummydb +POSTHOOK: query: use dummydb +POSTHOOK: type: SWITCHDATABASE +POSTHOOK: Input: database:dummydb +PREHOOK: query: ALTER TABLE default.src_stat UPDATE STATISTICS for column key SET ('numDVs'='3333','avgColLen'='2.222') +PREHOOK: type: ALTERTABLE_UPDATETABLESTATS +POSTHOOK: query: ALTER TABLE default.src_stat UPDATE STATISTICS for column key SET ('numDVs'='3333','avgColLen'='2.222') +POSTHOOK: type: ALTERTABLE_UPDATETABLESTATS +PREHOOK: query: describe formatted default.src_stat key +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@src_stat +POSTHOOK: query: describe formatted default.src_stat key +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@src_stat +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +key string 0 3333 2.222 3 from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} +PREHOOK: query: ALTER TABLE default.src_stat UPDATE STATISTICS for column value SET ('numDVs'='232','numNulls'='233','avgColLen'='2.34','maxColLen'='235') +PREHOOK: type: ALTERTABLE_UPDATETABLESTATS +POSTHOOK: query: ALTER TABLE default.src_stat UPDATE STATISTICS for column value SET ('numDVs'='232','numNulls'='233','avgColLen'='2.34','maxColLen'='235') +POSTHOOK: type: ALTERTABLE_UPDATETABLESTATS +PREHOOK: query: describe formatted default.src_stat value +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@src_stat +POSTHOOK: query: describe formatted default.src_stat value +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@src_stat +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +value string 233 232 2.34 235 from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} +PREHOOK: query: use default +PREHOOK: type: SWITCHDATABASE +PREHOOK: Input: database:default +POSTHOOK: query: use default +POSTHOOK: type: SWITCHDATABASE +POSTHOOK: Input: database:default +PREHOOK: query: drop database dummydb +PREHOOK: type: DROPDATABASE +PREHOOK: Input: database:dummydb +PREHOOK: Output: database:dummydb +POSTHOOK: query: drop database dummydb +POSTHOOK: type: DROPDATABASE +POSTHOOK: Input: database:dummydb +POSTHOOK: Output: database:dummydb +PREHOOK: query: create table datatype_stats( + t TINYINT, + s SMALLINT, + i INT, + b BIGINT, + f FLOAT, + d DOUBLE, + dem DECIMAL, --default decimal (10,0) + ts TIMESTAMP, + dt DATE, + str STRING, + v VARCHAR(12), + c CHAR(5), + bl BOOLEAN, + bin BINARY) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@datatype_stats +POSTHOOK: query: create table datatype_stats( + t TINYINT, + s SMALLINT, + i INT, + b BIGINT, + f FLOAT, + d DOUBLE, + dem DECIMAL, --default decimal (10,0) + ts TIMESTAMP, + dt DATE, + str STRING, + v VARCHAR(12), + c CHAR(5), + bl BOOLEAN, + bin BINARY) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@datatype_stats +PREHOOK: query: INSERT INTO datatype_stats values(2, 3, 45, 456, 45454.4, 454.6565, 2355, '2012-01-01 01:02:03', '2012-01-01', 'update_statistics', 'stats', 'hive', 'true', 'bin') +PREHOOK: type: QUERY +PREHOOK: Output: default@datatype_stats +POSTHOOK: query: INSERT INTO datatype_stats values(2, 3, 45, 456, 45454.4, 454.6565, 2355, '2012-01-01 01:02:03', '2012-01-01', 'update_statistics', 'stats', 'hive', 'true', 'bin') +POSTHOOK: type: QUERY +POSTHOOK: Output: default@datatype_stats +POSTHOOK: Lineage: datatype_stats.b EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +POSTHOOK: Lineage: datatype_stats.bin EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col14, type:string, comment:), ] +POSTHOOK: Lineage: datatype_stats.bl EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col13, type:string, comment:), ] +POSTHOOK: Lineage: datatype_stats.c EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col12, type:string, comment:), ] +POSTHOOK: Lineage: datatype_stats.d EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col6, type:string, comment:), ] +POSTHOOK: Lineage: datatype_stats.dem EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col7, type:string, comment:), ] +POSTHOOK: Lineage: datatype_stats.dt EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col9, type:string, comment:), ] +POSTHOOK: Lineage: datatype_stats.f EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col5, type:string, comment:), ] +POSTHOOK: Lineage: datatype_stats.i EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: datatype_stats.s EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: datatype_stats.str SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col10, type:string, comment:), ] +POSTHOOK: Lineage: datatype_stats.t EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: datatype_stats.ts EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col8, type:string, comment:), ] +POSTHOOK: Lineage: datatype_stats.v EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col11, type:string, comment:), ] +PREHOOK: query: INSERT INTO datatype_stats values(NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL) +PREHOOK: type: QUERY +PREHOOK: Output: default@datatype_stats +POSTHOOK: query: INSERT INTO datatype_stats values(NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@datatype_stats +POSTHOOK: Lineage: datatype_stats.b EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +POSTHOOK: Lineage: datatype_stats.bin EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col14, type:string, comment:), ] +POSTHOOK: Lineage: datatype_stats.bl EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col13, type:string, comment:), ] +POSTHOOK: Lineage: datatype_stats.c EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col12, type:string, comment:), ] +POSTHOOK: Lineage: datatype_stats.d EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col6, type:string, comment:), ] +POSTHOOK: Lineage: datatype_stats.dem EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col7, type:string, comment:), ] +POSTHOOK: Lineage: datatype_stats.dt EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col9, type:string, comment:), ] +POSTHOOK: Lineage: datatype_stats.f EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col5, type:string, comment:), ] +POSTHOOK: Lineage: datatype_stats.i EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: datatype_stats.s EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: datatype_stats.str SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col10, type:string, comment:), ] +POSTHOOK: Lineage: datatype_stats.t EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: datatype_stats.ts EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col8, type:string, comment:), ] +POSTHOOK: Lineage: datatype_stats.v EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col11, type:string, comment:), ] +PREHOOK: query: DESC FORMATTED datatype_stats s +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@datatype_stats +POSTHOOK: query: DESC FORMATTED datatype_stats s +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@datatype_stats +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +s smallint from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} +PREHOOK: query: DESC FORMATTED datatype_stats i +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@datatype_stats +POSTHOOK: query: DESC FORMATTED datatype_stats i +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@datatype_stats +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +i int from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} +PREHOOK: query: DESC FORMATTED datatype_stats b +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@datatype_stats +POSTHOOK: query: DESC FORMATTED datatype_stats b +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@datatype_stats +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +b bigint from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} +PREHOOK: query: DESC FORMATTED datatype_stats f +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@datatype_stats +POSTHOOK: query: DESC FORMATTED datatype_stats f +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@datatype_stats +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +f float from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} +PREHOOK: query: DESC FORMATTED datatype_stats d +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@datatype_stats +POSTHOOK: query: DESC FORMATTED datatype_stats d +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@datatype_stats +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +d double from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} +PREHOOK: query: DESC FORMATTED datatype_stats dem +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@datatype_stats +POSTHOOK: query: DESC FORMATTED datatype_stats dem +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@datatype_stats +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +dem decimal(10,0) from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} +PREHOOK: query: DESC FORMATTED datatype_stats ts +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@datatype_stats +POSTHOOK: query: DESC FORMATTED datatype_stats ts +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@datatype_stats +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +ts timestamp from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} +PREHOOK: query: DESC FORMATTED datatype_stats dt +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@datatype_stats +POSTHOOK: query: DESC FORMATTED datatype_stats dt +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@datatype_stats +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +dt date from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} +PREHOOK: query: DESC FORMATTED datatype_stats str +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@datatype_stats +POSTHOOK: query: DESC FORMATTED datatype_stats str +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@datatype_stats +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +str string from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} +PREHOOK: query: DESC FORMATTED datatype_stats v +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@datatype_stats +POSTHOOK: query: DESC FORMATTED datatype_stats v +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@datatype_stats +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +v varchar(12) from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} +PREHOOK: query: DESC FORMATTED datatype_stats c +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@datatype_stats +POSTHOOK: query: DESC FORMATTED datatype_stats c +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@datatype_stats +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +c char(5) from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} +PREHOOK: query: DESC FORMATTED datatype_stats bl +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@datatype_stats +POSTHOOK: query: DESC FORMATTED datatype_stats bl +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@datatype_stats +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +bl boolean from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} +PREHOOK: query: DESC FORMATTED datatype_stats bin +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@datatype_stats +POSTHOOK: query: DESC FORMATTED datatype_stats bin +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@datatype_stats +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +bin binary from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} +PREHOOK: query: DESC FORMATTED datatype_stats t +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@datatype_stats +POSTHOOK: query: DESC FORMATTED datatype_stats t +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@datatype_stats +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +t tinyint from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} +PREHOOK: query: ALTER TABLE default.datatype_stats UPDATE STATISTICS for column t SET ('numDVs'='232','numNulls'='233','highValue'='234','lowValue'='35') +PREHOOK: type: ALTERTABLE_UPDATETABLESTATS +POSTHOOK: query: ALTER TABLE default.datatype_stats UPDATE STATISTICS for column t SET ('numDVs'='232','numNulls'='233','highValue'='234','lowValue'='35') +POSTHOOK: type: ALTERTABLE_UPDATETABLESTATS +PREHOOK: query: DESC FORMATTED datatype_stats t +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@datatype_stats +POSTHOOK: query: DESC FORMATTED datatype_stats t +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@datatype_stats +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +t tinyint 35 234 233 232 from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"t\":\"true\"}} +PREHOOK: query: DESC FORMATTED datatype_stats s +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@datatype_stats +POSTHOOK: query: DESC FORMATTED datatype_stats s +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@datatype_stats +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +s smallint from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"t\":\"true\"}} +PREHOOK: query: ALTER TABLE default.datatype_stats UPDATE STATISTICS for column s SET ('numDVs'='56','numNulls'='56','highValue'='489','lowValue'='25') +PREHOOK: type: ALTERTABLE_UPDATETABLESTATS +POSTHOOK: query: ALTER TABLE default.datatype_stats UPDATE STATISTICS for column s SET ('numDVs'='56','numNulls'='56','highValue'='489','lowValue'='25') +POSTHOOK: type: ALTERTABLE_UPDATETABLESTATS +PREHOOK: query: DESC FORMATTED datatype_stats s +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@datatype_stats +POSTHOOK: query: DESC FORMATTED datatype_stats s +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@datatype_stats +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +s smallint 25 489 56 56 from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"s\":\"true\",\"t\":\"true\"}} +PREHOOK: query: DESC FORMATTED datatype_stats i +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@datatype_stats +POSTHOOK: query: DESC FORMATTED datatype_stats i +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@datatype_stats +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +i int from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"s\":\"true\",\"t\":\"true\"}} +PREHOOK: query: ALTER TABLE default.datatype_stats UPDATE STATISTICS for column i SET ('numDVs'='59','numNulls'='1','highValue'='889','lowValue'='5') +PREHOOK: type: ALTERTABLE_UPDATETABLESTATS +POSTHOOK: query: ALTER TABLE default.datatype_stats UPDATE STATISTICS for column i SET ('numDVs'='59','numNulls'='1','highValue'='889','lowValue'='5') +POSTHOOK: type: ALTERTABLE_UPDATETABLESTATS +PREHOOK: query: DESC FORMATTED datatype_stats i +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@datatype_stats +POSTHOOK: query: DESC FORMATTED datatype_stats i +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@datatype_stats +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +i int 5 889 1 59 from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"i\":\"true\",\"s\":\"true\",\"t\":\"true\"}} +PREHOOK: query: DESC FORMATTED datatype_stats b +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@datatype_stats +POSTHOOK: query: DESC FORMATTED datatype_stats b +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@datatype_stats +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +b bigint from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"i\":\"true\",\"s\":\"true\",\"t\":\"true\"}} +PREHOOK: query: ALTER TABLE default.datatype_stats UPDATE STATISTICS for column b SET ('numDVs'='9','numNulls'='14','highValue'='89','lowValue'='8') +PREHOOK: type: ALTERTABLE_UPDATETABLESTATS +POSTHOOK: query: ALTER TABLE default.datatype_stats UPDATE STATISTICS for column b SET ('numDVs'='9','numNulls'='14','highValue'='89','lowValue'='8') +POSTHOOK: type: ALTERTABLE_UPDATETABLESTATS +PREHOOK: query: DESC FORMATTED datatype_stats b +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@datatype_stats +POSTHOOK: query: DESC FORMATTED datatype_stats b +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@datatype_stats +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +b bigint 8 89 14 9 from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"i\":\"true\",\"s\":\"true\",\"t\":\"true\"}} +PREHOOK: query: DESC FORMATTED datatype_stats f +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@datatype_stats +POSTHOOK: query: DESC FORMATTED datatype_stats f +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@datatype_stats +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +f float from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"i\":\"true\",\"s\":\"true\",\"t\":\"true\"}} +PREHOOK: query: ALTER TABLE default.datatype_stats UPDATE STATISTICS for column f SET ('numDVs'='563','numNulls'='45','highValue'='2345.656','lowValue'='8.00') +PREHOOK: type: ALTERTABLE_UPDATETABLESTATS +POSTHOOK: query: ALTER TABLE default.datatype_stats UPDATE STATISTICS for column f SET ('numDVs'='563','numNulls'='45','highValue'='2345.656','lowValue'='8.00') +POSTHOOK: type: ALTERTABLE_UPDATETABLESTATS +PREHOOK: query: DESC FORMATTED datatype_stats f +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@datatype_stats +POSTHOOK: query: DESC FORMATTED datatype_stats f +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@datatype_stats +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +f float 8.0 2345.656 45 563 from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"f\":\"true\",\"i\":\"true\",\"s\":\"true\",\"t\":\"true\"}} +PREHOOK: query: DESC FORMATTED datatype_stats d +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@datatype_stats +POSTHOOK: query: DESC FORMATTED datatype_stats d +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@datatype_stats +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +d double from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"f\":\"true\",\"i\":\"true\",\"s\":\"true\",\"t\":\"true\"}} +PREHOOK: query: ALTER TABLE default.datatype_stats UPDATE STATISTICS for column d SET ('numDVs'='5677','numNulls'='12','highValue'='560.3367','lowValue'='0.00455') +PREHOOK: type: ALTERTABLE_UPDATETABLESTATS +POSTHOOK: query: ALTER TABLE default.datatype_stats UPDATE STATISTICS for column d SET ('numDVs'='5677','numNulls'='12','highValue'='560.3367','lowValue'='0.00455') +POSTHOOK: type: ALTERTABLE_UPDATETABLESTATS +PREHOOK: query: DESC FORMATTED datatype_stats d +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@datatype_stats +POSTHOOK: query: DESC FORMATTED datatype_stats d +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@datatype_stats +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +d double 0.00455 560.3367 12 5677 from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"d\":\"true\",\"f\":\"true\",\"i\":\"true\",\"s\":\"true\",\"t\":\"true\"}} +PREHOOK: query: DESC FORMATTED datatype_stats dem +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@datatype_stats +POSTHOOK: query: DESC FORMATTED datatype_stats dem +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@datatype_stats +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +dem decimal(10,0) from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"d\":\"true\",\"f\":\"true\",\"i\":\"true\",\"s\":\"true\",\"t\":\"true\"}} +PREHOOK: query: ALTER TABLE default.datatype_stats UPDATE STATISTICS for column dem SET ('numDVs'='57','numNulls'='912','highValue'='560','lowValue'='0') +PREHOOK: type: ALTERTABLE_UPDATETABLESTATS +POSTHOOK: query: ALTER TABLE default.datatype_stats UPDATE STATISTICS for column dem SET ('numDVs'='57','numNulls'='912','highValue'='560','lowValue'='0') +POSTHOOK: type: ALTERTABLE_UPDATETABLESTATS +PREHOOK: query: DESC FORMATTED datatype_stats dem +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@datatype_stats +POSTHOOK: query: DESC FORMATTED datatype_stats dem +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@datatype_stats +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +dem decimal(10,0) 0 560 912 57 from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"d\":\"true\",\"dem\":\"true\",\"f\":\"true\",\"i\":\"true\",\"s\":\"true\",\"t\":\"true\"}} +PREHOOK: query: DESC FORMATTED datatype_stats ts +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@datatype_stats +POSTHOOK: query: DESC FORMATTED datatype_stats ts +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@datatype_stats +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +ts timestamp from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"d\":\"true\",\"dem\":\"true\",\"f\":\"true\",\"i\":\"true\",\"s\":\"true\",\"t\":\"true\"}} +PREHOOK: query: ALTER TABLE default.datatype_stats UPDATE STATISTICS for column ts SET ('numDVs'='7','numNulls'='12','highValue'='1357030923','lowValue'='1357030924') +PREHOOK: type: ALTERTABLE_UPDATETABLESTATS +POSTHOOK: query: ALTER TABLE default.datatype_stats UPDATE STATISTICS for column ts SET ('numDVs'='7','numNulls'='12','highValue'='1357030923','lowValue'='1357030924') +POSTHOOK: type: ALTERTABLE_UPDATETABLESTATS +PREHOOK: query: DESC FORMATTED datatype_stats ts +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@datatype_stats +POSTHOOK: query: DESC FORMATTED datatype_stats ts +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@datatype_stats +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +ts timestamp 1357030924 1357030923 12 7 from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"d\":\"true\",\"dem\":\"true\",\"f\":\"true\",\"i\":\"true\",\"s\":\"true\",\"t\":\"true\",\"ts\":\"true\"}} +PREHOOK: query: DESC FORMATTED datatype_stats dt +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@datatype_stats +POSTHOOK: query: DESC FORMATTED datatype_stats dt +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@datatype_stats +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +dt date from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"d\":\"true\",\"dem\":\"true\",\"f\":\"true\",\"i\":\"true\",\"s\":\"true\",\"t\":\"true\",\"ts\":\"true\"}} +PREHOOK: query: ALTER TABLE default.datatype_stats UPDATE STATISTICS for column dt SET ('numDVs'='57','numNulls'='912','highValue'='2012-01-01','lowValue'='2001-02-04') +PREHOOK: type: ALTERTABLE_UPDATETABLESTATS +POSTHOOK: query: ALTER TABLE default.datatype_stats UPDATE STATISTICS for column dt SET ('numDVs'='57','numNulls'='912','highValue'='2012-01-01','lowValue'='2001-02-04') +POSTHOOK: type: ALTERTABLE_UPDATETABLESTATS +PREHOOK: query: DESC FORMATTED datatype_stats dt +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@datatype_stats +POSTHOOK: query: DESC FORMATTED datatype_stats dt +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@datatype_stats +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +dt date 2001-02-04 2012-01-01 912 57 from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"d\":\"true\",\"dem\":\"true\",\"dt\":\"true\",\"f\":\"true\",\"i\":\"true\",\"s\":\"true\",\"t\":\"true\",\"ts\":\"true\"}} +PREHOOK: query: DESC FORMATTED datatype_stats str +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@datatype_stats +POSTHOOK: query: DESC FORMATTED datatype_stats str +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@datatype_stats +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +str string from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"d\":\"true\",\"dem\":\"true\",\"dt\":\"true\",\"f\":\"true\",\"i\":\"true\",\"s\":\"true\",\"t\":\"true\",\"ts\":\"true\"}} +PREHOOK: query: ALTER TABLE default.datatype_stats UPDATE STATISTICS for column str SET ('numDVs'='232','numNulls'='233','avgColLen'='2.34','maxColLen'='235') +PREHOOK: type: ALTERTABLE_UPDATETABLESTATS +POSTHOOK: query: ALTER TABLE default.datatype_stats UPDATE STATISTICS for column str SET ('numDVs'='232','numNulls'='233','avgColLen'='2.34','maxColLen'='235') +POSTHOOK: type: ALTERTABLE_UPDATETABLESTATS +PREHOOK: query: DESC FORMATTED datatype_stats str +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@datatype_stats +POSTHOOK: query: DESC FORMATTED datatype_stats str +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@datatype_stats +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +str string 233 232 2.34 235 from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"d\":\"true\",\"dem\":\"true\",\"dt\":\"true\",\"f\":\"true\",\"i\":\"true\",\"s\":\"true\",\"str\":\"true\",\"t\":\"true\",\"ts\":\"true\"}} +PREHOOK: query: DESC FORMATTED datatype_stats v +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@datatype_stats +POSTHOOK: query: DESC FORMATTED datatype_stats v +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@datatype_stats +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +v varchar(12) from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"d\":\"true\",\"dem\":\"true\",\"dt\":\"true\",\"f\":\"true\",\"i\":\"true\",\"s\":\"true\",\"str\":\"true\",\"t\":\"true\",\"ts\":\"true\"}} +PREHOOK: query: ALTER TABLE default.datatype_stats UPDATE STATISTICS for column v SET ('numDVs'='22','numNulls'='33','avgColLen'='4.40','maxColLen'='25') +PREHOOK: type: ALTERTABLE_UPDATETABLESTATS +POSTHOOK: query: ALTER TABLE default.datatype_stats UPDATE STATISTICS for column v SET ('numDVs'='22','numNulls'='33','avgColLen'='4.40','maxColLen'='25') +POSTHOOK: type: ALTERTABLE_UPDATETABLESTATS +PREHOOK: query: DESC FORMATTED datatype_stats v +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@datatype_stats +POSTHOOK: query: DESC FORMATTED datatype_stats v +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@datatype_stats +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +v varchar(12) 33 22 4.4 25 from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"d\":\"true\",\"dem\":\"true\",\"dt\":\"true\",\"f\":\"true\",\"i\":\"true\",\"s\":\"true\",\"str\":\"true\",\"t\":\"true\",\"ts\":\"true\",\"v\":\"true\"}} +PREHOOK: query: DESC FORMATTED datatype_stats c +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@datatype_stats +POSTHOOK: query: DESC FORMATTED datatype_stats c +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@datatype_stats +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +c char(5) from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"d\":\"true\",\"dem\":\"true\",\"dt\":\"true\",\"f\":\"true\",\"i\":\"true\",\"s\":\"true\",\"str\":\"true\",\"t\":\"true\",\"ts\":\"true\",\"v\":\"true\"}} +PREHOOK: query: ALTER TABLE default.datatype_stats UPDATE STATISTICS for column c SET ('numDVs'='2','numNulls'='03','avgColLen'='9.00','maxColLen'='58') +PREHOOK: type: ALTERTABLE_UPDATETABLESTATS +POSTHOOK: query: ALTER TABLE default.datatype_stats UPDATE STATISTICS for column c SET ('numDVs'='2','numNulls'='03','avgColLen'='9.00','maxColLen'='58') +POSTHOOK: type: ALTERTABLE_UPDATETABLESTATS +PREHOOK: query: DESC FORMATTED datatype_stats c +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@datatype_stats +POSTHOOK: query: DESC FORMATTED datatype_stats c +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@datatype_stats +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +c char(5) 3 2 9.0 58 from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"c\":\"true\",\"d\":\"true\",\"dem\":\"true\",\"dt\":\"true\",\"f\":\"true\",\"i\":\"true\",\"s\":\"true\",\"str\":\"true\",\"t\":\"true\",\"ts\":\"true\",\"v\":\"true\"}} +PREHOOK: query: DESC FORMATTED datatype_stats bl +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@datatype_stats +POSTHOOK: query: DESC FORMATTED datatype_stats bl +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@datatype_stats +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +bl boolean from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"c\":\"true\",\"d\":\"true\",\"dem\":\"true\",\"dt\":\"true\",\"f\":\"true\",\"i\":\"true\",\"s\":\"true\",\"str\":\"true\",\"t\":\"true\",\"ts\":\"true\",\"v\":\"true\"}} +PREHOOK: query: ALTER TABLE default.datatype_stats UPDATE STATISTICS for column bl SET ('numNulls'='1','numTrues'='9','numFalses'='8') +PREHOOK: type: ALTERTABLE_UPDATETABLESTATS +POSTHOOK: query: ALTER TABLE default.datatype_stats UPDATE STATISTICS for column bl SET ('numNulls'='1','numTrues'='9','numFalses'='8') +POSTHOOK: type: ALTERTABLE_UPDATETABLESTATS +PREHOOK: query: DESC FORMATTED datatype_stats bl +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@datatype_stats +POSTHOOK: query: DESC FORMATTED datatype_stats bl +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@datatype_stats +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +bl boolean 1 9 8 from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"bl\":\"true\",\"c\":\"true\",\"d\":\"true\",\"dem\":\"true\",\"dt\":\"true\",\"f\":\"true\",\"i\":\"true\",\"s\":\"true\",\"str\":\"true\",\"t\":\"true\",\"ts\":\"true\",\"v\":\"true\"}} +PREHOOK: query: DESC FORMATTED datatype_stats bin +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@datatype_stats +POSTHOOK: query: DESC FORMATTED datatype_stats bin +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@datatype_stats +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +bin binary from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"bl\":\"true\",\"c\":\"true\",\"d\":\"true\",\"dem\":\"true\",\"dt\":\"true\",\"f\":\"true\",\"i\":\"true\",\"s\":\"true\",\"str\":\"true\",\"t\":\"true\",\"ts\":\"true\",\"v\":\"true\"}} +PREHOOK: query: ALTER TABLE default.datatype_stats UPDATE STATISTICS for column bin SET ('numNulls'='8','avgColLen'='2.0','maxColLen'='8') +PREHOOK: type: ALTERTABLE_UPDATETABLESTATS +POSTHOOK: query: ALTER TABLE default.datatype_stats UPDATE STATISTICS for column bin SET ('numNulls'='8','avgColLen'='2.0','maxColLen'='8') +POSTHOOK: type: ALTERTABLE_UPDATETABLESTATS +PREHOOK: query: DESC FORMATTED datatype_stats bin +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@datatype_stats +POSTHOOK: query: DESC FORMATTED datatype_stats bin +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@datatype_stats +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +bin binary 8 2.0 8 from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"bin\":\"true\",\"bl\":\"true\",\"c\":\"true\",\"d\":\"true\",\"dem\":\"true\",\"dt\":\"true\",\"f\":\"true\",\"i\":\"true\",\"s\":\"true\",\"str\":\"true\",\"t\":\"true\",\"ts\":\"true\",\"v\":\"true\"}} diff --git a/ql/src/test/results/clientpositive/analyze_tbl_part.q.out b/ql/src/test/results/clientpositive/analyze_tbl_part.q.out index 6a3fbc0cc7..0de0a3a385 100644 --- a/ql/src/test/results/clientpositive/analyze_tbl_part.q.out +++ b/ql/src/test/results/clientpositive/analyze_tbl_part.q.out @@ -48,9 +48,11 @@ PREHOOK: Input: default@src_stat_part POSTHOOK: query: describe formatted src_stat_part PARTITION(partitionId=1) key POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@src_stat_part -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -key string 0 16 1.72 3 from deserializer +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +key string 0 16 1.72 3 SExM4BAQgaTbFYDg/BOBktRrv6WcHMDErSCAotAegLu2HIHRunr/p8hhwLbpSsD22ZgBgdS9KMHV +myn/wfcugOGjfsCYzA0= + from deserializer PREHOOK: query: ANALYZE TABLE src_stat_part partition (partitionId) COMPUTE STATISTICS for columns key, value PREHOOK: type: QUERY PREHOOK: Input: default@src_stat_part @@ -69,18 +71,22 @@ PREHOOK: Input: default@src_stat_part POSTHOOK: query: describe formatted src_stat_part PARTITION(partitionId=1) key POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@src_stat_part -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -key string 0 16 1.72 3 from deserializer +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +key string 0 16 1.72 3 SExM4BAQgaTbFYDg/BOBktRrv6WcHMDErSCAotAegLu2HIHRunr/p8hhwLbpSsD22ZgBgdS9KMHV +myn/wfcugOGjfsCYzA0= + from deserializer PREHOOK: query: describe formatted src_stat_part PARTITION(partitionId=2) value PREHOOK: type: DESCTABLE PREHOOK: Input: default@src_stat_part POSTHOOK: query: describe formatted src_stat_part PARTITION(partitionId=2) value POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@src_stat_part -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -value string 0 19 4.92 7 from deserializer +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +value string 0 19 4.92 7 SExM4BMTgaTbFcCikRTAp44YwK72BIGdshzAtN4dgfC7Ab6ikDTAz6JGgejDCP+AlzSA84UvwYTL +Wr+ivynA6+uCAsDjm8kBgri1Ab++nA+/vawa + from deserializer PREHOOK: query: create table src_stat_string_part(key string, value string) partitioned by (partitionName string) PREHOOK: type: CREATETABLE PREHOOK: Output: database:default diff --git a/ql/src/test/results/clientpositive/autoColumnStats_5.q.out b/ql/src/test/results/clientpositive/autoColumnStats_5.q.out index e3abba5bd0..29963975d3 100644 --- a/ql/src/test/results/clientpositive/autoColumnStats_5.q.out +++ b/ql/src/test/results/clientpositive/autoColumnStats_5.q.out @@ -192,10 +192,11 @@ PREHOOK: Input: default@partitioned1 POSTHOOK: query: desc formatted partitioned1 partition(part=1) a POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@partitioned1 -col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -a int 1 4 0 4 from deserializer +col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitvector +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +a int 1 4 0 4 SExM4AQExfO+SLy7rGKA4vdMwPD8wQI= + from deserializer PREHOOK: query: alter table partitioned1 add columns(c int, d string) PREHOOK: type: ALTERTABLE_ADDCOLS PREHOOK: Input: default@partitioned1 @@ -434,10 +435,11 @@ PREHOOK: Input: default@partitioned1 POSTHOOK: query: desc formatted partitioned1 partition(part=2) c POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@partitioned1 -col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -c int 10 40 0 4 from deserializer +col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitvector +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +c int 10 40 0 4 SExM4AQEguSTlQGB4f34Ab/okIMC/4XTfQ== + from deserializer PREHOOK: query: explain insert into table partitioned1 partition(part=1) values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred') PREHOOK: type: QUERY POSTHOOK: query: explain insert into table partitioned1 partition(part=1) values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred') @@ -626,17 +628,19 @@ PREHOOK: Input: default@partitioned1 POSTHOOK: query: desc formatted partitioned1 partition(part=1) a POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@partitioned1 -col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -a int 1 6 0 4 from deserializer +col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitvector +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +a int 1 6 0 6 SExM4AYGwZn6L4TaxBi8u6xigOL3TMCSiwGA3vHAAg== + from deserializer PREHOOK: query: desc formatted partitioned1 partition(part=1) c PREHOOK: type: DESCTABLE PREHOOK: Input: default@partitioned1 POSTHOOK: query: desc formatted partitioned1 partition(part=1) c POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@partitioned1 -col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -c int 100 200 0 2 from deserializer +col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitvector +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +c int 100 200 0 2 SExM4AICweD/2gaAj/YU + from deserializer diff --git a/ql/src/test/results/clientpositive/autoColumnStats_9.q.out b/ql/src/test/results/clientpositive/autoColumnStats_9.q.out index 06f23b1e7c..e32c884c7d 100644 --- a/ql/src/test/results/clientpositive/autoColumnStats_9.q.out +++ b/ql/src/test/results/clientpositive/autoColumnStats_9.q.out @@ -250,17 +250,59 @@ PREHOOK: Input: default@dest_j1 POSTHOOK: query: desc formatted dest_j1 key POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@dest_j1 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -key int 0 498 0 309 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +key int 0 498 0 309 SExM4LUCtQKN6yH2ofgFwu2cAvzblwLAqoIDwf6+AcDkmgT/i5kBxOPKA72MowGA5fAbg4LgAr2L +vQH/+x+A0gOA0fsBgefUBb+gkhPB+03A7aUCg+BdwKUTvbaQA4LIeL7O3QTCo6IDvbuRAsCRFoGb +YL/lO4LfuQK+4acBgLePCsHvrQHD43u87s4EwP0QwJJtyL23ArjLvQPCz7wG/5yDC7/N4AKCxMcO +vvCNA4HI4wP/6rEDgIIJwaZOv+cwgaevAf+GzQHA14ICwPz+BcDb+gKAhg+H7RS67okB/5HHAoOo +Nb2V2wWA6fAIwJSODsCT9gGAiY8Bg/xI/bq4CoDXkgHAyvYGgOduwJKLAYKbvgH+2bQBwNCWBoK7 +Gb+fmQO/6J8Bgb89/9fzAsCPywLAp/wHgbJeg/z8Ar25kQGA4P0Dv/OUA4CgoQaBpL4EwDuC+m69 +yDWA/BLApYELxMEv/I1LgYWwBMbTlAP56cMEgZssgeiCAb+kowHBvf0CgYHSAf3g0QaBi9sC/9yi +AoDbIYHllQnAhAGBqJkFvrBKgZmZDIKEogG9slWC7qgF/q5DwM30DoKHRcCN7wO+ir0DwLOtAcDy +8wKB4L0Dv/HEA4adpAOAqxr6kkyA14EIwbkUgIihCIGfcoCODr/z5wKAs/QBw7JvvLnQBMHmsgL/ +1UTAy5gCgbHaAf+UpgOAjO0HwcRQhOePAYChCruLvQaBtSj/osUBwoK1AYGn+Qm9kLcDgLSoAYCQ +2QeAv54FwoIavsJ2wYYL/9jbAoCTjgGBjDX/ztkBgPF8gtNC/r2PAoGgUcHDcb+LqAe/laoBwsOe +A8D6EsDQkAT+0tcGgIRzgIqQAYCT+gXB7wv/jvQMw4miBr3LvgTA0YYBgKCTCIHyxQHBtPcCvts+ +we3HAsD/9gG/zaEDgMiqA8H6iQHAniPCiIQB/bucAYDykQGCodED/o+VAsa89gO6pqAHwKvqAYGu +9QO/0bgPwLiEAcH7lwHA4v4FgMUrwe9k/v9ggaI5wbniAr7lOYP3tAH9vmXBxscCwPDuAYCkFoPc +6QaCoOUH/MSUAr/4gwmAw4wIv/rBAsCH2QGEl1n86qQBgOWcEoLOsgb+k74EhNjFAbyX2QHAi4MB +gJiCAYHyiwnAvYgC/5LkB4HnoQLA46QU/6+SBsGv6QHBut4Evo/iA8KzFL7b0AKAwJkJwZSRAb+g +4gHBux+B/58F/+D2Av/5tgKAmieA4MsBwrvkBMDIBb77GoCqnwjA3PkBgPOTCMD9e8P8tgK91poD +gIGeAcH3nQKAhqIEv6LdA4DK2AKClCm+mc4BxoVo+rCiAoDfoAKAtPoFwdCUAsHtpwH+j8QBwYWl +Ab+00gOAy9gMgfHAA7/hvwTAqCeCsUq/yUj/t9wCxYPOArvNrQTAq5ADwJrZCcKbX764IcHS1QKA +t+kLwtSlC/3wyweAl2bAhKEDwLXQCYDXhQXBpeICgcpm//3nBoDmGMG7lwH/y+YI//XaAYHTlQKA +4gPA7aoC/6mKCIDZpgLDoEQ= + from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} PREHOOK: query: desc formatted dest_j1 value PREHOOK: type: DESCTABLE PREHOOK: Input: default@dest_j1 POSTHOOK: query: desc formatted dest_j1 value POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@dest_j1 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -value string 0 309 6.834630350194552 7 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +value string 0 309 6.834630350194552 7 SExM4LUCtQLM7Sz2qpEHw751/KEegclIgPbqBb/+4QWCxbMFvttNwJ79A8DuYoDMqgOCtK0Dv6qT +A/+3OMCb5AGBvuwFgLXtAb/E0QOB1IQGwKwc/9PdBMCu9gSBw0y/1IYJwKeVDcCdmAGBuSGAhBmF +r5AB+tCzAcGDswGC8n292qYHwZPUAb+SVsDC1gGAuPwCgZG9Av/S+gXDuJAB//KXAv61xQGBgpcB +gIo6geaBAf+Q3wz/tkWBqNID/6ilBoDahwbA1fICwcKtBIHklAe+8sQFgLx/gIXzAcLDwAP+09EB +wP4GwrXYAoDuxwPBy5cCgLUF/dq4BcCkoAaAyfcEgJnRBYD8iAKA4vgGwL4SgIuEAsHnqgP/obsC +gO6YAoKYVb7VSsCR2ALAjJgBwfSUAoD3PMP/4QKAwxO84LoCwKDSAcDaLsH7kAK/u/YCgLf0AYH6 +xwGA1c8Bg7KxAf3Wcf+9pgKA4X+FseMDvZb8Ab7MtQaA0eUDwOUigKINwZUuv+HJBMCWDoLCNf7W +sgSAlO8DgO6lB8KatwT/xdQCv45EgcDADb/E5QTCnJUG/7OtBf+0mAHDmdEDv7edBv6CpgLAj/sC +wIV1gf2zBv/SywTC8dAC/r3JA4LUTP7BuQKAieADgfLcBr/ouAGAs2OAiKUGwJf2CoGQ0Qa/05oG +gNojgJTZBoCWkwTAt4EKx5rPA7mZqQKB5kCB9fcCgLTwB/6O8gTAxvYSwOaRAYDFgAPA+NsCgcm8 +BoC0jASB3esC/oDgBYGcvgXDteICwKoK/MZMgoXCAcCl4gO/m3q/3VKBs84BgY9kgMeyBL/zR8Cl +nALCsOIC/s2tAf/JsQLAmWCA+akCgrq2A//wlAu/26sEwobEBP/ejgG/spYMwOKnCoHqsQHA7oMC +v62oBoDghAaA8oECgPSkB4Dv+gHB+yHAqQ6/xRWBm9EBhPaFAby9jgO/1cgMgK9pwMuiA8C5/AaA +/f8DwsiOA4CiDYDemwS/zXDDxX6+x4UKwpyWBv7n8AH+la4HwvBSv67qAr+duAHA3K8EgvKLA/7+ +ygPA/nmAuQOFvt4Nu7+4A8PXG/2ZboGhF8K6+Ab+6IsJgd6AAf+aiwH/qVyEhB7+o/4F/rO0AYCM +fcC1qQOAuuYBgcbKAcDy5AH/wu0Bga6UAr/e1wPBnscCv9S+BYDKjwPB/JABgJeXBoCjAYHT8gL+ +tV6B6RO/9GGB1aUHxcCxAfqrngKAlIcDhLXJAr3vV//G5ATBiqQDgcGkAcDijQS+gsEJgc+FA4DP +KYKJiwH9hqwBwpJC/sHNAYHStAmB4IoJvsenBoHYR8CZiBTB2m/+mdUIgY2kAv+5P8CunwyA2QaC +sEb+toQCwp3JBYCqiAS+ruIEhq+wAfr6QcHKrw7/xMABxN/NB/ztO4K4tQG/94oBgJf4Ab+F2AHC +zawDwa2BAr6vkwX/u5QFwYfWAsDVgQKCgZwJ/ZYXwNztAoL3/AL+uKIBwdeeA8OYjwGBoIkIvMSm +A/+FF4PnPb6a7gKA1BfA4JMGwOb1Ab+WHg== + from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} diff --git a/ql/src/test/results/clientpositive/avro_decimal.q.out b/ql/src/test/results/clientpositive/avro_decimal.q.out index e1045ebea1..07dca397a1 100644 --- a/ql/src/test/results/clientpositive/avro_decimal.q.out +++ b/ql/src/test/results/clientpositive/avro_decimal.q.out @@ -32,10 +32,11 @@ PREHOOK: Input: default@dec POSTHOOK: query: DESC FORMATTED `dec` value POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@dec -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -value decimal(8,4) -12.25 234.79 0 10 from deserializer -COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"value\":\"true\"}} +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +value decimal(8,4) -12.25 234.79 0 10 SExM4AoKxdOOGP2An6UDv92lC4HV6VD/sbUNg9u1Bb210FHA981AwdjTnAGB//Ui + from deserializer +COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"value\":\"true\"}} PREHOOK: query: DROP TABLE IF EXISTS avro_dec PREHOOK: type: DROPTABLE POSTHOOK: query: DROP TABLE IF EXISTS avro_dec diff --git a/ql/src/test/results/clientpositive/avro_decimal_native.q.out b/ql/src/test/results/clientpositive/avro_decimal_native.q.out index b73b5f5679..9f8d4c6465 100644 --- a/ql/src/test/results/clientpositive/avro_decimal_native.q.out +++ b/ql/src/test/results/clientpositive/avro_decimal_native.q.out @@ -36,10 +36,11 @@ PREHOOK: Input: default@dec POSTHOOK: query: DESC FORMATTED `dec` value POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@dec -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -value decimal(8,4) -12.25 234.79 0 10 from deserializer -COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"value\":\"true\"}} +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +value decimal(8,4) -12.25 234.79 0 10 SExM4AoKxdOOGP2An6UDv92lC4HV6VD/sbUNg9u1Bb210FHA981AwdjTnAGB//Ui + from deserializer +COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"value\":\"true\"}} PREHOOK: query: DROP TABLE IF EXISTS avro_dec PREHOOK: type: DROPTABLE POSTHOOK: query: DROP TABLE IF EXISTS avro_dec diff --git a/ql/src/test/results/clientpositive/bitvector.q.out b/ql/src/test/results/clientpositive/bitvector.q.out new file mode 100644 index 0000000000..21859d2fc1 --- /dev/null +++ b/ql/src/test/results/clientpositive/bitvector.q.out @@ -0,0 +1,31 @@ +PREHOOK: query: desc formatted src key +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@src +POSTHOOK: query: desc formatted src key +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@src +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +key string 0 309 2.812 3 SExM4LUCtQLBoliC580Fv5Nq/8NRwJtoxdPYB/yjmAO/kokBgcjcBMHblgH/lecCgMn9AcC78gm/ +wzeA/BHB6MgH/5KjAsKL1QqAge0D/obCAoCvMMGIDoDPDr/ovgSAn6ALgL7TEMGr8wX/6xnAuqgF +wcgagNoPwY+GAf7ouATEjJwBvIfKAoGetgS/ysoIwN4dwMHtA8DAnQmB7TWBspEFv+7uBMGB4wL+ +mmvAqtEBgrrQAYGlkQb9j1CBuYcE/5VDgNciweGkA4Hf9wS+ws4BgPdxgJr/BcCctQOB2rQD/+LE +CMDf/QHAklbAxaoBgP7gA4Gu1weBx4YB/remBMHjToDh2wH/+sEHwMqxA8DvyATBrUj/7ecCgP0H +ifmTBIC8FriHfv/5pArAlSzAnIoBwJ/bAsXdU7v+0g7ArvQBgOX+AoD5+hPA/4oHgK3rAcCWB4D+ +iwSA/soDgu71Ab68MYD/gQKAh+ECwN6xBMCTYoSh4AO9nESArIcGwYmeA/7TpAWBncQDwthzvrb9 +BIG2hQG+lbgIgan2DP/2R4Cu1giAlJcDg7UU/ZBfgchL/4O8AsDO8gKBodUGwMTcA8OKggP88sYD +w9tCvr+CA7/W3QfE05wB/dWUCcCa2QmBsY4HgrD3BbzCwguAih3B67gD/9zeAYX0twf7048HwLKS +CICJC8GyrgGB0akBvr2mAcDP8QGA0/ECwMScA4GXrAGBpokFgKgKwNSHBP7sMIDfkAbBpzy/pdcK +wLnfAYLeKP6R5wLCssoBv8JY/7IvwNEMgq+7AsDQH/6VwAGA8nXCisoBvvevAoH4PMKOqAO+lnj/ +yjnCwzL/7IADv/8jhK28Brzx1wGCmk6/0t0CgIyaAv/CnwXAzZ0BwJadA4GCKcC53AG/sUCBhs8J +gZmRAb6zwwSBgLYGweuIAb+tbsH6gwPAngH+ysQBhLxh/NihAsGYswS/l8MKgtFX/u6jA8XB6AL8 +3tAC/5TDBMHMvgG/0NICwO79BYS82Qa8oMQBwPqBBcHhI//N2RLAitsEgJnuBcD+qAXA/t0FwJ32 +CoDBrAHDlSD9ltIDwMieBIC26QLBzZkGgbecAYCvCf+Xgwn/40+JgRX3xNcBwIkLweaQAYDuggKB +2okBwPESvtvPBsHxpQL/ucsCwIeJA8CjBoH6SP+UnwKCw8IGv7mcBYHtywKChYICvKDpAsDx5gHC ++MIF/sDeBYCh1ALA+poCwYygCL+TTITO3AK9weMEwf+fAb/V5AKApKMGg8Fg/J9OwfnNCv/pkgLC +z+gEvt+XA4CqlgGAlOQDgIaICYGmzAGCiYUHv96HBv/njQHEzfsEv9LuB7zk1gPD4RiDo/UDu5qr +A//IIcDDHsCwDYDozwLEq9EBvZp1gLwawJaGAv/JwAPC/JcDvqvcAcG1ugODzpMBvZfmAoDWB//s +1wKHzaYCucW2BcL9uQT/+aMB/+WtAsKPN7+sdsTghAG7t6kEwNw+wKLUAsCDIIHcjwGAqpACgM36 +BcC//AOAtLEEgMbwAf+mwQiAqfgH + from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} diff --git a/ql/src/test/results/clientpositive/char_udf1.q.out b/ql/src/test/results/clientpositive/char_udf1.q.out index fefc7407e0..e701d64357 100644 --- a/ql/src/test/results/clientpositive/char_udf1.q.out +++ b/ql/src/test/results/clientpositive/char_udf1.q.out @@ -406,7 +406,7 @@ from char_udf_1 POSTHOOK: type: QUERY POSTHOOK: Input: default@char_udf_1 #### A masked pattern was here #### -{"columntype":"String","maxlength":7,"avglength":7.0,"countnulls":0,"numdistinctvalues":1,"ndvbitvector":"{0}{3}{2}{3}{1}{0}{2}{0}{1}{0}{0}{1}{3}{2}{0}{3}"} {"columntype":"String","maxlength":7,"avglength":7.0,"countnulls":0,"numdistinctvalues":1,"ndvbitvector":"{0}{3}{2}{3}{1}{0}{2}{0}{1}{0}{0}{1}{3}{2}{0}{3}"} +{"columntype":"String","maxlength":7,"avglength":7.0,"countnulls":0,"numdistinctvalues":1,"ndvbitvector":"Rk0QAAEAAAAIAAAABAAAAAgAAAACAAAAAQAAAAQAAAABAAAAAgAAAAEAAAABAAAAAgAAAAgAAAAE\r\nAAAAAQAAAAgAAAA=\r\n"} {"columntype":"String","maxlength":7,"avglength":7.0,"countnulls":0,"numdistinctvalues":1,"ndvbitvector":"Rk0QAAEAAAAIAAAABAAAAAgAAAACAAAAAQAAAAQAAAABAAAAAgAAAAEAAAABAAAAAgAAAAgAAAAE\r\nAAAAAQAAAAgAAAA=\r\n"} PREHOOK: query: select min(c2), min(c4) diff --git a/ql/src/test/results/clientpositive/colstats_all_nulls.q.out b/ql/src/test/results/clientpositive/colstats_all_nulls.q.out index 0f2822504f..ca1ec00634 100644 --- a/ql/src/test/results/clientpositive/colstats_all_nulls.q.out +++ b/ql/src/test/results/clientpositive/colstats_all_nulls.q.out @@ -41,20 +41,22 @@ PREHOOK: Input: default@all_nulls POSTHOOK: query: describe formatted all_nulls a POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@all_nulls -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -a bigint 0 0 5 0 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"a\":\"true\",\"b\":\"true\",\"c\":\"true\"}} +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +a bigint 0 0 5 1 SExM4AEA + from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"a\":\"true\",\"b\":\"true\",\"c\":\"true\"}} PREHOOK: query: describe formatted all_nulls b PREHOOK: type: DESCTABLE PREHOOK: Input: default@all_nulls POSTHOOK: query: describe formatted all_nulls b POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@all_nulls -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -b double 0.0 0.0 5 0 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"a\":\"true\",\"b\":\"true\",\"c\":\"true\"}} +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +b double 0.0 0.0 5 1 SExM4AEA + from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"a\":\"true\",\"b\":\"true\",\"c\":\"true\"}} PREHOOK: query: drop table all_nulls PREHOOK: type: DROPTABLE PREHOOK: Input: default@all_nulls diff --git a/ql/src/test/results/clientpositive/column_names_with_leading_and_trailing_spaces.q.out b/ql/src/test/results/clientpositive/column_names_with_leading_and_trailing_spaces.q.out index fb833bccb2..74085bf0c4 100644 --- a/ql/src/test/results/clientpositive/column_names_with_leading_and_trailing_spaces.q.out +++ b/ql/src/test/results/clientpositive/column_names_with_leading_and_trailing_spaces.q.out @@ -48,10 +48,10 @@ PREHOOK: Input: default@space POSTHOOK: query: desc formatted space ` left` POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@space -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - - left string from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\" left\":\"true\",\" middle \":\"true\",\"right \":\"true\"}} +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + + left string from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\" left\":\"true\",\" middle \":\"true\",\"right \":\"true\"}} PREHOOK: query: insert into space values ("1", "2", "3") PREHOOK: type: QUERY PREHOOK: Output: default@space @@ -67,10 +67,11 @@ PREHOOK: Input: default@space POSTHOOK: query: desc formatted space ` left` POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@space -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - - left string 0 1 1.0 1 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\" left\":\"true\",\" middle \":\"true\",\"right \":\"true\"}} +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + + left string 0 1 1.0 1 SExM4AEBxbi8+AQ= + from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\" left\":\"true\",\" middle \":\"true\",\"right \":\"true\"}} PREHOOK: query: select * from space PREHOOK: type: QUERY PREHOOK: Input: default@space diff --git a/ql/src/test/results/clientpositive/column_pruner_multiple_children.q.out b/ql/src/test/results/clientpositive/column_pruner_multiple_children.q.out index 9925928da7..00e53dc3e9 100644 --- a/ql/src/test/results/clientpositive/column_pruner_multiple_children.q.out +++ b/ql/src/test/results/clientpositive/column_pruner_multiple_children.q.out @@ -173,17 +173,19 @@ PREHOOK: Input: default@dest1 POSTHOOK: query: desc formatted DEST1 key POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@dest1 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -key int 10 10 0 1 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +key int 10 10 0 1 SExM4AEBg8WRjgM= + from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} PREHOOK: query: desc formatted DEST1 value PREHOOK: type: DESCTABLE PREHOOK: Input: default@dest1 POSTHOOK: query: desc formatted DEST1 value POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@dest1 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -value string 0 1 4.0 4 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +value string 0 1 4.0 4 SExM4AEBg7CVmgY= + from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} diff --git a/ql/src/test/results/clientpositive/columnstats_partlvl.q.out b/ql/src/test/results/clientpositive/columnstats_partlvl.q.out index 5ecb20501b..c0f007159d 100644 --- a/ql/src/test/results/clientpositive/columnstats_partlvl.q.out +++ b/ql/src/test/results/clientpositive/columnstats_partlvl.q.out @@ -509,18 +509,20 @@ PREHOOK: Input: default@employee_part POSTHOOK: query: describe formatted Employee_Part partition (employeeSalary=2000.0) employeeID POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@employee_part -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -employeeID int 16 34 1 12 from deserializer +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +employeeID int 16 34 1 12 SExM4AwMgZroaMLqyAGF9dJV+rKWOf/rxgKAgPo7xsfWqAH8u4h+//bMRoPekTO9roqbAcCI5ns= + from deserializer PREHOOK: query: describe formatted Employee_Part partition (employeeSalary=2000.0) employeeName PREHOOK: type: DESCTABLE PREHOOK: Input: default@employee_part POSTHOOK: query: describe formatted Employee_Part partition (employeeSalary=2000.0) employeeName POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@employee_part -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -employeeName string 1 12 4.3076923076923075 6 from deserializer +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +employeeName string 1 12 4.3076923076923075 6 SExM4AwMhN+NPL2pzAqA8p0tgKf/ZoCU0AnAwotPw4/Z2AG9tL50wLuAhgHAmduBAcG66mL//JYR + from deserializer PREHOOK: query: explain analyze table Employee_Part compute statistics for columns PREHOOK: type: QUERY @@ -598,18 +600,20 @@ PREHOOK: Input: default@employee_part POSTHOOK: query: describe formatted Employee_Part partition(employeeSalary=2000.0) employeeID POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@employee_part -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -employeeID int 16 34 1 12 from deserializer +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +employeeID int 16 34 1 12 SExM4AwMgZroaMLqyAGF9dJV+rKWOf/rxgKAgPo7xsfWqAH8u4h+//bMRoPekTO9roqbAcCI5ns= + from deserializer PREHOOK: query: describe formatted Employee_Part partition(employeeSalary=4000.0) employeeID PREHOOK: type: DESCTABLE PREHOOK: Input: default@employee_part POSTHOOK: query: describe formatted Employee_Part partition(employeeSalary=4000.0) employeeID POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@employee_part -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -employeeID int 16 34 1 12 from deserializer +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +employeeID int 16 34 1 12 SExM4AwMgZroaMLqyAGF9dJV+rKWOf/rxgKAgPo7xsfWqAH8u4h+//bMRoPekTO9roqbAcCI5ns= + from deserializer PREHOOK: query: explain analyze table Employee_Part compute statistics for columns PREHOOK: type: QUERY @@ -679,10 +683,11 @@ PREHOOK: Input: default@employee_part POSTHOOK: query: describe formatted Employee_Part employeeID POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@employee_part -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -employeeID int 16 34 2 12 from deserializer -COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"employeeid\":\"true\",\"employeename\":\"true\"}} +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +employeeID int 16 34 2 12 SExM4AwMgZroaMLqyAGF9dJV+rKWOf/rxgKAgPo7xsfWqAH8u4h+//bMRoPekTO9roqbAcCI5ns= + from deserializer +COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"employeeid\":\"true\",\"employeename\":\"true\"}} PREHOOK: query: create database if not exists dummydb PREHOOK: type: CREATEDATABASE PREHOOK: Output: database:dummydb @@ -711,10 +716,11 @@ PREHOOK: Input: default@employee_part POSTHOOK: query: describe formatted default.Employee_Part partition (employeeSalary=2000.0) employeeID POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@employee_part -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -employeeID int 16 34 1 12 from deserializer -COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"employeeid\":\"true\",\"employeename\":\"true\"}} +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +employeeID int 16 34 1 12 SExM4AwMgZroaMLqyAGF9dJV+rKWOf/rxgKAgPo7xsfWqAH8u4h+//bMRoPekTO9roqbAcCI5ns= + from deserializer +COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"employeeid\":\"true\",\"employeename\":\"true\"}} PREHOOK: query: analyze table default.Employee_Part compute statistics for columns PREHOOK: type: QUERY PREHOOK: Input: default@employee_part diff --git a/ql/src/test/results/clientpositive/columnstats_partlvl_dp.q.out b/ql/src/test/results/clientpositive/columnstats_partlvl_dp.q.out index a64c76badf..0cb4863a17 100644 --- a/ql/src/test/results/clientpositive/columnstats_partlvl_dp.q.out +++ b/ql/src/test/results/clientpositive/columnstats_partlvl_dp.q.out @@ -141,9 +141,10 @@ PREHOOK: Input: default@employee_part POSTHOOK: query: describe formatted Employee_Part partition (employeeSalary='4000.0', country='USA') employeeName POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@employee_part -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -employeeName string 0 7 5.142857142857143 6 from deserializer +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +employeeName string 0 7 5.142857142857143 6 SExM4AcHhN+NPL2pzAqA8p0tgLvPcIPS5KcCvbS+dMC7gIYB + from deserializer PREHOOK: query: explain analyze table Employee_Part partition (employeeSalary='2000.0') compute statistics for columns employeeID PREHOOK: type: QUERY @@ -221,18 +222,20 @@ PREHOOK: Input: default@employee_part POSTHOOK: query: describe formatted Employee_Part partition (employeeSalary='2000.0', country='USA') employeeID POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@employee_part -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -employeeID int 16 34 1 12 from deserializer +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +employeeID int 16 34 1 12 SExM4AwMgZroaMLqyAGF9dJV+rKWOf/rxgKAgPo7xsfWqAH8u4h+//bMRoPekTO9roqbAcCI5ns= + from deserializer PREHOOK: query: describe formatted Employee_Part partition (employeeSalary='2000.0', country='UK') employeeID PREHOOK: type: DESCTABLE PREHOOK: Input: default@employee_part POSTHOOK: query: describe formatted Employee_Part partition (employeeSalary='2000.0', country='UK') employeeID POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@employee_part -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -employeeID int 16 31 0 7 from deserializer +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +employeeID int 16 31 0 7 SExM4AcHw4SxaoX10lX5nt07xsfQ5AH8u4h+gtXeeb2uipsB + from deserializer PREHOOK: query: explain analyze table Employee_Part partition (employeeSalary) compute statistics for columns employeeID PREHOOK: type: QUERY @@ -318,9 +321,10 @@ PREHOOK: Input: default@employee_part POSTHOOK: query: describe formatted Employee_Part partition (employeeSalary='3000.0', country='UK') employeeID POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@employee_part -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -employeeID int 16 34 1 12 from deserializer +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +employeeID int 16 34 1 12 SExM4AwMgZroaMLqyAGF9dJV+rKWOf/rxgKAgPo7xsfWqAH8u4h+//bMRoPekTO9roqbAcCI5ns= + from deserializer PREHOOK: query: explain analyze table Employee_Part partition (employeeSalary,country) compute statistics for columns PREHOOK: type: QUERY @@ -406,9 +410,10 @@ PREHOOK: Input: default@employee_part POSTHOOK: query: describe formatted Employee_Part partition (employeeSalary='3500.0', country='UK') employeeName POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@employee_part -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -employeeName string 0 12 5.142857142857143 6 from deserializer +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +employeeName string 0 12 5.142857142857143 6 SExM4AwMhN+NPL2pzAqA8p0tgKf/ZoCU0AnAwotPw4/Z2AG9tL50wLuAhgHAmduBAcG66mL//JYR + from deserializer PREHOOK: query: drop table Employee PREHOOK: type: DROPTABLE POSTHOOK: query: drop table Employee @@ -481,9 +486,10 @@ PREHOOK: Input: default@employee POSTHOOK: query: describe formatted Employee partition (employeeSalary='3500.0', country='UK') employeeName POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@employee -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -employeeName string 0 12 5.142857142857143 6 from deserializer +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +employeeName string 0 12 5.142857142857143 6 SExM4AwMhN+NPL2pzAqA8p0tgKf/ZoCU0AnAwotPw4/Z2AG9tL50wLuAhgHAmduBAcG66mL//JYR + from deserializer PREHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee partition(employeeSalary='3000.0', country='USA') PREHOOK: type: LOAD #### A masked pattern was here #### @@ -528,9 +534,10 @@ PREHOOK: Input: default@employee POSTHOOK: query: describe formatted Employee partition (employeeSalary='3000.0', country='USA') employeeName POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@employee -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -employeeName string 0 12 5.142857142857143 6 from deserializer +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +employeeName string 0 12 5.142857142857143 6 SExM4AwMhN+NPL2pzAqA8p0tgKf/ZoCU0AnAwotPw4/Z2AG9tL50wLuAhgHAmduBAcG66mL//JYR + from deserializer PREHOOK: query: alter table Employee add columns (c int ,d string) PREHOOK: type: ALTERTABLE_ADDCOLS PREHOOK: Input: default@employee @@ -564,24 +571,27 @@ PREHOOK: Input: default@employee POSTHOOK: query: describe formatted Employee partition (employeeSalary='6000.0', country='UK') employeeName POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@employee -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -employeeName string 0 9 4.777777777777778 6 from deserializer +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +employeeName string 0 9 4.777777777777778 6 SExM4AkJhN+NPL2pzAqA8p0tgLvPcIPS5KcCvbS+dMC7gIYBwJnbgQGAz/1W + from deserializer PREHOOK: query: describe formatted Employee partition (employeeSalary='6000.0', country='UK') c PREHOOK: type: DESCTABLE PREHOOK: Input: default@employee POSTHOOK: query: describe formatted Employee partition (employeeSalary='6000.0', country='UK') c POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@employee -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -c int 2000 4000 0 3 from deserializer +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +c int 2000 4000 0 3 SExM4AMDwpKn6wH/9JpogbzaCQ== + from deserializer PREHOOK: query: describe formatted Employee partition (employeeSalary='6000.0', country='UK') d PREHOOK: type: DESCTABLE PREHOOK: Input: default@employee POSTHOOK: query: describe formatted Employee partition (employeeSalary='6000.0', country='UK') d POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@employee -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -d string 0 2 2.4444444444444446 3 from deserializer +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +d string 0 2 2.4444444444444446 3 SExM4AICgaD/7QKE/4mqAw== + from deserializer diff --git a/ql/src/test/results/clientpositive/columnstats_tbllvl.q.out b/ql/src/test/results/clientpositive/columnstats_tbllvl.q.out index 91c8f150a2..b85c1ff721 100644 --- a/ql/src/test/results/clientpositive/columnstats_tbllvl.q.out +++ b/ql/src/test/results/clientpositive/columnstats_tbllvl.q.out @@ -285,30 +285,39 @@ PREHOOK: Input: default@uservisits_web_text_none POSTHOOK: query: describe formatted UserVisits_web_text_none destURL POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@uservisits_web_text_none -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -destURL string 0 55 48.945454545454545 96 from deserializer -COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"adrevenue\":\"true\",\"avgtimeonsite\":\"true\",\"ccode\":\"true\",\"desturl\":\"true\",\"lcode\":\"true\",\"skeyword\":\"true\",\"sourceip\":\"true\",\"useragent\":\"true\",\"visitdate\":\"true\"}} +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +destURL string 0 55 48.945454545454545 96 SExM4Dc3gddnwYLHAsC19xX/4qoLgdjABMSO/kb8t5ELgsyUGb2kwBPAigWA2YICwJDHAsD9qkjA +ptZKwJKdAcHjohi/yjvCzEu+h8IWwYngGsHl7i6+zboChK7WC7z2kQTAmKEZweOqB/+K7zPE+LIH +/JOmGcDr9BjBqsQIwKOPCv/cvwHB1bMW/7jgFICW5gaAuv4IgICNCIOopRq+0IMD/8nJLsDrlwKA +vN4lhfvmCv/49gf8n6kBwLKKBID89gfA/Y4kgKjeNMCAgRHAmYwFxI7hE4H09wf8x+4K + from deserializer +COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"adrevenue\":\"true\",\"avgtimeonsite\":\"true\",\"ccode\":\"true\",\"desturl\":\"true\",\"lcode\":\"true\",\"skeyword\":\"true\",\"sourceip\":\"true\",\"useragent\":\"true\",\"visitdate\":\"true\"}} PREHOOK: query: describe formatted UserVisits_web_text_none adRevenue PREHOOK: type: DESCTABLE PREHOOK: Input: default@uservisits_web_text_none POSTHOOK: query: describe formatted UserVisits_web_text_none adRevenue POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@uservisits_web_text_none -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -adRevenue float 13.099044799804688 492.98870849609375 0 55 from deserializer -COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"adrevenue\":\"true\",\"avgtimeonsite\":\"true\",\"ccode\":\"true\",\"desturl\":\"true\",\"lcode\":\"true\",\"skeyword\":\"true\",\"sourceip\":\"true\",\"useragent\":\"true\",\"visitdate\":\"true\"}} +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +adRevenue float 13.099044799804688 492.98870849609375 0 55 SExM4Dc3gb3HC8Lswyq+hbYCgOOKIoHH7AKA4u4D/73OA4DH6QnA8ZIbhaSXBv/e/xf9jo4JgJ2b +Av/htwrBsJ4ZwZugD//O6wbB6qcFvoW+E4DW+wyA8/gCgK6GD4HIuhD/pccFgIXqAsCl/wyAv+QK +wNq4HYLrrB++s5sIgOWzPoSMlA/83cMVwdy8PYCjhwL/3LIWxOm7JPye8w/A/O0VwNjgBIDOiRHA +86ELwJ/+AYCr1QzA7YUQgO2gEcDZEIDK6EPAo+kOg4HxCv3ZkSmBrLlRgd6IA/6lwROAlYAL + from deserializer +COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"adrevenue\":\"true\",\"avgtimeonsite\":\"true\",\"ccode\":\"true\",\"desturl\":\"true\",\"lcode\":\"true\",\"skeyword\":\"true\",\"sourceip\":\"true\",\"useragent\":\"true\",\"visitdate\":\"true\"}} PREHOOK: query: describe formatted UserVisits_web_text_none avgTimeOnSite PREHOOK: type: DESCTABLE PREHOOK: Input: default@uservisits_web_text_none POSTHOOK: query: describe formatted UserVisits_web_text_none avgTimeOnSite POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@uservisits_web_text_none -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -avgTimeOnSite int 1 9 0 9 from deserializer -COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"adrevenue\":\"true\",\"avgtimeonsite\":\"true\",\"ccode\":\"true\",\"desturl\":\"true\",\"lcode\":\"true\",\"skeyword\":\"true\",\"sourceip\":\"true\",\"useragent\":\"true\",\"visitdate\":\"true\"}} +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +avgTimeOnSite int 1 9 0 9 SExM4AkJwZn6L4TaxBi8u6xigOL3TMCSiwHBrsJOwKr8Df+Es+QBgPyEtwI= + from deserializer +COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"adrevenue\":\"true\",\"avgtimeonsite\":\"true\",\"ccode\":\"true\",\"desturl\":\"true\",\"lcode\":\"true\",\"skeyword\":\"true\",\"sourceip\":\"true\",\"useragent\":\"true\",\"visitdate\":\"true\"}} PREHOOK: query: CREATE TABLE empty_tab( a int, b double, @@ -414,10 +423,14 @@ PREHOOK: Input: default@uservisits_web_text_none POSTHOOK: query: describe formatted default.UserVisits_web_text_none destURL POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@uservisits_web_text_none -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -destURL string 0 55 48.945454545454545 96 from deserializer -COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"adrevenue\":\"true\",\"avgtimeonsite\":\"true\",\"ccode\":\"true\",\"desturl\":\"true\",\"lcode\":\"true\",\"skeyword\":\"true\",\"sourceip\":\"true\",\"useragent\":\"true\",\"visitdate\":\"true\"}} +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +destURL string 0 55 48.945454545454545 96 SExM4Dc3gddnwYLHAsC19xX/4qoLgdjABMSO/kb8t5ELgsyUGb2kwBPAigWA2YICwJDHAsD9qkjA +ptZKwJKdAcHjohi/yjvCzEu+h8IWwYngGsHl7i6+zboChK7WC7z2kQTAmKEZweOqB/+K7zPE+LIH +/JOmGcDr9BjBqsQIwKOPCv/cvwHB1bMW/7jgFICW5gaAuv4IgICNCIOopRq+0IMD/8nJLsDrlwKA +vN4lhfvmCv/49gf8n6kBwLKKBID89gfA/Y4kgKjeNMCAgRHAmYwFxI7hE4H09wf8x+4K + from deserializer +COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"adrevenue\":\"true\",\"avgtimeonsite\":\"true\",\"ccode\":\"true\",\"desturl\":\"true\",\"lcode\":\"true\",\"skeyword\":\"true\",\"sourceip\":\"true\",\"useragent\":\"true\",\"visitdate\":\"true\"}} PREHOOK: query: CREATE TABLE UserVisits_in_dummy_db ( sourceIP string, destURL string, @@ -707,30 +720,39 @@ PREHOOK: Input: dummydb@uservisits_in_dummy_db POSTHOOK: query: describe formatted dummydb.UserVisits_in_dummy_db destURL POSTHOOK: type: DESCTABLE POSTHOOK: Input: dummydb@uservisits_in_dummy_db -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -destURL string 0 55 48.945454545454545 96 from deserializer -COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"adrevenue\":\"true\",\"avgtimeonsite\":\"true\",\"ccode\":\"true\",\"desturl\":\"true\",\"lcode\":\"true\",\"skeyword\":\"true\",\"sourceip\":\"true\",\"useragent\":\"true\",\"visitdate\":\"true\"}} +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +destURL string 0 55 48.945454545454545 96 SExM4Dc3gddnwYLHAsC19xX/4qoLgdjABMSO/kb8t5ELgsyUGb2kwBPAigWA2YICwJDHAsD9qkjA +ptZKwJKdAcHjohi/yjvCzEu+h8IWwYngGsHl7i6+zboChK7WC7z2kQTAmKEZweOqB/+K7zPE+LIH +/JOmGcDr9BjBqsQIwKOPCv/cvwHB1bMW/7jgFICW5gaAuv4IgICNCIOopRq+0IMD/8nJLsDrlwKA +vN4lhfvmCv/49gf8n6kBwLKKBID89gfA/Y4kgKjeNMCAgRHAmYwFxI7hE4H09wf8x+4K + from deserializer +COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"adrevenue\":\"true\",\"avgtimeonsite\":\"true\",\"ccode\":\"true\",\"desturl\":\"true\",\"lcode\":\"true\",\"skeyword\":\"true\",\"sourceip\":\"true\",\"useragent\":\"true\",\"visitdate\":\"true\"}} PREHOOK: query: describe formatted dummydb.UserVisits_in_dummy_db adRevenue PREHOOK: type: DESCTABLE PREHOOK: Input: dummydb@uservisits_in_dummy_db POSTHOOK: query: describe formatted dummydb.UserVisits_in_dummy_db adRevenue POSTHOOK: type: DESCTABLE POSTHOOK: Input: dummydb@uservisits_in_dummy_db -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -adRevenue float 13.099044799804688 492.98870849609375 0 55 from deserializer -COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"adrevenue\":\"true\",\"avgtimeonsite\":\"true\",\"ccode\":\"true\",\"desturl\":\"true\",\"lcode\":\"true\",\"skeyword\":\"true\",\"sourceip\":\"true\",\"useragent\":\"true\",\"visitdate\":\"true\"}} +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +adRevenue float 13.099044799804688 492.98870849609375 0 55 SExM4Dc3gb3HC8Lswyq+hbYCgOOKIoHH7AKA4u4D/73OA4DH6QnA8ZIbhaSXBv/e/xf9jo4JgJ2b +Av/htwrBsJ4ZwZugD//O6wbB6qcFvoW+E4DW+wyA8/gCgK6GD4HIuhD/pccFgIXqAsCl/wyAv+QK +wNq4HYLrrB++s5sIgOWzPoSMlA/83cMVwdy8PYCjhwL/3LIWxOm7JPye8w/A/O0VwNjgBIDOiRHA +86ELwJ/+AYCr1QzA7YUQgO2gEcDZEIDK6EPAo+kOg4HxCv3ZkSmBrLlRgd6IA/6lwROAlYAL + from deserializer +COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"adrevenue\":\"true\",\"avgtimeonsite\":\"true\",\"ccode\":\"true\",\"desturl\":\"true\",\"lcode\":\"true\",\"skeyword\":\"true\",\"sourceip\":\"true\",\"useragent\":\"true\",\"visitdate\":\"true\"}} PREHOOK: query: describe formatted dummydb.UserVisits_in_dummy_db avgTimeOnSite PREHOOK: type: DESCTABLE PREHOOK: Input: dummydb@uservisits_in_dummy_db POSTHOOK: query: describe formatted dummydb.UserVisits_in_dummy_db avgTimeOnSite POSTHOOK: type: DESCTABLE POSTHOOK: Input: dummydb@uservisits_in_dummy_db -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -avgTimeOnSite int 1 9 0 9 from deserializer -COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"adrevenue\":\"true\",\"avgtimeonsite\":\"true\",\"ccode\":\"true\",\"desturl\":\"true\",\"lcode\":\"true\",\"skeyword\":\"true\",\"sourceip\":\"true\",\"useragent\":\"true\",\"visitdate\":\"true\"}} +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +avgTimeOnSite int 1 9 0 9 SExM4AkJwZn6L4TaxBi8u6xigOL3TMCSiwHBrsJOwKr8Df+Es+QBgPyEtwI= + from deserializer +COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"adrevenue\":\"true\",\"avgtimeonsite\":\"true\",\"ccode\":\"true\",\"desturl\":\"true\",\"lcode\":\"true\",\"skeyword\":\"true\",\"sourceip\":\"true\",\"useragent\":\"true\",\"visitdate\":\"true\"}} PREHOOK: query: drop table dummydb.UserVisits_in_dummy_db PREHOOK: type: DROPTABLE PREHOOK: Input: dummydb@uservisits_in_dummy_db diff --git a/ql/src/test/results/clientpositive/compustat_avro.q.out b/ql/src/test/results/clientpositive/compustat_avro.q.out index 2f8dc10e50..db20bef04e 100644 --- a/ql/src/test/results/clientpositive/compustat_avro.q.out +++ b/ql/src/test/results/clientpositive/compustat_avro.q.out @@ -30,10 +30,10 @@ PREHOOK: Input: default@testavro POSTHOOK: query: describe formatted testAvro col1 POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@testavro -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col1 string from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\",\"col4\":\"true\",\"col5\":\"true\",\"col6\":\"true\"}} +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +col1 string from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\",\"col4\":\"true\",\"col5\":\"true\",\"col6\":\"true\"}} PREHOOK: query: analyze table testAvro compute statistics for columns col1,col3 PREHOOK: type: QUERY PREHOOK: Input: default@testavro @@ -48,7 +48,7 @@ PREHOOK: Input: default@testavro POSTHOOK: query: describe formatted testAvro col1 POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@testavro -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col1 string 0 0 0.0 0 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\",\"col4\":\"true\",\"col5\":\"true\",\"col6\":\"true\"}} +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +col1 string 0 0 0.0 0 from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\",\"col4\":\"true\",\"col5\":\"true\",\"col6\":\"true\"}} diff --git a/ql/src/test/results/clientpositive/compute_stats_date.q.out b/ql/src/test/results/clientpositive/compute_stats_date.q.out index 5cd2180108..78d04f9dfc 100644 --- a/ql/src/test/results/clientpositive/compute_stats_date.q.out +++ b/ql/src/test/results/clientpositive/compute_stats_date.q.out @@ -109,10 +109,12 @@ PREHOOK: Input: default@tab_date POSTHOOK: query: describe formatted tab_date fl_date POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@tab_date -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -fl_date date 2000-11-20 2010-10-29 0 19 from deserializer -COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"fl_date\":\"true\"}} +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +fl_date date 2000-11-20 2010-10-29 0 19 SExM4BMTw6qAFv+ogCGC/7ZdgMDTH73K3+4Bgq+jE766tgWAh/xZgIqTVIDhgVDA655SwfXHA4Dy +/Ve//Z0LwMSIToCZ6QOAhZ8Gg8jOEb38rBw= + from deserializer +COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"fl_date\":\"true\"}} PREHOOK: query: alter table tab_date update statistics for column fl_date set ('numDVs'='19', 'highValue'='2015-01-01', 'lowValue'='0') PREHOOK: type: ALTERTABLE_UPDATETABLESTATS POSTHOOK: query: alter table tab_date update statistics for column fl_date set ('numDVs'='19', 'highValue'='2015-01-01', 'lowValue'='0') @@ -123,7 +125,9 @@ PREHOOK: Input: default@tab_date POSTHOOK: query: describe formatted tab_date fl_date POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@tab_date -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -fl_date date 1970-01-01 2015-01-01 0 19 from deserializer -COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"fl_date\":\"true\"}} +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +fl_date date 1970-01-01 2015-01-01 0 19 SExM4BMTw6qAFv+ogCGC/7ZdgMDTH73K3+4Bgq+jE766tgWAh/xZgIqTVIDhgVDA655SwfXHA4Dy +/Ve//Z0LwMSIToCZ6QOAhZ8Gg8jOEb38rBw= + from deserializer +COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"fl_date\":\"true\"}} diff --git a/ql/src/test/results/clientpositive/compute_stats_decimal.q.out b/ql/src/test/results/clientpositive/compute_stats_decimal.q.out index fcfce78b82..e18b989062 100644 --- a/ql/src/test/results/clientpositive/compute_stats_decimal.q.out +++ b/ql/src/test/results/clientpositive/compute_stats_decimal.q.out @@ -31,4 +31,4 @@ POSTHOOK: query: select compute_stats(a, 'fm', 18) from tab_decimal POSTHOOK: type: QUERY POSTHOOK: Input: default@tab_decimal #### A masked pattern was here #### -{"columntype":"Decimal","min":-87.2,"max":123456789012345678901234567890.123,"countnulls":2,"numdistinctvalues":13,"ndvbitvector":"{0, 1, 2, 3, 4}{0, 1, 2, 3, 5}{0, 1, 2, 3}{0, 1, 2}{0, 1, 2}{0, 1, 2, 3, 5}{0, 1, 3}{0, 1, 2, 4}{0, 1, 2, 3, 5}{0, 1, 2, 3}{0, 1, 2}{0, 1}{0, 1, 2}{0, 1, 2, 3}{0, 1, 2, 6, 8}{0, 1, 2, 3}{0, 1, 2}{0, 1, 4, 5}"} +{"columntype":"Decimal","min":-87.2,"max":123456789012345678901234567890.123,"countnulls":2,"numdistinctvalues":13,"ndvbitvector":"Rk0SAB8AAAAvAAAADwAAAAcAAAAHAAAALwAAAAsAAAAXAAAALwAAAA8AAAAHAAAAAwAAAAcAAAAP\r\nAAAARwEAAA8AAAAHAAAAMwAAAA==\r\n"} diff --git a/ql/src/test/results/clientpositive/compute_stats_double.q.out b/ql/src/test/results/clientpositive/compute_stats_double.q.out index e6a087dd98..d937c3a002 100644 --- a/ql/src/test/results/clientpositive/compute_stats_double.q.out +++ b/ql/src/test/results/clientpositive/compute_stats_double.q.out @@ -31,4 +31,4 @@ POSTHOOK: query: select compute_stats(a, 'fm', 16) from tab_double POSTHOOK: type: QUERY POSTHOOK: Input: default@tab_double #### A masked pattern was here #### -{"columntype":"Double","min":-87.2,"max":435.33,"countnulls":2,"numdistinctvalues":11,"ndvbitvector":"{0, 1, 2, 3, 4}{0, 1, 2}{0, 1}{0, 1, 3, 4}{0, 1, 3}{0, 1, 2, 3, 8}{0, 1, 3}{0, 1, 2}{0, 1, 4}{0, 1, 2}{0, 1, 2, 3}{0, 1, 2, 3}{0, 1, 2, 3, 4}{0, 1, 2}{0, 1, 2, 3, 4}{0, 1, 3}"} +{"columntype":"Double","min":-87.2,"max":435.33,"countnulls":2,"numdistinctvalues":11,"ndvbitvector":"Rk0QAB8AAAAHAAAAAwAAABsAAAALAAAADwEAAAsAAAAHAAAAEwAAAAcAAAAPAAAADwAAAB8AAAAH\r\nAAAAHwAAAAsAAAA=\r\n"} diff --git a/ql/src/test/results/clientpositive/compute_stats_long.q.out b/ql/src/test/results/clientpositive/compute_stats_long.q.out index fb985d8266..3451072a1b 100644 --- a/ql/src/test/results/clientpositive/compute_stats_long.q.out +++ b/ql/src/test/results/clientpositive/compute_stats_long.q.out @@ -31,4 +31,4 @@ POSTHOOK: query: select compute_stats(a, 'fm', 16) from tab_int POSTHOOK: type: QUERY POSTHOOK: Input: default@tab_int #### A masked pattern was here #### -{"columntype":"Long","min":4,"max":344,"countnulls":1,"numdistinctvalues":11,"ndvbitvector":"{0, 1, 2, 3}{0, 2, 5}{0, 1, 2, 3, 4}{0, 1, 2, 4, 6, 7}{0, 1, 2, 4}{0, 1, 2, 4, 5}{0, 1, 2, 5}{0, 1, 2}{0, 1, 2, 3}{0, 1, 3, 4}{0, 1, 2, 5, 6}{0, 1, 2, 3}{0, 1, 3}{0, 1, 2, 3}{0, 1, 2, 3, 10}{0, 1, 2, 4}"} +{"columntype":"Long","min":4,"max":344,"countnulls":1,"numdistinctvalues":11,"ndvbitvector":"Rk0QAA8AAAAlAAAAHwAAANcAAAAXAAAANwAAACcAAAAHAAAADwAAABsAAABnAAAADwAAAAsAAAAP\r\nAAAADwQAABcAAAA=\r\n"} diff --git a/ql/src/test/results/clientpositive/compute_stats_string.q.out b/ql/src/test/results/clientpositive/compute_stats_string.q.out index a5d66eba31..bbb236150e 100644 --- a/ql/src/test/results/clientpositive/compute_stats_string.q.out +++ b/ql/src/test/results/clientpositive/compute_stats_string.q.out @@ -31,4 +31,4 @@ POSTHOOK: query: select compute_stats(a, 'fm', 16) from tab_string POSTHOOK: type: QUERY POSTHOOK: Input: default@tab_string #### A masked pattern was here #### -{"columntype":"String","maxlength":11,"avglength":3.9,"countnulls":0,"numdistinctvalues":7,"ndvbitvector":"{0, 1, 2, 3}{0, 1}{0, 1, 3}{0, 2}{0, 1, 2, 3}{0, 1, 3}{0, 1, 2, 3}{0, 1, 3}{0, 1}{0, 1}{0, 1, 2, 4}{0, 1, 4}{0, 2, 4}{0, 1, 2, 3}{0, 1, 2}{0, 1, 2}"} +{"columntype":"String","maxlength":11,"avglength":3.9,"countnulls":0,"numdistinctvalues":7,"ndvbitvector":"Rk0QAA8AAAADAAAACwAAAAUAAAAPAAAACwAAAA8AAAALAAAAAwAAAAMAAAAXAAAAEwAAABUAAAAP\r\nAAAABwAAAAcAAAA=\r\n"} diff --git a/ql/src/test/results/clientpositive/confirm_initial_tbl_stats.q.out b/ql/src/test/results/clientpositive/confirm_initial_tbl_stats.q.out index 5593e422b6..559f05e598 100644 --- a/ql/src/test/results/clientpositive/confirm_initial_tbl_stats.q.out +++ b/ql/src/test/results/clientpositive/confirm_initial_tbl_stats.q.out @@ -14,10 +14,31 @@ PREHOOK: Input: default@src POSTHOOK: query: describe formatted src key POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@src -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -key string 0 309 2.812 3 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +key string 0 309 2.812 3 SExM4LUCtQLBoliC580Fv5Nq/8NRwJtoxdPYB/yjmAO/kokBgcjcBMHblgH/lecCgMn9AcC78gm/ +wzeA/BHB6MgH/5KjAsKL1QqAge0D/obCAoCvMMGIDoDPDr/ovgSAn6ALgL7TEMGr8wX/6xnAuqgF +wcgagNoPwY+GAf7ouATEjJwBvIfKAoGetgS/ysoIwN4dwMHtA8DAnQmB7TWBspEFv+7uBMGB4wL+ +mmvAqtEBgrrQAYGlkQb9j1CBuYcE/5VDgNciweGkA4Hf9wS+ws4BgPdxgJr/BcCctQOB2rQD/+LE +CMDf/QHAklbAxaoBgP7gA4Gu1weBx4YB/remBMHjToDh2wH/+sEHwMqxA8DvyATBrUj/7ecCgP0H +ifmTBIC8FriHfv/5pArAlSzAnIoBwJ/bAsXdU7v+0g7ArvQBgOX+AoD5+hPA/4oHgK3rAcCWB4D+ +iwSA/soDgu71Ab68MYD/gQKAh+ECwN6xBMCTYoSh4AO9nESArIcGwYmeA/7TpAWBncQDwthzvrb9 +BIG2hQG+lbgIgan2DP/2R4Cu1giAlJcDg7UU/ZBfgchL/4O8AsDO8gKBodUGwMTcA8OKggP88sYD +w9tCvr+CA7/W3QfE05wB/dWUCcCa2QmBsY4HgrD3BbzCwguAih3B67gD/9zeAYX0twf7048HwLKS +CICJC8GyrgGB0akBvr2mAcDP8QGA0/ECwMScA4GXrAGBpokFgKgKwNSHBP7sMIDfkAbBpzy/pdcK +wLnfAYLeKP6R5wLCssoBv8JY/7IvwNEMgq+7AsDQH/6VwAGA8nXCisoBvvevAoH4PMKOqAO+lnj/ +yjnCwzL/7IADv/8jhK28Brzx1wGCmk6/0t0CgIyaAv/CnwXAzZ0BwJadA4GCKcC53AG/sUCBhs8J +gZmRAb6zwwSBgLYGweuIAb+tbsH6gwPAngH+ysQBhLxh/NihAsGYswS/l8MKgtFX/u6jA8XB6AL8 +3tAC/5TDBMHMvgG/0NICwO79BYS82Qa8oMQBwPqBBcHhI//N2RLAitsEgJnuBcD+qAXA/t0FwJ32 +CoDBrAHDlSD9ltIDwMieBIC26QLBzZkGgbecAYCvCf+Xgwn/40+JgRX3xNcBwIkLweaQAYDuggKB +2okBwPESvtvPBsHxpQL/ucsCwIeJA8CjBoH6SP+UnwKCw8IGv7mcBYHtywKChYICvKDpAsDx5gHC ++MIF/sDeBYCh1ALA+poCwYygCL+TTITO3AK9weMEwf+fAb/V5AKApKMGg8Fg/J9OwfnNCv/pkgLC +z+gEvt+XA4CqlgGAlOQDgIaICYGmzAGCiYUHv96HBv/njQHEzfsEv9LuB7zk1gPD4RiDo/UDu5qr +A//IIcDDHsCwDYDozwLEq9EBvZp1gLwawJaGAv/JwAPC/JcDvqvcAcG1ugODzpMBvZfmAoDWB//s +1wKHzaYCucW2BcL9uQT/+aMB/+WtAsKPN7+sdsTghAG7t6kEwNw+wKLUAsCDIIHcjwGAqpACgM36 +BcC//AOAtLEEgMbwAf+mwQiAqfgH + from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} PREHOOK: query: describe extended src1 PREHOOK: type: DESCTABLE PREHOOK: Input: default@src1 @@ -34,10 +55,12 @@ PREHOOK: Input: default@src1 POSTHOOK: query: describe formatted src1 value POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@src1 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -value string 0 19 4.92 7 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +value string 0 19 4.92 7 SExM4BMTgaTbFcCikRTAp44YwK72BIGdshzAtN4dgfC7Ab6ikDTAz6JGgejDCP+AlzSA84UvwYTL +Wr+ivynA6+uCAsDjm8kBgri1Ab++nA+/vawa + from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} PREHOOK: query: describe extended src_json PREHOOK: type: DESCTABLE PREHOOK: Input: default@src_json @@ -53,10 +76,11 @@ PREHOOK: Input: default@src_json POSTHOOK: query: describe formatted src_json json POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@src_json -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -json string 0 1 644.0 644 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"json\":\"true\"}} +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +json string 0 1 644.0 644 SExM4AEBhZK/6AY= + from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"json\":\"true\"}} PREHOOK: query: describe extended src_sequencefile PREHOOK: type: DESCTABLE PREHOOK: Input: default@src_sequencefile @@ -73,10 +97,31 @@ PREHOOK: Input: default@src_sequencefile POSTHOOK: query: describe formatted src_sequencefile value POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@src_sequencefile -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -value string 0 309 6.812 7 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +value string 0 309 6.812 7 SExM4LUCtQLM7Sz2qpEHw751/KEegclIgPbqBb/+4QWCxbMFvttNwJ79A8DuYoDMqgOCtK0Dv6qT +A/+3OMCb5AGBvuwFgLXtAb/E0QOB1IQGwKwc/9PdBMCu9gSBw0y/1IYJwKeVDcCdmAGBuSGAhBmF +r5AB+tCzAcGDswGC8n292qYHwZPUAb+SVsDC1gGAuPwCgZG9Av/S+gXDuJAB//KXAv61xQGBgpcB +gIo6geaBAf+Q3wz/tkWBqNID/6ilBoDahwbA1fICwcKtBIHklAe+8sQFgLx/gIXzAcLDwAP+09EB +wP4GwrXYAoDuxwPBy5cCgLUF/dq4BcCkoAaAyfcEgJnRBYD8iAKA4vgGwL4SgIuEAsHnqgP/obsC +gO6YAoKYVb7VSsCR2ALAjJgBwfSUAoD3PMP/4QKAwxO84LoCwKDSAcDaLsH7kAK/u/YCgLf0AYH6 +xwGA1c8Bg7KxAf3Wcf+9pgKA4X+FseMDvZb8Ab7MtQaA0eUDwOUigKINwZUuv+HJBMCWDoLCNf7W +sgSAlO8DgO6lB8KatwT/xdQCv45EgcDADb/E5QTCnJUG/7OtBf+0mAHDmdEDv7edBv6CpgLAj/sC +wIV1gf2zBv/SywTC8dAC/r3JA4LUTP7BuQKAieADgfLcBr/ouAGAs2OAiKUGwJf2CoGQ0Qa/05oG +gNojgJTZBoCWkwTAt4EKx5rPA7mZqQKB5kCB9fcCgLTwB/6O8gTAxvYSwOaRAYDFgAPA+NsCgcm8 +BoC0jASB3esC/oDgBYGcvgXDteICwKoK/MZMgoXCAcCl4gO/m3q/3VKBs84BgY9kgMeyBL/zR8Cl +nALCsOIC/s2tAf/JsQLAmWCA+akCgrq2A//wlAu/26sEwobEBP/ejgG/spYMwOKnCoHqsQHA7oMC +v62oBoDghAaA8oECgPSkB4Dv+gHB+yHAqQ6/xRWBm9EBhPaFAby9jgO/1cgMgK9pwMuiA8C5/AaA +/f8DwsiOA4CiDYDemwS/zXDDxX6+x4UKwpyWBv7n8AH+la4HwvBSv67qAr+duAHA3K8EgvKLA/7+ +ygPA/nmAuQOFvt4Nu7+4A8PXG/2ZboGhF8K6+Ab+6IsJgd6AAf+aiwH/qVyEhB7+o/4F/rO0AYCM +fcC1qQOAuuYBgcbKAcDy5AH/wu0Bga6UAr/e1wPBnscCv9S+BYDKjwPB/JABgJeXBoCjAYHT8gL+ +tV6B6RO/9GGB1aUHxcCxAfqrngKAlIcDhLXJAr3vV//G5ATBiqQDgcGkAcDijQS+gsEJgc+FA4DP +KYKJiwH9hqwBwpJC/sHNAYHStAmB4IoJvsenBoHYR8CZiBTB2m/+mdUIgY2kAv+5P8CunwyA2QaC +sEb+toQCwp3JBYCqiAS+ruIEhq+wAfr6QcHKrw7/xMABxN/NB/ztO4K4tQG/94oBgJf4Ab+F2AHC +zawDwa2BAr6vkwX/u5QFwYfWAsDVgQKCgZwJ/ZYXwNztAoL3/AL+uKIBwdeeA8OYjwGBoIkIvMSm +A/+FF4PnPb6a7gKA1BfA4JMGwOb1Ab+WHg== + from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} PREHOOK: query: describe extended srcbucket PREHOOK: type: DESCTABLE PREHOOK: Input: default@srcbucket @@ -93,10 +138,38 @@ PREHOOK: Input: default@srcbucket POSTHOOK: query: describe formatted srcbucket value POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@srcbucket -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -value string 0 430 6.802 7 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +value string 0 430 6.802 7 SExM4K4DrgPM7Sz2soMHgPgNw751/KEegclIgPbqBb/h5wOAnfoBgsWzBb7bTcD0a4CqkQPA7mKB +osYB/6nkAcHVGcHekwOA7wS/u44D/7c4wJvkAYLQxAL/7acDgLXtAb/E0QOB1IQGwKwcv76qAsCV +swLAzlqA4JsEgcNMv9SGCcDHgAWC2Ei+18ACwLCLBcCdmAGBuSGAhBmFr5AB+tCzAcGDswGC8n39 +6cQDwPDhA8GT1AG/klbAwtYBgvP0Av7EB4GRvQL/0voFgeMggv1uwFj9i2iC06gBgJQH/rXFAYGC +lwGAijqB5oEB/5DfDP+2RYGo0gP/qKUGgNqHBoKL0AK+3BaA7gvBwq0Ev4j7BcLbmQG+8sQFgLx/ +gIXzAYDdjQHC5rIC/+OKAf/vRsD+BsK12AK+3iDCj6cDwcuXAoC1Bb6hYL+52ATAhvEDgKkLgPWj +AoDJ9wSCiY0B/o/EBID8iALB/12/4poGwL4Sw7JgvdijAcHnqgP/obsCw4ZzveelAYKYVb7VSoCo +/wHA6VjAjJgBwfSUAoD3PMPPygKAsBeAwxO84LoCgYc9v5mVAcDaLsH7kALA+SH/wdQCgLf0AYH6 +xwGA1c8Bg7KxAf3Wcf+9pgKA4X+FseMDvZb8Ab7+rASAzogCgfKyAv/esgHA5SKAog3BlS6/4ckE +wJYOgsI1/qWdAYCxlQPAndwBwPaSAoPN3gH9oMcFgaefAcHzlwP/xdQCv45EwK+ZDMCAmAGBkA+/ +xOUEgZXHAcGHzgT+hdkBw4QvvqmlA/+0mAGA5qwDw7Mkv7edBv6CpgLAj/sCwIV1gf2zBoSuiAH7 +pMMDwvHQAv69yQOC1Ez+wbkCgIngA4Hy3Aa/6LgBwIUMwK1XgIilBsCX9grBvakCv6m4AYGp7wKB +nni/1fkE/98ogNojgJTZBoCWkwTAkPUCgablA/+ApwPA2JoCh8K0AbmZqQKB5kCB9fcCv5bvBMGd +gQP+0F2AvpQEgeChCr/m1AjA5pEBgMWAA8D42wLCwMkB/oMgwYTTBIC0jAT/oD2CvK4C/rQ5gMym +BcPGyQS+1XTDteICwKoK/MZMgoXCAcCl4gO/m3q/3VKBs84BgY9kgMeyBL/zR8ClnALCsOIC/s2t +Af/JsQLAmWCA+akCgrq2A77LxgPBpc4Hv9urBMKGxAT/3o4B/6abB8CL+wTA4qcKgeqxAcDugwK/ +nLICgsOsAv7NyQHA/f4CwOKFA4DygQKA7IwFgIiYAoC23QGAuR3B+yHAqQ6/xRWBm9EBhPaFAby9 +jgO/rbwEwuDXBr7HtAHA+1HAsxfAy6IDhMahBLyaSIHHSf+RyQGAjkvAtxrAt5oDwsiOA4CiDb6m +0gHCt8kCv81ww8V+vseFCsKclgb+5/AB/pWuB8LwUr+u6gKB8Z8BvqwYwOqBAYSbVPzW2QKC8osD +/v7KA8D+eYC5A8GYygLEpZQLu/fNAoDIasPXG8DAOL3ZNYGhF8Cj9gWCl4IB/YFvgeecCIHegAH/ +mosB/6lchIQe/qP+BYChKv6SigGAjH3AtakDg78k/frBAYHGygHA8uQB/8LtAYGulAK/3tcDwZ7H +Ar/UvgWB6Sz/4OICwfyQAf/X9wWBvx+AowGAxY8CgY5j/rVegekTgMQ4v7ApwIryAsDSmQGB+JkD +xcCxAfqrngKA7O0CgKgZhLXJAsCEDP3qS//G5ATBiqQDgcGkAcDijQT+w44BgMCUCMD+HcCY/QHB +togBgM8pgrZ3gNMTvuRLv6JgwpJC/sHNAcK0uQL+vzSA45YEwfqvAoDG/AH/4rwDgrfRA77HpwaB +2Ee/mMcHgafGAr+mzwfBs6sCwdpvgO8X/qq9CIGNpAL/uT+ApoACwIifCoDZBoKwRv62hALA7YcE +gKF2go9LgKqIBL6u4gSB6UWFxmr6+kHByq8O/8TAAcTpsQeA9hv87TuCuLUBv/eKAYCX+AHAhQ7/ +/8kBws2sA8GtgQK+r5MFwfjTBL7DQMGH1gLA1YECgoGcCf2WF8Dc7QKC9/wC/riiAcHXngPDmI8B +/JhghYepB7zEpgP/hReD5z29musBgYCDAYDUF8DgkwbA5vUBv5Ye + from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} PREHOOK: query: describe extended srcbucket2 PREHOOK: type: DESCTABLE PREHOOK: Input: default@srcbucket2 @@ -113,10 +186,31 @@ PREHOOK: Input: default@srcbucket2 POSTHOOK: query: describe formatted srcbucket2 value POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@srcbucket2 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -value string 0 309 6.812 7 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +value string 0 309 6.812 7 SExM4LUCtQLM7Sz2qpEHw751/KEegclIgPbqBb/+4QWCxbMFvttNwJ79A8DuYoDMqgOCtK0Dv6qT +A/+3OMCb5AGBvuwFgLXtAb/E0QOB1IQGwKwc/9PdBMCu9gSBw0y/1IYJwKeVDcCdmAGBuSGAhBmF +r5AB+tCzAcGDswGC8n292qYHwZPUAb+SVsDC1gGAuPwCgZG9Av/S+gXDuJAB//KXAv61xQGBgpcB +gIo6geaBAf+Q3wz/tkWBqNID/6ilBoDahwbA1fICwcKtBIHklAe+8sQFgLx/gIXzAcLDwAP+09EB +wP4GwrXYAoDuxwPBy5cCgLUF/dq4BcCkoAaAyfcEgJnRBYD8iAKA4vgGwL4SgIuEAsHnqgP/obsC +gO6YAoKYVb7VSsCR2ALAjJgBwfSUAoD3PMP/4QKAwxO84LoCwKDSAcDaLsH7kAK/u/YCgLf0AYH6 +xwGA1c8Bg7KxAf3Wcf+9pgKA4X+FseMDvZb8Ab7MtQaA0eUDwOUigKINwZUuv+HJBMCWDoLCNf7W +sgSAlO8DgO6lB8KatwT/xdQCv45EgcDADb/E5QTCnJUG/7OtBf+0mAHDmdEDv7edBv6CpgLAj/sC +wIV1gf2zBv/SywTC8dAC/r3JA4LUTP7BuQKAieADgfLcBr/ouAGAs2OAiKUGwJf2CoGQ0Qa/05oG +gNojgJTZBoCWkwTAt4EKx5rPA7mZqQKB5kCB9fcCgLTwB/6O8gTAxvYSwOaRAYDFgAPA+NsCgcm8 +BoC0jASB3esC/oDgBYGcvgXDteICwKoK/MZMgoXCAcCl4gO/m3q/3VKBs84BgY9kgMeyBL/zR8Cl +nALCsOIC/s2tAf/JsQLAmWCA+akCgrq2A//wlAu/26sEwobEBP/ejgG/spYMwOKnCoHqsQHA7oMC +v62oBoDghAaA8oECgPSkB4Dv+gHB+yHAqQ6/xRWBm9EBhPaFAby9jgO/1cgMgK9pwMuiA8C5/AaA +/f8DwsiOA4CiDYDemwS/zXDDxX6+x4UKwpyWBv7n8AH+la4HwvBSv67qAr+duAHA3K8EgvKLA/7+ +ygPA/nmAuQOFvt4Nu7+4A8PXG/2ZboGhF8K6+Ab+6IsJgd6AAf+aiwH/qVyEhB7+o/4F/rO0AYCM +fcC1qQOAuuYBgcbKAcDy5AH/wu0Bga6UAr/e1wPBnscCv9S+BYDKjwPB/JABgJeXBoCjAYHT8gL+ +tV6B6RO/9GGB1aUHxcCxAfqrngKAlIcDhLXJAr3vV//G5ATBiqQDgcGkAcDijQS+gsEJgc+FA4DP +KYKJiwH9hqwBwpJC/sHNAYHStAmB4IoJvsenBoHYR8CZiBTB2m/+mdUIgY2kAv+5P8CunwyA2QaC +sEb+toQCwp3JBYCqiAS+ruIEhq+wAfr6QcHKrw7/xMABxN/NB/ztO4K4tQG/94oBgJf4Ab+F2AHC +zawDwa2BAr6vkwX/u5QFwYfWAsDVgQKCgZwJ/ZYXwNztAoL3/AL+uKIBwdeeA8OYjwGBoIkIvMSm +A/+FF4PnPb6a7gKA1BfA4JMGwOb1Ab+WHg== + from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} PREHOOK: query: describe extended srcpart PREHOOK: type: DESCTABLE PREHOOK: Input: default@srcpart @@ -141,9 +235,30 @@ PREHOOK: Input: default@srcpart POSTHOOK: query: describe formatted srcpart PARTITION (ds="2008-04-09", hr="12") key POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@srcpart -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -key string 0 309 2.812 3 from deserializer +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +key string 0 309 2.812 3 SExM4LUCtQLBoliC580Fv5Nq/8NRwJtoxdPYB/yjmAO/kokBgcjcBMHblgH/lecCgMn9AcC78gm/ +wzeA/BHB6MgH/5KjAsKL1QqAge0D/obCAoCvMMGIDoDPDr/ovgSAn6ALgL7TEMGr8wX/6xnAuqgF +wcgagNoPwY+GAf7ouATEjJwBvIfKAoGetgS/ysoIwN4dwMHtA8DAnQmB7TWBspEFv+7uBMGB4wL+ +mmvAqtEBgrrQAYGlkQb9j1CBuYcE/5VDgNciweGkA4Hf9wS+ws4BgPdxgJr/BcCctQOB2rQD/+LE +CMDf/QHAklbAxaoBgP7gA4Gu1weBx4YB/remBMHjToDh2wH/+sEHwMqxA8DvyATBrUj/7ecCgP0H +ifmTBIC8FriHfv/5pArAlSzAnIoBwJ/bAsXdU7v+0g7ArvQBgOX+AoD5+hPA/4oHgK3rAcCWB4D+ +iwSA/soDgu71Ab68MYD/gQKAh+ECwN6xBMCTYoSh4AO9nESArIcGwYmeA/7TpAWBncQDwthzvrb9 +BIG2hQG+lbgIgan2DP/2R4Cu1giAlJcDg7UU/ZBfgchL/4O8AsDO8gKBodUGwMTcA8OKggP88sYD +w9tCvr+CA7/W3QfE05wB/dWUCcCa2QmBsY4HgrD3BbzCwguAih3B67gD/9zeAYX0twf7048HwLKS +CICJC8GyrgGB0akBvr2mAcDP8QGA0/ECwMScA4GXrAGBpokFgKgKwNSHBP7sMIDfkAbBpzy/pdcK +wLnfAYLeKP6R5wLCssoBv8JY/7IvwNEMgq+7AsDQH/6VwAGA8nXCisoBvvevAoH4PMKOqAO+lnj/ +yjnCwzL/7IADv/8jhK28Brzx1wGCmk6/0t0CgIyaAv/CnwXAzZ0BwJadA4GCKcC53AG/sUCBhs8J +gZmRAb6zwwSBgLYGweuIAb+tbsH6gwPAngH+ysQBhLxh/NihAsGYswS/l8MKgtFX/u6jA8XB6AL8 +3tAC/5TDBMHMvgG/0NICwO79BYS82Qa8oMQBwPqBBcHhI//N2RLAitsEgJnuBcD+qAXA/t0FwJ32 +CoDBrAHDlSD9ltIDwMieBIC26QLBzZkGgbecAYCvCf+Xgwn/40+JgRX3xNcBwIkLweaQAYDuggKB +2okBwPESvtvPBsHxpQL/ucsCwIeJA8CjBoH6SP+UnwKCw8IGv7mcBYHtywKChYICvKDpAsDx5gHC ++MIF/sDeBYCh1ALA+poCwYygCL+TTITO3AK9weMEwf+fAb/V5AKApKMGg8Fg/J9OwfnNCv/pkgLC +z+gEvt+XA4CqlgGAlOQDgIaICYGmzAGCiYUHv96HBv/njQHEzfsEv9LuB7zk1gPD4RiDo/UDu5qr +A//IIcDDHsCwDYDozwLEq9EBvZp1gLwawJaGAv/JwAPC/JcDvqvcAcG1ugODzpMBvZfmAoDWB//s +1wKHzaYCucW2BcL9uQT/+aMB/+WtAsKPN7+sdsTghAG7t6kEwNw+wKLUAsCDIIHcjwGAqpACgM36 +BcC//AOAtLEEgMbwAf+mwQiAqfgH + from deserializer PREHOOK: query: describe extended alltypesorc PREHOOK: type: DESCTABLE PREHOOK: Input: default@alltypesorc @@ -170,37 +285,59 @@ PREHOOK: Input: default@alltypesorc POSTHOOK: query: describe formatted alltypesorc ctinyint POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@alltypesorc -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -ctinyint tinyint -64 62 3115 127 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"cbigint\":\"true\",\"cboolean1\":\"true\",\"cboolean2\":\"true\",\"cdouble\":\"true\",\"cfloat\":\"true\",\"cint\":\"true\",\"csmallint\":\"true\",\"cstring1\":\"true\",\"cstring2\":\"true\",\"ctimestamp1\":\"true\",\"ctimestamp2\":\"true\",\"ctinyint\":\"true\"}} +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +ctinyint tinyint -64 62 3115 127 SExM4H9/wffjBsSrxgHA1wz9+IYHg+DQCP3JihW/hsoBwPWWAcCpzQaAka8Nguq/AsK1iAL87ZUO +gJNyw+PsDL27kQLAhqMCwurIAb6YtwuCsqkM/uPMAoHrwgqAxvIFv4+0BsPOQb6yowG/ur4Nx8qY +FbyohgT+yXaA6b0C/8ydAYCV8gKA2pQswJKLAYGJLP/rxgKAk/YDhfPdELvbjQTB5+wEv5WXE8Ch +lAuHqL8IupGWB4O/f76/iwK+iKEJwaPQAf+XygGCnp4fv/WnBcCSzg/AgSSBieICv7yhBb/FvwnC +oMIVv+uBKYWN+wT6/KIGxLiuAf3/uAPCqdEIvabTBYHXviP//KQEgKuDCcGV3Ai/h7wGwdqmA4DO +pwaB1+kGgJuiFP6GugLB6sYcgNvQAb+vZMHalQX/jPYBgaebAoCHiwj/8NQIgeajCb+mUYHomgGA +uLEa/+yOBoTZwAK8pmPAwvUCgNSdB4SjmgW86+QGgZWyAsLt0wL/26AG/9OGDf+a0QSCvZgGvp/J +AYKB0AK+4ZoFwMqsA8CF8QqAy5cCgMjKDoDasB7A3/wYgoa7BP6VxASCvrwD/p2zAoGivgLAhPwX +/+7aD8Hztw7AkIsE/8CDC4GHyxT/zd0EwaXNBcCM4hP/qMsBgaClAr+48AbCqqEI + from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"cbigint\":\"true\",\"cboolean1\":\"true\",\"cboolean2\":\"true\",\"cdouble\":\"true\",\"cfloat\":\"true\",\"cint\":\"true\",\"csmallint\":\"true\",\"cstring1\":\"true\",\"cstring2\":\"true\",\"ctimestamp1\":\"true\",\"ctimestamp2\":\"true\",\"ctinyint\":\"true\"}} PREHOOK: query: describe formatted alltypesorc cfloat PREHOOK: type: DESCTABLE PREHOOK: Input: default@alltypesorc POSTHOOK: query: describe formatted alltypesorc cfloat POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@alltypesorc -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -cfloat float -64.0 79.5530014038086 3115 131 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"cbigint\":\"true\",\"cboolean1\":\"true\",\"cboolean2\":\"true\",\"cdouble\":\"true\",\"cfloat\":\"true\",\"cint\":\"true\",\"csmallint\":\"true\",\"cstring1\":\"true\",\"cstring2\":\"true\",\"ctimestamp1\":\"true\",\"ctimestamp2\":\"true\",\"ctinyint\":\"true\"}} +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +cfloat float -64.0 79.5530014038086 3115 131 SExM4IMBgwGCipYJ/7xdxIybDoK1gwiAsCS79s8MgIafCsHkghfAg+AHvrH2AYHVhgfDuPwIwIsU +voWVAr/C+AiB5IADvtnDCYC79RPA2PkIwozYBb6JtAGA0owCgJ7bBMLTyQ/+j8UHweetA4C3Nb/e +hwaAh7oRwOdSxvHFAvzIpAK+w44ExIbjEb6o7AH/kYodgYKOB//70wy/wdUCwPehFcTdqAX/wuML +vY68B4b06QS7m/4GwL3bB7/RxQOB0twCgLoTw5/mAsS/6we59YkDwd2GC8GD7gW9xLgHwdrsA4Wx +fP3L7Au9usENgM/tDYLx4Ar+n7MJho+kBPrh2ROAs+4Cwc+0CoKbrAP+2f8Ev8mtFcC6lgWAivUC +wKH3B4ff0RH66u0OwM/7C4PflA29yYUBgIyFBIHt3QzB2YsK/aDFEoCT4QPA2bYVwNXRB8Gc0gO/ +xLcMwea/A8GajwP+q58Cga3hCv/vnwTD0LsE/fvlBoCDb8GBygWC+/oK/oaXBcC+CIOT3wb+wu8B +/8j9FMCM9A3/wPgDgqybAb65xAKAm/ICwNErgIbqCsCwiQfAy98IwPumAYTJrwT8i6ECgo/rA4KX +3AmA0A/85FDB0qsCgc2RA4Cw6g/+3+EGgZSjEsC/8An/+cwbwvjlBcCv4Qe+8tILwIDhAYGDxgvA +q6ECv6q2CQ== + from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"cbigint\":\"true\",\"cboolean1\":\"true\",\"cboolean2\":\"true\",\"cdouble\":\"true\",\"cfloat\":\"true\",\"cint\":\"true\",\"csmallint\":\"true\",\"cstring1\":\"true\",\"cstring2\":\"true\",\"ctimestamp1\":\"true\",\"ctimestamp2\":\"true\",\"ctinyint\":\"true\"}} PREHOOK: query: describe formatted alltypesorc ctimestamp1 PREHOOK: type: DESCTABLE PREHOOK: Input: default@alltypesorc POSTHOOK: query: describe formatted alltypesorc ctimestamp1 POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@alltypesorc -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -ctimestamp1 timestamp -30 31 3115 35 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"cbigint\":\"true\",\"cboolean1\":\"true\",\"cboolean2\":\"true\",\"cdouble\":\"true\",\"cfloat\":\"true\",\"cint\":\"true\",\"csmallint\":\"true\",\"cstring1\":\"true\",\"cstring2\":\"true\",\"ctimestamp1\":\"true\",\"ctimestamp2\":\"true\",\"ctinyint\":\"true\"}} +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +ctimestamp1 timestamp -30 31 3115 35 SExM4CMjxdOOGLzQ1BbA9ZYBwKnNBoCRrw3En8gEvKmpLYCSgzWA4vdMwJKLAcGuwk7AqvwNwcjk +OMLQ8Vn9/7gDgKfjMcC9hBa/h7wGwajOCYHyixu+m7FqwML1AoCwmzCAj+odgMuXAoDIyg7Aua03 +gJz/CIDc7wWBor4Cv/PWJ4GEwxKAyNs9/6jLAYKDtxE= + from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"cbigint\":\"true\",\"cboolean1\":\"true\",\"cboolean2\":\"true\",\"cdouble\":\"true\",\"cfloat\":\"true\",\"cint\":\"true\",\"csmallint\":\"true\",\"cstring1\":\"true\",\"cstring2\":\"true\",\"ctimestamp1\":\"true\",\"ctimestamp2\":\"true\",\"ctinyint\":\"true\"}} PREHOOK: query: describe formatted alltypesorc cboolean2 PREHOOK: type: DESCTABLE PREHOOK: Input: default@alltypesorc POSTHOOK: query: describe formatted alltypesorc cboolean2 POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@alltypesorc -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -cboolean2 boolean 3115 3983 5190 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"cbigint\":\"true\",\"cboolean1\":\"true\",\"cboolean2\":\"true\",\"cdouble\":\"true\",\"cfloat\":\"true\",\"cint\":\"true\",\"csmallint\":\"true\",\"cstring1\":\"true\",\"cstring2\":\"true\",\"ctimestamp1\":\"true\",\"ctimestamp2\":\"true\",\"ctinyint\":\"true\"}} +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +cboolean2 boolean 3115 3983 5190 from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"cbigint\":\"true\",\"cboolean1\":\"true\",\"cboolean2\":\"true\",\"cdouble\":\"true\",\"cfloat\":\"true\",\"cint\":\"true\",\"csmallint\":\"true\",\"cstring1\":\"true\",\"cstring2\":\"true\",\"ctimestamp1\":\"true\",\"ctimestamp2\":\"true\",\"ctinyint\":\"true\"}} diff --git a/ql/src/test/results/clientpositive/decimal_stats.q.out b/ql/src/test/results/clientpositive/decimal_stats.q.out index f58a7cc8e1..2742a321cc 100644 --- a/ql/src/test/results/clientpositive/decimal_stats.q.out +++ b/ql/src/test/results/clientpositive/decimal_stats.q.out @@ -46,10 +46,11 @@ PREHOOK: Input: default@decimal_1 POSTHOOK: query: desc formatted decimal_1 v POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@decimal_1 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -v decimal(10,0) 500 0 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"t\":\"true\",\"u\":\"true\",\"v\":\"true\"}} +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +v decimal(10,0) 500 1 SExM4AEA + from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"t\":\"true\",\"u\":\"true\",\"v\":\"true\"}} PREHOOK: query: explain select * from decimal_1 order by t limit 100 PREHOOK: type: QUERY POSTHOOK: query: explain select * from decimal_1 order by t limit 100 diff --git a/ql/src/test/results/clientpositive/deleteAnalyze.q.out b/ql/src/test/results/clientpositive/deleteAnalyze.q.out index 1bae859e2c..d3609f8b6f 100644 --- a/ql/src/test/results/clientpositive/deleteAnalyze.q.out +++ b/ql/src/test/results/clientpositive/deleteAnalyze.q.out @@ -72,10 +72,10 @@ PREHOOK: Input: default@testdeci2 POSTHOOK: query: describe formatted testdeci2 amount POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@testdeci2 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -amount decimal(10,3) from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +amount decimal(10,3) from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} PREHOOK: query: analyze table testdeci2 compute statistics for columns PREHOOK: type: QUERY PREHOOK: Input: default@testdeci2 diff --git a/ql/src/test/results/clientpositive/describe_syntax.q.out b/ql/src/test/results/clientpositive/describe_syntax.q.out index 19147a1d92..16b7763152 100644 --- a/ql/src/test/results/clientpositive/describe_syntax.q.out +++ b/ql/src/test/results/clientpositive/describe_syntax.q.out @@ -211,10 +211,10 @@ PREHOOK: Input: db1@t1 POSTHOOK: query: DESCRIBE FORMATTED t1 key1 POSTHOOK: type: DESCTABLE POSTHOOK: Input: db1@t1 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -key1 int from deserializer -COLUMN_STATS_ACCURATE {} +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +key1 int from deserializer +COLUMN_STATS_ACCURATE {} PREHOOK: query: DESCRIBE db1.t1 key1 PREHOOK: type: DESCTABLE PREHOOK: Input: db1@t1 @@ -235,10 +235,10 @@ PREHOOK: Input: db1@t1 POSTHOOK: query: DESCRIBE FORMATTED db1.t1 key1 POSTHOOK: type: DESCTABLE POSTHOOK: Input: db1@t1 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -key1 int from deserializer -COLUMN_STATS_ACCURATE {} +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +key1 int from deserializer +COLUMN_STATS_ACCURATE {} PREHOOK: query: DESCRIBE t1 key1 PREHOOK: type: DESCTABLE PREHOOK: Input: db1@t1 @@ -259,10 +259,10 @@ PREHOOK: Input: db1@t1 POSTHOOK: query: DESCRIBE FORMATTED t1 key1 POSTHOOK: type: DESCTABLE POSTHOOK: Input: db1@t1 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -key1 int from deserializer -COLUMN_STATS_ACCURATE {} +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +key1 int from deserializer +COLUMN_STATS_ACCURATE {} PREHOOK: query: DESCRIBE t1 PARTITION(ds='4', part='5') PREHOOK: type: DESCTABLE PREHOOK: Input: db1@t1 diff --git a/ql/src/test/results/clientpositive/describe_table.q.out b/ql/src/test/results/clientpositive/describe_table.q.out index 3ba9a7b942..7644fbd61b 100644 --- a/ql/src/test/results/clientpositive/describe_table.q.out +++ b/ql/src/test/results/clientpositive/describe_table.q.out @@ -210,10 +210,31 @@ PREHOOK: Input: default@srcpart POSTHOOK: query: describe formatted srcpart key POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@srcpart -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -key string 0 309 2.812 3 from deserializer -COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"key\":\"true\"}} +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +key string 0 309 2.812 3 SExM4LUCtQLBoliC580Fv5Nq/8NRwJtoxdPYB/yjmAO/kokBgcjcBMHblgH/lecCgMn9AcC78gm/ +wzeA/BHB6MgH/5KjAsKL1QqAge0D/obCAoCvMMGIDoDPDr/ovgSAn6ALgL7TEMGr8wX/6xnAuqgF +wcgagNoPwY+GAf7ouATEjJwBvIfKAoGetgS/ysoIwN4dwMHtA8DAnQmB7TWBspEFv+7uBMGB4wL+ +mmvAqtEBgrrQAYGlkQb9j1CBuYcE/5VDgNciweGkA4Hf9wS+ws4BgPdxgJr/BcCctQOB2rQD/+LE +CMDf/QHAklbAxaoBgP7gA4Gu1weBx4YB/remBMHjToDh2wH/+sEHwMqxA8DvyATBrUj/7ecCgP0H +ifmTBIC8FriHfv/5pArAlSzAnIoBwJ/bAsXdU7v+0g7ArvQBgOX+AoD5+hPA/4oHgK3rAcCWB4D+ +iwSA/soDgu71Ab68MYD/gQKAh+ECwN6xBMCTYoSh4AO9nESArIcGwYmeA/7TpAWBncQDwthzvrb9 +BIG2hQG+lbgIgan2DP/2R4Cu1giAlJcDg7UU/ZBfgchL/4O8AsDO8gKBodUGwMTcA8OKggP88sYD +w9tCvr+CA7/W3QfE05wB/dWUCcCa2QmBsY4HgrD3BbzCwguAih3B67gD/9zeAYX0twf7048HwLKS +CICJC8GyrgGB0akBvr2mAcDP8QGA0/ECwMScA4GXrAGBpokFgKgKwNSHBP7sMIDfkAbBpzy/pdcK +wLnfAYLeKP6R5wLCssoBv8JY/7IvwNEMgq+7AsDQH/6VwAGA8nXCisoBvvevAoH4PMKOqAO+lnj/ +yjnCwzL/7IADv/8jhK28Brzx1wGCmk6/0t0CgIyaAv/CnwXAzZ0BwJadA4GCKcC53AG/sUCBhs8J +gZmRAb6zwwSBgLYGweuIAb+tbsH6gwPAngH+ysQBhLxh/NihAsGYswS/l8MKgtFX/u6jA8XB6AL8 +3tAC/5TDBMHMvgG/0NICwO79BYS82Qa8oMQBwPqBBcHhI//N2RLAitsEgJnuBcD+qAXA/t0FwJ32 +CoDBrAHDlSD9ltIDwMieBIC26QLBzZkGgbecAYCvCf+Xgwn/40+JgRX3xNcBwIkLweaQAYDuggKB +2okBwPESvtvPBsHxpQL/ucsCwIeJA8CjBoH6SP+UnwKCw8IGv7mcBYHtywKChYICvKDpAsDx5gHC ++MIF/sDeBYCh1ALA+poCwYygCL+TTITO3AK9weMEwf+fAb/V5AKApKMGg8Fg/J9OwfnNCv/pkgLC +z+gEvt+XA4CqlgGAlOQDgIaICYGmzAGCiYUHv96HBv/njQHEzfsEv9LuB7zk1gPD4RiDo/UDu5qr +A//IIcDDHsCwDYDozwLEq9EBvZp1gLwawJaGAv/JwAPC/JcDvqvcAcG1ugODzpMBvZfmAoDWB//s +1wKHzaYCucW2BcL9uQT/+aMB/+WtAsKPN7+sdsTghAG7t6kEwNw+wKLUAsCDIIHcjwGAqpACgM36 +BcC//AOAtLEEgMbwAf+mwQiAqfgH + from deserializer +COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"key\":\"true\"}} PREHOOK: query: describe formatted srcpart PARTITION(ds='2008-04-08', hr='12') PREHOOK: type: DESCTABLE PREHOOK: Input: default@srcpart @@ -302,10 +323,31 @@ PREHOOK: Input: default@srcpart POSTHOOK: query: describe formatted `srcpart` `key` POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@srcpart -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -key string 0 309 2.812 3 from deserializer -COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"key\":\"true\"}} +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +key string 0 309 2.812 3 SExM4LUCtQLBoliC580Fv5Nq/8NRwJtoxdPYB/yjmAO/kokBgcjcBMHblgH/lecCgMn9AcC78gm/ +wzeA/BHB6MgH/5KjAsKL1QqAge0D/obCAoCvMMGIDoDPDr/ovgSAn6ALgL7TEMGr8wX/6xnAuqgF +wcgagNoPwY+GAf7ouATEjJwBvIfKAoGetgS/ysoIwN4dwMHtA8DAnQmB7TWBspEFv+7uBMGB4wL+ +mmvAqtEBgrrQAYGlkQb9j1CBuYcE/5VDgNciweGkA4Hf9wS+ws4BgPdxgJr/BcCctQOB2rQD/+LE +CMDf/QHAklbAxaoBgP7gA4Gu1weBx4YB/remBMHjToDh2wH/+sEHwMqxA8DvyATBrUj/7ecCgP0H +ifmTBIC8FriHfv/5pArAlSzAnIoBwJ/bAsXdU7v+0g7ArvQBgOX+AoD5+hPA/4oHgK3rAcCWB4D+ +iwSA/soDgu71Ab68MYD/gQKAh+ECwN6xBMCTYoSh4AO9nESArIcGwYmeA/7TpAWBncQDwthzvrb9 +BIG2hQG+lbgIgan2DP/2R4Cu1giAlJcDg7UU/ZBfgchL/4O8AsDO8gKBodUGwMTcA8OKggP88sYD +w9tCvr+CA7/W3QfE05wB/dWUCcCa2QmBsY4HgrD3BbzCwguAih3B67gD/9zeAYX0twf7048HwLKS +CICJC8GyrgGB0akBvr2mAcDP8QGA0/ECwMScA4GXrAGBpokFgKgKwNSHBP7sMIDfkAbBpzy/pdcK +wLnfAYLeKP6R5wLCssoBv8JY/7IvwNEMgq+7AsDQH/6VwAGA8nXCisoBvvevAoH4PMKOqAO+lnj/ +yjnCwzL/7IADv/8jhK28Brzx1wGCmk6/0t0CgIyaAv/CnwXAzZ0BwJadA4GCKcC53AG/sUCBhs8J +gZmRAb6zwwSBgLYGweuIAb+tbsH6gwPAngH+ysQBhLxh/NihAsGYswS/l8MKgtFX/u6jA8XB6AL8 +3tAC/5TDBMHMvgG/0NICwO79BYS82Qa8oMQBwPqBBcHhI//N2RLAitsEgJnuBcD+qAXA/t0FwJ32 +CoDBrAHDlSD9ltIDwMieBIC26QLBzZkGgbecAYCvCf+Xgwn/40+JgRX3xNcBwIkLweaQAYDuggKB +2okBwPESvtvPBsHxpQL/ucsCwIeJA8CjBoH6SP+UnwKCw8IGv7mcBYHtywKChYICvKDpAsDx5gHC ++MIF/sDeBYCh1ALA+poCwYygCL+TTITO3AK9weMEwf+fAb/V5AKApKMGg8Fg/J9OwfnNCv/pkgLC +z+gEvt+XA4CqlgGAlOQDgIaICYGmzAGCiYUHv96HBv/njQHEzfsEv9LuB7zk1gPD4RiDo/UDu5qr +A//IIcDDHsCwDYDozwLEq9EBvZp1gLwawJaGAv/JwAPC/JcDvqvcAcG1ugODzpMBvZfmAoDWB//s +1wKHzaYCucW2BcL9uQT/+aMB/+WtAsKPN7+sdsTghAG7t6kEwNw+wKLUAsCDIIHcjwGAqpACgM36 +BcC//AOAtLEEgMbwAf+mwQiAqfgH + from deserializer +COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"key\":\"true\"}} PREHOOK: query: describe formatted `srcpart` PARTITION(ds='2008-04-08', hr='12') PREHOOK: type: DESCTABLE PREHOOK: Input: default@srcpart @@ -352,20 +394,20 @@ PREHOOK: Input: default@srcpart POSTHOOK: query: describe formatted `srcpart` `ds` POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@srcpart -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -ds string 0 2 100.0 100 -COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"ds\":\"true\"}} +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +ds string 0 2 100.0 100 +COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"ds\":\"true\"}} PREHOOK: query: describe formatted `srcpart` `hr` PREHOOK: type: DESCTABLE PREHOOK: Input: default@srcpart POSTHOOK: query: describe formatted `srcpart` `hr` POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@srcpart -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -hr string 0 2 100.0 100 -COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"hr\":\"true\"}} +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +hr string 0 2 100.0 100 +COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"hr\":\"true\"}} PREHOOK: query: create table srcpart_serdeprops like srcpart PREHOOK: type: CREATETABLE PREHOOK: Output: database:default diff --git a/ql/src/test/results/clientpositive/display_colstats_tbllvl.q.out b/ql/src/test/results/clientpositive/display_colstats_tbllvl.q.out index 73d4cd7660..7cb62a8a94 100644 --- a/ql/src/test/results/clientpositive/display_colstats_tbllvl.q.out +++ b/ql/src/test/results/clientpositive/display_colstats_tbllvl.q.out @@ -51,9 +51,9 @@ PREHOOK: Input: default@uservisits_web_text_none POSTHOOK: query: desc formatted UserVisits_web_text_none sourceIP POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@uservisits_web_text_none -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -sourceIP string from deserializer +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +sourceIP string from deserializer PREHOOK: query: explain analyze table UserVisits_web_text_none compute statistics for columns sourceIP, avgTimeOnSite, adRevenue PREHOOK: type: QUERY @@ -242,30 +242,39 @@ PREHOOK: Input: default@uservisits_web_text_none POSTHOOK: query: desc formatted UserVisits_web_text_none sourceIP POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@uservisits_web_text_none -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -sourceIP string 0 55 12.763636363636364 13 from deserializer -COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"adrevenue\":\"true\",\"avgtimeonsite\":\"true\",\"sourceip\":\"true\"}} +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +sourceIP string 0 55 12.763636363636364 13 SExM4Dc3wbjRG8DNgg/A1YwYwNiYDsCVzwXBzLkCgOc1v9LCJcG2rAK/65wVwYL2Br/zjxnBze8M +wMiBIMDE/DG/n50HwcqyAoCXmQi/0KAPgMSxIIGKsRi/oqUSwKD9F4DuAYH72Rn/48sWgLP+EMGB +wgS/28MZwPT9KsGGrwuAluEFv+ngDYGoqgT/09AOgLCEBYHVvg6/l78rgevVFMD77Q+AkZ0I/7Wz +AoOimAj+mLMJwdPMCL7P1BvC9sIM/+puv4W+A4KWxlP+nsMpwYbnCf+4qyHCnJgXgPenMA== + from deserializer +COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"adrevenue\":\"true\",\"avgtimeonsite\":\"true\",\"sourceip\":\"true\"}} PREHOOK: query: desc formatted UserVisits_web_text_none avgTimeOnSite PREHOOK: type: DESCTABLE PREHOOK: Input: default@uservisits_web_text_none POSTHOOK: query: desc formatted UserVisits_web_text_none avgTimeOnSite POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@uservisits_web_text_none -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -avgTimeOnSite int 1 9 0 9 from deserializer -COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"adrevenue\":\"true\",\"avgtimeonsite\":\"true\",\"sourceip\":\"true\"}} +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +avgTimeOnSite int 1 9 0 9 SExM4AkJwZn6L4TaxBi8u6xigOL3TMCSiwHBrsJOwKr8Df+Es+QBgPyEtwI= + from deserializer +COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"adrevenue\":\"true\",\"avgtimeonsite\":\"true\",\"sourceip\":\"true\"}} PREHOOK: query: desc formatted UserVisits_web_text_none adRevenue PREHOOK: type: DESCTABLE PREHOOK: Input: default@uservisits_web_text_none POSTHOOK: query: desc formatted UserVisits_web_text_none adRevenue POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@uservisits_web_text_none -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -adRevenue float 13.099044799804688 492.98870849609375 0 55 from deserializer -COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"adrevenue\":\"true\",\"avgtimeonsite\":\"true\",\"sourceip\":\"true\"}} +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +adRevenue float 13.099044799804688 492.98870849609375 0 55 SExM4Dc3gb3HC8Lswyq+hbYCgOOKIoHH7AKA4u4D/73OA4DH6QnA8ZIbhaSXBv/e/xf9jo4JgJ2b +Av/htwrBsJ4ZwZugD//O6wbB6qcFvoW+E4DW+wyA8/gCgK6GD4HIuhD/pccFgIXqAsCl/wyAv+QK +wNq4HYLrrB++s5sIgOWzPoSMlA/83cMVwdy8PYCjhwL/3LIWxOm7JPye8w/A/O0VwNjgBIDOiRHA +86ELwJ/+AYCr1QzA7YUQgO2gEcDZEIDK6EPAo+kOg4HxCv3ZkSmBrLlRgd6IA/6lwROAlYAL + from deserializer +COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"adrevenue\":\"true\",\"avgtimeonsite\":\"true\",\"sourceip\":\"true\"}} PREHOOK: query: CREATE TABLE empty_tab( a int, b double, @@ -292,10 +301,10 @@ PREHOOK: Input: default@empty_tab POSTHOOK: query: desc formatted empty_tab a POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@empty_tab -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -a int from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"a\":\"true\",\"b\":\"true\",\"c\":\"true\",\"d\":\"true\",\"e\":\"true\"}} +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +a int from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"a\":\"true\",\"b\":\"true\",\"c\":\"true\",\"d\":\"true\",\"e\":\"true\"}} PREHOOK: query: explain analyze table empty_tab compute statistics for columns a,b,c,d,e PREHOOK: type: QUERY @@ -361,20 +370,20 @@ PREHOOK: Input: default@empty_tab POSTHOOK: query: desc formatted empty_tab a POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@empty_tab -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -a int 0 0 0 0 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"a\":\"true\",\"b\":\"true\",\"c\":\"true\",\"d\":\"true\",\"e\":\"true\"}} +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +a int 0 0 0 0 from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"a\":\"true\",\"b\":\"true\",\"c\":\"true\",\"d\":\"true\",\"e\":\"true\"}} PREHOOK: query: desc formatted empty_tab b PREHOOK: type: DESCTABLE PREHOOK: Input: default@empty_tab POSTHOOK: query: desc formatted empty_tab b POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@empty_tab -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -b double 0.0 0.0 0 0 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"a\":\"true\",\"b\":\"true\",\"c\":\"true\",\"d\":\"true\",\"e\":\"true\"}} +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +b double 0.0 0.0 0 0 from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"a\":\"true\",\"b\":\"true\",\"c\":\"true\",\"d\":\"true\",\"e\":\"true\"}} PREHOOK: query: CREATE DATABASE test PREHOOK: type: CREATEDATABASE PREHOOK: Output: database:test @@ -451,28 +460,32 @@ PREHOOK: Input: test@uservisits_web_text_none POSTHOOK: query: desc formatted UserVisits_web_text_none sourceIP POSTHOOK: type: DESCTABLE POSTHOOK: Input: test@uservisits_web_text_none -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -sourceIP string from deserializer +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +sourceIP string from deserializer PREHOOK: query: desc formatted test.UserVisits_web_text_none sourceIP PREHOOK: type: DESCTABLE PREHOOK: Input: test@uservisits_web_text_none POSTHOOK: query: desc formatted test.UserVisits_web_text_none sourceIP POSTHOOK: type: DESCTABLE POSTHOOK: Input: test@uservisits_web_text_none -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -sourceIP string from deserializer +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +sourceIP string from deserializer PREHOOK: query: desc formatted default.UserVisits_web_text_none sourceIP PREHOOK: type: DESCTABLE PREHOOK: Input: default@uservisits_web_text_none POSTHOOK: query: desc formatted default.UserVisits_web_text_none sourceIP POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@uservisits_web_text_none -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -sourceIP string 0 55 12.763636363636364 13 from deserializer -COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"adrevenue\":\"true\",\"avgtimeonsite\":\"true\",\"sourceip\":\"true\"}} +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +sourceIP string 0 55 12.763636363636364 13 SExM4Dc3wbjRG8DNgg/A1YwYwNiYDsCVzwXBzLkCgOc1v9LCJcG2rAK/65wVwYL2Br/zjxnBze8M +wMiBIMDE/DG/n50HwcqyAoCXmQi/0KAPgMSxIIGKsRi/oqUSwKD9F4DuAYH72Rn/48sWgLP+EMGB +wgS/28MZwPT9KsGGrwuAluEFv+ngDYGoqgT/09AOgLCEBYHVvg6/l78rgevVFMD77Q+AkZ0I/7Wz +AoOimAj+mLMJwdPMCL7P1BvC9sIM/+puv4W+A4KWxlP+nsMpwYbnCf+4qyHCnJgXgPenMA== + from deserializer +COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"adrevenue\":\"true\",\"avgtimeonsite\":\"true\",\"sourceip\":\"true\"}} PREHOOK: query: analyze table UserVisits_web_text_none compute statistics for columns sKeyword PREHOOK: type: QUERY PREHOOK: Input: test@uservisits_web_text_none @@ -495,17 +508,25 @@ PREHOOK: Input: test@uservisits_web_text_none POSTHOOK: query: desc formatted UserVisits_web_text_none sKeyword POSTHOOK: type: DESCTABLE POSTHOOK: Input: test@uservisits_web_text_none -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -sKeyword string 0 54 7.872727272727273 19 from deserializer -COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"skeyword\":\"true\"}} +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +sKeyword string 0 54 7.872727272727273 19 SExM4DY2gavGA8LX6ha/63i/4NIZgP/NA8Hmxi7D8X68yu4JwKnuAYKBvg6+/cIOwO2NH4D7xCeA ++Q7Al64DwLnLC8Gi2Rj/p8wIwrK2LoGP3w2B/p4EvoGCEsDMiCi+xtAqwJ/3BITmuRb8sqcLgIPr +aYC3txTA4/MHgN7cBICm/g3Bx13AiJMOwI79Bb+wjQLAm7oEgJnWH4LUzgL/4PYagKCOBoLi+yC9 +x84VgK/tGcKxyAL+6NULwcKYE8KzmD/F7/IDuK6yFoT3wgG9m5UJv9WvIcDC5DA= + from deserializer +COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"skeyword\":\"true\"}} PREHOOK: query: desc formatted test.UserVisits_web_text_none sKeyword PREHOOK: type: DESCTABLE PREHOOK: Input: test@uservisits_web_text_none POSTHOOK: query: desc formatted test.UserVisits_web_text_none sKeyword POSTHOOK: type: DESCTABLE POSTHOOK: Input: test@uservisits_web_text_none -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -sKeyword string 0 54 7.872727272727273 19 from deserializer -COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"skeyword\":\"true\"}} +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +sKeyword string 0 54 7.872727272727273 19 SExM4DY2gavGA8LX6ha/63i/4NIZgP/NA8Hmxi7D8X68yu4JwKnuAYKBvg6+/cIOwO2NH4D7xCeA ++Q7Al64DwLnLC8Gi2Rj/p8wIwrK2LoGP3w2B/p4EvoGCEsDMiCi+xtAqwJ/3BITmuRb8sqcLgIPr +aYC3txTA4/MHgN7cBICm/g3Bx13AiJMOwI79Bb+wjQLAm7oEgJnWH4LUzgL/4PYagKCOBoLi+yC9 +x84VgK/tGcKxyAL+6NULwcKYE8KzmD/F7/IDuK6yFoT3wgG9m5UJv9WvIcDC5DA= + from deserializer +COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"skeyword\":\"true\"}} diff --git a/ql/src/test/results/clientpositive/encrypted/encryption_move_tbl.q.out b/ql/src/test/results/clientpositive/encrypted/encryption_move_tbl.q.out index 1096e9fc64..78511ad62d 100644 --- a/ql/src/test/results/clientpositive/encrypted/encryption_move_tbl.q.out +++ b/ql/src/test/results/clientpositive/encrypted/encryption_move_tbl.q.out @@ -59,20 +59,62 @@ PREHOOK: Input: default@encrypted_table POSTHOOK: query: DESCRIBE FORMATTED encrypted_table key POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@encrypted_table -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -key int 0 498 0 309 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +key int 0 498 0 309 SExM4LUCtQKN6yH2ofgFwu2cAvzblwLAqoIDwf6+AcDkmgT/i5kBxOPKA72MowGA5fAbg4LgAr2L +vQH/+x+A0gOA0fsBgefUBb+gkhPB+03A7aUCg+BdwKUTvbaQA4LIeL7O3QTCo6IDvbuRAsCRFoGb +YL/lO4LfuQK+4acBgLePCsHvrQHD43u87s4EwP0QwJJtyL23ArjLvQPCz7wG/5yDC7/N4AKCxMcO +vvCNA4HI4wP/6rEDgIIJwaZOv+cwgaevAf+GzQHA14ICwPz+BcDb+gKAhg+H7RS67okB/5HHAoOo +Nb2V2wWA6fAIwJSODsCT9gGAiY8Bg/xI/bq4CoDXkgHAyvYGgOduwJKLAYKbvgH+2bQBwNCWBoK7 +Gb+fmQO/6J8Bgb89/9fzAsCPywLAp/wHgbJeg/z8Ar25kQGA4P0Dv/OUA4CgoQaBpL4EwDuC+m69 +yDWA/BLApYELxMEv/I1LgYWwBMbTlAP56cMEgZssgeiCAb+kowHBvf0CgYHSAf3g0QaBi9sC/9yi +AoDbIYHllQnAhAGBqJkFvrBKgZmZDIKEogG9slWC7qgF/q5DwM30DoKHRcCN7wO+ir0DwLOtAcDy +8wKB4L0Dv/HEA4adpAOAqxr6kkyA14EIwbkUgIihCIGfcoCODr/z5wKAs/QBw7JvvLnQBMHmsgL/ +1UTAy5gCgbHaAf+UpgOAjO0HwcRQhOePAYChCruLvQaBtSj/osUBwoK1AYGn+Qm9kLcDgLSoAYCQ +2QeAv54FwoIavsJ2wYYL/9jbAoCTjgGBjDX/ztkBgPF8gtNC/r2PAoGgUcHDcb+LqAe/laoBwsOe +A8D6EsDQkAT+0tcGgIRzgIqQAYCT+gXB7wv/jvQMw4miBr3LvgTA0YYBgKCTCIHyxQHBtPcCvts+ +we3HAsD/9gG/zaEDgMiqA8H6iQHAniPCiIQB/bucAYDykQGCodED/o+VAsa89gO6pqAHwKvqAYGu +9QO/0bgPwLiEAcH7lwHA4v4FgMUrwe9k/v9ggaI5wbniAr7lOYP3tAH9vmXBxscCwPDuAYCkFoPc +6QaCoOUH/MSUAr/4gwmAw4wIv/rBAsCH2QGEl1n86qQBgOWcEoLOsgb+k74EhNjFAbyX2QHAi4MB +gJiCAYHyiwnAvYgC/5LkB4HnoQLA46QU/6+SBsGv6QHBut4Evo/iA8KzFL7b0AKAwJkJwZSRAb+g +4gHBux+B/58F/+D2Av/5tgKAmieA4MsBwrvkBMDIBb77GoCqnwjA3PkBgPOTCMD9e8P8tgK91poD +gIGeAcH3nQKAhqIEv6LdA4DK2AKClCm+mc4BxoVo+rCiAoDfoAKAtPoFwdCUAsHtpwH+j8QBwYWl +Ab+00gOAy9gMgfHAA7/hvwTAqCeCsUq/yUj/t9wCxYPOArvNrQTAq5ADwJrZCcKbX764IcHS1QKA +t+kLwtSlC/3wyweAl2bAhKEDwLXQCYDXhQXBpeICgcpm//3nBoDmGMG7lwH/y+YI//XaAYHTlQKA +4gPA7aoC/6mKCIDZpgLDoEQ= + from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} PREHOOK: query: DESCRIBE FORMATTED encrypted_table value PREHOOK: type: DESCTABLE PREHOOK: Input: default@encrypted_table POSTHOOK: query: DESCRIBE FORMATTED encrypted_table value POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@encrypted_table -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -value string 0 309 6.812 7 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +value string 0 309 SExM4LUCtQLM7Sz2qpEHw751/KEegclIgPbqBb/+4QWCxbMFvttNwJ79A8DuYoDMqgOCtK0Dv6qT +A/+3OMCb5AGBvuwFgLXtAb/E0QOB1IQGwKwc/9PdBMCu9gSBw0y/1IYJwKeVDcCdmAGBuSGAhBmF +r5AB+tCzAcGDswGC8n292qYHwZPUAb+SVsDC1gGAuPwCgZG9Av/S+gXDuJAB//KXAv61xQGBgpcB +gIo6geaBAf+Q3wz/tkWBqNID/6ilBoDahwbA1fICwcKtBIHklAe+8sQFgLx/gIXzAcLDwAP+09EB +wP4GwrXYAoDuxwPBy5cCgLUF/dq4BcCkoAaAyfcEgJnRBYD8iAKA4vgGwL4SgIuEAsHnqgP/obsC +gO6YAoKYVb7VSsCR2ALAjJgBwfSUAoD3PMP/4QKAwxO84LoCwKDSAcDaLsH7kAK/u/YCgLf0AYH6 +xwGA1c8Bg7KxAf3Wcf+9pgKA4X+FseMDvZb8Ab7MtQaA0eUDwOUigKINwZUuv+HJBMCWDoLCNf7W +sgSAlO8DgO6lB8KatwT/xdQCv45EgcDADb/E5QTCnJUG/7OtBf+0mAHDmdEDv7edBv6CpgLAj/sC +wIV1gf2zBv/SywTC8dAC/r3JA4LUTP7BuQKAieADgfLcBr/ouAGAs2OAiKUGwJf2CoGQ0Qa/05oG +gNojgJTZBoCWkwTAt4EKx5rPA7mZqQKB5kCB9fcCgLTwB/6O8gTAxvYSwOaRAYDFgAPA+NsCgcm8 +BoC0jASB3esC/oDgBYGcvgXDteICwKoK/MZMgoXCAcCl4gO/m3q/3VKBs84BgY9kgMeyBL/zR8Cl +nALCsOIC/s2tAf/JsQLAmWCA+akCgrq2A//wlAu/26sEwobEBP/ejgG/spYMwOKnCoHqsQHA7oMC +v62oBoDghAaA8oECgPSkB4Dv+gHB+yHAqQ6/xRWBm9EBhPaFAby9jgO/1cgMgK9pwMuiA8C5/AaA +/f8DwsiOA4CiDYDemwS/zXDDxX6+x4UKwpyWBv7n8AH+la4HwvBSv67qAr+duAHA3K8EgvKLA/7+ +ygPA/nmAuQOFvt4Nu7+4A8PXG/2ZboGhF8K6+Ab+6IsJgd6AAf+aiwH/qVyEhB7+o/4F/rO0AYCM +fcC1qQOAuuYBgcbKAcDy5AH/wu0Bga6UAr/e1wPBnscCv9S+BYDKjwPB/JABgJeXBoCjAYHT8gL+ +tV6B6RO/9GGB1aUHxcCxAfqrngKAlIcDhLXJAr3vV//G5ATBiqQDgcGkAcDijQS+gsEJgc+FA4DP +KYKJiwH9hqwBwpJC/sHNAYHStAmB4IoJvsenBoHYR8CZiBTB2m/+mdUIgY2kAv+5P8CunwyA2QaC +sEb+toQCwp3JBYCqiAS+ruIEhq+wAfr6QcHKrw7/xMABxN/NB/ztO4K4tQG/94oBgJf4Ab+F2AHC +zawDwa2BAr6vkwX/u5QFwYfWAsDVgQKCgZwJ/ZYXwNztAoL3/AL+uKIBwdeeA8OYjwGBoIkIvMSm +A/+FF4PnPb6a7gKA1BfA4JMGwOb1Ab+WHg== + 6.812 7 from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} PREHOOK: query: ALTER TABLE default.encrypted_table RENAME TO encrypted_db.encrypted_table_2 PREHOOK: type: ALTERTABLE_RENAME PREHOOK: Input: default@encrypted_table @@ -92,20 +134,62 @@ PREHOOK: Input: default@encrypted_table POSTHOOK: query: DESCRIBE FORMATTED encrypted_table key POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@encrypted_table -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -key int 0 498 0 309 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +key int 0 498 0 309 SExM4LUCtQKN6yH2ofgFwu2cAvzblwLAqoIDwf6+AcDkmgT/i5kBxOPKA72MowGA5fAbg4LgAr2L +vQH/+x+A0gOA0fsBgefUBb+gkhPB+03A7aUCg+BdwKUTvbaQA4LIeL7O3QTCo6IDvbuRAsCRFoGb +YL/lO4LfuQK+4acBgLePCsHvrQHD43u87s4EwP0QwJJtyL23ArjLvQPCz7wG/5yDC7/N4AKCxMcO +vvCNA4HI4wP/6rEDgIIJwaZOv+cwgaevAf+GzQHA14ICwPz+BcDb+gKAhg+H7RS67okB/5HHAoOo +Nb2V2wWA6fAIwJSODsCT9gGAiY8Bg/xI/bq4CoDXkgHAyvYGgOduwJKLAYKbvgH+2bQBwNCWBoK7 +Gb+fmQO/6J8Bgb89/9fzAsCPywLAp/wHgbJeg/z8Ar25kQGA4P0Dv/OUA4CgoQaBpL4EwDuC+m69 +yDWA/BLApYELxMEv/I1LgYWwBMbTlAP56cMEgZssgeiCAb+kowHBvf0CgYHSAf3g0QaBi9sC/9yi +AoDbIYHllQnAhAGBqJkFvrBKgZmZDIKEogG9slWC7qgF/q5DwM30DoKHRcCN7wO+ir0DwLOtAcDy +8wKB4L0Dv/HEA4adpAOAqxr6kkyA14EIwbkUgIihCIGfcoCODr/z5wKAs/QBw7JvvLnQBMHmsgL/ +1UTAy5gCgbHaAf+UpgOAjO0HwcRQhOePAYChCruLvQaBtSj/osUBwoK1AYGn+Qm9kLcDgLSoAYCQ +2QeAv54FwoIavsJ2wYYL/9jbAoCTjgGBjDX/ztkBgPF8gtNC/r2PAoGgUcHDcb+LqAe/laoBwsOe +A8D6EsDQkAT+0tcGgIRzgIqQAYCT+gXB7wv/jvQMw4miBr3LvgTA0YYBgKCTCIHyxQHBtPcCvts+ +we3HAsD/9gG/zaEDgMiqA8H6iQHAniPCiIQB/bucAYDykQGCodED/o+VAsa89gO6pqAHwKvqAYGu +9QO/0bgPwLiEAcH7lwHA4v4FgMUrwe9k/v9ggaI5wbniAr7lOYP3tAH9vmXBxscCwPDuAYCkFoPc +6QaCoOUH/MSUAr/4gwmAw4wIv/rBAsCH2QGEl1n86qQBgOWcEoLOsgb+k74EhNjFAbyX2QHAi4MB +gJiCAYHyiwnAvYgC/5LkB4HnoQLA46QU/6+SBsGv6QHBut4Evo/iA8KzFL7b0AKAwJkJwZSRAb+g +4gHBux+B/58F/+D2Av/5tgKAmieA4MsBwrvkBMDIBb77GoCqnwjA3PkBgPOTCMD9e8P8tgK91poD +gIGeAcH3nQKAhqIEv6LdA4DK2AKClCm+mc4BxoVo+rCiAoDfoAKAtPoFwdCUAsHtpwH+j8QBwYWl +Ab+00gOAy9gMgfHAA7/hvwTAqCeCsUq/yUj/t9wCxYPOArvNrQTAq5ADwJrZCcKbX764IcHS1QKA +t+kLwtSlC/3wyweAl2bAhKEDwLXQCYDXhQXBpeICgcpm//3nBoDmGMG7lwH/y+YI//XaAYHTlQKA +4gPA7aoC/6mKCIDZpgLDoEQ= + from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} PREHOOK: query: DESCRIBE FORMATTED encrypted_table value PREHOOK: type: DESCTABLE PREHOOK: Input: default@encrypted_table POSTHOOK: query: DESCRIBE FORMATTED encrypted_table value POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@encrypted_table -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -value string 0 309 6.812 7 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +value string 0 309 SExM4LUCtQLM7Sz2qpEHw751/KEegclIgPbqBb/+4QWCxbMFvttNwJ79A8DuYoDMqgOCtK0Dv6qT +A/+3OMCb5AGBvuwFgLXtAb/E0QOB1IQGwKwc/9PdBMCu9gSBw0y/1IYJwKeVDcCdmAGBuSGAhBmF +r5AB+tCzAcGDswGC8n292qYHwZPUAb+SVsDC1gGAuPwCgZG9Av/S+gXDuJAB//KXAv61xQGBgpcB +gIo6geaBAf+Q3wz/tkWBqNID/6ilBoDahwbA1fICwcKtBIHklAe+8sQFgLx/gIXzAcLDwAP+09EB +wP4GwrXYAoDuxwPBy5cCgLUF/dq4BcCkoAaAyfcEgJnRBYD8iAKA4vgGwL4SgIuEAsHnqgP/obsC +gO6YAoKYVb7VSsCR2ALAjJgBwfSUAoD3PMP/4QKAwxO84LoCwKDSAcDaLsH7kAK/u/YCgLf0AYH6 +xwGA1c8Bg7KxAf3Wcf+9pgKA4X+FseMDvZb8Ab7MtQaA0eUDwOUigKINwZUuv+HJBMCWDoLCNf7W +sgSAlO8DgO6lB8KatwT/xdQCv45EgcDADb/E5QTCnJUG/7OtBf+0mAHDmdEDv7edBv6CpgLAj/sC +wIV1gf2zBv/SywTC8dAC/r3JA4LUTP7BuQKAieADgfLcBr/ouAGAs2OAiKUGwJf2CoGQ0Qa/05oG +gNojgJTZBoCWkwTAt4EKx5rPA7mZqQKB5kCB9fcCgLTwB/6O8gTAxvYSwOaRAYDFgAPA+NsCgcm8 +BoC0jASB3esC/oDgBYGcvgXDteICwKoK/MZMgoXCAcCl4gO/m3q/3VKBs84BgY9kgMeyBL/zR8Cl +nALCsOIC/s2tAf/JsQLAmWCA+akCgrq2A//wlAu/26sEwobEBP/ejgG/spYMwOKnCoHqsQHA7oMC +v62oBoDghAaA8oECgPSkB4Dv+gHB+yHAqQ6/xRWBm9EBhPaFAby9jgO/1cgMgK9pwMuiA8C5/AaA +/f8DwsiOA4CiDYDemwS/zXDDxX6+x4UKwpyWBv7n8AH+la4HwvBSv67qAr+duAHA3K8EgvKLA/7+ +ygPA/nmAuQOFvt4Nu7+4A8PXG/2ZboGhF8K6+Ab+6IsJgd6AAf+aiwH/qVyEhB7+o/4F/rO0AYCM +fcC1qQOAuuYBgcbKAcDy5AH/wu0Bga6UAr/e1wPBnscCv9S+BYDKjwPB/JABgJeXBoCjAYHT8gL+ +tV6B6RO/9GGB1aUHxcCxAfqrngKAlIcDhLXJAr3vV//G5ATBiqQDgcGkAcDijQS+gsEJgc+FA4DP +KYKJiwH9hqwBwpJC/sHNAYHStAmB4IoJvsenBoHYR8CZiBTB2m/+mdUIgY2kAv+5P8CunwyA2QaC +sEb+toQCwp3JBYCqiAS+ruIEhq+wAfr6QcHKrw7/xMABxN/NB/ztO4K4tQG/94oBgJf4Ab+F2AHC +zawDwa2BAr6vkwX/u5QFwYfWAsDVgQKCgZwJ/ZYXwNztAoL3/AL+uKIBwdeeA8OYjwGBoIkIvMSm +A/+FF4PnPb6a7gKA1BfA4JMGwOb1Ab+WHg== + 6.812 7 from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} PREHOOK: query: ALTER TABLE default.encrypted_table RENAME TO default.plain_table PREHOOK: type: ALTERTABLE_RENAME PREHOOK: Input: default@encrypted_table diff --git a/ql/src/test/results/clientpositive/extrapolate_part_stats_full.q.out b/ql/src/test/results/clientpositive/extrapolate_part_stats_full.q.out index b212da907b..eff8774841 100644 --- a/ql/src/test/results/clientpositive/extrapolate_part_stats_full.q.out +++ b/ql/src/test/results/clientpositive/extrapolate_part_stats_full.q.out @@ -81,9 +81,10 @@ PREHOOK: Input: default@loc_orc_1d POSTHOOK: query: describe formatted loc_orc_1d PARTITION(year='2001') state POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@loc_orc_1d -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -state string 0 3 0.75 2 from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +state string 0 3 SExM4AMDgaTbFcD8mOYCwMOJoQQ= + 0.75 2 from deserializer PREHOOK: query: explain extended select state from loc_orc_1d PREHOOK: type: QUERY POSTHOOK: query: explain extended select state from loc_orc_1d diff --git a/ql/src/test/results/clientpositive/extrapolate_part_stats_partial.q.out b/ql/src/test/results/clientpositive/extrapolate_part_stats_partial.q.out index b5f4feede0..48ee0759b8 100644 --- a/ql/src/test/results/clientpositive/extrapolate_part_stats_partial.q.out +++ b/ql/src/test/results/clientpositive/extrapolate_part_stats_partial.q.out @@ -89,18 +89,20 @@ PREHOOK: Input: default@loc_orc_1d POSTHOOK: query: describe formatted loc_orc_1d PARTITION(year='2001') state POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@loc_orc_1d -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -state string 0 3 0.75 2 from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +state string 0 3 SExM4AMDgaTbFcD8mOYCwMOJoQQ= + 0.75 2 from deserializer PREHOOK: query: describe formatted loc_orc_1d PARTITION(year='2002') state PREHOOK: type: DESCTABLE PREHOOK: Input: default@loc_orc_1d POSTHOOK: query: describe formatted loc_orc_1d PARTITION(year='2002') state POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@loc_orc_1d -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -state string 0 6 3.0 3 from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +state string 0 6 SExM4AYGhJ2RPL68foHA90C/kJJjgJX39QKAwfg7 + 3.0 3 from deserializer PREHOOK: query: explain extended select state from loc_orc_1d PREHOOK: type: QUERY POSTHOOK: query: explain extended select state from loc_orc_1d @@ -296,12 +298,12 @@ STAGE PLANS: Processor Tree: TableScan alias: loc_orc_1d - Statistics: Num rows: 20 Data size: 1780 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 20 Data size: 1700 Basic stats: COMPLETE Column stats: PARTIAL GatherStats: false Select Operator expressions: state (type: string) outputColumnNames: _col0 - Statistics: Num rows: 20 Data size: 1780 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 20 Data size: 1700 Basic stats: COMPLETE Column stats: PARTIAL ListSink PREHOOK: query: explain extended select state,locid from loc_orc_1d @@ -499,12 +501,12 @@ STAGE PLANS: Processor Tree: TableScan alias: loc_orc_1d - Statistics: Num rows: 20 Data size: 1860 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 20 Data size: 1780 Basic stats: COMPLETE Column stats: PARTIAL GatherStats: false Select Operator expressions: state (type: string), locid (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 20 Data size: 1860 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 20 Data size: 1780 Basic stats: COMPLETE Column stats: PARTIAL ListSink PREHOOK: query: analyze table loc_orc_1d partition(year='2000') compute statistics for columns state diff --git a/ql/src/test/results/clientpositive/fm-sketch.q.out b/ql/src/test/results/clientpositive/fm-sketch.q.out new file mode 100644 index 0000000000..2bd218b4c8 --- /dev/null +++ b/ql/src/test/results/clientpositive/fm-sketch.q.out @@ -0,0 +1,333 @@ +PREHOOK: query: create table n(key int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@n +POSTHOOK: query: create table n(key int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@n +PREHOOK: query: insert overwrite table n select null from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@n +POSTHOOK: query: insert overwrite table n select null from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@n +POSTHOOK: Lineage: n.key EXPRESSION [] +PREHOOK: query: explain analyze table n compute statistics for columns +PREHOOK: type: QUERY +POSTHOOK: query: explain analyze table n compute statistics for columns +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + Stage-1 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-0 + Map Reduce + Map Operator Tree: + TableScan + alias: n + Statistics: Num rows: 500 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: key + Statistics: Num rows: 500 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'fm', 16) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-1 + Column Stats Work + Column Stats Desc: + Columns: key + Column Types: int + Table: default.n + +PREHOOK: query: analyze table n compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@n +#### A masked pattern was here #### +POSTHOOK: query: analyze table n compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@n +#### A masked pattern was here #### +PREHOOK: query: desc formatted n key +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@n +POSTHOOK: query: desc formatted n key +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@n +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +key int 0 0 500 1 Rk0QAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA +AAAAAAAAAAAAAAA= + from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\"}} +PREHOOK: query: create table i(key int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@i +POSTHOOK: query: create table i(key int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@i +PREHOOK: query: insert overwrite table i select key from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@i +POSTHOOK: query: insert overwrite table i select key from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@i +POSTHOOK: Lineage: i.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: explain analyze table i compute statistics for columns +PREHOOK: type: QUERY +POSTHOOK: query: explain analyze table i compute statistics for columns +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + Stage-1 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-0 + Map Reduce + Map Operator Tree: + TableScan + alias: i + Statistics: Num rows: 500 Data size: 1406 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: key + Statistics: Num rows: 500 Data size: 1406 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'fm', 16) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-1 + Column Stats Work + Column Stats Desc: + Columns: key + Column Types: int + Table: default.i + +PREHOOK: query: analyze table i compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@i +#### A masked pattern was here #### +POSTHOOK: query: analyze table i compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@i +#### A masked pattern was here #### +PREHOOK: query: desc formatted i key +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@i +POSTHOOK: query: desc formatted i key +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@i +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +key int 0 498 0 196 Rk0QAP8YAAB/AAAA/woAAP8AAAC/AQAA/wEAAH8BAAD/AgAAfwAAAPsLAAB/AgAA/wgAAH9DAAA/ +AAAA/xQAAP8DAAA= + from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\"}} +PREHOOK: query: drop table i +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@i +PREHOOK: Output: default@i +POSTHOOK: query: drop table i +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@i +POSTHOOK: Output: default@i +PREHOOK: query: create table i(key double) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@i +POSTHOOK: query: create table i(key double) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@i +PREHOOK: query: insert overwrite table i select key from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@i +POSTHOOK: query: insert overwrite table i select key from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@i +POSTHOOK: Lineage: i.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: analyze table i compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@i +#### A masked pattern was here #### +POSTHOOK: query: analyze table i compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@i +#### A masked pattern was here #### +PREHOOK: query: desc formatted i key +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@i +POSTHOOK: query: desc formatted i key +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@i +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +key double 0.0 498.0 0 234 Rk0QAP8AAAD/AQAA/wAAAJ8NAAB/MAAA/xEAAP8CAAD/AgAAfwIAAP8AAAB/EQAA/wAAAP8AAAB/ +AAAA3wEAAP8CAAA= + from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\"}} +PREHOOK: query: drop table i +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@i +PREHOOK: Output: default@i +POSTHOOK: query: drop table i +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@i +POSTHOOK: Output: default@i +PREHOOK: query: create table i(key decimal) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@i +POSTHOOK: query: create table i(key decimal) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@i +PREHOOK: query: insert overwrite table i select key from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@i +POSTHOOK: query: insert overwrite table i select key from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@i +POSTHOOK: Lineage: i.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: analyze table i compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@i +#### A masked pattern was here #### +POSTHOOK: query: analyze table i compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@i +#### A masked pattern was here #### +PREHOOK: query: desc formatted i key +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@i +POSTHOOK: query: desc formatted i key +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@i +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +key decimal(10,0) 0 498 0 180 Rk0QAP8AAAD/AwAA/wUAAP8DAAD/AwAAvwIAAH8eAAC/AQAAPwAAAL8AAAAHAAAAvwAAAP0CAAD/ +AQAA/wMAAH8CAAA= + from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\"}} +PREHOOK: query: drop table i +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@i +PREHOOK: Output: default@i +POSTHOOK: query: drop table i +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@i +POSTHOOK: Output: default@i +PREHOOK: query: create table i(key date) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@i +POSTHOOK: query: create table i(key date) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@i +PREHOOK: query: insert into i values ('2012-08-17') +PREHOOK: type: QUERY +PREHOOK: Output: default@i +POSTHOOK: query: insert into i values ('2012-08-17') +POSTHOOK: type: QUERY +POSTHOOK: Output: default@i +POSTHOOK: Lineage: i.key EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: insert into i values ('2012-08-17') +PREHOOK: type: QUERY +PREHOOK: Output: default@i +POSTHOOK: query: insert into i values ('2012-08-17') +POSTHOOK: type: QUERY +POSTHOOK: Output: default@i +POSTHOOK: Lineage: i.key EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: insert into i values ('2013-08-17') +PREHOOK: type: QUERY +PREHOOK: Output: default@i +POSTHOOK: query: insert into i values ('2013-08-17') +POSTHOOK: type: QUERY +POSTHOOK: Output: default@i +POSTHOOK: Lineage: i.key EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: insert into i values ('2012-03-17') +PREHOOK: type: QUERY +PREHOOK: Output: default@i +POSTHOOK: query: insert into i values ('2012-03-17') +POSTHOOK: type: QUERY +POSTHOOK: Output: default@i +POSTHOOK: Lineage: i.key EXPRESSION [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: insert into i values ('2012-05-17') +PREHOOK: type: QUERY +PREHOOK: Output: default@i +POSTHOOK: query: insert into i values ('2012-05-17') +POSTHOOK: type: QUERY +POSTHOOK: Output: default@i +POSTHOOK: Lineage: i.key EXPRESSION [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: analyze table i compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@i +#### A masked pattern was here #### +POSTHOOK: query: analyze table i compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@i +#### A masked pattern was here #### +PREHOOK: query: desc formatted i key +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@i +POSTHOOK: query: desc formatted i key +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@i +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +key date 2012-03-17 2013-08-17 0 3 Rk0QAAEAAAAGAAAAAwAAAA0AAAADAAAABwAAAAsAAAAJAAAAEwAAAAkAAAADAAAABwAAAAMAAAAB +AAAABAAAAAUAAAA= + from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\"}} diff --git a/ql/src/test/results/clientpositive/hll.q.out b/ql/src/test/results/clientpositive/hll.q.out index b9357c3043..13da13087e 100644 --- a/ql/src/test/results/clientpositive/hll.q.out +++ b/ql/src/test/results/clientpositive/hll.q.out @@ -1,3 +1,88 @@ +PREHOOK: query: create table n(key int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@n +POSTHOOK: query: create table n(key int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@n +PREHOOK: query: insert overwrite table n select null from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@n +POSTHOOK: query: insert overwrite table n select null from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@n +POSTHOOK: Lineage: n.key EXPRESSION [] +PREHOOK: query: explain analyze table n compute statistics for columns +PREHOOK: type: QUERY +POSTHOOK: query: explain analyze table n compute statistics for columns +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + Stage-1 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-0 + Map Reduce + Map Operator Tree: + TableScan + alias: n + Statistics: Num rows: 500 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: key + Statistics: Num rows: 500 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll') + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-1 + Column Stats Work + Column Stats Desc: + Columns: key + Column Types: int + Table: default.n + +PREHOOK: query: analyze table n compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@n +#### A masked pattern was here #### +POSTHOOK: query: analyze table n compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@n +#### A masked pattern was here #### +PREHOOK: query: desc formatted n key +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@n +POSTHOOK: query: desc formatted n key +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@n +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +key int 0 0 500 1 SExM4AEA + from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\"}} PREHOOK: query: create table i(key int) PREHOOK: type: CREATETABLE PREHOOK: Output: database:default @@ -78,10 +163,31 @@ PREHOOK: Input: default@i POSTHOOK: query: desc formatted i key POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@i -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -key int 0 498 0 309 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\"}} +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +key int 0 498 0 309 SExM4LUCtQKN6yH2ofgFwu2cAvzblwLAqoIDwf6+AcDkmgT/i5kBxOPKA72MowGA5fAbg4LgAr2L +vQH/+x+A0gOA0fsBgefUBb+gkhPB+03A7aUCg+BdwKUTvbaQA4LIeL7O3QTCo6IDvbuRAsCRFoGb +YL/lO4LfuQK+4acBgLePCsHvrQHD43u87s4EwP0QwJJtyL23ArjLvQPCz7wG/5yDC7/N4AKCxMcO +vvCNA4HI4wP/6rEDgIIJwaZOv+cwgaevAf+GzQHA14ICwPz+BcDb+gKAhg+H7RS67okB/5HHAoOo +Nb2V2wWA6fAIwJSODsCT9gGAiY8Bg/xI/bq4CoDXkgHAyvYGgOduwJKLAYKbvgH+2bQBwNCWBoK7 +Gb+fmQO/6J8Bgb89/9fzAsCPywLAp/wHgbJeg/z8Ar25kQGA4P0Dv/OUA4CgoQaBpL4EwDuC+m69 +yDWA/BLApYELxMEv/I1LgYWwBMbTlAP56cMEgZssgeiCAb+kowHBvf0CgYHSAf3g0QaBi9sC/9yi +AoDbIYHllQnAhAGBqJkFvrBKgZmZDIKEogG9slWC7qgF/q5DwM30DoKHRcCN7wO+ir0DwLOtAcDy +8wKB4L0Dv/HEA4adpAOAqxr6kkyA14EIwbkUgIihCIGfcoCODr/z5wKAs/QBw7JvvLnQBMHmsgL/ +1UTAy5gCgbHaAf+UpgOAjO0HwcRQhOePAYChCruLvQaBtSj/osUBwoK1AYGn+Qm9kLcDgLSoAYCQ +2QeAv54FwoIavsJ2wYYL/9jbAoCTjgGBjDX/ztkBgPF8gtNC/r2PAoGgUcHDcb+LqAe/laoBwsOe +A8D6EsDQkAT+0tcGgIRzgIqQAYCT+gXB7wv/jvQMw4miBr3LvgTA0YYBgKCTCIHyxQHBtPcCvts+ +we3HAsD/9gG/zaEDgMiqA8H6iQHAniPCiIQB/bucAYDykQGCodED/o+VAsa89gO6pqAHwKvqAYGu +9QO/0bgPwLiEAcH7lwHA4v4FgMUrwe9k/v9ggaI5wbniAr7lOYP3tAH9vmXBxscCwPDuAYCkFoPc +6QaCoOUH/MSUAr/4gwmAw4wIv/rBAsCH2QGEl1n86qQBgOWcEoLOsgb+k74EhNjFAbyX2QHAi4MB +gJiCAYHyiwnAvYgC/5LkB4HnoQLA46QU/6+SBsGv6QHBut4Evo/iA8KzFL7b0AKAwJkJwZSRAb+g +4gHBux+B/58F/+D2Av/5tgKAmieA4MsBwrvkBMDIBb77GoCqnwjA3PkBgPOTCMD9e8P8tgK91poD +gIGeAcH3nQKAhqIEv6LdA4DK2AKClCm+mc4BxoVo+rCiAoDfoAKAtPoFwdCUAsHtpwH+j8QBwYWl +Ab+00gOAy9gMgfHAA7/hvwTAqCeCsUq/yUj/t9wCxYPOArvNrQTAq5ADwJrZCcKbX764IcHS1QKA +t+kLwtSlC/3wyweAl2bAhKEDwLXQCYDXhQXBpeICgcpm//3nBoDmGMG7lwH/y+YI//XaAYHTlQKA +4gPA7aoC/6mKCIDZpgLDoEQ= + from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\"}} PREHOOK: query: drop table i PREHOOK: type: DROPTABLE PREHOOK: Input: default@i @@ -121,10 +227,31 @@ PREHOOK: Input: default@i POSTHOOK: query: desc formatted i key POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@i -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -key double 0.0 498.0 0 309 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\"}} +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +key double 0.0 498.0 0 309 SExM4LUCtQLB60iBzkW/t98FgZmoAoClIL/zvgGA98wFwZxDgNEev/fQAoTVmQS8tMUCgIfzAsap +7wL7vdICwMCHAv/a2gGAj+MJwMSpBsHPLcGehw++nboDgY6bAb+Z3Q2CycMBvtO0A8DE9QOCh+sE +v4/3AoHde7++1QKB1ZUFvvHBAYCumwGAm4sBwOjEAoD4LsDB1APA0rEEwo6RAb665QaCysQBvua+ +BMGxF8C4qASAqtwLhZqTAbqmvAHChlWBuNsJ/fzNAoCjiwSA7f0BgMHaBIDtjQ3B9zLCwa0F/8ba +AYPTogK7xaMGgNT7BIOAQr7ZogH//r4Bwb88xcts+7xwv97zAYPPhAG+mJkBgeabAb6jFcDZmgmA +wWiB6NQG/6y2AcDn7QLB2OMBv7PADMK6K7+obMCrqwrBnPUE/qNQwaacDsCm9gG/wdUCwauuAf/I +yQGCgtkB/r+qAcAgwNvzD8C0GoTuwAS87xqAx7wJwc+QAYDolQL/164BwJ+VA8K4Hf/DjwG//ecB +wNTFEMCugwLA3CyBopEBv4fdAoOhSb6DjQPDjm79gI0B//ixBoCFA4HvpwGAx4sC/7KuD4DKGoPk +qwH9g8YBwI95wMCYAsD0lAHAhViGgt0F+7hQv4bMAsCKEICDmQHE06oD/J1FgJBfgOapA8PJlQf9 +itwEwM2GC4P8pgi997EDgP2aAsCzhgTAj5ADw5HBA77hdb/IL8LNxQT+zzOA4aQBgf+gBf/SWMDl +iAWF/r4BgPwD/KhS/5M+wIP+AcBkgPCBA4CxjAGAwgHEgFz/9vYLvZuZA8CHqgPDjJwBvddOwJua +BMCpUYKEvgS+pVaA9PoEg6osvZ1Gga/IAv/9wQLA94EDwOwigO1tgfadAcO4f7yA/ATAq94BwP2X +A4CahgLA64ECwOzUCoPq5wP9s5cBwNkJgISgAcCN7gLB0bMFgOyLCMDTlA7Bzm2/rYoBgbv8Av+G +esGE3gKAuSu+8YwFxK+9BICqLv70iAq/z1vB2oQDv790gZOXA8DxhQi+3r0Ewe2+AsGpfL7JtgGB +sdgHgt+IAb3riwKA/xqAx4YBwM6BBMD24QeE/sgEvM3RAsD/4QHA9KUBg9/PBr7xxgaB0aUD//aC +A8D0gxSB19wEvtOyCcDBmQGC9q4BvqHgCYDEbMGnaoHK2QT/j5kDv+w7gutQgP3zC/6+kgSAsh3B +xkC/ybsCgYq4Ab+iS8LN2wK/3dUEgMGICMHQ9wK+ucQCgJvyAofd9Ai5wzbC3LcFwrjwAf78jgq+ +xiPBgzO/0myEya8E/OKkAsHYPcHfqQP/ndwCwNH/BcOngAG8/d8Egd5S/+khgr+zEICIJ4bv0AH4 +isQCgN6lAsTolwO88EDA56UEwsSgDf7U4gHDpUa9570DweyNAb/LyQfA/PwGga7MA8Db7QGBpYEB +vqNhwNSNBMCL3AHBqzu/gGXAweUCgIqDAoCBdYLHyAbAaL/rgAWA9e4RgMwTv76yAoDZDcHd1wGA +tucFgd6SE8DhBr+JUQ== + from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\"}} PREHOOK: query: drop table i PREHOOK: type: DROPTABLE PREHOOK: Input: default@i @@ -164,10 +291,31 @@ PREHOOK: Input: default@i POSTHOOK: query: desc formatted i key POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@i -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -key decimal(10,0) 0 498 0 309 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\"}} +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +key decimal(10,0) 0 498 0 309 SExM4LUCtQLB60iBzkW/t98FgZmoAoClIL/zvgGA98wFwZxDgNEev/fQAoTVmQS8tMUCgIfzAsap +7wL7vdICwMCHAv/a2gGAj+MJwMSpBsHPLcGehw++nboDgY6bAb+Z3Q2CycMBvtO0A8DE9QOCh+sE +v4/3AoHde7++1QKB1ZUFvvHBAYCumwGAm4sBwOjEAoD4LsDB1APA0rEEwo6RAb665QaCysQBvua+ +BMGxF8C4qASAqtwLhZqTAbqmvAHChlWBuNsJ/fzNAoCjiwSA7f0BgMHaBIDtjQ3B9zLCwa0F/8ba +AYPTogK7xaMGgNT7BIOAQr7ZogH//r4Bwb88xcts+7xwv97zAYPPhAG+mJkBgeabAb6jFcDZmgmA +wWiB6NQG/6y2AcDn7QLB2OMBv7PADMK6K7+obMCrqwrBnPUE/qNQwaacDsCm9gG/wdUCwauuAf/I +yQGCgtkB/r+qAcAgwNvzD8C0GoTuwAS87xqAx7wJwc+QAYDolQL/164BwJ+VA8K4Hf/DjwG//ecB +wNTFEMCugwLA3CyBopEBv4fdAoOhSb6DjQPDjm79gI0B//ixBoCFA4HvpwGAx4sC/7KuD4DKGoPk +qwH9g8YBwI95wMCYAsD0lAHAhViGgt0F+7hQv4bMAsCKEICDmQHE06oD/J1FgJBfgOapA8PJlQf9 +itwEwM2GC4P8pgi997EDgP2aAsCzhgTAj5ADw5HBA77hdb/IL8LNxQT+zzOA4aQBgf+gBf/SWMDl +iAWF/r4BgPwD/KhS/5M+wIP+AcBkgPCBA4CxjAGAwgHEgFz/9vYLvZuZA8CHqgPDjJwBvddOwJua +BMCpUYKEvgS+pVaA9PoEg6osvZ1Gga/IAv/9wQLA94EDwOwigO1tgfadAcO4f7yA/ATAq94BwP2X +A4CahgLA64ECwOzUCoPq5wP9s5cBwNkJgISgAcCN7gLB0bMFgOyLCMDTlA7Bzm2/rYoBgbv8Av+G +esGE3gKAuSu+8YwFxK+9BICqLv70iAq/z1vB2oQDv790gZOXA8DxhQi+3r0Ewe2+AsGpfL7JtgGB +sdgHgt+IAb3riwKA/xqAx4YBwM6BBMD24QeE/sgEvM3RAsD/4QHA9KUBg9/PBr7xxgaB0aUD//aC +A8D0gxSB19wEvtOyCcDBmQGC9q4BvqHgCYDEbMGnaoHK2QT/j5kDv+w7gutQgP3zC/6+kgSAsh3B +xkC/ybsCgYq4Ab+iS8LN2wK/3dUEgMGICMHQ9wK+ucQCgJvyAofd9Ai5wzbC3LcFwrjwAf78jgq+ +xiPBgzO/0myEya8E/OKkAsHYPcHfqQP/ndwCwNH/BcOngAG8/d8Egd5S/+khgr+zEICIJ4bv0AH4 +isQCgN6lAsTolwO88EDA56UEwsSgDf7U4gHDpUa9570DweyNAb/LyQfA/PwGga7MA8Db7QGBpYEB +vqNhwNSNBMCL3AHBqzu/gGXAweUCgIqDAoCBdYLHyAbAaL/rgAWA9e4RgMwTv76yAoDZDcHd1wGA +tucFgd6SE8DhBr+JUQ== + from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\"}} PREHOOK: query: drop table i PREHOOK: type: DROPTABLE PREHOOK: Input: default@i @@ -233,7 +381,8 @@ PREHOOK: Input: default@i POSTHOOK: query: desc formatted i key POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@i -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -key date 2012-03-17 2013-08-17 0 4 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\"}} +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +key date 2012-03-17 2013-08-17 0 4 SExM4AQEgZ3gM4Gdw13A3/qtA4L855QD + from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\"}} diff --git a/ql/src/test/results/clientpositive/llap/autoColumnStats_2.q.out b/ql/src/test/results/clientpositive/llap/autoColumnStats_2.q.out index f29f7b5d1a..ce524f9e1a 100644 --- a/ql/src/test/results/clientpositive/llap/autoColumnStats_2.q.out +++ b/ql/src/test/results/clientpositive/llap/autoColumnStats_2.q.out @@ -123,20 +123,62 @@ PREHOOK: Input: default@a POSTHOOK: query: describe formatted a key POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@a -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -key string 0 309 2.812 3 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +key string 0 309 SExM4LUCtQLBoliC580Fv5Nq/8NRwJtoxdPYB/yjmAO/kokBgcjcBMHblgH/lecCgMn9AcC78gm/ +wzeA/BHB6MgH/5KjAsKL1QqAge0D/obCAoCvMMGIDoDPDr/ovgSAn6ALgL7TEMGr8wX/6xnAuqgF +wcgagNoPwY+GAf7ouATEjJwBvIfKAoGetgS/ysoIwN4dwMHtA8DAnQmB7TWBspEFv+7uBMGB4wL+ +mmvAqtEBgrrQAYGlkQb9j1CBuYcE/5VDgNciweGkA4Hf9wS+ws4BgPdxgJr/BcCctQOB2rQD/+LE +CMDf/QHAklbAxaoBgP7gA4Gu1weBx4YB/remBMHjToDh2wH/+sEHwMqxA8DvyATBrUj/7ecCgP0H +ifmTBIC8FriHfv/5pArAlSzAnIoBwJ/bAsXdU7v+0g7ArvQBgOX+AoD5+hPA/4oHgK3rAcCWB4D+ +iwSA/soDgu71Ab68MYD/gQKAh+ECwN6xBMCTYoSh4AO9nESArIcGwYmeA/7TpAWBncQDwthzvrb9 +BIG2hQG+lbgIgan2DP/2R4Cu1giAlJcDg7UU/ZBfgchL/4O8AsDO8gKBodUGwMTcA8OKggP88sYD +w9tCvr+CA7/W3QfE05wB/dWUCcCa2QmBsY4HgrD3BbzCwguAih3B67gD/9zeAYX0twf7048HwLKS +CICJC8GyrgGB0akBvr2mAcDP8QGA0/ECwMScA4GXrAGBpokFgKgKwNSHBP7sMIDfkAbBpzy/pdcK +wLnfAYLeKP6R5wLCssoBv8JY/7IvwNEMgq+7AsDQH/6VwAGA8nXCisoBvvevAoH4PMKOqAO+lnj/ +yjnCwzL/7IADv/8jhK28Brzx1wGCmk6/0t0CgIyaAv/CnwXAzZ0BwJadA4GCKcC53AG/sUCBhs8J +gZmRAb6zwwSBgLYGweuIAb+tbsH6gwPAngH+ysQBhLxh/NihAsGYswS/l8MKgtFX/u6jA8XB6AL8 +3tAC/5TDBMHMvgG/0NICwO79BYS82Qa8oMQBwPqBBcHhI//N2RLAitsEgJnuBcD+qAXA/t0FwJ32 +CoDBrAHDlSD9ltIDwMieBIC26QLBzZkGgbecAYCvCf+Xgwn/40+JgRX3xNcBwIkLweaQAYDuggKB +2okBwPESvtvPBsHxpQL/ucsCwIeJA8CjBoH6SP+UnwKCw8IGv7mcBYHtywKChYICvKDpAsDx5gHC ++MIF/sDeBYCh1ALA+poCwYygCL+TTITO3AK9weMEwf+fAb/V5AKApKMGg8Fg/J9OwfnNCv/pkgLC +z+gEvt+XA4CqlgGAlOQDgIaICYGmzAGCiYUHv96HBv/njQHEzfsEv9LuB7zk1gPD4RiDo/UDu5qr +A//IIcDDHsCwDYDozwLEq9EBvZp1gLwawJaGAv/JwAPC/JcDvqvcAcG1ugODzpMBvZfmAoDWB//s +1wKHzaYCucW2BcL9uQT/+aMB/+WtAsKPN7+sdsTghAG7t6kEwNw+wKLUAsCDIIHcjwGAqpACgM36 +BcC//AOAtLEEgMbwAf+mwQiAqfgH + 2.812 3 from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} PREHOOK: query: describe formatted b key PREHOOK: type: DESCTABLE PREHOOK: Input: default@b POSTHOOK: query: describe formatted b key POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@b -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -key string 0 309 2.812 3 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +key string 0 309 SExM4LUCtQLBoliC580Fv5Nq/8NRwJtoxdPYB/yjmAO/kokBgcjcBMHblgH/lecCgMn9AcC78gm/ +wzeA/BHB6MgH/5KjAsKL1QqAge0D/obCAoCvMMGIDoDPDr/ovgSAn6ALgL7TEMGr8wX/6xnAuqgF +wcgagNoPwY+GAf7ouATEjJwBvIfKAoGetgS/ysoIwN4dwMHtA8DAnQmB7TWBspEFv+7uBMGB4wL+ +mmvAqtEBgrrQAYGlkQb9j1CBuYcE/5VDgNciweGkA4Hf9wS+ws4BgPdxgJr/BcCctQOB2rQD/+LE +CMDf/QHAklbAxaoBgP7gA4Gu1weBx4YB/remBMHjToDh2wH/+sEHwMqxA8DvyATBrUj/7ecCgP0H +ifmTBIC8FriHfv/5pArAlSzAnIoBwJ/bAsXdU7v+0g7ArvQBgOX+AoD5+hPA/4oHgK3rAcCWB4D+ +iwSA/soDgu71Ab68MYD/gQKAh+ECwN6xBMCTYoSh4AO9nESArIcGwYmeA/7TpAWBncQDwthzvrb9 +BIG2hQG+lbgIgan2DP/2R4Cu1giAlJcDg7UU/ZBfgchL/4O8AsDO8gKBodUGwMTcA8OKggP88sYD +w9tCvr+CA7/W3QfE05wB/dWUCcCa2QmBsY4HgrD3BbzCwguAih3B67gD/9zeAYX0twf7048HwLKS +CICJC8GyrgGB0akBvr2mAcDP8QGA0/ECwMScA4GXrAGBpokFgKgKwNSHBP7sMIDfkAbBpzy/pdcK +wLnfAYLeKP6R5wLCssoBv8JY/7IvwNEMgq+7AsDQH/6VwAGA8nXCisoBvvevAoH4PMKOqAO+lnj/ +yjnCwzL/7IADv/8jhK28Brzx1wGCmk6/0t0CgIyaAv/CnwXAzZ0BwJadA4GCKcC53AG/sUCBhs8J +gZmRAb6zwwSBgLYGweuIAb+tbsH6gwPAngH+ysQBhLxh/NihAsGYswS/l8MKgtFX/u6jA8XB6AL8 +3tAC/5TDBMHMvgG/0NICwO79BYS82Qa8oMQBwPqBBcHhI//N2RLAitsEgJnuBcD+qAXA/t0FwJ32 +CoDBrAHDlSD9ltIDwMieBIC26QLBzZkGgbecAYCvCf+Xgwn/40+JgRX3xNcBwIkLweaQAYDuggKB +2okBwPESvtvPBsHxpQL/ucsCwIeJA8CjBoH6SP+UnwKCw8IGv7mcBYHtywKChYICvKDpAsDx5gHC ++MIF/sDeBYCh1ALA+poCwYygCL+TTITO3AK9weMEwf+fAb/V5AKApKMGg8Fg/J9OwfnNCv/pkgLC +z+gEvt+XA4CqlgGAlOQDgIaICYGmzAGCiYUHv96HBv/njQHEzfsEv9LuB7zk1gPD4RiDo/UDu5qr +A//IIcDDHsCwDYDozwLEq9EBvZp1gLwawJaGAv/JwAPC/JcDvqvcAcG1ugODzpMBvZfmAoDWB//s +1wKHzaYCucW2BcL9uQT/+aMB/+WtAsKPN7+sdsTghAG7t6kEwNw+wKLUAsCDIIHcjwGAqpACgM36 +BcC//AOAtLEEgMbwAf+mwQiAqfgH + 2.812 3 from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} PREHOOK: query: from src insert overwrite table a select * insert into table b select * @@ -231,20 +273,62 @@ PREHOOK: Input: default@b POSTHOOK: query: describe formatted b key POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@b -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -key string 0 309 2.812 3 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +key string 0 309 SExM4LUCtQLBoliC580Fv5Nq/8NRwJtoxdPYB/yjmAO/kokBgcjcBMHblgH/lecCgMn9AcC78gm/ +wzeA/BHB6MgH/5KjAsKL1QqAge0D/obCAoCvMMGIDoDPDr/ovgSAn6ALgL7TEMGr8wX/6xnAuqgF +wcgagNoPwY+GAf7ouATEjJwBvIfKAoGetgS/ysoIwN4dwMHtA8DAnQmB7TWBspEFv+7uBMGB4wL+ +mmvAqtEBgrrQAYGlkQb9j1CBuYcE/5VDgNciweGkA4Hf9wS+ws4BgPdxgJr/BcCctQOB2rQD/+LE +CMDf/QHAklbAxaoBgP7gA4Gu1weBx4YB/remBMHjToDh2wH/+sEHwMqxA8DvyATBrUj/7ecCgP0H +ifmTBIC8FriHfv/5pArAlSzAnIoBwJ/bAsXdU7v+0g7ArvQBgOX+AoD5+hPA/4oHgK3rAcCWB4D+ +iwSA/soDgu71Ab68MYD/gQKAh+ECwN6xBMCTYoSh4AO9nESArIcGwYmeA/7TpAWBncQDwthzvrb9 +BIG2hQG+lbgIgan2DP/2R4Cu1giAlJcDg7UU/ZBfgchL/4O8AsDO8gKBodUGwMTcA8OKggP88sYD +w9tCvr+CA7/W3QfE05wB/dWUCcCa2QmBsY4HgrD3BbzCwguAih3B67gD/9zeAYX0twf7048HwLKS +CICJC8GyrgGB0akBvr2mAcDP8QGA0/ECwMScA4GXrAGBpokFgKgKwNSHBP7sMIDfkAbBpzy/pdcK +wLnfAYLeKP6R5wLCssoBv8JY/7IvwNEMgq+7AsDQH/6VwAGA8nXCisoBvvevAoH4PMKOqAO+lnj/ +yjnCwzL/7IADv/8jhK28Brzx1wGCmk6/0t0CgIyaAv/CnwXAzZ0BwJadA4GCKcC53AG/sUCBhs8J +gZmRAb6zwwSBgLYGweuIAb+tbsH6gwPAngH+ysQBhLxh/NihAsGYswS/l8MKgtFX/u6jA8XB6AL8 +3tAC/5TDBMHMvgG/0NICwO79BYS82Qa8oMQBwPqBBcHhI//N2RLAitsEgJnuBcD+qAXA/t0FwJ32 +CoDBrAHDlSD9ltIDwMieBIC26QLBzZkGgbecAYCvCf+Xgwn/40+JgRX3xNcBwIkLweaQAYDuggKB +2okBwPESvtvPBsHxpQL/ucsCwIeJA8CjBoH6SP+UnwKCw8IGv7mcBYHtywKChYICvKDpAsDx5gHC ++MIF/sDeBYCh1ALA+poCwYygCL+TTITO3AK9weMEwf+fAb/V5AKApKMGg8Fg/J9OwfnNCv/pkgLC +z+gEvt+XA4CqlgGAlOQDgIaICYGmzAGCiYUHv96HBv/njQHEzfsEv9LuB7zk1gPD4RiDo/UDu5qr +A//IIcDDHsCwDYDozwLEq9EBvZp1gLwawJaGAv/JwAPC/JcDvqvcAcG1ugODzpMBvZfmAoDWB//s +1wKHzaYCucW2BcL9uQT/+aMB/+WtAsKPN7+sdsTghAG7t6kEwNw+wKLUAsCDIIHcjwGAqpACgM36 +BcC//AOAtLEEgMbwAf+mwQiAqfgH + 2.812 3 from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} PREHOOK: query: describe formatted b value PREHOOK: type: DESCTABLE PREHOOK: Input: default@b POSTHOOK: query: describe formatted b value POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@b -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -value string 0 309 6.812 7 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +value string 0 309 SExM4LUCtQLM7Sz2qpEHw751/KEegclIgPbqBb/+4QWCxbMFvttNwJ79A8DuYoDMqgOCtK0Dv6qT +A/+3OMCb5AGBvuwFgLXtAb/E0QOB1IQGwKwc/9PdBMCu9gSBw0y/1IYJwKeVDcCdmAGBuSGAhBmF +r5AB+tCzAcGDswGC8n292qYHwZPUAb+SVsDC1gGAuPwCgZG9Av/S+gXDuJAB//KXAv61xQGBgpcB +gIo6geaBAf+Q3wz/tkWBqNID/6ilBoDahwbA1fICwcKtBIHklAe+8sQFgLx/gIXzAcLDwAP+09EB +wP4GwrXYAoDuxwPBy5cCgLUF/dq4BcCkoAaAyfcEgJnRBYD8iAKA4vgGwL4SgIuEAsHnqgP/obsC +gO6YAoKYVb7VSsCR2ALAjJgBwfSUAoD3PMP/4QKAwxO84LoCwKDSAcDaLsH7kAK/u/YCgLf0AYH6 +xwGA1c8Bg7KxAf3Wcf+9pgKA4X+FseMDvZb8Ab7MtQaA0eUDwOUigKINwZUuv+HJBMCWDoLCNf7W +sgSAlO8DgO6lB8KatwT/xdQCv45EgcDADb/E5QTCnJUG/7OtBf+0mAHDmdEDv7edBv6CpgLAj/sC +wIV1gf2zBv/SywTC8dAC/r3JA4LUTP7BuQKAieADgfLcBr/ouAGAs2OAiKUGwJf2CoGQ0Qa/05oG +gNojgJTZBoCWkwTAt4EKx5rPA7mZqQKB5kCB9fcCgLTwB/6O8gTAxvYSwOaRAYDFgAPA+NsCgcm8 +BoC0jASB3esC/oDgBYGcvgXDteICwKoK/MZMgoXCAcCl4gO/m3q/3VKBs84BgY9kgMeyBL/zR8Cl +nALCsOIC/s2tAf/JsQLAmWCA+akCgrq2A//wlAu/26sEwobEBP/ejgG/spYMwOKnCoHqsQHA7oMC +v62oBoDghAaA8oECgPSkB4Dv+gHB+yHAqQ6/xRWBm9EBhPaFAby9jgO/1cgMgK9pwMuiA8C5/AaA +/f8DwsiOA4CiDYDemwS/zXDDxX6+x4UKwpyWBv7n8AH+la4HwvBSv67qAr+duAHA3K8EgvKLA/7+ +ygPA/nmAuQOFvt4Nu7+4A8PXG/2ZboGhF8K6+Ab+6IsJgd6AAf+aiwH/qVyEhB7+o/4F/rO0AYCM +fcC1qQOAuuYBgcbKAcDy5AH/wu0Bga6UAr/e1wPBnscCv9S+BYDKjwPB/JABgJeXBoCjAYHT8gL+ +tV6B6RO/9GGB1aUHxcCxAfqrngKAlIcDhLXJAr3vV//G5ATBiqQDgcGkAcDijQS+gsEJgc+FA4DP +KYKJiwH9hqwBwpJC/sHNAYHStAmB4IoJvsenBoHYR8CZiBTB2m/+mdUIgY2kAv+5P8CunwyA2QaC +sEb+toQCwp3JBYCqiAS+ruIEhq+wAfr6QcHKrw7/xMABxN/NB/ztO4K4tQG/94oBgJf4Ab+F2AHC +zawDwa2BAr6vkwX/u5QFwYfWAsDVgQKCgZwJ/ZYXwNztAoL3/AL+uKIBwdeeA8OYjwGBoIkIvMSm +A/+FF4PnPb6a7gKA1BfA4JMGwOb1Ab+WHg== + 6.812 7 from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} PREHOOK: query: insert into table b select NULL, NULL from src limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@src @@ -261,20 +345,62 @@ PREHOOK: Input: default@b POSTHOOK: query: describe formatted b key POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@b -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -key string 10 309 2.812 3 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +key string 10 309 SExM4LUCtQLBoliC580Fv5Nq/8NRwJtoxdPYB/yjmAO/kokBgcjcBMHblgH/lecCgMn9AcC78gm/ +wzeA/BHB6MgH/5KjAsKL1QqAge0D/obCAoCvMMGIDoDPDr/ovgSAn6ALgL7TEMGr8wX/6xnAuqgF +wcgagNoPwY+GAf7ouATEjJwBvIfKAoGetgS/ysoIwN4dwMHtA8DAnQmB7TWBspEFv+7uBMGB4wL+ +mmvAqtEBgrrQAYGlkQb9j1CBuYcE/5VDgNciweGkA4Hf9wS+ws4BgPdxgJr/BcCctQOB2rQD/+LE +CMDf/QHAklbAxaoBgP7gA4Gu1weBx4YB/remBMHjToDh2wH/+sEHwMqxA8DvyATBrUj/7ecCgP0H +ifmTBIC8FriHfv/5pArAlSzAnIoBwJ/bAsXdU7v+0g7ArvQBgOX+AoD5+hPA/4oHgK3rAcCWB4D+ +iwSA/soDgu71Ab68MYD/gQKAh+ECwN6xBMCTYoSh4AO9nESArIcGwYmeA/7TpAWBncQDwthzvrb9 +BIG2hQG+lbgIgan2DP/2R4Cu1giAlJcDg7UU/ZBfgchL/4O8AsDO8gKBodUGwMTcA8OKggP88sYD +w9tCvr+CA7/W3QfE05wB/dWUCcCa2QmBsY4HgrD3BbzCwguAih3B67gD/9zeAYX0twf7048HwLKS +CICJC8GyrgGB0akBvr2mAcDP8QGA0/ECwMScA4GXrAGBpokFgKgKwNSHBP7sMIDfkAbBpzy/pdcK +wLnfAYLeKP6R5wLCssoBv8JY/7IvwNEMgq+7AsDQH/6VwAGA8nXCisoBvvevAoH4PMKOqAO+lnj/ +yjnCwzL/7IADv/8jhK28Brzx1wGCmk6/0t0CgIyaAv/CnwXAzZ0BwJadA4GCKcC53AG/sUCBhs8J +gZmRAb6zwwSBgLYGweuIAb+tbsH6gwPAngH+ysQBhLxh/NihAsGYswS/l8MKgtFX/u6jA8XB6AL8 +3tAC/5TDBMHMvgG/0NICwO79BYS82Qa8oMQBwPqBBcHhI//N2RLAitsEgJnuBcD+qAXA/t0FwJ32 +CoDBrAHDlSD9ltIDwMieBIC26QLBzZkGgbecAYCvCf+Xgwn/40+JgRX3xNcBwIkLweaQAYDuggKB +2okBwPESvtvPBsHxpQL/ucsCwIeJA8CjBoH6SP+UnwKCw8IGv7mcBYHtywKChYICvKDpAsDx5gHC ++MIF/sDeBYCh1ALA+poCwYygCL+TTITO3AK9weMEwf+fAb/V5AKApKMGg8Fg/J9OwfnNCv/pkgLC +z+gEvt+XA4CqlgGAlOQDgIaICYGmzAGCiYUHv96HBv/njQHEzfsEv9LuB7zk1gPD4RiDo/UDu5qr +A//IIcDDHsCwDYDozwLEq9EBvZp1gLwawJaGAv/JwAPC/JcDvqvcAcG1ugODzpMBvZfmAoDWB//s +1wKHzaYCucW2BcL9uQT/+aMB/+WtAsKPN7+sdsTghAG7t6kEwNw+wKLUAsCDIIHcjwGAqpACgM36 +BcC//AOAtLEEgMbwAf+mwQiAqfgH + 2.812 3 from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} PREHOOK: query: describe formatted b value PREHOOK: type: DESCTABLE PREHOOK: Input: default@b POSTHOOK: query: describe formatted b value POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@b -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -value string 10 309 6.812 7 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +value string 10 309 SExM4LUCtQLM7Sz2qpEHw751/KEegclIgPbqBb/+4QWCxbMFvttNwJ79A8DuYoDMqgOCtK0Dv6qT +A/+3OMCb5AGBvuwFgLXtAb/E0QOB1IQGwKwc/9PdBMCu9gSBw0y/1IYJwKeVDcCdmAGBuSGAhBmF +r5AB+tCzAcGDswGC8n292qYHwZPUAb+SVsDC1gGAuPwCgZG9Av/S+gXDuJAB//KXAv61xQGBgpcB +gIo6geaBAf+Q3wz/tkWBqNID/6ilBoDahwbA1fICwcKtBIHklAe+8sQFgLx/gIXzAcLDwAP+09EB +wP4GwrXYAoDuxwPBy5cCgLUF/dq4BcCkoAaAyfcEgJnRBYD8iAKA4vgGwL4SgIuEAsHnqgP/obsC +gO6YAoKYVb7VSsCR2ALAjJgBwfSUAoD3PMP/4QKAwxO84LoCwKDSAcDaLsH7kAK/u/YCgLf0AYH6 +xwGA1c8Bg7KxAf3Wcf+9pgKA4X+FseMDvZb8Ab7MtQaA0eUDwOUigKINwZUuv+HJBMCWDoLCNf7W +sgSAlO8DgO6lB8KatwT/xdQCv45EgcDADb/E5QTCnJUG/7OtBf+0mAHDmdEDv7edBv6CpgLAj/sC +wIV1gf2zBv/SywTC8dAC/r3JA4LUTP7BuQKAieADgfLcBr/ouAGAs2OAiKUGwJf2CoGQ0Qa/05oG +gNojgJTZBoCWkwTAt4EKx5rPA7mZqQKB5kCB9fcCgLTwB/6O8gTAxvYSwOaRAYDFgAPA+NsCgcm8 +BoC0jASB3esC/oDgBYGcvgXDteICwKoK/MZMgoXCAcCl4gO/m3q/3VKBs84BgY9kgMeyBL/zR8Cl +nALCsOIC/s2tAf/JsQLAmWCA+akCgrq2A//wlAu/26sEwobEBP/ejgG/spYMwOKnCoHqsQHA7oMC +v62oBoDghAaA8oECgPSkB4Dv+gHB+yHAqQ6/xRWBm9EBhPaFAby9jgO/1cgMgK9pwMuiA8C5/AaA +/f8DwsiOA4CiDYDemwS/zXDDxX6+x4UKwpyWBv7n8AH+la4HwvBSv67qAr+duAHA3K8EgvKLA/7+ +ygPA/nmAuQOFvt4Nu7+4A8PXG/2ZboGhF8K6+Ab+6IsJgd6AAf+aiwH/qVyEhB7+o/4F/rO0AYCM +fcC1qQOAuuYBgcbKAcDy5AH/wu0Bga6UAr/e1wPBnscCv9S+BYDKjwPB/JABgJeXBoCjAYHT8gL+ +tV6B6RO/9GGB1aUHxcCxAfqrngKAlIcDhLXJAr3vV//G5ATBiqQDgcGkAcDijQS+gsEJgc+FA4DP +KYKJiwH9hqwBwpJC/sHNAYHStAmB4IoJvsenBoHYR8CZiBTB2m/+mdUIgY2kAv+5P8CunwyA2QaC +sEb+toQCwp3JBYCqiAS+ruIEhq+wAfr6QcHKrw7/xMABxN/NB/ztO4K4tQG/94oBgJf4Ab+F2AHC +zawDwa2BAr6vkwX/u5QFwYfWAsDVgQKCgZwJ/ZYXwNztAoL3/AL+uKIBwdeeA8OYjwGBoIkIvMSm +A/+FF4PnPb6a7gKA1BfA4JMGwOb1Ab+WHg== + 6.812 7 from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} PREHOOK: query: insert into table b(value) select key+100000 from src limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@src @@ -291,20 +417,63 @@ PREHOOK: Input: default@b POSTHOOK: query: describe formatted b key POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@b -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -key string 20 309 2.812 3 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +key string 20 309 SExM4LUCtQLBoliC580Fv5Nq/8NRwJtoxdPYB/yjmAO/kokBgcjcBMHblgH/lecCgMn9AcC78gm/ +wzeA/BHB6MgH/5KjAsKL1QqAge0D/obCAoCvMMGIDoDPDr/ovgSAn6ALgL7TEMGr8wX/6xnAuqgF +wcgagNoPwY+GAf7ouATEjJwBvIfKAoGetgS/ysoIwN4dwMHtA8DAnQmB7TWBspEFv+7uBMGB4wL+ +mmvAqtEBgrrQAYGlkQb9j1CBuYcE/5VDgNciweGkA4Hf9wS+ws4BgPdxgJr/BcCctQOB2rQD/+LE +CMDf/QHAklbAxaoBgP7gA4Gu1weBx4YB/remBMHjToDh2wH/+sEHwMqxA8DvyATBrUj/7ecCgP0H +ifmTBIC8FriHfv/5pArAlSzAnIoBwJ/bAsXdU7v+0g7ArvQBgOX+AoD5+hPA/4oHgK3rAcCWB4D+ +iwSA/soDgu71Ab68MYD/gQKAh+ECwN6xBMCTYoSh4AO9nESArIcGwYmeA/7TpAWBncQDwthzvrb9 +BIG2hQG+lbgIgan2DP/2R4Cu1giAlJcDg7UU/ZBfgchL/4O8AsDO8gKBodUGwMTcA8OKggP88sYD +w9tCvr+CA7/W3QfE05wB/dWUCcCa2QmBsY4HgrD3BbzCwguAih3B67gD/9zeAYX0twf7048HwLKS +CICJC8GyrgGB0akBvr2mAcDP8QGA0/ECwMScA4GXrAGBpokFgKgKwNSHBP7sMIDfkAbBpzy/pdcK +wLnfAYLeKP6R5wLCssoBv8JY/7IvwNEMgq+7AsDQH/6VwAGA8nXCisoBvvevAoH4PMKOqAO+lnj/ +yjnCwzL/7IADv/8jhK28Brzx1wGCmk6/0t0CgIyaAv/CnwXAzZ0BwJadA4GCKcC53AG/sUCBhs8J +gZmRAb6zwwSBgLYGweuIAb+tbsH6gwPAngH+ysQBhLxh/NihAsGYswS/l8MKgtFX/u6jA8XB6AL8 +3tAC/5TDBMHMvgG/0NICwO79BYS82Qa8oMQBwPqBBcHhI//N2RLAitsEgJnuBcD+qAXA/t0FwJ32 +CoDBrAHDlSD9ltIDwMieBIC26QLBzZkGgbecAYCvCf+Xgwn/40+JgRX3xNcBwIkLweaQAYDuggKB +2okBwPESvtvPBsHxpQL/ucsCwIeJA8CjBoH6SP+UnwKCw8IGv7mcBYHtywKChYICvKDpAsDx5gHC ++MIF/sDeBYCh1ALA+poCwYygCL+TTITO3AK9weMEwf+fAb/V5AKApKMGg8Fg/J9OwfnNCv/pkgLC +z+gEvt+XA4CqlgGAlOQDgIaICYGmzAGCiYUHv96HBv/njQHEzfsEv9LuB7zk1gPD4RiDo/UDu5qr +A//IIcDDHsCwDYDozwLEq9EBvZp1gLwawJaGAv/JwAPC/JcDvqvcAcG1ugODzpMBvZfmAoDWB//s +1wKHzaYCucW2BcL9uQT/+aMB/+WtAsKPN7+sdsTghAG7t6kEwNw+wKLUAsCDIIHcjwGAqpACgM36 +BcC//AOAtLEEgMbwAf+mwQiAqfgH + 2.812 3 from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} PREHOOK: query: describe formatted b value PREHOOK: type: DESCTABLE PREHOOK: Input: default@b POSTHOOK: query: describe formatted b value POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@b -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -value string 10 309 8.0 8 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +value string 10 319 SExM4L8CvwLM7SyB+xL1r/4Gw751wOABvMEcgclIgPbqBb/+4QWCxbMFvttNwJ79A8DuYoLguAL+ +63GCtK0Dv6qTA/+3OMCb5AGBvuwFgLXtAb/E0QOB1IQGwKwc/9PdBMCu9gSBw0y/1IYJwKeVDcCd +mAGBuSGAhBmFr5AB+tCzAcGDswGC8n292qYHwZPUAb+SVsDC1gGAuPwCgZG9Av/S+gXDuJAB//KX +Av61xQGBgpcBgIo6geaBAf6ArwyBkDD/tkWBqNID/6ilBoDahwbA1fICwcKtBL+I0QHC28MFvvLE +BYC8f4CF8wHCw8AD/tPRAcD+BsK12AKA7scDwcuXAoC1Bf3auAXApKAGgMn3BICZ0QWA/IgCgOL4 +BsC+EoCLhALB56oD/6G7AoDumAKCmFW+1UrAkdgCwIyYAcH0lAKA9zzD/+ECgMMTvOC6AsCg0gHA +2i7B+5ACv7v2AoC39AGB+scBgNXPAYOysQH91nH/vaYCgOF/hbHjA72W/AG+zLUGgNHlA8DlIoCi +DcGVLr/hyQTAlg6CwjX/5ooB/++nA4CU7wOA7qUHwpq3BP/F1AK/jkSBwMANv8TlBMKclQb/s60F +/7SYAcOZ0QO/t50G/oKmAsCP+wLAhXWB/bMGv/DkA8DiZsLx0AL+vckDgtRM/sG5AoCJ4AOB8twG +v+i4AYCzY4CIpQbAl/YKgZDRBr/TmgaA2iOAlNkGgJaTBMC3gQrHms8DuZmpAoHmQIH19wKAtPAH +/o7yBMDG9hLA5pEBgMWAA8D42wKBybwGgLSMBIHd6wL+gOAFgZy+BcO14gLAqgr8xkyChcIBwKXi +A7+ber/dUoGzzgGBj2SAx7IEv/NHwKWcAsKw4gL+za0B/8mxAsCZYID5qQKCurYD//CUC7/bqwTC +3USAqf8D/96OAb+ylgzA4qcKgeqxAcDugwK/ragGgOCEBoDygQKA9KQHgO/6AcH7IcCpDr/FFYGb +0QGE9oUBvL2OA7/VyAyAr2nAy6IDwLn8BoD9/wPCyI4DgKINgN6bBL/NcMPFfr7HhQrCnJYG/ufw +Af6VrgfC8FK/ruoCv524AcDcrwSC8osD/v7KA8D+eYC5A4W+3g27v7gDw9cb/ZlugaEXwrr4Bv7o +iwmB3oAB/5qLAf+pXISEHv6j/gX+s7QBgIx9wLWpA4C65gGBxsoBwPLkAf/C7QGBrpQCv97XA8Ge +xwK/1L4FgMqPA8H8kAGAl5cGgKMBgdPyAv61XoHpE7/0YYHVpQfFwLEB+queAoCUhwOEtckCve9X +/8bkBMGKpAOBwaQBwOKNBL6CwQmBz4UDgM8pgomLAf2GrAHCkkL+wc0BgdK0CYHgigm+x6cGgdhH +v/SpB4Gl3gzB2m/+mdUIgY2kAv+5P8DmngeAyIAFgNkGgrBG/raEAsKdyQWAqogEvq7iBIavsAH6 ++kHByq8O/8TAAcTfzQf87TuCuLUBv/eKAYCX+AG/hdgBws2sA8GtgQK+r5MF/7uUBcGH1gLA1YEC +goGcCf2WF8Dc7QKC9/wC/riiAcHXngPDmI8BgaCJCLzEpgP/hReD5z2+mu4CgNQXwOCTBsDm9QG/ +lh4= + 8.0 8 from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} PREHOOK: query: drop table src_multi2 PREHOOK: type: DROPTABLE POSTHOOK: query: drop table src_multi2 diff --git a/ql/src/test/results/clientpositive/llap/column_names_with_leading_and_trailing_spaces.q.out b/ql/src/test/results/clientpositive/llap/column_names_with_leading_and_trailing_spaces.q.out index fb833bccb2..040aa13b7b 100644 --- a/ql/src/test/results/clientpositive/llap/column_names_with_leading_and_trailing_spaces.q.out +++ b/ql/src/test/results/clientpositive/llap/column_names_with_leading_and_trailing_spaces.q.out @@ -48,10 +48,10 @@ PREHOOK: Input: default@space POSTHOOK: query: desc formatted space ` left` POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@space -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - - left string from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\" left\":\"true\",\" middle \":\"true\",\"right \":\"true\"}} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + + left string from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\" left\":\"true\",\" middle \":\"true\",\"right \":\"true\"}} PREHOOK: query: insert into space values ("1", "2", "3") PREHOOK: type: QUERY PREHOOK: Output: default@space @@ -67,10 +67,11 @@ PREHOOK: Input: default@space POSTHOOK: query: desc formatted space ` left` POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@space -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - - left string 0 1 1.0 1 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\" left\":\"true\",\" middle \":\"true\",\"right \":\"true\"}} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + + left string 0 1 SExM4AEBxbi8+AQ= + 1.0 1 from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\" left\":\"true\",\" middle \":\"true\",\"right \":\"true\"}} PREHOOK: query: select * from space PREHOOK: type: QUERY PREHOOK: Input: default@space diff --git a/ql/src/test/results/clientpositive/llap/columnstats_part_coltype.q.out b/ql/src/test/results/clientpositive/llap/columnstats_part_coltype.q.out index 5e647433f1..28e3ad70c0 100644 --- a/ql/src/test/results/clientpositive/llap/columnstats_part_coltype.q.out +++ b/ql/src/test/results/clientpositive/llap/columnstats_part_coltype.q.out @@ -80,36 +80,40 @@ PREHOOK: Input: default@partcolstats POSTHOOK: query: describe formatted partcolstats partition (ds=date '2015-04-02', hr=2, part='partA') key POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@partcolstats -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -key int 27 484 0 20 from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +key int 27 484 0 20 SExM4BQUwv+PD4DTryqAhvQHgJzgE8DtpQL/wqEPx9SoWfm94QmAqJgmgbCHgwG/0+MHwJKHG4De +jWeClaYVvr3WP8H6iQHDxpxl/uvvWb7ssz2AsKk6 + from deserializer PREHOOK: query: describe formatted partcolstats partition (ds=date '2015-04-02', hr=2, part='partA') value PREHOOK: type: DESCTABLE PREHOOK: Input: default@partcolstats POSTHOOK: query: describe formatted partcolstats partition (ds=date '2015-04-02', hr=2, part='partA') value POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@partcolstats -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -value string 0 20 6.8 7 from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +value string 0 20 SExM4BQUwcbsKcCnjhjArvYEgZ2yHMC03h2A4LIS/4KtDMCv7BbAz6JGgejDCP+AlzSA84UvwYTL +WsDygtQBv5uoWIKExaYBvt/WIoK4tQG/vpwPv72sGg== + 6.8 7 from deserializer PREHOOK: query: describe formatted partcolstats partition (ds=date '2015-04-02', hr=2, part='partB') key PREHOOK: type: DESCTABLE PREHOOK: Input: default@partcolstats POSTHOOK: query: describe formatted partcolstats partition (ds=date '2015-04-02', hr=2, part='partB') key POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@partcolstats -# col_name data_type comment - -key int from deserializer +# col_name data_type comment + +key int from deserializer PREHOOK: query: describe formatted partcolstats partition (ds=date '2015-04-02', hr=2, part='partB') value PREHOOK: type: DESCTABLE PREHOOK: Input: default@partcolstats POSTHOOK: query: describe formatted partcolstats partition (ds=date '2015-04-02', hr=2, part='partB') value POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@partcolstats -# col_name data_type comment - -value string from deserializer +# col_name data_type comment + +value string from deserializer PREHOOK: query: analyze table partcolstats partition (ds=date '2015-04-02', hr=2, part) compute statistics for columns PREHOOK: type: QUERY PREHOOK: Input: default@partcolstats @@ -134,36 +138,40 @@ PREHOOK: Input: default@partcolstats POSTHOOK: query: describe formatted partcolstats partition (ds=date '2015-04-02', hr=2, part='partB') key POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@partcolstats -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -key int 27 484 0 20 from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +key int 27 484 0 20 SExM4BQUwv+PD4DTryqAhvQHgJzgE8DtpQL/wqEPx9SoWfm94QmAqJgmgbCHgwG/0+MHwJKHG4De +jWeClaYVvr3WP8H6iQHDxpxl/uvvWb7ssz2AsKk6 + from deserializer PREHOOK: query: describe formatted partcolstats partition (ds=date '2015-04-02', hr=2, part='partB') value PREHOOK: type: DESCTABLE PREHOOK: Input: default@partcolstats POSTHOOK: query: describe formatted partcolstats partition (ds=date '2015-04-02', hr=2, part='partB') value POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@partcolstats -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -value string 0 20 6.8 7 from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +value string 0 20 SExM4BQUwcbsKcCnjhjArvYEgZ2yHMC03h2A4LIS/4KtDMCv7BbAz6JGgejDCP+AlzSA84UvwYTL +WsDygtQBv5uoWIKExaYBvt/WIoK4tQG/vpwPv72sGg== + 6.8 7 from deserializer PREHOOK: query: describe formatted partcolstats partition (ds=date '2015-04-02', hr=3, part='partA') key PREHOOK: type: DESCTABLE PREHOOK: Input: default@partcolstats POSTHOOK: query: describe formatted partcolstats partition (ds=date '2015-04-02', hr=3, part='partA') key POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@partcolstats -# col_name data_type comment - -key int from deserializer +# col_name data_type comment + +key int from deserializer PREHOOK: query: describe formatted partcolstats partition (ds=date '2015-04-02', hr=3, part='partA') value PREHOOK: type: DESCTABLE PREHOOK: Input: default@partcolstats POSTHOOK: query: describe formatted partcolstats partition (ds=date '2015-04-02', hr=3, part='partA') value POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@partcolstats -# col_name data_type comment - -value string from deserializer +# col_name data_type comment + +value string from deserializer PREHOOK: query: analyze table partcolstats partition (ds=date '2015-04-02', hr, part) compute statistics for columns PREHOOK: type: QUERY PREHOOK: Input: default@partcolstats @@ -192,54 +200,60 @@ PREHOOK: Input: default@partcolstats POSTHOOK: query: describe formatted partcolstats partition (ds=date '2015-04-02', hr=3, part='partA') key POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@partcolstats -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -key int 27 495 0 30 from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +key int 27 495 0 30 SExM4B4ewv+PD8PH8ii9i70BgIb0B4Cc4BPA7aUC/8KhD8C5hRaHm6ND+b3hCYComCaA+tFngba1 +G7/T4wfAkocbguS2HL+06gTBtfI+/8iBAf/G+AWClaYVvr3WP8H6iQGB35Yz/v9gwYukJIPcgA3+ +6+9ZvuyzPYCwqTo= + from deserializer PREHOOK: query: describe formatted partcolstats partition (ds=date '2015-04-02', hr=3, part='partA') value PREHOOK: type: DESCTABLE PREHOOK: Input: default@partcolstats POSTHOOK: query: describe formatted partcolstats partition (ds=date '2015-04-02', hr=3, part='partA') value POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@partcolstats -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -value string 0 30 6.833333333333333 7 from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +value string 0 30 SExM4B4eg+SgJr7iywPAp44YwK72BIGdshzAtN4dgfC7Af/v9hD/gq0MwK/sFsLS5Df+/L0OgejD +CISu/Ar70popgPOFL8GEy1q/or8pwL+aZcHYtQP/8MIqgcewF7+bqFjCr/eeAcDUzQe+39Yigri1 +Ab++nA+CmogTvaOkBw== + 6.833333333333333 7 from deserializer PREHOOK: query: describe formatted partcolstats partition (ds=date '2015-04-03', hr=3, part='partA') key PREHOOK: type: DESCTABLE PREHOOK: Input: default@partcolstats POSTHOOK: query: describe formatted partcolstats partition (ds=date '2015-04-03', hr=3, part='partA') key POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@partcolstats -# col_name data_type comment - -key int from deserializer +# col_name data_type comment + +key int from deserializer PREHOOK: query: describe formatted partcolstats partition (ds=date '2015-04-03', hr=3, part='partA') value PREHOOK: type: DESCTABLE PREHOOK: Input: default@partcolstats POSTHOOK: query: describe formatted partcolstats partition (ds=date '2015-04-03', hr=3, part='partA') value POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@partcolstats -# col_name data_type comment - -value string from deserializer +# col_name data_type comment + +value string from deserializer PREHOOK: query: describe formatted partcolstats partition (ds=date '2015-04-03', hr=3, part='partB') key PREHOOK: type: DESCTABLE PREHOOK: Input: default@partcolstats POSTHOOK: query: describe formatted partcolstats partition (ds=date '2015-04-03', hr=3, part='partB') key POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@partcolstats -# col_name data_type comment - -key int from deserializer +# col_name data_type comment + +key int from deserializer PREHOOK: query: describe formatted partcolstats partition (ds=date '2015-04-03', hr=3, part='partB') value PREHOOK: type: DESCTABLE PREHOOK: Input: default@partcolstats POSTHOOK: query: describe formatted partcolstats partition (ds=date '2015-04-03', hr=3, part='partB') value POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@partcolstats -# col_name data_type comment - -value string from deserializer +# col_name data_type comment + +value string from deserializer PREHOOK: query: analyze table partcolstats partition (ds, hr, part) compute statistics for columns PREHOOK: type: QUERY PREHOOK: Input: default@partcolstats @@ -276,36 +290,52 @@ PREHOOK: Input: default@partcolstats POSTHOOK: query: describe formatted partcolstats partition (ds=date '2015-04-03', hr=3, part='partA') key POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@partcolstats -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -key int 15 495 0 40 from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +key int 15 495 0 40 SExM4Cgowv+PD8PH8ii9i70BgIb0B4Cc4BPA7aUC/8KhD4D4jA/AwfgGh5ujQ/m94QmAqJgmgPrR +Z4G2tRu/0+MHwJKHG4Lkthy/tOoEwfiHI77r2A7C0ZEN/8iBAf/G+AWClaYVvr3WP8H6iQGB35Yz +/v9gwYukJL+9zgrEnrIC/OqkAYDasSKCp5k2vuyzPYDrkw6AxZUsgK/7DYK2uAr/ivcC + from deserializer PREHOOK: query: describe formatted partcolstats partition (ds=date '2015-04-03', hr=3, part='partA') value PREHOOK: type: DESCTABLE PREHOOK: Input: default@partcolstats POSTHOOK: query: describe formatted partcolstats partition (ds=date '2015-04-03', hr=3, part='partA') value POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@partcolstats -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -value string 0 40 6.825 7 from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +value string 0 40 SExM4Cgog+SgJr7iywPAp44YwK72BIC/6BaB3skFwLTeHYHwuwH/7/YQ/4KtDMCv7BbC0uQ3vqKP +DsDaLoHowwiErvwK+7OXDMDlIsC54ByB1egd/52dEcGEy1q//tAigKTuBsC/mmXB2LUDhN7rGvuS +1w+Bx7AXv5uoWMHXuTmB2L1lwNTNB77f1iKCuLUBv76cD4KaiBO9o6QHgdygE4DUFw== + 6.825 7 from deserializer PREHOOK: query: describe formatted partcolstats partition (ds=date '2015-04-03', hr=3, part='partB') key PREHOOK: type: DESCTABLE PREHOOK: Input: default@partcolstats POSTHOOK: query: describe formatted partcolstats partition (ds=date '2015-04-03', hr=3, part='partB') key POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@partcolstats -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -key int 15 495 0 58 from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +key int 15 495 0 58 SExM4Do6wv+PD8PH8ii9i70BgIb0B4Cc4BPA7aUC/8KhD4GbYP/crA7AwfgGwMyEQMfOngP5veEJ +gJH1GIDAkAyA15IBg8+TPL25xAzCp9gR/smhDYG2tRu/0+MHwJKHG4Lkthy/tOoExJKoGf3l3wm+ +69gOwtGRDf/IgQH/xvgFwsSVEcDQkAS/qJM3/5TDCMH6iQHAwrIawZzkGP7/YMGLpCS/vc4KxJ6y +AvzqpAGA2rEigqeZNr7ssz2AgZ4BgOr1DIaz3wL6kbYpgK/7DcCa2QnCm1//ivcC/5fiIsC10AmC +5uYQvue2GQ== + from deserializer PREHOOK: query: describe formatted partcolstats partition (ds=date '2015-04-03', hr=3, part='partB') value PREHOOK: type: DESCTABLE PREHOOK: Input: default@partcolstats POSTHOOK: query: describe formatted partcolstats partition (ds=date '2015-04-03', hr=3, part='partB') value POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@partcolstats -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -value string 0 58 6.883333333333334 7 from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +value string 0 58 SExM4Do6geTIH4KA2Aa+4ssDwKeOGMCu9gSAv+gWgd7JBcC03h2B8LsB/+/2EP+CrQzAr+wWwIKn +HoLQvRm+87oEgK/UCcDaLsH7kALA7LIGhK78CvuzlwzA5SLAueAcgdXoHf+dnRGA+ZUgwPH7M4C0 ++AWB5kC//tAigKTuBoHJvAaAm+4bv91SgP6cQsHYtQO/4s4XxfucA/uS1w+Bx7AXv5uoWMD2mxmB +4Z0gwMaBGv/Zzz7Ct+wM/raEAsKdyQW+39Yigri1Ab/3igGB6vwG/9yUB4KaiBO9o6QHwdeeA8CE +ghCA1Bc= + 6.883333333333334 7 from deserializer PREHOOK: query: drop table partcolstats PREHOOK: type: DROPTABLE PREHOOK: Input: default@partcolstats @@ -356,9 +386,12 @@ PREHOOK: Input: default@partcolstatsnum POSTHOOK: query: describe formatted partcolstatsnum partition (tint=100, sint=1000, bint=1000000) value POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@partcolstatsnum -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -value string 0 30 6.833333333333333 7 from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +value string 0 30 SExM4B4eg+SgJr7iywPAp44YwK72BIGdshzAtN4dgfC7Af/v9hD/gq0MwK/sFsLS5Df+/L0OgejD +CISu/Ar70popgPOFL8GEy1q/or8pwL+aZcHYtQP/8MIqgcewF7+bqFjCr/eeAcDUzQe+39Yigri1 +Ab++nA+CmogTvaOkBw== + 6.833333333333333 7 from deserializer PREHOOK: query: drop table partcolstatsnum PREHOOK: type: DROPTABLE PREHOOK: Input: default@partcolstatsnum @@ -409,9 +442,12 @@ PREHOOK: Input: default@partcolstatsdec POSTHOOK: query: describe formatted partcolstatsdec partition (decpart='1000.0001') value POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@partcolstatsdec -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -value string 0 30 6.833333333333333 7 from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +value string 0 30 SExM4B4eg+SgJr7iywPAp44YwK72BIGdshzAtN4dgfC7Af/v9hD/gq0MwK/sFsLS5Df+/L0OgejD +CISu/Ar70popgPOFL8GEy1q/or8pwL+aZcHYtQP/8MIqgcewF7+bqFjCr/eeAcDUzQe+39Yigri1 +Ab++nA+CmogTvaOkBw== + 6.833333333333333 7 from deserializer PREHOOK: query: drop table partcolstatsdec PREHOOK: type: DROPTABLE PREHOOK: Input: default@partcolstatsdec @@ -462,9 +498,12 @@ PREHOOK: Input: default@partcolstatschar POSTHOOK: query: describe formatted partcolstatschar partition (varpart='part1', charpart='aaa') value POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@partcolstatschar -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -value string 0 30 6.833333333333333 7 from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +value string 0 30 SExM4B4eg+SgJr7iywPAp44YwK72BIGdshzAtN4dgfC7Af/v9hD/gq0MwK/sFsLS5Df+/L0OgejD +CISu/Ar70popgPOFL8GEy1q/or8pwL+aZcHYtQP/8MIqgcewF7+bqFjCr/eeAcDUzQe+39Yigri1 +Ab++nA+CmogTvaOkBw== + 6.833333333333333 7 from deserializer PREHOOK: query: drop table partcolstatschar PREHOOK: type: DROPTABLE PREHOOK: Input: default@partcolstatschar diff --git a/ql/src/test/results/clientpositive/llap/deleteAnalyze.q.out b/ql/src/test/results/clientpositive/llap/deleteAnalyze.q.out index 5db87d97cf..15d7f1c1ee 100644 --- a/ql/src/test/results/clientpositive/llap/deleteAnalyze.q.out +++ b/ql/src/test/results/clientpositive/llap/deleteAnalyze.q.out @@ -72,10 +72,10 @@ PREHOOK: Input: default@testdeci2 POSTHOOK: query: describe formatted testdeci2 amount POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@testdeci2 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -amount decimal(10,3) from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +amount decimal(10,3) from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} PREHOOK: query: analyze table testdeci2 compute statistics for columns PREHOOK: type: QUERY PREHOOK: Input: default@testdeci2 diff --git a/ql/src/test/results/clientpositive/llap/extrapolate_part_stats_partial_ndv.q.out b/ql/src/test/results/clientpositive/llap/extrapolate_part_stats_partial_ndv.q.out index 6bc1970ad0..8e4dc4c1be 100644 --- a/ql/src/test/results/clientpositive/llap/extrapolate_part_stats_partial_ndv.q.out +++ b/ql/src/test/results/clientpositive/llap/extrapolate_part_stats_partial_ndv.q.out @@ -113,72 +113,80 @@ PREHOOK: Input: default@loc_orc_1d POSTHOOK: query: describe formatted loc_orc_1d PARTITION(year='2001') state POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@loc_orc_1d -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -state string 0 3 0.75 2 from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +state string 0 3 SExM4AMDgaTbFcD8mOYCwMOJoQQ= + 0.75 2 from deserializer PREHOOK: query: describe formatted loc_orc_1d PARTITION(year='2002') state PREHOOK: type: DESCTABLE PREHOOK: Input: default@loc_orc_1d POSTHOOK: query: describe formatted loc_orc_1d PARTITION(year='2002') state POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@loc_orc_1d -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -state string 0 6 3.0 3 from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +state string 0 6 SExM4AYGhJ2RPL68foHA90C/kJJjgJX39QKAwfg7 + 3.0 3 from deserializer PREHOOK: query: describe formatted loc_orc_1d PARTITION(year='2001') locid PREHOOK: type: DESCTABLE PREHOOK: Input: default@loc_orc_1d POSTHOOK: query: describe formatted loc_orc_1d PARTITION(year='2001') locid POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@loc_orc_1d -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -locid double 1.0 4.0 0 4 from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +locid double 1.0 4.0 0 4 SExM4AQEwvmagwOC4fQQ/cXBowKCnueKAg== + from deserializer PREHOOK: query: describe formatted loc_orc_1d PARTITION(year='2002') locid PREHOOK: type: DESCTABLE PREHOOK: Input: default@loc_orc_1d POSTHOOK: query: describe formatted loc_orc_1d PARTITION(year='2002') locid POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@loc_orc_1d -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -locid double 1.0 5.0 0 5 from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +locid double 1.0 5.0 0 5 SExM4AUFgoqWCcDvhPoCguH0EP3FwaMCgp7nigI= + from deserializer PREHOOK: query: describe formatted loc_orc_1d PARTITION(year='2001') cnt PREHOOK: type: DESCTABLE PREHOOK: Input: default@loc_orc_1d POSTHOOK: query: describe formatted loc_orc_1d PARTITION(year='2001') cnt POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@loc_orc_1d -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -cnt decimal(10,0) 10 2000 0 4 from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +cnt decimal(10,0) 10 2000 0 4 SExM4AQEwtKH1wOJpIYp95+qNYHs8ZgB + from deserializer PREHOOK: query: describe formatted loc_orc_1d PARTITION(year='2002') cnt PREHOOK: type: DESCTABLE PREHOOK: Input: default@loc_orc_1d POSTHOOK: query: describe formatted loc_orc_1d PARTITION(year='2002') cnt POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@loc_orc_1d -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -cnt decimal(10,0) 10 910 0 4 from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +cnt decimal(10,0) 10 910 0 4 SExM4AQEwavm2wOC18PyAYDUhBSCqe9l + from deserializer PREHOOK: query: describe formatted loc_orc_1d PARTITION(year='2001') zip PREHOOK: type: DESCTABLE PREHOOK: Input: default@loc_orc_1d POSTHOOK: query: describe formatted loc_orc_1d PARTITION(year='2001') zip POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@loc_orc_1d -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -zip int 43201 94087 0 3 from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +zip int 43201 94087 0 3 SExM4AMDgaPxmgPB562MAr/LtnY= + from deserializer PREHOOK: query: describe formatted loc_orc_1d PARTITION(year='2002') zip PREHOOK: type: DESCTABLE PREHOOK: Input: default@loc_orc_1d POSTHOOK: query: describe formatted loc_orc_1d PARTITION(year='2002') zip POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@loc_orc_1d -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -zip int 43201 94087 0 3 from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +zip int 43201 94087 0 3 SExM4AMDgaPxmgPB562MAr/LtnY= + from deserializer PREHOOK: query: explain extended select state,locid,cnt,zip from loc_orc_1d PREHOOK: type: QUERY POSTHOOK: query: explain extended select state,locid,cnt,zip from loc_orc_1d @@ -414,72 +422,80 @@ PREHOOK: Input: default@loc_orc_1d POSTHOOK: query: describe formatted loc_orc_1d PARTITION(year='2000') state POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@loc_orc_1d -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -state string 0 2 0.5 1 from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +state string 0 2 SExM4AICgaTbFYDJnvoC + 0.5 1 from deserializer PREHOOK: query: describe formatted loc_orc_1d PARTITION(year='2003') state PREHOOK: type: DESCTABLE PREHOOK: Input: default@loc_orc_1d POSTHOOK: query: describe formatted loc_orc_1d PARTITION(year='2003') state POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@loc_orc_1d -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -state string 0 3 1.25 4 from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +state string 0 3 SExM4AMDgaTbFcD8mOYCwJDuDA== + 1.25 4 from deserializer PREHOOK: query: describe formatted loc_orc_1d PARTITION(year='2000') locid PREHOOK: type: DESCTABLE PREHOOK: Input: default@loc_orc_1d POSTHOOK: query: describe formatted loc_orc_1d PARTITION(year='2000') locid POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@loc_orc_1d -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -locid double 1.0 2.0 0 2 from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +locid double 1.0 2.0 0 2 SExM4AICwvmagwP/pra0Ag== + from deserializer PREHOOK: query: describe formatted loc_orc_1d PARTITION(year='2003') locid PREHOOK: type: DESCTABLE PREHOOK: Input: default@loc_orc_1d POSTHOOK: query: describe formatted loc_orc_1d PARTITION(year='2003') locid POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@loc_orc_1d -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -locid double 1.0 31.0 0 5 from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +locid double 1.0 31.0 0 5 SExM4AUFgoqWCb/8tKEBg9TE6QH9xcGjAoKe54oC + from deserializer PREHOOK: query: describe formatted loc_orc_1d PARTITION(year='2000') cnt PREHOOK: type: DESCTABLE PREHOOK: Input: default@loc_orc_1d POSTHOOK: query: describe formatted loc_orc_1d PARTITION(year='2000') cnt POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@loc_orc_1d -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -cnt decimal(10,0) 1000 1010 0 2 from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +cnt decimal(10,0) 1000 1010 0 2 SExM4AICwtKH1wOJpIYp + from deserializer PREHOOK: query: describe formatted loc_orc_1d PARTITION(year='2003') cnt PREHOOK: type: DESCTABLE PREHOOK: Input: default@loc_orc_1d POSTHOOK: query: describe formatted loc_orc_1d PARTITION(year='2003') cnt POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@loc_orc_1d -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -cnt decimal(10,0) 1000 2000 0 3 from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +cnt decimal(10,0) 1000 2000 0 3 SExM4AMDwtKH1wOJpIYp95+qNQ== + from deserializer PREHOOK: query: describe formatted loc_orc_1d PARTITION(year='2000') zip PREHOOK: type: DESCTABLE PREHOOK: Input: default@loc_orc_1d POSTHOOK: query: describe formatted loc_orc_1d PARTITION(year='2000') zip POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@loc_orc_1d -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -zip int 94086 94087 0 2 from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +zip int 94086 94087 0 2 SExM4AICgaPxmgOAs+SCAw== + from deserializer PREHOOK: query: describe formatted loc_orc_1d PARTITION(year='2003') zip PREHOOK: type: DESCTABLE PREHOOK: Input: default@loc_orc_1d POSTHOOK: query: describe formatted loc_orc_1d PARTITION(year='2003') zip POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@loc_orc_1d -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -zip int 43201 94087 0 3 from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +zip int 43201 94087 0 3 SExM4AMDgaPxmgPB562MAr/LtnY= + from deserializer PREHOOK: query: explain extended select state,locid,cnt,zip from loc_orc_1d PREHOOK: type: QUERY POSTHOOK: query: explain extended select state,locid,cnt,zip from loc_orc_1d @@ -786,54 +802,60 @@ PREHOOK: Input: default@loc_orc_2d POSTHOOK: query: describe formatted loc_orc_2d partition(zip=94086, year='2001') state POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@loc_orc_2d -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -state string 0 2 0.5 1 from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +state string 0 2 SExM4AICgaTbFYDAoocH + 0.5 1 from deserializer PREHOOK: query: describe formatted loc_orc_2d partition(zip=94087, year='2002') state PREHOOK: type: DESCTABLE PREHOOK: Input: default@loc_orc_2d POSTHOOK: query: describe formatted loc_orc_2d partition(zip=94087, year='2002') state POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@loc_orc_2d -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -state string 0 3 3.0 3 from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +state string 0 3 SExM4AMDwtmPPYHA90C/kJJj + 3.0 3 from deserializer PREHOOK: query: describe formatted loc_orc_2d partition(zip=94086, year='2001') locid PREHOOK: type: DESCTABLE PREHOOK: Input: default@loc_orc_2d POSTHOOK: query: describe formatted loc_orc_2d partition(zip=94086, year='2001') locid POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@loc_orc_2d -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -locid int 2 3 0 2 from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +locid int 2 3 0 2 SExM4AICga/rqgHA0vSOAw== + from deserializer PREHOOK: query: describe formatted loc_orc_2d partition(zip=94087, year='2002') locid PREHOOK: type: DESCTABLE PREHOOK: Input: default@loc_orc_2d POSTHOOK: query: describe formatted loc_orc_2d partition(zip=94087, year='2002') locid POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@loc_orc_2d -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -locid int 1 5 0 3 from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +locid int 1 5 0 3 SExM4AMDxfO+SLy7rGLA9IJO + from deserializer PREHOOK: query: describe formatted loc_orc_2d partition(zip=94086, year='2001') cnt PREHOOK: type: DESCTABLE PREHOOK: Input: default@loc_orc_2d POSTHOOK: query: describe formatted loc_orc_2d partition(zip=94086, year='2001') cnt POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@loc_orc_2d -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -cnt decimal(10,0) 1000 2000 0 2 from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +cnt decimal(10,0) 1000 2000 0 2 SExM4AICy/aNgAT3n6o1 + from deserializer PREHOOK: query: describe formatted loc_orc_2d partition(zip=94087, year='2002') cnt PREHOOK: type: DESCTABLE PREHOOK: Input: default@loc_orc_2d POSTHOOK: query: describe formatted loc_orc_2d partition(zip=94087, year='2002') cnt POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@loc_orc_2d -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -cnt decimal(10,0) 10 100 0 2 from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +cnt decimal(10,0) 10 100 0 2 SExM4AICw4KqzgWC/fN5 + from deserializer PREHOOK: query: explain extended select state,locid,cnt,zip from loc_orc_2d PREHOOK: type: QUERY POSTHOOK: query: explain extended select state,locid,cnt,zip from loc_orc_2d diff --git a/ql/src/test/results/clientpositive/llap/llap_smb.q.out b/ql/src/test/results/clientpositive/llap/llap_smb.q.out index 87b33db805..6ece9a1982 100644 --- a/ql/src/test/results/clientpositive/llap/llap_smb.q.out +++ b/ql/src/test/results/clientpositive/llap/llap_smb.q.out @@ -321,7 +321,7 @@ POSTHOOK: Input: default@orc_a@y=2001/q=8 POSTHOOK: Input: default@orc_a@y=2001/q=9 POSTHOOK: Input: default@orc_b #### A masked pattern was here #### -2000 5 52 +2001 8 52 2001 5 139630 PREHOOK: query: DROP TABLE orc_a PREHOOK: type: DROPTABLE diff --git a/ql/src/test/results/clientpositive/llap/stats_only_null.q.out b/ql/src/test/results/clientpositive/llap/stats_only_null.q.out index 57aaf557b2..4b3d539a99 100644 --- a/ql/src/test/results/clientpositive/llap/stats_only_null.q.out +++ b/ql/src/test/results/clientpositive/llap/stats_only_null.q.out @@ -413,9 +413,10 @@ PREHOOK: Input: default@stats_null_part POSTHOOK: query: describe formatted stats_null_part partition(dt = 1) a POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@stats_null_part -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -a double 1.0 1.0 1 1 from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +a double 1.0 1.0 1 1 SExM4AEBwaDRtwU= + from deserializer PREHOOK: query: drop table stats_null PREHOOK: type: DROPTABLE PREHOOK: Input: default@stats_null diff --git a/ql/src/test/results/clientpositive/llap/varchar_udf1.q.out b/ql/src/test/results/clientpositive/llap/varchar_udf1.q.out index 2e9d88e343..023d51ccb8 100644 --- a/ql/src/test/results/clientpositive/llap/varchar_udf1.q.out +++ b/ql/src/test/results/clientpositive/llap/varchar_udf1.q.out @@ -406,7 +406,7 @@ from varchar_udf_1 POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1 #### A masked pattern was here #### -{"columntype":"String","maxlength":7,"avglength":7.0,"countnulls":0,"numdistinctvalues":1,"ndvbitvector":"{0}{3}{2}{3}{1}{0}{2}{0}{1}{0}{0}{1}{3}{2}{0}{3}"} {"columntype":"String","maxlength":7,"avglength":7.0,"countnulls":0,"numdistinctvalues":1,"ndvbitvector":"{0}{3}{2}{3}{1}{0}{2}{0}{1}{0}{0}{1}{3}{2}{0}{3}"} +{"columntype":"String","maxlength":7,"avglength":7.0,"countnulls":0,"numdistinctvalues":1,"ndvbitvector":"Rk0QAAEAAAAIAAAABAAAAAgAAAACAAAAAQAAAAQAAAABAAAAAgAAAAEAAAABAAAAAgAAAAgAAAAE\r\nAAAAAQAAAAgAAAA=\r\n"} {"columntype":"String","maxlength":7,"avglength":7.0,"countnulls":0,"numdistinctvalues":1,"ndvbitvector":"Rk0QAAEAAAAIAAAABAAAAAgAAAACAAAAAQAAAAQAAAABAAAAAgAAAAEAAAABAAAAAgAAAAgAAAAE\r\nAAAAAQAAAAgAAAA=\r\n"} PREHOOK: query: select min(c2), min(c4) diff --git a/ql/src/test/results/clientpositive/llap/vector_udf1.q.out b/ql/src/test/results/clientpositive/llap/vector_udf1.q.out index 9a164fe130..b8d19c5637 100644 --- a/ql/src/test/results/clientpositive/llap/vector_udf1.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_udf1.q.out @@ -2713,7 +2713,7 @@ from varchar_udf_1 POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1 #### A masked pattern was here #### -{"columntype":"String","maxlength":7,"avglength":7.0,"countnulls":0,"numdistinctvalues":1,"ndvbitvector":"{0}{3}{2}{3}{1}{0}{2}{0}{1}{0}{0}{1}{3}{2}{0}{3}"} {"columntype":"String","maxlength":7,"avglength":7.0,"countnulls":0,"numdistinctvalues":1,"ndvbitvector":"{0}{3}{2}{3}{1}{0}{2}{0}{1}{0}{0}{1}{3}{2}{0}{3}"} +{"columntype":"String","maxlength":7,"avglength":7.0,"countnulls":0,"numdistinctvalues":1,"ndvbitvector":"Rk0QAAEAAAAIAAAABAAAAAgAAAACAAAAAQAAAAQAAAABAAAAAgAAAAEAAAABAAAAAgAAAAgAAAAE\r\nAAAAAQAAAAgAAAA=\r\n"} {"columntype":"String","maxlength":7,"avglength":7.0,"countnulls":0,"numdistinctvalues":1,"ndvbitvector":"Rk0QAAEAAAAIAAAABAAAAAgAAAACAAAAAQAAAAQAAAABAAAAAgAAAAEAAAABAAAAAgAAAAgAAAAE\r\nAAAAAQAAAAgAAAA=\r\n"} PREHOOK: query: explain vectorization detail select min(c2), diff --git a/ql/src/test/results/clientpositive/partial_column_stats.q.out b/ql/src/test/results/clientpositive/partial_column_stats.q.out index 87d47dae22..452d4b688c 100644 --- a/ql/src/test/results/clientpositive/partial_column_stats.q.out +++ b/ql/src/test/results/clientpositive/partial_column_stats.q.out @@ -69,7 +69,7 @@ PREHOOK: Input: default@t1 POSTHOOK: query: desc formatted t1 value POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@t1 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -value string 0 0 0.0 0 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"data\":\"true\",\"key\":\"true\",\"value\":\"true\"}} +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +value string 0 0 0.0 0 from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"data\":\"true\",\"key\":\"true\",\"value\":\"true\"}} diff --git a/ql/src/test/results/clientpositive/partition_coltype_literals.q.out b/ql/src/test/results/clientpositive/partition_coltype_literals.q.out index d459b36ff0..d824a9886c 100644 --- a/ql/src/test/results/clientpositive/partition_coltype_literals.q.out +++ b/ql/src/test/results/clientpositive/partition_coltype_literals.q.out @@ -302,48 +302,52 @@ PREHOOK: Input: default@partcoltypenum POSTHOOK: query: describe formatted partcoltypenum partition (tint=110Y, sint=22000S, bint=330000000000L) key POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@partcoltypenum -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -key int 27 484 0 20 from deserializer +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +key int 27 484 0 20 SExM4BQUwv+PD4DTryqAhvQHgJzgE8DtpQL/wqEPx9SoWfm94QmAqJgmgbCHgwG/0+MHwJKHG4De +jWeClaYVvr3WP8H6iQHDxpxl/uvvWb7ssz2AsKk6 + from deserializer PREHOOK: query: describe formatted partcoltypenum partition (tint=110Y, sint=22000S, bint=330000000000L) value PREHOOK: type: DESCTABLE PREHOOK: Input: default@partcoltypenum POSTHOOK: query: describe formatted partcoltypenum partition (tint=110Y, sint=22000S, bint=330000000000L) value POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@partcoltypenum -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -value string 0 20 6.766666666666667 7 from deserializer +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +value string 0 20 6.766666666666667 7 SExM4BQUwcbsKcCnjhjArvYEgZ2yHMC03h2A4LIS/4KtDMCv7BbAz6JGgejDCP+AlzSA84UvwYTL +WsDygtQBv5uoWIKExaYBvt/WIoK4tQG/vpwPv72sGg== + from deserializer PREHOOK: query: describe formatted partcoltypenum tint PREHOOK: type: DESCTABLE PREHOOK: Input: default@partcoltypenum POSTHOOK: query: describe formatted partcoltypenum tint POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@partcoltypenum -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -tint tinyint 110 110 0 1 -COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"tint\":\"true\"}} +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +tint tinyint 110 110 0 1 +COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"tint\":\"true\"}} PREHOOK: query: describe formatted partcoltypenum sint PREHOOK: type: DESCTABLE PREHOOK: Input: default@partcoltypenum POSTHOOK: query: describe formatted partcoltypenum sint POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@partcoltypenum -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -sint smallint 22000 22000 0 1 -COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"sint\":\"true\"}} +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +sint smallint 22000 22000 0 1 +COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"sint\":\"true\"}} PREHOOK: query: describe formatted partcoltypenum bint PREHOOK: type: DESCTABLE PREHOOK: Input: default@partcoltypenum POSTHOOK: query: describe formatted partcoltypenum bint POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@partcoltypenum -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -bint bigint 330000000000 330000000000 0 1 -COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"bint\":\"true\"}} +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +bint bigint 330000000000 330000000000 0 1 +COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"bint\":\"true\"}} PREHOOK: query: alter table partcoltypenum change key key decimal(10,0) PREHOOK: type: ALTERTABLE_RENAMECOL PREHOOK: Input: default@partcoltypenum @@ -458,10 +462,10 @@ PREHOOK: Input: default@partcoltypenum POSTHOOK: query: describe formatted partcoltypenum tint POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@partcoltypenum -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -tint decimal(3,0) 110 110 0 1 -COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"tint\":\"true\"}} +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +tint decimal(3,0) 110 110 0 1 +COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"tint\":\"true\"}} PREHOOK: query: show partitions partcoltypenum partition (tint=110BD, sint=22000S, bint=330000000000L) PREHOOK: type: SHOWPARTITIONS PREHOOK: Input: default@partcoltypenum diff --git a/ql/src/test/results/clientpositive/reduceSinkDeDuplication_pRS_key_empty.q.out b/ql/src/test/results/clientpositive/reduceSinkDeDuplication_pRS_key_empty.q.out index 4bddd3bef8..124a4b489a 100644 --- a/ql/src/test/results/clientpositive/reduceSinkDeDuplication_pRS_key_empty.q.out +++ b/ql/src/test/results/clientpositive/reduceSinkDeDuplication_pRS_key_empty.q.out @@ -22,7 +22,7 @@ select POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### -{"columntype":"Double","min":260.182,"max":260.182,"countnulls":0,"numdistinctvalues":1,"ndvbitvector":"{1}{0}{0}{0}{1}{1}{1}{0}{0}{0}{0}{0}{1}{2}{1}{0}"} {"columntype":"String","maxlength":2,"avglength":2.0,"countnulls":0,"numdistinctvalues":1,"ndvbitvector":"{1}{2}{0}{3}{6}{3}{0}{1}{1}{0}{0}{0}{0}{0}{0}{0}"} {"columntype":"Double","min":20428.07287599998,"max":20428.07287599998,"countnulls":0,"numdistinctvalues":2,"ndvbitvector":"{0}{0}{3}{0}{1}{1}{0}{0}{0}{0}{0}{0}{0}{4}{2}{0}"} {"columntype":"Double","min":20469.01089779557,"max":20469.01089779557,"countnulls":0,"numdistinctvalues":1,"ndvbitvector":"{0}{1}{3}{2}{3}{5}{2}{0}{1}{0}{1}{1}{1}{1}{0}{1}"} +{"columntype":"Double","min":260.182,"max":260.182,"countnulls":0,"numdistinctvalues":1,"ndvbitvector":"Rk0QAAIAAAABAAAAAQAAAAEAAAACAAAAAgAAAAIAAAABAAAAAQAAAAEAAAABAAAAAQAAAAIAAAAE\r\nAAAAAgAAAAEAAAA=\r\n"} {"columntype":"String","maxlength":2,"avglength":2.0,"countnulls":0,"numdistinctvalues":1,"ndvbitvector":"Rk0QAAIAAAAEAAAAAQAAAAgAAABAAAAACAAAAAEAAAACAAAAAgAAAAEAAAABAAAAAQAAAAEAAAAB\r\nAAAAAQAAAAEAAAA=\r\n"} {"columntype":"Double","min":20428.07287599998,"max":20428.07287599998,"countnulls":0,"numdistinctvalues":2,"ndvbitvector":"Rk0QAAEAAAABAAAACAAAAAEAAAACAAAAAgAAAAEAAAABAAAAAQAAAAEAAAABAAAAAQAAAAEAAAAQ\r\nAAAABAAAAAEAAAA=\r\n"} {"columntype":"Double","min":20469.01089779557,"max":20469.01089779557,"countnulls":0,"numdistinctvalues":1,"ndvbitvector":"Rk0QAAEAAAACAAAACAAAAAQAAAAIAAAAIAAAAAQAAAABAAAAAgAAAAEAAAACAAAAAgAAAAIAAAAC\r\nAAAAAQAAAAIAAAA=\r\n"} PREHOOK: query: explain select compute_stats(a,'fm',16),compute_stats(b,'fm',16),compute_stats(c,'fm',16),compute_stats(d,'fm',16) from ( @@ -111,7 +111,7 @@ select POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### -{"columntype":"Double","min":256.10355987055016,"max":256.10355987055016,"countnulls":0,"numdistinctvalues":1,"ndvbitvector":"{2}{1}{0}{2}{0}{1}{1}{1}{0}{0}{1}{1}{0}{2}{1}{0}"} {"columntype":"String","maxlength":2,"avglength":2.0,"countnulls":0,"numdistinctvalues":1,"ndvbitvector":"{1}{2}{0}{3}{6}{3}{0}{1}{1}{0}{0}{0}{0}{0}{0}{0}"} {"columntype":"Double","min":20428.07287599999,"max":20428.07287599999,"countnulls":0,"numdistinctvalues":1,"ndvbitvector":"{1}{4}{0}{0}{4}{3}{0}{1}{0}{0}{0}{0}{0}{0}{1}{2}"} {"columntype":"Double","min":20469.010897795582,"max":20469.010897795582,"countnulls":0,"numdistinctvalues":2,"ndvbitvector":"{2}{0}{2}{2}{0}{0}{2}{0}{0}{0}{0}{0}{1}{0}{0}{0}"} +{"columntype":"Double","min":256.10355987055016,"max":256.10355987055016,"countnulls":0,"numdistinctvalues":1,"ndvbitvector":"Rk0QAAQAAAACAAAAAQAAAAQAAAABAAAAAgAAAAIAAAACAAAAAQAAAAEAAAACAAAAAgAAAAEAAAAE\r\nAAAAAgAAAAEAAAA=\r\n"} {"columntype":"String","maxlength":2,"avglength":2.0,"countnulls":0,"numdistinctvalues":1,"ndvbitvector":"Rk0QAAIAAAAEAAAAAQAAAAgAAABAAAAACAAAAAEAAAACAAAAAgAAAAEAAAABAAAAAQAAAAEAAAAB\r\nAAAAAQAAAAEAAAA=\r\n"} {"columntype":"Double","min":20428.07287599999,"max":20428.07287599999,"countnulls":0,"numdistinctvalues":1,"ndvbitvector":"Rk0QAAIAAAAQAAAAAQAAAAEAAAAQAAAACAAAAAEAAAACAAAAAQAAAAEAAAABAAAAAQAAAAEAAAAB\r\nAAAAAgAAAAQAAAA=\r\n"} {"columntype":"Double","min":20469.010897795582,"max":20469.010897795582,"countnulls":0,"numdistinctvalues":2,"ndvbitvector":"Rk0QAAQAAAABAAAABAAAAAQAAAABAAAAAQAAAAQAAAABAAAAAQAAAAEAAAABAAAAAQAAAAIAAAAB\r\nAAAAAQAAAAEAAAA=\r\n"} PREHOOK: query: explain select compute_stats(a,'fm',16),compute_stats(b,'fm',16),compute_stats(c,'fm',16),compute_stats(d,'fm',16) from ( @@ -217,4 +217,4 @@ select POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### -{"columntype":"Double","min":256.10355987055016,"max":256.10355987055016,"countnulls":0,"numdistinctvalues":1,"ndvbitvector":"{2}{1}{0}{2}{0}{1}{1}{1}{0}{0}{1}{1}{0}{2}{1}{0}"} {"columntype":"String","maxlength":2,"avglength":2.0,"countnulls":0,"numdistinctvalues":1,"ndvbitvector":"{1}{2}{0}{3}{6}{3}{0}{1}{1}{0}{0}{0}{0}{0}{0}{0}"} {"columntype":"Double","min":20428.07287599999,"max":20428.07287599999,"countnulls":0,"numdistinctvalues":1,"ndvbitvector":"{1}{4}{0}{0}{4}{3}{0}{1}{0}{0}{0}{0}{0}{0}{1}{2}"} {"columntype":"Double","min":20469.010897795582,"max":20469.010897795582,"countnulls":0,"numdistinctvalues":2,"ndvbitvector":"{2}{0}{2}{2}{0}{0}{2}{0}{0}{0}{0}{0}{1}{0}{0}{0}"} +{"columntype":"Double","min":256.10355987055016,"max":256.10355987055016,"countnulls":0,"numdistinctvalues":1,"ndvbitvector":"Rk0QAAQAAAACAAAAAQAAAAQAAAABAAAAAgAAAAIAAAACAAAAAQAAAAEAAAACAAAAAgAAAAEAAAAE\r\nAAAAAgAAAAEAAAA=\r\n"} {"columntype":"String","maxlength":2,"avglength":2.0,"countnulls":0,"numdistinctvalues":1,"ndvbitvector":"Rk0QAAIAAAAEAAAAAQAAAAgAAABAAAAACAAAAAEAAAACAAAAAgAAAAEAAAABAAAAAQAAAAEAAAAB\r\nAAAAAQAAAAEAAAA=\r\n"} {"columntype":"Double","min":20428.07287599999,"max":20428.07287599999,"countnulls":0,"numdistinctvalues":1,"ndvbitvector":"Rk0QAAIAAAAQAAAAAQAAAAEAAAAQAAAACAAAAAEAAAACAAAAAQAAAAEAAAABAAAAAQAAAAEAAAAB\r\nAAAAAgAAAAQAAAA=\r\n"} {"columntype":"Double","min":20469.010897795582,"max":20469.010897795582,"countnulls":0,"numdistinctvalues":2,"ndvbitvector":"Rk0QAAQAAAABAAAABAAAAAQAAAABAAAAAQAAAAQAAAABAAAAAQAAAAEAAAABAAAAAQAAAAIAAAAB\r\nAAAAAQAAAAEAAAA=\r\n"} diff --git a/ql/src/test/results/clientpositive/rename_external_partition_location.q.out b/ql/src/test/results/clientpositive/rename_external_partition_location.q.out index 19546c38bc..893aea338a 100644 --- a/ql/src/test/results/clientpositive/rename_external_partition_location.q.out +++ b/ql/src/test/results/clientpositive/rename_external_partition_location.q.out @@ -162,18 +162,20 @@ PREHOOK: Input: default@ex_table POSTHOOK: query: DESCRIBE FORMATTED ex_table PARTITION (part='part1') key POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@ex_table -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -key int 0 9 0 6 from deserializer +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +key int 0 9 0 6 SExM4AYGxdOOGLy91N8BwJKLAcGuwk7AqvwN/4Sz5AE= + from deserializer PREHOOK: query: DESCRIBE FORMATTED ex_table PARTITION (part='part1') value PREHOOK: type: DESCTABLE PREHOOK: Input: default@ex_table POSTHOOK: query: DESCRIBE FORMATTED ex_table PARTITION (part='part1') value POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@ex_table -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -value string 0 6 5.0 5 from deserializer +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +value string 0 6 5.0 5 SExM4AYGwZXdyQGC2MSsAcCIiJQBvtSupwHDnsmSAr36nzs= + from deserializer PREHOOK: query: ALTER TABLE ex_table PARTITION (part='part1') RENAME TO PARTITION (part='part2') PREHOOK: type: ALTERTABLE_RENAMEPART PREHOOK: Input: default@ex_table @@ -310,15 +312,17 @@ PREHOOK: Input: default@ex_table POSTHOOK: query: DESCRIBE FORMATTED ex_table PARTITION (part='part2') key POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@ex_table -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -key int 0 9 0 6 from deserializer +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +key int 0 9 0 6 SExM4AYGxdOOGLy91N8BwJKLAcGuwk7AqvwN/4Sz5AE= + from deserializer PREHOOK: query: DESCRIBE FORMATTED ex_table PARTITION (part='part2') value PREHOOK: type: DESCTABLE PREHOOK: Input: default@ex_table POSTHOOK: query: DESCRIBE FORMATTED ex_table PARTITION (part='part2') value POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@ex_table -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -value string 0 6 5.0 5 from deserializer +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +value string 0 6 5.0 5 SExM4AYGwZXdyQGC2MSsAcCIiJQBvtSupwHDnsmSAr36nzs= + from deserializer diff --git a/ql/src/test/results/clientpositive/rename_table_update_column_stats.q.out b/ql/src/test/results/clientpositive/rename_table_update_column_stats.q.out index 16b3a38c46..ae6fa400a6 100644 --- a/ql/src/test/results/clientpositive/rename_table_update_column_stats.q.out +++ b/ql/src/test/results/clientpositive/rename_table_update_column_stats.q.out @@ -57,30 +57,33 @@ PREHOOK: Input: statsdb1@testtable1 POSTHOOK: query: describe formatted statsdb1.testtable1 col1 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testtable1 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col1 int 27 484 0 10 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}} +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +col1 int 27 484 0 10 SExM4AoKwtK/OYCi1Bu/sMcRx9SoWfnl+S+BsIeDAb/T4wfChbuXAf+34EDBsoy/AQ== + from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}} PREHOOK: query: describe formatted statsdb1.testtable1 col2 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testtable1 POSTHOOK: query: describe formatted statsdb1.testtable1 col2 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testtable1 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col2 string 0 10 6.7 7 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}} +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +col2 string 0 10 6.7 7 SExM4AoKge76QcHLqCHAtN4dv5LMNcG35k7A+Oe9Af+Nq6wCgoTFpgG+39Yigri1AQ== + from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}} PREHOOK: query: describe formatted statsdb1.testtable1 col3 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testtable1 POSTHOOK: query: describe formatted statsdb1.testtable1 col3 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testtable1 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col3 string 0 1 4.0 4 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}} +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +col3 string 0 1 4.0 4 SExM4AEBgeL8+wM= + from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}} PREHOOK: query: alter table statsdb1.testtable1 rename to statsdb2.testtable2 PREHOOK: type: ALTERTABLE_RENAME PREHOOK: Input: statsdb1@testtable1 @@ -96,30 +99,33 @@ PREHOOK: Input: statsdb2@testtable2 POSTHOOK: query: describe formatted statsdb2.testtable2 col1 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb2@testtable2 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col1 int 27 484 0 10 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}} +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +col1 int 27 484 0 10 SExM4AoKwtK/OYCi1Bu/sMcRx9SoWfnl+S+BsIeDAb/T4wfChbuXAf+34EDBsoy/AQ== + from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}} PREHOOK: query: describe formatted statsdb2.testtable2 col2 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb2@testtable2 POSTHOOK: query: describe formatted statsdb2.testtable2 col2 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb2@testtable2 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col2 string 0 10 6.7 7 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}} +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +col2 string 0 10 6.7 7 SExM4AoKge76QcHLqCHAtN4dv5LMNcG35k7A+Oe9Af+Nq6wCgoTFpgG+39Yigri1AQ== + from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}} PREHOOK: query: describe formatted statsdb2.testtable2 col3 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb2@testtable2 POSTHOOK: query: describe formatted statsdb2.testtable2 col3 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb2@testtable2 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col3 string 0 1 4.0 4 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}} +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +col3 string 0 1 4.0 4 SExM4AEBgeL8+wM= + from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}} PREHOOK: query: use default PREHOOK: type: SWITCHDATABASE PREHOOK: Input: database:default @@ -203,30 +209,33 @@ PREHOOK: Input: statsdb1@testtable1 POSTHOOK: query: describe formatted statsdb1.testtable1 col1 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testtable1 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col1 int 27 484 0 10 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}} +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +col1 int 27 484 0 10 SExM4AoKwtK/OYCi1Bu/sMcRx9SoWfnl+S+BsIeDAb/T4wfChbuXAf+34EDBsoy/AQ== + from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}} PREHOOK: query: describe formatted statsdb1.testtable1 col2 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testtable1 POSTHOOK: query: describe formatted statsdb1.testtable1 col2 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testtable1 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col2 string 0 10 6.7 7 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}} +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +col2 string 0 10 6.7 7 SExM4AoKge76QcHLqCHAtN4dv5LMNcG35k7A+Oe9Af+Nq6wCgoTFpgG+39Yigri1AQ== + from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}} PREHOOK: query: describe formatted statsdb1.testtable1 col3 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testtable1 POSTHOOK: query: describe formatted statsdb1.testtable1 col3 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testtable1 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col3 string 0 1 4.0 4 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}} +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +col3 string 0 1 4.0 4 SExM4AEBgeL8+wM= + from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}} PREHOOK: query: alter table statsdb1.testtable1 rename to statsdb2.testtable2 PREHOOK: type: ALTERTABLE_RENAME PREHOOK: Input: statsdb1@testtable1 @@ -242,30 +251,33 @@ PREHOOK: Input: statsdb2@testtable2 POSTHOOK: query: describe formatted statsdb2.testtable2 col1 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb2@testtable2 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col1 int 27 484 0 10 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}} +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +col1 int 27 484 0 10 SExM4AoKwtK/OYCi1Bu/sMcRx9SoWfnl+S+BsIeDAb/T4wfChbuXAf+34EDBsoy/AQ== + from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}} PREHOOK: query: describe formatted statsdb2.testtable2 col2 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb2@testtable2 POSTHOOK: query: describe formatted statsdb2.testtable2 col2 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb2@testtable2 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col2 string 0 10 6.7 7 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}} +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +col2 string 0 10 6.7 7 SExM4AoKge76QcHLqCHAtN4dv5LMNcG35k7A+Oe9Af+Nq6wCgoTFpgG+39Yigri1AQ== + from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}} PREHOOK: query: describe formatted statsdb2.testtable2 col3 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb2@testtable2 POSTHOOK: query: describe formatted statsdb2.testtable2 col3 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb2@testtable2 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col3 string 0 1 4.0 4 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}} +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +col3 string 0 1 4.0 4 SExM4AEBgeL8+wM= + from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}} PREHOOK: query: use default PREHOOK: type: SWITCHDATABASE PREHOOK: Input: database:default diff --git a/ql/src/test/results/clientpositive/spark/avro_decimal_native.q.out b/ql/src/test/results/clientpositive/spark/avro_decimal_native.q.out index b73b5f5679..eafec9aa67 100644 --- a/ql/src/test/results/clientpositive/spark/avro_decimal_native.q.out +++ b/ql/src/test/results/clientpositive/spark/avro_decimal_native.q.out @@ -36,10 +36,10 @@ PREHOOK: Input: default@dec POSTHOOK: query: DESC FORMATTED `dec` value POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@dec -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -value decimal(8,4) -12.25 234.79 0 10 from deserializer -COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"value\":\"true\"}} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +value decimal(8,4) -12.25 234.79 0 10 SExM4AoKxdOOGP2An6UDv92lC4HV6VD/sbUNg9u1Bb210FHA981AwdjTnAGB//Ui from deserializer +COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"value\":\"true\"}} PREHOOK: query: DROP TABLE IF EXISTS avro_dec PREHOOK: type: DROPTABLE POSTHOOK: query: DROP TABLE IF EXISTS avro_dec diff --git a/ql/src/test/results/clientpositive/spark/stats_only_null.q.out b/ql/src/test/results/clientpositive/spark/stats_only_null.q.out index 359eea3acb..7cb2d64a8f 100644 --- a/ql/src/test/results/clientpositive/spark/stats_only_null.q.out +++ b/ql/src/test/results/clientpositive/spark/stats_only_null.q.out @@ -389,9 +389,9 @@ PREHOOK: Input: default@stats_null_part POSTHOOK: query: describe formatted stats_null_part partition(dt = 1) a POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@stats_null_part -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -a double 1.0 1.0 1 1 from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +a double 1.0 1.0 1 1 SExM4AEBwaDRtwU= from deserializer PREHOOK: query: drop table stats_null PREHOOK: type: DROPTABLE PREHOOK: Input: default@stats_null diff --git a/ql/src/test/results/clientpositive/stats_only_null.q.out b/ql/src/test/results/clientpositive/stats_only_null.q.out index 88c2114356..de1b017989 100644 --- a/ql/src/test/results/clientpositive/stats_only_null.q.out +++ b/ql/src/test/results/clientpositive/stats_only_null.q.out @@ -377,9 +377,10 @@ PREHOOK: Input: default@stats_null_part POSTHOOK: query: describe formatted stats_null_part partition(dt = 1) a POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@stats_null_part -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -a double 1.0 1.0 1 1 from deserializer +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +a double 1.0 1.0 1 1 SExM4AEBwaDRtwU= + from deserializer PREHOOK: query: drop table stats_null PREHOOK: type: DROPTABLE PREHOOK: Input: default@stats_null diff --git a/ql/src/test/results/clientpositive/temp_table_display_colstats_tbllvl.q.out b/ql/src/test/results/clientpositive/temp_table_display_colstats_tbllvl.q.out index ad92058cab..8d94ac6b07 100644 --- a/ql/src/test/results/clientpositive/temp_table_display_colstats_tbllvl.q.out +++ b/ql/src/test/results/clientpositive/temp_table_display_colstats_tbllvl.q.out @@ -55,9 +55,9 @@ PREHOOK: Input: default@uservisits_web_text_none POSTHOOK: query: desc formatted UserVisits_web_text_none sourceIP POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@uservisits_web_text_none -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -sourceIP string from deserializer +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +sourceIP string from deserializer PREHOOK: query: explain analyze table UserVisits_web_text_none compute statistics for columns sourceIP, avgTimeOnSite, adRevenue PREHOOK: type: QUERY @@ -242,27 +242,36 @@ PREHOOK: Input: default@uservisits_web_text_none POSTHOOK: query: desc formatted UserVisits_web_text_none sourceIP POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@uservisits_web_text_none -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -sourceIP string 0 55 12.763636363636364 13 from deserializer +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +sourceIP string 0 55 12.763636363636364 13 SExM4Dc3wbjRG8DNgg/A1YwYwNiYDsCVzwXBzLkCgOc1v9LCJcG2rAK/65wVwYL2Br/zjxnBze8M +wMiBIMDE/DG/n50HwcqyAoCXmQi/0KAPgMSxIIGKsRi/oqUSwKD9F4DuAYH72Rn/48sWgLP+EMGB +wgS/28MZwPT9KsGGrwuAluEFv+ngDYGoqgT/09AOgLCEBYHVvg6/l78rgevVFMD77Q+AkZ0I/7Wz +AoOimAj+mLMJwdPMCL7P1BvC9sIM/+puv4W+A4KWxlP+nsMpwYbnCf+4qyHCnJgXgPenMA== + from deserializer PREHOOK: query: desc formatted UserVisits_web_text_none avgTimeOnSite PREHOOK: type: DESCTABLE PREHOOK: Input: default@uservisits_web_text_none POSTHOOK: query: desc formatted UserVisits_web_text_none avgTimeOnSite POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@uservisits_web_text_none -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -avgTimeOnSite int 1 9 0 9 from deserializer +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +avgTimeOnSite int 1 9 0 9 SExM4AkJwZn6L4TaxBi8u6xigOL3TMCSiwHBrsJOwKr8Df+Es+QBgPyEtwI= + from deserializer PREHOOK: query: desc formatted UserVisits_web_text_none adRevenue PREHOOK: type: DESCTABLE PREHOOK: Input: default@uservisits_web_text_none POSTHOOK: query: desc formatted UserVisits_web_text_none adRevenue POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@uservisits_web_text_none -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -adRevenue float 13.099044799804688 492.98870849609375 0 55 from deserializer +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +adRevenue float 13.099044799804688 492.98870849609375 0 55 SExM4Dc3gb3HC8Lswyq+hbYCgOOKIoHH7AKA4u4D/73OA4DH6QnA8ZIbhaSXBv/e/xf9jo4JgJ2b +Av/htwrBsJ4ZwZugD//O6wbB6qcFvoW+E4DW+wyA8/gCgK6GD4HIuhD/pccFgIXqAsCl/wyAv+QK +wNq4HYLrrB++s5sIgOWzPoSMlA/83cMVwdy8PYCjhwL/3LIWxOm7JPye8w/A/O0VwNjgBIDOiRHA +86ELwJ/+AYCr1QzA7YUQgO2gEcDZEIDK6EPAo+kOg4HxCv3ZkSmBrLlRgd6IA/6lwROAlYAL + from deserializer PREHOOK: query: CREATE TEMPORARY TABLE empty_tab( a int, b double, @@ -289,10 +298,10 @@ PREHOOK: Input: default@empty_tab POSTHOOK: query: desc formatted empty_tab a POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@empty_tab -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -a int from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"a\":\"true\",\"b\":\"true\",\"c\":\"true\",\"d\":\"true\",\"e\":\"true\"}} +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +a int from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"a\":\"true\",\"b\":\"true\",\"c\":\"true\",\"d\":\"true\",\"e\":\"true\"}} PREHOOK: query: explain analyze table empty_tab compute statistics for columns a,b,c,d,e PREHOOK: type: QUERY @@ -358,20 +367,20 @@ PREHOOK: Input: default@empty_tab POSTHOOK: query: desc formatted empty_tab a POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@empty_tab -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -a int 0 0 0 0 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"a\":\"true\",\"b\":\"true\",\"c\":\"true\",\"d\":\"true\",\"e\":\"true\"}} +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +a int 0 0 0 0 from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"a\":\"true\",\"b\":\"true\",\"c\":\"true\",\"d\":\"true\",\"e\":\"true\"}} PREHOOK: query: desc formatted empty_tab b PREHOOK: type: DESCTABLE PREHOOK: Input: default@empty_tab POSTHOOK: query: desc formatted empty_tab b POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@empty_tab -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -b double 0.0 0.0 0 0 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"a\":\"true\",\"b\":\"true\",\"c\":\"true\",\"d\":\"true\",\"e\":\"true\"}} +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +b double 0.0 0.0 0 0 from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"a\":\"true\",\"b\":\"true\",\"c\":\"true\",\"d\":\"true\",\"e\":\"true\"}} PREHOOK: query: CREATE DATABASE test PREHOOK: type: CREATEDATABASE PREHOOK: Output: database:test @@ -447,27 +456,31 @@ PREHOOK: Input: test@uservisits_web_text_none POSTHOOK: query: desc formatted UserVisits_web_text_none sourceIP POSTHOOK: type: DESCTABLE POSTHOOK: Input: test@uservisits_web_text_none -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -sourceIP string from deserializer +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +sourceIP string from deserializer PREHOOK: query: desc formatted test.UserVisits_web_text_none sourceIP PREHOOK: type: DESCTABLE PREHOOK: Input: test@uservisits_web_text_none POSTHOOK: query: desc formatted test.UserVisits_web_text_none sourceIP POSTHOOK: type: DESCTABLE POSTHOOK: Input: test@uservisits_web_text_none -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -sourceIP string from deserializer +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +sourceIP string from deserializer PREHOOK: query: desc formatted default.UserVisits_web_text_none sourceIP PREHOOK: type: DESCTABLE PREHOOK: Input: default@uservisits_web_text_none POSTHOOK: query: desc formatted default.UserVisits_web_text_none sourceIP POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@uservisits_web_text_none -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -sourceIP string 0 55 12.763636363636364 13 from deserializer +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +sourceIP string 0 55 12.763636363636364 13 SExM4Dc3wbjRG8DNgg/A1YwYwNiYDsCVzwXBzLkCgOc1v9LCJcG2rAK/65wVwYL2Br/zjxnBze8M +wMiBIMDE/DG/n50HwcqyAoCXmQi/0KAPgMSxIIGKsRi/oqUSwKD9F4DuAYH72Rn/48sWgLP+EMGB +wgS/28MZwPT9KsGGrwuAluEFv+ngDYGoqgT/09AOgLCEBYHVvg6/l78rgevVFMD77Q+AkZ0I/7Wz +AoOimAj+mLMJwdPMCL7P1BvC9sIM/+puv4W+A4KWxlP+nsMpwYbnCf+4qyHCnJgXgPenMA== + from deserializer PREHOOK: query: analyze table UserVisits_web_text_none compute statistics for columns sKeyword PREHOOK: type: QUERY PREHOOK: Input: test@uservisits_web_text_none @@ -489,15 +502,23 @@ PREHOOK: Input: test@uservisits_web_text_none POSTHOOK: query: desc formatted UserVisits_web_text_none sKeyword POSTHOOK: type: DESCTABLE POSTHOOK: Input: test@uservisits_web_text_none -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -sKeyword string 0 54 7.872727272727273 19 from deserializer +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +sKeyword string 0 54 7.872727272727273 19 SExM4DY2gavGA8LX6ha/63i/4NIZgP/NA8Hmxi7D8X68yu4JwKnuAYKBvg6+/cIOwO2NH4D7xCeA ++Q7Al64DwLnLC8Gi2Rj/p8wIwrK2LoGP3w2B/p4EvoGCEsDMiCi+xtAqwJ/3BITmuRb8sqcLgIPr +aYC3txTA4/MHgN7cBICm/g3Bx13AiJMOwI79Bb+wjQLAm7oEgJnWH4LUzgL/4PYagKCOBoLi+yC9 +x84VgK/tGcKxyAL+6NULwcKYE8KzmD/F7/IDuK6yFoT3wgG9m5UJv9WvIcDC5DA= + from deserializer PREHOOK: query: desc formatted test.UserVisits_web_text_none sKeyword PREHOOK: type: DESCTABLE PREHOOK: Input: test@uservisits_web_text_none POSTHOOK: query: desc formatted test.UserVisits_web_text_none sKeyword POSTHOOK: type: DESCTABLE POSTHOOK: Input: test@uservisits_web_text_none -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -sKeyword string 0 54 7.872727272727273 19 from deserializer +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +sKeyword string 0 54 7.872727272727273 19 SExM4DY2gavGA8LX6ha/63i/4NIZgP/NA8Hmxi7D8X68yu4JwKnuAYKBvg6+/cIOwO2NH4D7xCeA ++Q7Al64DwLnLC8Gi2Rj/p8wIwrK2LoGP3w2B/p4EvoGCEsDMiCi+xtAqwJ/3BITmuRb8sqcLgIPr +aYC3txTA4/MHgN7cBICm/g3Bx13AiJMOwI79Bb+wjQLAm7oEgJnWH4LUzgL/4PYagKCOBoLi+yC9 +x84VgK/tGcKxyAL+6NULwcKYE8KzmD/F7/IDuK6yFoT3wgG9m5UJv9WvIcDC5DA= + from deserializer diff --git a/ql/src/test/results/clientpositive/tez/explainanalyze_5.q.out b/ql/src/test/results/clientpositive/tez/explainanalyze_5.q.out index 626e1fd4d0..1764164a91 100644 --- a/ql/src/test/results/clientpositive/tez/explainanalyze_5.q.out +++ b/ql/src/test/results/clientpositive/tez/explainanalyze_5.q.out @@ -296,14 +296,14 @@ Stage-3 Reducer 2 File Output Operator [FS_8] table:{"name:":"default.acid_uami"} - Select Operator [SEL_4] (rows=8/2 width=302) + Select Operator [SEL_4] (rows=4/2 width=302) Output:["_col0","_col1","_col2","_col3"] <-Map 1 [SIMPLE_EDGE] SHUFFLE [RS_3] PartitionCols:UDFToInteger(_col0) - Select Operator [SEL_2] (rows=8/2 width=302) + Select Operator [SEL_2] (rows=4/2 width=302) Output:["_col0","_col1","_col3"] - Filter Operator [FIL_9] (rows=8/2 width=226) + Filter Operator [FIL_9] (rows=4/2 width=226) predicate:((de = 109.23) or (de = 119.23)) TableScan [TS_0] (rows=8/4 width=226) default@acid_uami,acid_uami, ACID table,Tbl:COMPLETE,Col:COMPLETE,Output:["i","de","vc"] diff --git a/ql/src/test/results/clientpositive/tunable_ndv.q.out b/ql/src/test/results/clientpositive/tunable_ndv.q.out index 437beafc0d..e08f452e4b 100644 --- a/ql/src/test/results/clientpositive/tunable_ndv.q.out +++ b/ql/src/test/results/clientpositive/tunable_ndv.q.out @@ -73,48 +73,53 @@ PREHOOK: Input: default@loc_orc_1d POSTHOOK: query: describe formatted loc_orc_1d partition(year=2000) locid POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@loc_orc_1d -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -locid int 1 2 0 2 from deserializer +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +locid int 1 2 0 2 SExM4AICxfO+SPyNofED + from deserializer PREHOOK: query: describe formatted loc_orc_1d partition(year=2001) locid PREHOOK: type: DESCTABLE PREHOOK: Input: default@loc_orc_1d POSTHOOK: query: describe formatted loc_orc_1d partition(year=2001) locid POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@loc_orc_1d -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -locid int 1 4 0 4 from deserializer +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +locid int 1 4 0 4 SExM4AQExfO+SLy7rGKA4vdMwPD8wQI= + from deserializer PREHOOK: query: describe formatted loc_orc_1d locid PREHOOK: type: DESCTABLE PREHOOK: Input: default@loc_orc_1d POSTHOOK: query: describe formatted loc_orc_1d locid POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@loc_orc_1d -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -locid int 1 4 0 4 from deserializer -COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"locid\":\"true\"}} +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +locid int 1 4 0 4 SExM4AICxfO+SPyNofED + from deserializer +COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"locid\":\"true\"}} PREHOOK: query: describe formatted loc_orc_1d locid PREHOOK: type: DESCTABLE PREHOOK: Input: default@loc_orc_1d POSTHOOK: query: describe formatted loc_orc_1d locid POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@loc_orc_1d -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -locid int 1 4 0 4 from deserializer -COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"locid\":\"true\"}} +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +locid int 1 4 0 4 SExM4AICxfO+SPyNofED + from deserializer +COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"locid\":\"true\"}} PREHOOK: query: describe formatted loc_orc_1d locid PREHOOK: type: DESCTABLE PREHOOK: Input: default@loc_orc_1d POSTHOOK: query: describe formatted loc_orc_1d locid POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@loc_orc_1d -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -locid int 1 4 0 4 from deserializer -COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"locid\":\"true\"}} +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +locid int 1 4 0 4 SExM4AICxfO+SPyNofED + from deserializer +COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"locid\":\"true\"}} PREHOOK: query: create table if not exists loc_orc_2d ( state string, locid int @@ -194,27 +199,30 @@ PREHOOK: Input: default@loc_orc_2d POSTHOOK: query: describe formatted loc_orc_2d locid POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@loc_orc_2d -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -locid int 1 4 0 3 from deserializer -COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"locid\":\"true\"}} +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +locid int 1 4 0 4 SExM4AEBwYHguQQ= + from deserializer +COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"locid\":\"true\"}} PREHOOK: query: describe formatted loc_orc_2d locid PREHOOK: type: DESCTABLE PREHOOK: Input: default@loc_orc_2d POSTHOOK: query: describe formatted loc_orc_2d locid POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@loc_orc_2d -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -locid int 1 4 0 4 from deserializer -COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"locid\":\"true\"}} +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +locid int 1 4 0 4 SExM4AEBwYHguQQ= + from deserializer +COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"locid\":\"true\"}} PREHOOK: query: describe formatted loc_orc_2d locid PREHOOK: type: DESCTABLE PREHOOK: Input: default@loc_orc_2d POSTHOOK: query: describe formatted loc_orc_2d locid POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@loc_orc_2d -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -locid int 1 4 0 4 from deserializer -COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"locid\":\"true\"}} +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +locid int 1 4 0 4 SExM4AEBwYHguQQ= + from deserializer +COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"locid\":\"true\"}}