diff --git a/common/src/java/org/apache/hadoop/hive/common/ndv/NumDistinctValueEstimatorFactory.java b/common/src/java/org/apache/hadoop/hive/common/ndv/NumDistinctValueEstimatorFactory.java index e810ac5487..457d2f5059 100644 --- a/common/src/java/org/apache/hadoop/hive/common/ndv/NumDistinctValueEstimatorFactory.java +++ b/common/src/java/org/apache/hadoop/hive/common/ndv/NumDistinctValueEstimatorFactory.java @@ -19,6 +19,14 @@ package org.apache.hadoop.hive.common.ndv; +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.util.Arrays; + +import org.apache.commons.codec.binary.Base64; +import org.apache.hadoop.hive.common.ndv.fm.FMSketch; +import org.apache.hadoop.hive.common.ndv.fm.FMSketchUtils; import org.apache.hadoop.hive.common.ndv.hll.HyperLogLog; public class NumDistinctValueEstimatorFactory { @@ -26,11 +34,24 @@ private NumDistinctValueEstimatorFactory() { } + private static boolean isFMSketch(String s) throws IOException { + InputStream in = new ByteArrayInputStream(Base64.decodeBase64(s)); + byte[] magic = new byte[2]; + magic[0] = (byte) in.read(); + magic[1] = (byte) in.read(); + return Arrays.equals(magic, FMSketchUtils.MAGIC); + } + public static NumDistinctValueEstimator getNumDistinctValueEstimator(String s) { - if (s.startsWith("{")) { - return new FMSketch(s); - } else { - return HyperLogLog.builder().build().deserialize(s); + // Right now we assume only FM and HLL are available. + try { + if (isFMSketch(s)) { + return FMSketchUtils.deserializeFM(s); + } else { + return HyperLogLog.builder().build().deserialize(s); + } + } catch (IOException e) { + throw new RuntimeException(e); } } diff --git a/common/src/java/org/apache/hadoop/hive/common/ndv/FMSketch.java b/common/src/java/org/apache/hadoop/hive/common/ndv/fm/FMSketch.java similarity index 81% rename from common/src/java/org/apache/hadoop/hive/common/ndv/FMSketch.java rename to common/src/java/org/apache/hadoop/hive/common/ndv/fm/FMSketch.java index e20d29954a..7f48a8aaaa 100644 --- a/common/src/java/org/apache/hadoop/hive/common/ndv/FMSketch.java +++ b/common/src/java/org/apache/hadoop/hive/common/ndv/fm/FMSketch.java @@ -15,22 +15,36 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.hadoop.hive.common.ndv; - +package org.apache.hadoop.hive.common.ndv.fm; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.util.Arrays; import java.util.Random; +import javolution.text.Text; import javolution.util.FastBitSet; +import javolution.util.FastCollection.Record; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.apache.commons.codec.binary.Base64; import org.apache.commons.lang.StringUtils; import org.apache.hadoop.hive.common.classification.InterfaceAudience; +import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimator; +import org.apache.hadoop.hive.common.ndv.hll.HyperLogLog; +import org.apache.hadoop.hive.common.ndv.hll.HyperLogLogUtils; +import org.apache.hadoop.hive.common.ndv.hll.HyperLogLog.EncodingType; import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.ql.util.JavaDataModel; public class FMSketch implements NumDistinctValueEstimator{ static final Logger LOG = LoggerFactory.getLogger(FMSketch.class.getName()); + public static final byte[] MAGIC = new byte[] { 'F', 'M' }; /* We want a,b,x to come from a finite field of size 0 to k, where k is a prime number. * 2^p - 1 is prime for p = 31. Hence bitvectorSize has to be 31. Pick k to be 2^p -1. @@ -38,7 +52,7 @@ * independent. As a consequence, the hash values will not distribute uniformly from 0 to 2^p-1 * thus introducing errors in the estimates. */ - private static final int BIT_VECTOR_SIZE = 31; + public static final int BIT_VECTOR_SIZE = 31; // Refer to Flajolet-Martin'86 for the value of phi private static final double PHI = 0.77351; @@ -111,27 +125,6 @@ public FMSketch(int numBitVectors) { } } - public FMSketch(String s, int numBitVectors) { - this.numBitVectors = numBitVectors; - FastBitSet bitVectorDeser[] = genBitSet(s, numBitVectors); - bitVector = new FastBitSet[numBitVectors]; - for(int i=0; i = '0' && c <= '9') { - String t = new String(); - t = t + c; - c = s.charAt(i); - i = i + 1; - - while (c != ',' && c!= '}') { - t = t + c; - c = s.charAt(i); - i = i + 1; - } - - int bitIndex = Integer.parseInt(t); - assert(bitIndex >= 0); - assert(vectorIndex < numBitVectors); - b[vectorIndex].set(bitIndex); - if (c == '}') { - vectorIndex = vectorIndex + 1; - } - } + + @Override + public NumDistinctValueEstimator deserialize(String s) { + InputStream is = new ByteArrayInputStream(Base64.decodeBase64(s)); + try { + return FMSketchUtils.deserializeFM(is); + } catch (IOException e) { + throw new RuntimeException(e); } - return b; } - + private int generateHash(long v, int hashNum) { int mod = (1<4 byte header is encoded like below 2 bytes - FM magic string to + * identify serialized stream 2 bytes - numbitvectors because + * BIT_VECTOR_SIZE=31, 4 bytes are enough to hold positions of 0-31 + */ + public static void serializeFM(OutputStream out, FMSketch fm) throws IOException { + out.write(MAGIC); + + // max of numBitVectors = 1024, 2 bytes is enough. + byte[] nbv = new byte[2]; + nbv[0] = (byte) fm.getnumBitVectors(); + nbv[1] = (byte) (fm.getnumBitVectors() >>> 8); + + out.write(nbv); + + // original toString takes too much space + // we compress a fastbitset to 4 bytes + for (int i = 0; i < fm.getnumBitVectors(); i++) { + writeBitVector(out, fm.getBitVector(i)); + } + } + + private static void writeBitVector(OutputStream out, FastBitSet bit) throws IOException { + int num = 0; + for (int pos = 0; pos < FMSketch.BIT_VECTOR_SIZE; pos++) { + if (bit.get(pos)) { + num |= 1 << pos; + } + } + byte[] i = new byte[4]; + for (int j = 0; j < 4; j++) { + i[j] = (byte) ((num >>> (8 * j)) & 0xff); + } + out.write(i); + } + + /* + * Deserializes from string to FastBitSet; Creates a NumDistinctValueEstimator + * object and returns it. + */ + public static FMSketch deserializeFM(String s) throws IOException { + InputStream is = new ByteArrayInputStream(Base64.decodeBase64(s)); + try { + return deserializeFM(is); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + public static FMSketch deserializeFM(InputStream in) throws IOException { + checkMagicString(in); + + byte[] nbv = new byte[2]; + nbv[0] = (byte) in.read(); + nbv[1] = (byte) in.read(); + + int numBitVectors = 0; + numBitVectors |= (nbv[0] & 0xff); + numBitVectors |= ((nbv[1] & 0xff) << 8); + + FMSketch sketch = new FMSketch(numBitVectors); + for (int n = 0; n < numBitVectors; n++) { + sketch.setBitVector(readBitVector(in), n); + } + return sketch; + } + + private static FastBitSet readBitVector(InputStream in) throws IOException { + FastBitSet fastBitSet = new FastBitSet(); + fastBitSet.clear(); + for (int i = 0; i < 4; i++) { + byte b = (byte) in.read(); + for (int j = 0; j < 8; j++) { + if ((b & (1 << j)) != 0) { + fastBitSet.set(j + 8 * i); + } + } + } + return fastBitSet; + } + + private static void checkMagicString(InputStream in) throws IOException { + byte[] magic = new byte[2]; + magic[0] = (byte) in.read(); + magic[1] = (byte) in.read(); + + if (!Arrays.equals(magic, MAGIC)) { + throw new IllegalArgumentException("The input stream is not a FMSketch stream."); + } + } +} diff --git a/common/src/java/org/apache/hadoop/hive/common/ndv/hll/HyperLogLog.java b/common/src/java/org/apache/hadoop/hive/common/ndv/hll/HyperLogLog.java index d1955468a6..a700e846fc 100644 --- a/common/src/java/org/apache/hadoop/hive/common/ndv/hll/HyperLogLog.java +++ b/common/src/java/org/apache/hadoop/hive/common/ndv/hll/HyperLogLog.java @@ -277,7 +277,9 @@ public void add(long hashcode) { } public long estimateNumDistinctValues() { - return count(); + // FMSketch treats the ndv of all nulls as 1 but hll treates the ndv as 0. + // In order to get rid of divide by 1 problem, we follow FMSketch + return count() > 0 ? count() : 1; } public long count() { diff --git a/common/src/test/org/apache/hadoop/hive/common/ndv/fm/TestFMSketchSerialization.java b/common/src/test/org/apache/hadoop/hive/common/ndv/fm/TestFMSketchSerialization.java new file mode 100644 index 0000000000..74fdf58d2d --- /dev/null +++ b/common/src/test/org/apache/hadoop/hive/common/ndv/fm/TestFMSketchSerialization.java @@ -0,0 +1,97 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.common.ndv.fm; + +import static org.junit.Assert.assertEquals; + +import java.io.IOException; + +import javolution.util.FastBitSet; + +import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimatorFactory; +import org.junit.Test; + +public class TestFMSketchSerialization { + + private FastBitSet[] deserialize(String s, int numBitVectors) { + FastBitSet[] b = new FastBitSet[numBitVectors]; + for (int j = 0; j < numBitVectors; j++) { + b[j] = new FastBitSet(FMSketch.BIT_VECTOR_SIZE); + b[j].clear(); + } + + int vectorIndex = 0; + + /* + * Parse input string to obtain the indexes that are set in the bitvector. + * When a toString() is called on a FastBitSet object to serialize it, the + * serialization adds { and } to the beginning and end of the return String. + * Skip "{", "}", ",", " " in the input string. + */ + for (int i = 1; i < s.length() - 1;) { + char c = s.charAt(i); + i = i + 1; + + // Move on to the next bit vector + if (c == '}') { + vectorIndex = vectorIndex + 1; + } + + // Encountered a numeric value; Extract out the entire number + if (c >= '0' && c <= '9') { + String t = new String(); + t = t + c; + c = s.charAt(i); + i = i + 1; + + while (c != ',' && c != '}') { + t = t + c; + c = s.charAt(i); + i = i + 1; + } + + int bitIndex = Integer.parseInt(t); + assert (bitIndex >= 0); + assert (vectorIndex < numBitVectors); + b[vectorIndex].set(bitIndex); + if (c == '}') { + vectorIndex = vectorIndex + 1; + } + } + } + return b; + } + + @Test + public void testSerDe() throws IOException { + String bitVectors = "{0, 4, 5, 7}{0, 1}{0, 1, 2}{0, 1, 4}{0}{0, 2}{0, 3}{0, 2, 3, 4}{0, 1, 4}{0, 1}{0}{0, 1, 3, 8}{0, 2}{0, 2}{0, 9}{0, 1, 4}"; + FastBitSet[] fastBitSet = deserialize(bitVectors, 16); + FMSketch sketch = new FMSketch(16); + for (int i = 0; i < 16; i++) { + sketch.setBitVector(fastBitSet[i], i); + } + assertEquals(sketch.estimateNumDistinctValues(), 3); + String s = sketch.serialize(); + FMSketch newSketch = (FMSketch) NumDistinctValueEstimatorFactory + .getNumDistinctValueEstimator(s); + sketch.equals(newSketch); + assertEquals(newSketch.estimateNumDistinctValues(), 3); + assertEquals(newSketch.serialize(), s); + } + +} \ No newline at end of file diff --git a/metastore/scripts/upgrade/derby/044-HIVE-16997.derby.sql b/metastore/scripts/upgrade/derby/044-HIVE-16997.derby.sql new file mode 100644 index 0000000000..5819e2ca6e --- /dev/null +++ b/metastore/scripts/upgrade/derby/044-HIVE-16997.derby.sql @@ -0,0 +1 @@ +ALTER TABLE "APP"."PART_COL_STATS" ADD COLUMN "BIT_VECTOR" VARCHAR(16400); diff --git a/metastore/scripts/upgrade/derby/hive-schema-3.0.0.derby.sql b/metastore/scripts/upgrade/derby/hive-schema-3.0.0.derby.sql index a9a532906f..edd9decd90 100644 --- a/metastore/scripts/upgrade/derby/hive-schema-3.0.0.derby.sql +++ b/metastore/scripts/upgrade/derby/hive-schema-3.0.0.derby.sql @@ -94,7 +94,7 @@ CREATE TABLE "APP"."MASTER_KEYS" ("KEY_ID" INTEGER NOT NULL generated always as CREATE TABLE "APP"."DELEGATION_TOKENS" ( "TOKEN_IDENT" VARCHAR(767) NOT NULL, "TOKEN" VARCHAR(767)); -CREATE TABLE "APP"."PART_COL_STATS"("DB_NAME" VARCHAR(128) NOT NULL,"TABLE_NAME" VARCHAR(256) NOT NULL, "PARTITION_NAME" VARCHAR(767) NOT NULL, "COLUMN_NAME" VARCHAR(767) NOT NULL, "COLUMN_TYPE" VARCHAR(128) NOT NULL, "LONG_LOW_VALUE" BIGINT, "LONG_HIGH_VALUE" BIGINT, "DOUBLE_LOW_VALUE" DOUBLE, "DOUBLE_HIGH_VALUE" DOUBLE, "BIG_DECIMAL_LOW_VALUE" VARCHAR(4000), "BIG_DECIMAL_HIGH_VALUE" VARCHAR(4000),"NUM_DISTINCTS" BIGINT, "NUM_NULLS" BIGINT NOT NULL, "AVG_COL_LEN" DOUBLE, "MAX_COL_LEN" BIGINT, "NUM_TRUES" BIGINT, "NUM_FALSES" BIGINT, "LAST_ANALYZED" BIGINT, "CS_ID" BIGINT NOT NULL, "PART_ID" BIGINT NOT NULL); +CREATE TABLE "APP"."PART_COL_STATS"("DB_NAME" VARCHAR(128) NOT NULL,"TABLE_NAME" VARCHAR(256) NOT NULL, "PARTITION_NAME" VARCHAR(767) NOT NULL, "COLUMN_NAME" VARCHAR(767) NOT NULL, "COLUMN_TYPE" VARCHAR(128) NOT NULL, "LONG_LOW_VALUE" BIGINT, "LONG_HIGH_VALUE" BIGINT, "DOUBLE_LOW_VALUE" DOUBLE, "DOUBLE_HIGH_VALUE" DOUBLE, "BIG_DECIMAL_LOW_VALUE" VARCHAR(4000), "BIG_DECIMAL_HIGH_VALUE" VARCHAR(4000),"NUM_DISTINCTS" BIGINT, "BIT_VECTOR" VARCHAR(16400), "NUM_NULLS" BIGINT NOT NULL, "AVG_COL_LEN" DOUBLE, "MAX_COL_LEN" BIGINT, "NUM_TRUES" BIGINT, "NUM_FALSES" BIGINT, "LAST_ANALYZED" BIGINT, "CS_ID" BIGINT NOT NULL, "PART_ID" BIGINT NOT NULL); CREATE TABLE "APP"."VERSION" ("VER_ID" BIGINT NOT NULL, "SCHEMA_VERSION" VARCHAR(127) NOT NULL, "VERSION_COMMENT" VARCHAR(255)); diff --git a/metastore/scripts/upgrade/derby/upgrade-2.3.0-to-3.0.0.derby.sql b/metastore/scripts/upgrade/derby/upgrade-2.3.0-to-3.0.0.derby.sql index 30513dc882..01b6f908f5 100644 --- a/metastore/scripts/upgrade/derby/upgrade-2.3.0-to-3.0.0.derby.sql +++ b/metastore/scripts/upgrade/derby/upgrade-2.3.0-to-3.0.0.derby.sql @@ -2,5 +2,6 @@ RUN '041-HIVE-16556.derby.sql'; RUN '042-HIVE-16575.derby.sql'; RUN '043-HIVE-16922.derby.sql'; +RUN '044-HIVE-16997.derby.sql'; UPDATE "APP".VERSION SET SCHEMA_VERSION='3.0.0', VERSION_COMMENT='Hive release version 3.0.0' where VER_ID=1; diff --git a/metastore/scripts/upgrade/mssql/029-HIVE-16997.mssql.sql b/metastore/scripts/upgrade/mssql/029-HIVE-16997.mssql.sql new file mode 100644 index 0000000000..fc2a6e0f2e --- /dev/null +++ b/metastore/scripts/upgrade/mssql/029-HIVE-16997.mssql.sql @@ -0,0 +1 @@ +ALTER TABLE PART_COL_STATS ADD BIT_VECTOR nvarchar(16400); diff --git a/metastore/scripts/upgrade/mssql/hive-schema-3.0.0.mssql.sql b/metastore/scripts/upgrade/mssql/hive-schema-3.0.0.mssql.sql index 1cfe2d1b2d..498d089262 100644 --- a/metastore/scripts/upgrade/mssql/hive-schema-3.0.0.mssql.sql +++ b/metastore/scripts/upgrade/mssql/hive-schema-3.0.0.mssql.sql @@ -87,6 +87,7 @@ CREATE TABLE PART_COL_STATS LONG_LOW_VALUE bigint NULL, MAX_COL_LEN bigint NULL, NUM_DISTINCTS bigint NULL, + BIT_VECTOR nvarchar(16400) NULL, NUM_FALSES bigint NULL, NUM_NULLS bigint NOT NULL, NUM_TRUES bigint NULL, diff --git a/metastore/scripts/upgrade/mssql/upgrade-2.3.0-to-3.0.0.mssql.sql b/metastore/scripts/upgrade/mssql/upgrade-2.3.0-to-3.0.0.mssql.sql index 5683254b04..21d62ae470 100644 --- a/metastore/scripts/upgrade/mssql/upgrade-2.3.0-to-3.0.0.mssql.sql +++ b/metastore/scripts/upgrade/mssql/upgrade-2.3.0-to-3.0.0.mssql.sql @@ -3,6 +3,7 @@ SELECT 'Upgrading MetaStore schema from 2.3.0 to 3.0.0' AS MESSAGE; :r 026-HIVE-16556.mssql.sql :r 027-HIVE-16575.mssql.sql :r 028-HIVE-16922.mssql.sql +:r 029-HIVE-16997.mssql.sql UPDATE VERSION SET SCHEMA_VERSION='3.0.0', VERSION_COMMENT='Hive release version 3.0.0' where VER_ID=1; SELECT 'Finished upgrading MetaStore schema from 2.3.0 to 3.0.0' AS MESSAGE; diff --git a/metastore/scripts/upgrade/mysql/044-HIVE-16997.mysql.sql b/metastore/scripts/upgrade/mysql/044-HIVE-16997.mysql.sql new file mode 100644 index 0000000000..6b41c77c15 --- /dev/null +++ b/metastore/scripts/upgrade/mysql/044-HIVE-16997.mysql.sql @@ -0,0 +1 @@ +ALTER TABLE PART_COL_STATS ADD COLUMN BIT_VECTOR VARCHAR(16400); diff --git a/metastore/scripts/upgrade/mysql/hive-schema-3.0.0.mysql.sql b/metastore/scripts/upgrade/mysql/hive-schema-3.0.0.mysql.sql index 97d881f263..89991efdde 100644 --- a/metastore/scripts/upgrade/mysql/hive-schema-3.0.0.mysql.sql +++ b/metastore/scripts/upgrade/mysql/hive-schema-3.0.0.mysql.sql @@ -690,6 +690,7 @@ CREATE TABLE IF NOT EXISTS `PART_COL_STATS` ( `BIG_DECIMAL_HIGH_VALUE` varchar(4000) CHARACTER SET latin1 COLLATE latin1_bin, `NUM_NULLS` bigint(20) NOT NULL, `NUM_DISTINCTS` bigint(20), + `BIT_VECTOR` varchar(16400) CHARACTER SET latin1 COLLATE latin1_bin, `AVG_COL_LEN` double(53,4), `MAX_COL_LEN` bigint(20), `NUM_TRUES` bigint(20), diff --git a/metastore/scripts/upgrade/mysql/upgrade-2.3.0-to-3.0.0.mysql.sql b/metastore/scripts/upgrade/mysql/upgrade-2.3.0-to-3.0.0.mysql.sql index ba62939809..9cd3a62663 100644 --- a/metastore/scripts/upgrade/mysql/upgrade-2.3.0-to-3.0.0.mysql.sql +++ b/metastore/scripts/upgrade/mysql/upgrade-2.3.0-to-3.0.0.mysql.sql @@ -3,6 +3,7 @@ SELECT 'Upgrading MetaStore schema from 2.3.0 to 3.0.0' AS ' '; SOURCE 041-HIVE-16556.mysql.sql; SOURCE 042-HIVE-16575.mysql.sql; SOURCE 043-HIVE-16922.mysql.sql; +SOURCE 044-HIVE-16997.mysql.sql; UPDATE VERSION SET SCHEMA_VERSION='3.0.0', VERSION_COMMENT='Hive release version 3.0.0' where VER_ID=1; SELECT 'Finished upgrading MetaStore schema from 2.3.0 to 3.0.0' AS ' '; diff --git a/metastore/scripts/upgrade/oracle/044-HIVE-16997.oracle.sql b/metastore/scripts/upgrade/oracle/044-HIVE-16997.oracle.sql new file mode 100644 index 0000000000..0ee529fa77 --- /dev/null +++ b/metastore/scripts/upgrade/oracle/044-HIVE-16997.oracle.sql @@ -0,0 +1 @@ +ALTER TABLE PART_COL_STATS ADD BIT_VECTOR VARCHAR2(16400) NULL; diff --git a/metastore/scripts/upgrade/oracle/hive-schema-3.0.0.oracle.sql b/metastore/scripts/upgrade/oracle/hive-schema-3.0.0.oracle.sql index 8fdb552367..b636673e68 100644 --- a/metastore/scripts/upgrade/oracle/hive-schema-3.0.0.oracle.sql +++ b/metastore/scripts/upgrade/oracle/hive-schema-3.0.0.oracle.sql @@ -515,6 +515,7 @@ CREATE TABLE PART_COL_STATS ( BIG_DECIMAL_HIGH_VALUE VARCHAR2(4000), NUM_NULLS NUMBER NOT NULL, NUM_DISTINCTS NUMBER, + BIT_VECTOR VARCHAR2(16400), AVG_COL_LEN NUMBER, MAX_COL_LEN NUMBER, NUM_TRUES NUMBER, diff --git a/metastore/scripts/upgrade/oracle/upgrade-2.3.0-to-3.0.0.oracle.sql b/metastore/scripts/upgrade/oracle/upgrade-2.3.0-to-3.0.0.oracle.sql index 0a70d47cca..6a266498b5 100644 --- a/metastore/scripts/upgrade/oracle/upgrade-2.3.0-to-3.0.0.oracle.sql +++ b/metastore/scripts/upgrade/oracle/upgrade-2.3.0-to-3.0.0.oracle.sql @@ -3,6 +3,7 @@ SELECT 'Upgrading MetaStore schema from 2.3.0 to 3.0.0' AS Status from dual; @041-HIVE-16556.oracle.sql; @042-HIVE-16575.oracle.sql; @043-HIVE-16922.oracle.sql; +@044-HIVE-16997.oracle.sql; UPDATE VERSION SET SCHEMA_VERSION='3.0.0', VERSION_COMMENT='Hive release version 3.0.0' where VER_ID=1; SELECT 'Finished upgrading MetaStore schema from 2.3.0 to 3.0.0' AS Status from dual; diff --git a/metastore/scripts/upgrade/postgres/043-HIVE-16997.postgres.sql b/metastore/scripts/upgrade/postgres/043-HIVE-16997.postgres.sql new file mode 100644 index 0000000000..86003cf2fa --- /dev/null +++ b/metastore/scripts/upgrade/postgres/043-HIVE-16997.postgres.sql @@ -0,0 +1 @@ +ALTER TABLE "PART_COL_STATS" ADD COLUMN "BIT_VECTOR" VARCHAR(16400); diff --git a/metastore/scripts/upgrade/postgres/hive-schema-3.0.0.postgres.sql b/metastore/scripts/upgrade/postgres/hive-schema-3.0.0.postgres.sql index 1cdeb6b45a..eda2e40c71 100644 --- a/metastore/scripts/upgrade/postgres/hive-schema-3.0.0.postgres.sql +++ b/metastore/scripts/upgrade/postgres/hive-schema-3.0.0.postgres.sql @@ -534,6 +534,7 @@ CREATE TABLE "PART_COL_STATS" ( "BIG_DECIMAL_HIGH_VALUE" character varying(4000) DEFAULT NULL::character varying, "NUM_NULLS" bigint NOT NULL, "NUM_DISTINCTS" bigint, + "BIT_VECTOR" character varying(16400) DEFAULT NULL::character varying, "AVG_COL_LEN" double precision, "MAX_COL_LEN" bigint, "NUM_TRUES" bigint, diff --git a/metastore/scripts/upgrade/postgres/upgrade-2.3.0-to-3.0.0.postgres.sql b/metastore/scripts/upgrade/postgres/upgrade-2.3.0-to-3.0.0.postgres.sql index c44dd067fc..ee5a673a72 100644 --- a/metastore/scripts/upgrade/postgres/upgrade-2.3.0-to-3.0.0.postgres.sql +++ b/metastore/scripts/upgrade/postgres/upgrade-2.3.0-to-3.0.0.postgres.sql @@ -3,6 +3,7 @@ SELECT 'Upgrading MetaStore schema from 2.3.0 to 3.0.0'; \i 040-HIVE-16556.postgres.sql; \i 041-HIVE-16575.postgres.sql; \i 042-HIVE-16922.postgres.sql; +\i 043-HIVE-16997.postgres.sql; UPDATE "VERSION" SET "SCHEMA_VERSION"='3.0.0', "VERSION_COMMENT"='Hive release version 3.0.0' where "VER_ID"=1; SELECT 'Finished upgrading MetaStore schema from 2.3.0 to 3.0.0'; diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/IExtrapolatePartStatus.java b/metastore/src/java/org/apache/hadoop/hive/metastore/IExtrapolatePartStatus.java deleted file mode 100644 index d0569fb8d8..0000000000 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/IExtrapolatePartStatus.java +++ /dev/null @@ -1,85 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.metastore; - -import java.util.HashMap; -import java.util.Map; - -public interface IExtrapolatePartStatus { - /** - * The sequence of colStatNames. - */ - static String[] colStatNames = new String[] { "LONG_LOW_VALUE", "LONG_HIGH_VALUE", - "DOUBLE_LOW_VALUE", "DOUBLE_HIGH_VALUE", "BIG_DECIMAL_LOW_VALUE", "BIG_DECIMAL_HIGH_VALUE", - "NUM_NULLS", "NUM_DISTINCTS", "AVG_COL_LEN", "MAX_COL_LEN", "NUM_TRUES", "NUM_FALSES", - "AVG_NDV_LONG", "AVG_NDV_DOUBLE", "AVG_NDV_DECIMAL", "SUM_NUM_DISTINCTS" }; - - /** - * The indexes for colstats. - */ - static HashMap indexMaps = new HashMap() { - { - put("bigint", new Integer[] { 0, 1, 6, 7, 12, 15 }); - put("int", new Integer[] { 0, 1, 6, 7, 12, 15 }); - put("smallint", new Integer[] { 0, 1, 6, 7, 12, 15 }); - put("tinyint", new Integer[] { 0, 1, 6, 7, 12, 15 }); - put("date", new Integer[] { 0, 1, 6, 7, 12, 15 }); - put("timestamp", new Integer[] { 0, 1, 6, 7, 12, 15 }); - put("long", new Integer[] { 0, 1, 6, 7, 12, 15 }); - put("double", new Integer[] { 2, 3, 6, 7, 13, 15 }); - put("float", new Integer[] { 2, 3, 6, 7, 13, 15 }); - put("varchar", new Integer[] { 8, 9, 6, 7, 15 }); - put("char", new Integer[] { 8, 9, 6, 7, 15 }); - put("string", new Integer[] { 8, 9, 6, 7, 15 }); - put("boolean", new Integer[] { 10, 11, 6, 15 }); - put("binary", new Integer[] { 8, 9, 6, 15 }); - put("decimal", new Integer[] { 4, 5, 6, 7, 14, 15 }); - put("default", new Integer[] { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 15 }); - } - }; - - /** - * The sequence of colStatTypes. - */ - static enum ColStatType { - Long, Double, Decimal - } - - static ColStatType[] colStatTypes = new ColStatType[] { ColStatType.Long, ColStatType.Long, - ColStatType.Double, ColStatType.Double, ColStatType.Decimal, ColStatType.Decimal, - ColStatType.Long, ColStatType.Long, ColStatType.Double, ColStatType.Long, ColStatType.Long, - ColStatType.Long, ColStatType.Double, ColStatType.Double, ColStatType.Double, - ColStatType.Long }; - - /** - * The sequence of aggregation function on colStats. - */ - static enum AggrType { - Min, Max, Sum, Avg - } - - static AggrType[] aggrTypes = new AggrType[] { AggrType.Min, AggrType.Max, AggrType.Min, - AggrType.Max, AggrType.Min, AggrType.Max, AggrType.Sum, AggrType.Max, AggrType.Max, - AggrType.Max, AggrType.Sum, AggrType.Sum, AggrType.Avg, AggrType.Avg, AggrType.Avg, - AggrType.Sum }; - - public Object extrapolate(Object[] min, Object[] max, int colStatIndex, - Map indexMap); - -} diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/LinearExtrapolatePartStatus.java b/metastore/src/java/org/apache/hadoop/hive/metastore/LinearExtrapolatePartStatus.java deleted file mode 100644 index f4e5ef7045..0000000000 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/LinearExtrapolatePartStatus.java +++ /dev/null @@ -1,106 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.metastore; - -import java.math.BigDecimal; -import java.util.Map; - -public class LinearExtrapolatePartStatus implements IExtrapolatePartStatus { - - @Override - public Object extrapolate(Object[] min, Object[] max, int colStatIndex, - Map indexMap) { - int rightBorderInd = indexMap.size() - 1; - int minInd = indexMap.get((String) min[1]); - int maxInd = indexMap.get((String) max[1]); - if (minInd == maxInd) { - return min[0]; - } - //note that recent metastore stores decimal in string. - double decimalmin= 0; - double decimalmax = 0; - if (colStatTypes[colStatIndex] == ColStatType.Decimal) { - BigDecimal bdmin = new BigDecimal(min[0].toString()); - decimalmin = bdmin.doubleValue(); - BigDecimal bdmax = new BigDecimal(max[0].toString()); - decimalmax = bdmax.doubleValue(); - } - if (aggrTypes[colStatIndex] == AggrType.Max) { - if (minInd < maxInd) { - // right border is the max - if (colStatTypes[colStatIndex] == ColStatType.Long) { - return (Long) ((Long) min[0] + (((Long) max[0] - (Long) min[0]) - * (rightBorderInd - minInd) / (maxInd - minInd))); - } else if (colStatTypes[colStatIndex] == ColStatType.Double) { - return (Double) ((Double) min[0] + (((Double) max[0] - (Double) min[0]) - * (rightBorderInd - minInd) / (maxInd - minInd))); - } else { - double ret = decimalmin + (decimalmax - decimalmin) - * (rightBorderInd - minInd) / (maxInd - minInd); - return String.valueOf(ret); - } - } else { - // left border is the max - if (colStatTypes[colStatIndex] == ColStatType.Long) { - return (Long) ((Long) min[0] + ((Long) max[0] - (Long) min[0]) - * minInd / (minInd - maxInd)); - } else if (colStatTypes[colStatIndex] == ColStatType.Double) { - return (Double) ((Double) min[0] + ((Double) max[0] - (Double) min[0]) - * minInd / (minInd - maxInd)); - } else { - double ret = decimalmin + (decimalmax - decimalmin) * minInd - / (minInd - maxInd); - return String.valueOf(ret); - } - } - } else { - if (minInd < maxInd) { - // left border is the min - if (colStatTypes[colStatIndex] == ColStatType.Long) { - Long ret = (Long) max[0] - ((Long) max[0] - (Long) min[0]) * maxInd - / (maxInd - minInd); - return ret; - } else if (colStatTypes[colStatIndex] == ColStatType.Double) { - Double ret = (Double) max[0] - ((Double) max[0] - (Double) min[0]) - * maxInd / (maxInd - minInd); - return ret; - } else { - double ret = decimalmax - (decimalmax - decimalmin) * maxInd - / (maxInd - minInd); - return String.valueOf(ret); - } - } else { - // right border is the min - if (colStatTypes[colStatIndex] == ColStatType.Long) { - Long ret = (Long) max[0] - ((Long) max[0] - (Long) min[0]) - * (rightBorderInd - maxInd) / (minInd - maxInd); - return ret; - } else if (colStatTypes[colStatIndex] == ColStatType.Double) { - Double ret = (Double) max[0] - ((Double) max[0] - (Double) min[0]) - * (rightBorderInd - maxInd) / (minInd - maxInd); - return ret; - } else { - double ret = decimalmax - (decimalmax - decimalmin) - * (rightBorderInd - maxInd) / (minInd - maxInd); - return String.valueOf(ret); - } - } - } - } -} diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java b/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java index a960b2d26b..07f3cffa93 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java @@ -33,6 +33,7 @@ import java.util.Iterator; import java.util.List; import java.util.Map; +import java.util.Map.Entry; import java.util.TreeMap; import javax.jdo.PersistenceManager; @@ -64,6 +65,8 @@ import org.apache.hadoop.hive.metastore.api.SkewedInfo; import org.apache.hadoop.hive.metastore.api.StorageDescriptor; import org.apache.hadoop.hive.metastore.api.Table; +import org.apache.hadoop.hive.metastore.columnstats.aggr.ColumnStatsAggregator; +import org.apache.hadoop.hive.metastore.columnstats.aggr.ColumnStatsAggregatorFactory; import org.apache.hadoop.hive.metastore.model.MConstraint; import org.apache.hadoop.hive.metastore.model.MDatabase; import org.apache.hadoop.hive.metastore.model.MPartitionColumnStatistics; @@ -1447,291 +1450,45 @@ private long partsFoundForPartitions(final String dbName, final String tableName private List columnStatisticsObjForPartitionsBatch(String dbName, String tableName, List partNames, List colNames, boolean areAllPartsFound, boolean useDensityFunctionForNDVEstimation, double ndvTuner) throws MetaException { - // TODO: all the extrapolation logic should be moved out of this class, - // only mechanical data retrieval should remain here. - String commonPrefix = "select \"COLUMN_NAME\", \"COLUMN_TYPE\", " - + "min(\"LONG_LOW_VALUE\"), max(\"LONG_HIGH_VALUE\"), min(\"DOUBLE_LOW_VALUE\"), max(\"DOUBLE_HIGH_VALUE\"), " - + "min(cast(\"BIG_DECIMAL_LOW_VALUE\" as decimal)), max(cast(\"BIG_DECIMAL_HIGH_VALUE\" as decimal)), " - + "sum(\"NUM_NULLS\"), max(\"NUM_DISTINCTS\"), " - + "max(\"AVG_COL_LEN\"), max(\"MAX_COL_LEN\"), sum(\"NUM_TRUES\"), sum(\"NUM_FALSES\"), " - // The following data is used to compute a partitioned table's NDV based - // on partitions' NDV when useDensityFunctionForNDVEstimation = true. Global NDVs cannot be - // accurately derived from partition NDVs, because the domain of column value two partitions - // can overlap. If there is no overlap then global NDV is just the sum - // of partition NDVs (UpperBound). But if there is some overlay then - // global NDV can be anywhere between sum of partition NDVs (no overlap) - // and same as one of the partition NDV (domain of column value in all other - // partitions is subset of the domain value in one of the partition) - // (LowerBound).But under uniform distribution, we can roughly estimate the global - // NDV by leveraging the min/max values. - // And, we also guarantee that the estimation makes sense by comparing it to the - // UpperBound (calculated by "sum(\"NUM_DISTINCTS\")") - // and LowerBound (calculated by "max(\"NUM_DISTINCTS\")") - + "avg((\"LONG_HIGH_VALUE\"-\"LONG_LOW_VALUE\")/cast(\"NUM_DISTINCTS\" as decimal))," - + "avg((\"DOUBLE_HIGH_VALUE\"-\"DOUBLE_LOW_VALUE\")/\"NUM_DISTINCTS\")," - + "avg((cast(\"BIG_DECIMAL_HIGH_VALUE\" as decimal)-cast(\"BIG_DECIMAL_LOW_VALUE\" as decimal))/\"NUM_DISTINCTS\")," - + "sum(\"NUM_DISTINCTS\")" + " from " + PART_COL_STATS + "" - + " where \"DB_NAME\" = ? and \"TABLE_NAME\" = ? "; - String queryText = null; - long start = 0; - long end = 0; - Query query = null; - boolean doTrace = LOG.isDebugEnabled(); - Object qResult = null; - ForwardQueryResult fqr = null; - // Check if the status of all the columns of all the partitions exists - // Extrapolation is not needed. - if (areAllPartsFound) { - queryText = commonPrefix + " and \"COLUMN_NAME\" in (" + makeParams(colNames.size()) + ")" - + " and \"PARTITION_NAME\" in (" + makeParams(partNames.size()) + ")" - + " group by \"COLUMN_NAME\", \"COLUMN_TYPE\""; - start = doTrace ? System.nanoTime() : 0; - query = pm.newQuery("javax.jdo.query.SQL", queryText); - qResult = executeWithArray(query, prepareParams(dbName, tableName, partNames, colNames), - queryText); - if (qResult == null) { - query.closeAll(); - return Collections.emptyList(); - } - end = doTrace ? System.nanoTime() : 0; - timingTrace(doTrace, queryText, start, end); - List list = ensureList(qResult); - List colStats = new ArrayList(list.size()); - for (Object[] row : list) { - colStats.add(prepareCSObjWithAdjustedNDV(row, 0, useDensityFunctionForNDVEstimation, ndvTuner)); - Deadline.checkTimeout(); - } - query.closeAll(); - return colStats; - } else { - // Extrapolation is needed for some columns. - // In this case, at least a column status for a partition is missing. - // We need to extrapolate this partition based on the other partitions - List colStats = new ArrayList(colNames.size()); - queryText = "select \"COLUMN_NAME\", \"COLUMN_TYPE\", count(\"PARTITION_NAME\") " - + " from " + PART_COL_STATS - + " where \"DB_NAME\" = ? and \"TABLE_NAME\" = ? " - + " and \"COLUMN_NAME\" in (" + makeParams(colNames.size()) + ")" - + " and \"PARTITION_NAME\" in (" + makeParams(partNames.size()) + ")" - + " group by \"COLUMN_NAME\", \"COLUMN_TYPE\""; - start = doTrace ? System.nanoTime() : 0; - query = pm.newQuery("javax.jdo.query.SQL", queryText); - qResult = executeWithArray(query, prepareParams(dbName, tableName, partNames, colNames), - queryText); - end = doTrace ? System.nanoTime() : 0; - timingTrace(doTrace, queryText, start, end); - if (qResult == null) { - query.closeAll(); - return Collections.emptyList(); - } - List noExtraColumnNames = new ArrayList(); - Map extraColumnNameTypeParts = new HashMap(); - List list = ensureList(qResult); - for (Object[] row : list) { - String colName = (String) row[0]; - String colType = (String) row[1]; - // Extrapolation is not needed for this column if - // count(\"PARTITION_NAME\")==partNames.size() - // Or, extrapolation is not possible for this column if - // count(\"PARTITION_NAME\")<2 - Long count = extractSqlLong(row[2]); - if (count == partNames.size() || count < 2) { - noExtraColumnNames.add(colName); - } else { - extraColumnNameTypeParts.put(colName, new String[] { colType, String.valueOf(count) }); - } - Deadline.checkTimeout(); - } - query.closeAll(); - // Extrapolation is not needed for columns noExtraColumnNames - if (noExtraColumnNames.size() != 0) { - queryText = commonPrefix + " and \"COLUMN_NAME\" in (" - + makeParams(noExtraColumnNames.size()) + ")" + " and \"PARTITION_NAME\" in (" - + makeParams(partNames.size()) + ")" + " group by \"COLUMN_NAME\", \"COLUMN_TYPE\""; - start = doTrace ? System.nanoTime() : 0; - query = pm.newQuery("javax.jdo.query.SQL", queryText); - qResult = executeWithArray(query, - prepareParams(dbName, tableName, partNames, noExtraColumnNames), queryText); - if (qResult == null) { - query.closeAll(); - return Collections.emptyList(); - } - list = ensureList(qResult); - for (Object[] row : list) { - colStats.add(prepareCSObjWithAdjustedNDV(row, 0, useDensityFunctionForNDVEstimation, ndvTuner)); - Deadline.checkTimeout(); - } - end = doTrace ? System.nanoTime() : 0; - timingTrace(doTrace, queryText, start, end); - query.closeAll(); - } - // Extrapolation is needed for extraColumnNames. - // give a sequence number for all the partitions - if (extraColumnNameTypeParts.size() != 0) { - Map indexMap = new HashMap(); - for (int index = 0; index < partNames.size(); index++) { - indexMap.put(partNames.get(index), index); - } - // get sum for all columns to reduce the number of queries - Map> sumMap = new HashMap>(); - queryText = "select \"COLUMN_NAME\", sum(\"NUM_NULLS\"), sum(\"NUM_TRUES\"), sum(\"NUM_FALSES\"), sum(\"NUM_DISTINCTS\")" - + " from " + PART_COL_STATS + " where \"DB_NAME\" = ? and \"TABLE_NAME\" = ? " - + " and \"COLUMN_NAME\" in (" + makeParams(extraColumnNameTypeParts.size()) - + ") and \"PARTITION_NAME\" in (" + makeParams(partNames.size()) - + ") group by \"COLUMN_NAME\""; - start = doTrace ? System.nanoTime() : 0; - query = pm.newQuery("javax.jdo.query.SQL", queryText); - List extraColumnNames = new ArrayList(); - extraColumnNames.addAll(extraColumnNameTypeParts.keySet()); - qResult = executeWithArray(query, - prepareParams(dbName, tableName, partNames, extraColumnNames), queryText); - if (qResult == null) { - query.closeAll(); - return Collections.emptyList(); - } - list = ensureList(qResult); - // see the indexes for colstats in IExtrapolatePartStatus - Integer[] sumIndex = new Integer[] { 6, 10, 11, 15 }; - for (Object[] row : list) { - Map indexToObject = new HashMap(); - for (int ind = 1; ind < row.length; ind++) { - indexToObject.put(sumIndex[ind - 1], row[ind]); - } - // row[0] is the column name - sumMap.put((String) row[0], indexToObject); - Deadline.checkTimeout(); - } - end = doTrace ? System.nanoTime() : 0; - timingTrace(doTrace, queryText, start, end); - query.closeAll(); - for (Map.Entry entry : extraColumnNameTypeParts.entrySet()) { - Object[] row = new Object[IExtrapolatePartStatus.colStatNames.length + 2]; - String colName = entry.getKey(); - String colType = entry.getValue()[0]; - Long sumVal = Long.parseLong(entry.getValue()[1]); - // fill in colname - row[0] = colName; - // fill in coltype - row[1] = colType; - // use linear extrapolation. more complicated one can be added in the - // future. - IExtrapolatePartStatus extrapolateMethod = new LinearExtrapolatePartStatus(); - // fill in colstatus - Integer[] index = null; - boolean decimal = false; - if (colType.toLowerCase().startsWith("decimal")) { - index = IExtrapolatePartStatus.indexMaps.get("decimal"); - decimal = true; - } else { - index = IExtrapolatePartStatus.indexMaps.get(colType.toLowerCase()); - } - // if the colType is not the known type, long, double, etc, then get - // all index. - if (index == null) { - index = IExtrapolatePartStatus.indexMaps.get("default"); - } - for (int colStatIndex : index) { - String colStatName = IExtrapolatePartStatus.colStatNames[colStatIndex]; - // if the aggregation type is sum, we do a scale-up - if (IExtrapolatePartStatus.aggrTypes[colStatIndex] == IExtrapolatePartStatus.AggrType.Sum) { - Object o = sumMap.get(colName).get(colStatIndex); - if (o == null) { - row[2 + colStatIndex] = null; - } else { - Long val = extractSqlLong(o); - row[2 + colStatIndex] = (Long) (val / sumVal * (partNames.size())); - } - } else if (IExtrapolatePartStatus.aggrTypes[colStatIndex] == IExtrapolatePartStatus.AggrType.Min - || IExtrapolatePartStatus.aggrTypes[colStatIndex] == IExtrapolatePartStatus.AggrType.Max) { - // if the aggregation type is min/max, we extrapolate from the - // left/right borders - if (!decimal) { - queryText = "select \"" + colStatName - + "\",\"PARTITION_NAME\" from " + PART_COL_STATS - + " where \"DB_NAME\" = ? and \"TABLE_NAME\" = ?" + " and \"COLUMN_NAME\" = ?" - + " and \"PARTITION_NAME\" in (" + makeParams(partNames.size()) + ")" - + " order by \"" + colStatName + "\""; - } else { - queryText = "select \"" + colStatName - + "\",\"PARTITION_NAME\" from " + PART_COL_STATS - + " where \"DB_NAME\" = ? and \"TABLE_NAME\" = ?" + " and \"COLUMN_NAME\" = ?" - + " and \"PARTITION_NAME\" in (" + makeParams(partNames.size()) + ")" - + " order by cast(\"" + colStatName + "\" as decimal)"; - } - start = doTrace ? System.nanoTime() : 0; - query = pm.newQuery("javax.jdo.query.SQL", queryText); - qResult = executeWithArray(query, - prepareParams(dbName, tableName, partNames, Arrays.asList(colName)), queryText); - if (qResult == null) { - query.closeAll(); - return Collections.emptyList(); - } - fqr = (ForwardQueryResult) qResult; - Object[] min = (Object[]) (fqr.get(0)); - Object[] max = (Object[]) (fqr.get(fqr.size() - 1)); - end = doTrace ? System.nanoTime() : 0; - timingTrace(doTrace, queryText, start, end); - query.closeAll(); - if (min[0] == null || max[0] == null) { - row[2 + colStatIndex] = null; - } else { - row[2 + colStatIndex] = extrapolateMethod.extrapolate(min, max, colStatIndex, - indexMap); - } - } else { - // if the aggregation type is avg, we use the average on the existing ones. - queryText = "select " - + "avg((\"LONG_HIGH_VALUE\"-\"LONG_LOW_VALUE\")/cast(\"NUM_DISTINCTS\" as decimal))," - + "avg((\"DOUBLE_HIGH_VALUE\"-\"DOUBLE_LOW_VALUE\")/\"NUM_DISTINCTS\")," - + "avg((cast(\"BIG_DECIMAL_HIGH_VALUE\" as decimal)-cast(\"BIG_DECIMAL_LOW_VALUE\" as decimal))/\"NUM_DISTINCTS\")" - + " from " + PART_COL_STATS + "" + " where \"DB_NAME\" = ? and \"TABLE_NAME\" = ?" - + " and \"COLUMN_NAME\" = ?" + " and \"PARTITION_NAME\" in (" - + makeParams(partNames.size()) + ")" + " group by \"COLUMN_NAME\""; - start = doTrace ? System.nanoTime() : 0; - query = pm.newQuery("javax.jdo.query.SQL", queryText); - qResult = executeWithArray(query, - prepareParams(dbName, tableName, partNames, Arrays.asList(colName)), queryText); - if (qResult == null) { - query.closeAll(); - return Collections.emptyList(); - } - fqr = (ForwardQueryResult) qResult; - Object[] avg = (Object[]) (fqr.get(0)); - // colStatIndex=12,13,14 respond to "AVG_LONG", "AVG_DOUBLE", - // "AVG_DECIMAL" - row[2 + colStatIndex] = avg[colStatIndex - 12]; - end = doTrace ? System.nanoTime() : 0; - timingTrace(doTrace, queryText, start, end); - query.closeAll(); - } - } - colStats.add(prepareCSObjWithAdjustedNDV(row, 0, useDensityFunctionForNDVEstimation, ndvTuner)); - Deadline.checkTimeout(); + List colStats = new ArrayList<>(); + // 1. get all the stats for colNames in partNames; + List list = getPartitionStats(dbName, tableName, partNames, colNames); + // 2. group by the stats by colNames + // map the colName to List + Map> map = new HashMap<>(); + for (ColumnStatistics css : list) { + List objs = css.getStatsObj(); + for (ColumnStatisticsObj obj : objs) { + List singleObj = new ArrayList<>(); + singleObj.add(obj); + ColumnStatistics singleCS = new ColumnStatistics(css.getStatsDesc(), singleObj); + if (!map.containsKey(obj.getColName())) { + map.put(obj.getColName(), new ArrayList()); } + map.get(obj.getColName()).add(singleCS); } - return colStats; } + // 3. aggr stats for each colName + // TODO: thread pool can be used to speed up the process + for (Entry> entry : map.entrySet()) { + List css = entry.getValue(); + ColumnStatsAggregator aggregator = ColumnStatsAggregatorFactory.getColumnStatsAggregator(css + .iterator().next().getStatsObj().iterator().next().getStatsData().getSetField(), + useDensityFunctionForNDVEstimation, ndvTuner); + ColumnStatisticsObj statsObj = aggregator.aggregate(entry.getKey(), partNames, css); + colStats.add(statsObj); + } + return colStats; } private ColumnStatisticsObj prepareCSObj (Object[] row, int i) throws MetaException { ColumnStatisticsData data = new ColumnStatisticsData(); ColumnStatisticsObj cso = new ColumnStatisticsObj((String)row[i++], (String)row[i++], data); Object llow = row[i++], lhigh = row[i++], dlow = row[i++], dhigh = row[i++], - declow = row[i++], dechigh = row[i++], nulls = row[i++], dist = row[i++], + declow = row[i++], dechigh = row[i++], nulls = row[i++], dist = row[i++], bitVector = row[i++], avglen = row[i++], maxlen = row[i++], trues = row[i++], falses = row[i++]; StatObjectConverter.fillColumnStatisticsData(cso.getColType(), data, - llow, lhigh, dlow, dhigh, declow, dechigh, nulls, dist, avglen, maxlen, trues, falses); - return cso; - } - - private ColumnStatisticsObj prepareCSObjWithAdjustedNDV(Object[] row, int i, - boolean useDensityFunctionForNDVEstimation, double ndvTuner) throws MetaException { - ColumnStatisticsData data = new ColumnStatisticsData(); - ColumnStatisticsObj cso = new ColumnStatisticsObj((String) row[i++], (String) row[i++], data); - Object llow = row[i++], lhigh = row[i++], dlow = row[i++], dhigh = row[i++], declow = row[i++], dechigh = row[i++], nulls = row[i++], dist = row[i++], avglen = row[i++], maxlen = row[i++], trues = row[i++], falses = row[i++], avgLong = row[i++], avgDouble = row[i++], avgDecimal = row[i++], sumDist = row[i++]; - StatObjectConverter.fillColumnStatisticsData(cso.getColType(), data, llow, lhigh, dlow, dhigh, - declow, dechigh, nulls, dist, avglen, maxlen, trues, falses, avgLong, avgDouble, - avgDecimal, sumDist, useDensityFunctionForNDVEstimation, ndvTuner); + llow, lhigh, dlow, dhigh, declow, dechigh, nulls, dist, bitVector, avglen, maxlen, trues, falses); return cso; } @@ -1815,7 +1572,7 @@ private ColumnStatisticsObj prepareCSObjWithAdjustedNDV(Object[] row, int i, private static final String STATS_COLLIST = "\"COLUMN_NAME\", \"COLUMN_TYPE\", \"LONG_LOW_VALUE\", \"LONG_HIGH_VALUE\", " + "\"DOUBLE_LOW_VALUE\", \"DOUBLE_HIGH_VALUE\", \"BIG_DECIMAL_LOW_VALUE\", " - + "\"BIG_DECIMAL_HIGH_VALUE\", \"NUM_NULLS\", \"NUM_DISTINCTS\", \"AVG_COL_LEN\", " + + "\"BIG_DECIMAL_HIGH_VALUE\", \"NUM_NULLS\", \"NUM_DISTINCTS\", \"BIT_VECTOR\", \"AVG_COL_LEN\", " + "\"MAX_COL_LEN\", \"NUM_TRUES\", \"NUM_FALSES\", \"LAST_ANALYZED\" "; private ColumnStatistics makeColumnStats( @@ -1826,7 +1583,7 @@ private ColumnStatistics makeColumnStats( for (Object[] row : list) { // LastAnalyzed is stored per column but thrift has it per several; // get the lowest for now as nobody actually uses this field. - Object laObj = row[offset + 14]; + Object laObj = row[offset + 15]; if (laObj != null && (!csd.isSetLastAnalyzed() || csd.getLastAnalyzed() > extractSqlLong(laObj))) { csd.setLastAnalyzed(extractSqlLong(laObj)); } diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java b/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java index b52c94c9fb..5870054008 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java @@ -71,8 +71,8 @@ import org.apache.hadoop.hive.metastore.api.StorageDescriptor; import org.apache.hadoop.hive.metastore.api.Table; import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants; -import org.apache.hadoop.hive.metastore.hbase.stats.merge.ColumnStatsMerger; -import org.apache.hadoop.hive.metastore.hbase.stats.merge.ColumnStatsMergerFactory; +import org.apache.hadoop.hive.metastore.columnstats.merge.ColumnStatsMerger; +import org.apache.hadoop.hive.metastore.columnstats.merge.ColumnStatsMergerFactory; import org.apache.hadoop.hive.metastore.partition.spec.PartitionSpecProxy; import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.Deserializer; diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/StatObjectConverter.java b/metastore/src/java/org/apache/hadoop/hive/metastore/StatObjectConverter.java index 2dc2804343..d2aba95282 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/StatObjectConverter.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/StatObjectConverter.java @@ -76,6 +76,7 @@ public static MTableColumnStatistics convertToMTableColumnStatistics(MTable tabl mColStats.setLongStats( longStats.isSetNumNulls() ? longStats.getNumNulls() : null, longStats.isSetNumDVs() ? longStats.getNumDVs() : null, + longStats.isSetBitVectors() ? longStats.getBitVectors() : null, longStats.isSetLowValue() ? longStats.getLowValue() : null, longStats.isSetHighValue() ? longStats.getHighValue() : null); } else if (statsObj.getStatsData().isSetDoubleStats()) { @@ -83,6 +84,7 @@ public static MTableColumnStatistics convertToMTableColumnStatistics(MTable tabl mColStats.setDoubleStats( doubleStats.isSetNumNulls() ? doubleStats.getNumNulls() : null, doubleStats.isSetNumDVs() ? doubleStats.getNumDVs() : null, + doubleStats.isSetBitVectors() ? doubleStats.getBitVectors() : null, doubleStats.isSetLowValue() ? doubleStats.getLowValue() : null, doubleStats.isSetHighValue() ? doubleStats.getHighValue() : null); } else if (statsObj.getStatsData().isSetDecimalStats()) { @@ -92,12 +94,14 @@ public static MTableColumnStatistics convertToMTableColumnStatistics(MTable tabl mColStats.setDecimalStats( decimalStats.isSetNumNulls() ? decimalStats.getNumNulls() : null, decimalStats.isSetNumDVs() ? decimalStats.getNumDVs() : null, + decimalStats.isSetBitVectors() ? decimalStats.getBitVectors() : null, low, high); } else if (statsObj.getStatsData().isSetStringStats()) { StringColumnStatsData stringStats = statsObj.getStatsData().getStringStats(); mColStats.setStringStats( stringStats.isSetNumNulls() ? stringStats.getNumNulls() : null, stringStats.isSetNumDVs() ? stringStats.getNumDVs() : null, + stringStats.isSetBitVectors() ? stringStats.getBitVectors() : null, stringStats.isSetMaxColLen() ? stringStats.getMaxColLen() : null, stringStats.isSetAvgColLen() ? stringStats.getAvgColLen() : null); } else if (statsObj.getStatsData().isSetBinaryStats()) { @@ -111,6 +115,7 @@ public static MTableColumnStatistics convertToMTableColumnStatistics(MTable tabl mColStats.setDateStats( dateStats.isSetNumNulls() ? dateStats.getNumNulls() : null, dateStats.isSetNumDVs() ? dateStats.getNumDVs() : null, + dateStats.isSetBitVectors() ? dateStats.getBitVectors() : null, dateStats.isSetLowValue() ? dateStats.getLowValue().getDaysSinceEpoch() : null, dateStats.isSetHighValue() ? dateStats.getHighValue().getDaysSinceEpoch() : null); } @@ -146,6 +151,9 @@ public static void setFieldsIntoOldStats( if (mStatsObj.getNumDVs() != null) { oldStatsObj.setNumDVs(mStatsObj.getNumDVs()); } + if (mStatsObj.getBitVector() != null) { + oldStatsObj.setBitVector(mStatsObj.getBitVector()); + } if (mStatsObj.getNumFalses() != null) { oldStatsObj.setNumFalses(mStatsObj.getNumFalses()); } @@ -188,6 +196,9 @@ public static void setFieldsIntoOldStats( if (mStatsObj.getNumDVs() != null) { oldStatsObj.setNumDVs(mStatsObj.getNumDVs()); } + if (mStatsObj.getBitVector() != null) { + oldStatsObj.setBitVector(mStatsObj.getBitVector()); + } if (mStatsObj.getNumFalses() != null) { oldStatsObj.setNumFalses(mStatsObj.getNumFalses()); } @@ -220,6 +231,7 @@ public static ColumnStatisticsObj getTableColumnStatisticsObj( stringStats.setAvgColLen(mStatsObj.getAvgColLen()); stringStats.setMaxColLen(mStatsObj.getMaxColLen()); stringStats.setNumDVs(mStatsObj.getNumDVs()); + stringStats.setBitVectors(mStatsObj.getBitVector()); colStatsData.setStringStats(stringStats); } else if (colType.equals("binary")) { BinaryColumnStatsData binaryStats = new BinaryColumnStatsData(); @@ -241,6 +253,7 @@ public static ColumnStatisticsObj getTableColumnStatisticsObj( longStats.setLowValue(longLowValue); } longStats.setNumDVs(mStatsObj.getNumDVs()); + longStats.setBitVectors(mStatsObj.getBitVector()); colStatsData.setLongStats(longStats); } else if (colType.equals("double") || colType.equals("float")) { DoubleColumnStatsData doubleStats = new DoubleColumnStatsData(); @@ -254,6 +267,7 @@ public static ColumnStatisticsObj getTableColumnStatisticsObj( doubleStats.setLowValue(doubleLowValue); } doubleStats.setNumDVs(mStatsObj.getNumDVs()); + doubleStats.setBitVectors(mStatsObj.getBitVector()); colStatsData.setDoubleStats(doubleStats); } else if (colType.startsWith("decimal")) { DecimalColumnStatsData decimalStats = new DecimalColumnStatsData(); @@ -267,6 +281,7 @@ public static ColumnStatisticsObj getTableColumnStatisticsObj( decimalStats.setLowValue(createThriftDecimal(decimalLowValue)); } decimalStats.setNumDVs(mStatsObj.getNumDVs()); + decimalStats.setBitVectors(mStatsObj.getBitVector()); colStatsData.setDecimalStats(decimalStats); } else if (colType.equals("date")) { DateColumnStatsData dateStats = new DateColumnStatsData(); @@ -280,6 +295,7 @@ public static ColumnStatisticsObj getTableColumnStatisticsObj( dateStats.setLowValue(new Date(lowValue)); } dateStats.setNumDVs(mStatsObj.getNumDVs()); + dateStats.setBitVectors(mStatsObj.getBitVector()); colStatsData.setDateStats(dateStats); } statsObj.setStatsData(colStatsData); @@ -323,6 +339,7 @@ public static MPartitionColumnStatistics convertToMPartitionColumnStatistics( mColStats.setLongStats( longStats.isSetNumNulls() ? longStats.getNumNulls() : null, longStats.isSetNumDVs() ? longStats.getNumDVs() : null, + longStats.isSetBitVectors() ? longStats.getBitVectors() : null, longStats.isSetLowValue() ? longStats.getLowValue() : null, longStats.isSetHighValue() ? longStats.getHighValue() : null); } else if (statsObj.getStatsData().isSetDoubleStats()) { @@ -330,6 +347,7 @@ public static MPartitionColumnStatistics convertToMPartitionColumnStatistics( mColStats.setDoubleStats( doubleStats.isSetNumNulls() ? doubleStats.getNumNulls() : null, doubleStats.isSetNumDVs() ? doubleStats.getNumDVs() : null, + doubleStats.isSetBitVectors() ? doubleStats.getBitVectors() : null, doubleStats.isSetLowValue() ? doubleStats.getLowValue() : null, doubleStats.isSetHighValue() ? doubleStats.getHighValue() : null); } else if (statsObj.getStatsData().isSetDecimalStats()) { @@ -339,12 +357,14 @@ public static MPartitionColumnStatistics convertToMPartitionColumnStatistics( mColStats.setDecimalStats( decimalStats.isSetNumNulls() ? decimalStats.getNumNulls() : null, decimalStats.isSetNumDVs() ? decimalStats.getNumDVs() : null, + decimalStats.isSetBitVectors() ? decimalStats.getBitVectors() : null, low, high); } else if (statsObj.getStatsData().isSetStringStats()) { StringColumnStatsData stringStats = statsObj.getStatsData().getStringStats(); mColStats.setStringStats( stringStats.isSetNumNulls() ? stringStats.getNumNulls() : null, stringStats.isSetNumDVs() ? stringStats.getNumDVs() : null, + stringStats.isSetBitVectors() ? stringStats.getBitVectors() : null, stringStats.isSetMaxColLen() ? stringStats.getMaxColLen() : null, stringStats.isSetAvgColLen() ? stringStats.getAvgColLen() : null); } else if (statsObj.getStatsData().isSetBinaryStats()) { @@ -358,6 +378,7 @@ public static MPartitionColumnStatistics convertToMPartitionColumnStatistics( mColStats.setDateStats( dateStats.isSetNumNulls() ? dateStats.getNumNulls() : null, dateStats.isSetNumDVs() ? dateStats.getNumDVs() : null, + dateStats.isSetBitVectors() ? dateStats.getBitVectors() : null, dateStats.isSetLowValue() ? dateStats.getLowValue().getDaysSinceEpoch() : null, dateStats.isSetHighValue() ? dateStats.getHighValue().getDaysSinceEpoch() : null); } @@ -385,6 +406,7 @@ public static ColumnStatisticsObj getPartitionColumnStatisticsObj( stringStats.setAvgColLen(mStatsObj.getAvgColLen()); stringStats.setMaxColLen(mStatsObj.getMaxColLen()); stringStats.setNumDVs(mStatsObj.getNumDVs()); + stringStats.setBitVectors(mStatsObj.getBitVector()); colStatsData.setStringStats(stringStats); } else if (colType.equals("binary")) { BinaryColumnStatsData binaryStats = new BinaryColumnStatsData(); @@ -404,6 +426,7 @@ public static ColumnStatisticsObj getPartitionColumnStatisticsObj( longStats.setLowValue(mStatsObj.getLongLowValue()); } longStats.setNumDVs(mStatsObj.getNumDVs()); + longStats.setBitVectors(mStatsObj.getBitVector()); colStatsData.setLongStats(longStats); } else if (colType.equals("double") || colType.equals("float")) { DoubleColumnStatsData doubleStats = new DoubleColumnStatsData(); @@ -415,6 +438,7 @@ public static ColumnStatisticsObj getPartitionColumnStatisticsObj( doubleStats.setLowValue(mStatsObj.getDoubleLowValue()); } doubleStats.setNumDVs(mStatsObj.getNumDVs()); + doubleStats.setBitVectors(mStatsObj.getBitVector()); colStatsData.setDoubleStats(doubleStats); } else if (colType.startsWith("decimal")) { DecimalColumnStatsData decimalStats = new DecimalColumnStatsData(); @@ -426,6 +450,7 @@ public static ColumnStatisticsObj getPartitionColumnStatisticsObj( decimalStats.setLowValue(createThriftDecimal(mStatsObj.getDecimalLowValue())); } decimalStats.setNumDVs(mStatsObj.getNumDVs()); + decimalStats.setBitVectors(mStatsObj.getBitVector()); colStatsData.setDecimalStats(decimalStats); } else if (colType.equals("date")) { DateColumnStatsData dateStats = new DateColumnStatsData(); @@ -433,6 +458,7 @@ public static ColumnStatisticsObj getPartitionColumnStatisticsObj( dateStats.setHighValue(new Date(mStatsObj.getLongHighValue())); dateStats.setLowValue(new Date(mStatsObj.getLongLowValue())); dateStats.setNumDVs(mStatsObj.getNumDVs()); + dateStats.setBitVectors(mStatsObj.getBitVector()); colStatsData.setDateStats(dateStats); } statsObj.setStatsData(colStatsData); @@ -453,7 +479,7 @@ public static ColumnStatisticsDesc getPartitionColumnStatisticsDesc( // SQL public static void fillColumnStatisticsData(String colType, ColumnStatisticsData data, Object llow, Object lhigh, Object dlow, Object dhigh, Object declow, Object dechigh, - Object nulls, Object dist, Object avglen, Object maxlen, Object trues, Object falses) throws MetaException { + Object nulls, Object dist, Object bitVector, Object avglen, Object maxlen, Object trues, Object falses) throws MetaException { colType = colType.toLowerCase(); if (colType.equals("boolean")) { BooleanColumnStatsData boolStats = new BooleanColumnStatsData(); @@ -468,6 +494,7 @@ public static void fillColumnStatisticsData(String colType, ColumnStatisticsData stringStats.setAvgColLen(MetaStoreDirectSql.extractSqlDouble(avglen)); stringStats.setMaxColLen(MetaStoreDirectSql.extractSqlLong(maxlen)); stringStats.setNumDVs(MetaStoreDirectSql.extractSqlLong(dist)); + stringStats.setBitVectors((String) bitVector); data.setStringStats(stringStats); } else if (colType.equals("binary")) { BinaryColumnStatsData binaryStats = new BinaryColumnStatsData(); @@ -487,6 +514,7 @@ public static void fillColumnStatisticsData(String colType, ColumnStatisticsData longStats.setLowValue(MetaStoreDirectSql.extractSqlLong(llow)); } longStats.setNumDVs(MetaStoreDirectSql.extractSqlLong(dist)); + longStats.setBitVectors((String) bitVector); data.setLongStats(longStats); } else if (colType.equals("double") || colType.equals("float")) { DoubleColumnStatsData doubleStats = new DoubleColumnStatsData(); @@ -498,6 +526,7 @@ public static void fillColumnStatisticsData(String colType, ColumnStatisticsData doubleStats.setLowValue(MetaStoreDirectSql.extractSqlDouble(dlow)); } doubleStats.setNumDVs(MetaStoreDirectSql.extractSqlLong(dist)); + doubleStats.setBitVectors((String) bitVector); data.setDoubleStats(doubleStats); } else if (colType.startsWith("decimal")) { DecimalColumnStatsData decimalStats = new DecimalColumnStatsData(); @@ -509,6 +538,7 @@ public static void fillColumnStatisticsData(String colType, ColumnStatisticsData decimalStats.setLowValue(createThriftDecimal((String)declow)); } decimalStats.setNumDVs(MetaStoreDirectSql.extractSqlLong(dist)); + decimalStats.setBitVectors((String) bitVector); data.setDecimalStats(decimalStats); } else if (colType.equals("date")) { DateColumnStatsData dateStats = new DateColumnStatsData(); @@ -520,178 +550,11 @@ public static void fillColumnStatisticsData(String colType, ColumnStatisticsData dateStats.setLowValue(new Date(MetaStoreDirectSql.extractSqlLong(llow))); } dateStats.setNumDVs(MetaStoreDirectSql.extractSqlLong(dist)); + dateStats.setBitVectors((String) bitVector); data.setDateStats(dateStats); } } - public static void fillColumnStatisticsData(String colType, ColumnStatisticsData data, - Object llow, Object lhigh, Object dlow, Object dhigh, Object declow, Object dechigh, - Object nulls, Object dist, Object avglen, Object maxlen, Object trues, Object falses, - Object avgLong, Object avgDouble, Object avgDecimal, Object sumDist, - boolean useDensityFunctionForNDVEstimation, double ndvTuner) throws MetaException { - colType = colType.toLowerCase(); - if (colType.equals("boolean")) { - BooleanColumnStatsData boolStats = new BooleanColumnStatsData(); - boolStats.setNumFalses(MetaStoreDirectSql.extractSqlLong(falses)); - boolStats.setNumTrues(MetaStoreDirectSql.extractSqlLong(trues)); - boolStats.setNumNulls(MetaStoreDirectSql.extractSqlLong(nulls)); - data.setBooleanStats(boolStats); - } else if (colType.equals("string") || colType.startsWith("varchar") - || colType.startsWith("char")) { - StringColumnStatsData stringStats = new StringColumnStatsData(); - stringStats.setNumNulls(MetaStoreDirectSql.extractSqlLong(nulls)); - stringStats.setAvgColLen(MetaStoreDirectSql.extractSqlDouble(avglen)); - stringStats.setMaxColLen(MetaStoreDirectSql.extractSqlLong(maxlen)); - stringStats.setNumDVs(MetaStoreDirectSql.extractSqlLong(dist)); - data.setStringStats(stringStats); - } else if (colType.equals("binary")) { - BinaryColumnStatsData binaryStats = new BinaryColumnStatsData(); - binaryStats.setNumNulls(MetaStoreDirectSql.extractSqlLong(nulls)); - binaryStats.setAvgColLen(MetaStoreDirectSql.extractSqlDouble(avglen)); - binaryStats.setMaxColLen(MetaStoreDirectSql.extractSqlLong(maxlen)); - data.setBinaryStats(binaryStats); - } else if (colType.equals("bigint") || colType.equals("int") || colType.equals("smallint") - || colType.equals("tinyint") || colType.equals("timestamp")) { - LongColumnStatsData longStats = new LongColumnStatsData(); - longStats.setNumNulls(MetaStoreDirectSql.extractSqlLong(nulls)); - if (lhigh != null) { - longStats.setHighValue(MetaStoreDirectSql.extractSqlLong(lhigh)); - } - if (llow != null) { - longStats.setLowValue(MetaStoreDirectSql.extractSqlLong(llow)); - } - long lowerBound = MetaStoreDirectSql.extractSqlLong(dist); - long higherBound = MetaStoreDirectSql.extractSqlLong(sumDist); - long rangeBound = Long.MAX_VALUE; - if (lhigh != null && llow != null) { - rangeBound = MetaStoreDirectSql.extractSqlLong(lhigh) - - MetaStoreDirectSql.extractSqlLong(llow) + 1; - } - long estimation; - if (useDensityFunctionForNDVEstimation && lhigh != null && llow != null && avgLong != null - && MetaStoreDirectSql.extractSqlDouble(avgLong) != 0.0) { - // We have estimation, lowerbound and higherbound. We use estimation if - // it is between lowerbound and higherbound. - estimation = MetaStoreDirectSql - .extractSqlLong((MetaStoreDirectSql.extractSqlLong(lhigh) - MetaStoreDirectSql - .extractSqlLong(llow)) / MetaStoreDirectSql.extractSqlDouble(avgLong)); - if (estimation < lowerBound) { - estimation = lowerBound; - } else if (estimation > higherBound) { - estimation = higherBound; - } - } else { - estimation = (long) (lowerBound + (higherBound - lowerBound) * ndvTuner); - } - estimation = Math.min(estimation, rangeBound); - longStats.setNumDVs(estimation); - data.setLongStats(longStats); - } else if (colType.equals("date")) { - DateColumnStatsData dateStats = new DateColumnStatsData(); - dateStats.setNumNulls(MetaStoreDirectSql.extractSqlLong(nulls)); - if (lhigh != null) { - dateStats.setHighValue(new Date(MetaStoreDirectSql.extractSqlLong(lhigh))); - } - if (llow != null) { - dateStats.setLowValue(new Date(MetaStoreDirectSql.extractSqlLong(llow))); - } - long lowerBound = MetaStoreDirectSql.extractSqlLong(dist); - long higherBound = MetaStoreDirectSql.extractSqlLong(sumDist); - long rangeBound = Long.MAX_VALUE; - if (lhigh != null && llow != null) { - rangeBound = MetaStoreDirectSql.extractSqlLong(lhigh) - - MetaStoreDirectSql.extractSqlLong(llow) + 1; - } - long estimation; - if (useDensityFunctionForNDVEstimation && lhigh != null && llow != null && avgLong != null - && MetaStoreDirectSql.extractSqlDouble(avgLong) != 0.0) { - // We have estimation, lowerbound and higherbound. We use estimation if - // it is between lowerbound and higherbound. - estimation = MetaStoreDirectSql - .extractSqlLong((MetaStoreDirectSql.extractSqlLong(lhigh) - MetaStoreDirectSql - .extractSqlLong(llow)) / MetaStoreDirectSql.extractSqlDouble(avgLong)); - if (estimation < lowerBound) { - estimation = lowerBound; - } else if (estimation > higherBound) { - estimation = higherBound; - } - } else { - estimation = (long) (lowerBound + (higherBound - lowerBound) * ndvTuner); - } - estimation = Math.min(estimation, rangeBound); - dateStats.setNumDVs(estimation); - data.setDateStats(dateStats); - } else if (colType.equals("double") || colType.equals("float")) { - DoubleColumnStatsData doubleStats = new DoubleColumnStatsData(); - doubleStats.setNumNulls(MetaStoreDirectSql.extractSqlLong(nulls)); - if (dhigh != null) { - doubleStats.setHighValue(MetaStoreDirectSql.extractSqlDouble(dhigh)); - } - if (dlow != null) { - doubleStats.setLowValue(MetaStoreDirectSql.extractSqlDouble(dlow)); - } - long lowerBound = MetaStoreDirectSql.extractSqlLong(dist); - long higherBound = MetaStoreDirectSql.extractSqlLong(sumDist); - if (useDensityFunctionForNDVEstimation && dhigh != null && dlow != null && avgDouble != null - && MetaStoreDirectSql.extractSqlDouble(avgDouble) != 0.0) { - long estimation = MetaStoreDirectSql - .extractSqlLong((MetaStoreDirectSql.extractSqlLong(dhigh) - MetaStoreDirectSql - .extractSqlLong(dlow)) / MetaStoreDirectSql.extractSqlDouble(avgDouble)); - if (estimation < lowerBound) { - doubleStats.setNumDVs(lowerBound); - } else if (estimation > higherBound) { - doubleStats.setNumDVs(higherBound); - } else { - doubleStats.setNumDVs(estimation); - } - } else { - doubleStats.setNumDVs((long) (lowerBound + (higherBound - lowerBound) * ndvTuner)); - } - data.setDoubleStats(doubleStats); - } else if (colType.startsWith("decimal")) { - DecimalColumnStatsData decimalStats = new DecimalColumnStatsData(); - decimalStats.setNumNulls(MetaStoreDirectSql.extractSqlLong(nulls)); - Decimal low = null; - Decimal high = null; - BigDecimal blow = null; - BigDecimal bhigh = null; - if (dechigh instanceof BigDecimal) { - bhigh = (BigDecimal) dechigh; - high = new Decimal(ByteBuffer.wrap(bhigh.unscaledValue().toByteArray()), - (short) bhigh.scale()); - } else if (dechigh instanceof String) { - bhigh = new BigDecimal((String) dechigh); - high = createThriftDecimal((String) dechigh); - } - decimalStats.setHighValue(high); - if (declow instanceof BigDecimal) { - blow = (BigDecimal) declow; - low = new Decimal(ByteBuffer.wrap(blow.unscaledValue().toByteArray()), (short) blow.scale()); - } else if (dechigh instanceof String) { - blow = new BigDecimal((String) declow); - low = createThriftDecimal((String) declow); - } - decimalStats.setLowValue(low); - long lowerBound = MetaStoreDirectSql.extractSqlLong(dist); - long higherBound = MetaStoreDirectSql.extractSqlLong(sumDist); - if (useDensityFunctionForNDVEstimation && dechigh != null && declow != null && avgDecimal != null - && MetaStoreDirectSql.extractSqlDouble(avgDecimal) != 0.0) { - long estimation = MetaStoreDirectSql.extractSqlLong(MetaStoreDirectSql.extractSqlLong(bhigh - .subtract(blow).floatValue() / MetaStoreDirectSql.extractSqlDouble(avgDecimal))); - if (estimation < lowerBound) { - decimalStats.setNumDVs(lowerBound); - } else if (estimation > higherBound) { - decimalStats.setNumDVs(higherBound); - } else { - decimalStats.setNumDVs(estimation); - } - } else { - decimalStats.setNumDVs((long) (lowerBound + (higherBound - lowerBound) * ndvTuner)); - } - data.setDecimalStats(decimalStats); - } - } - public static Decimal createThriftDecimal(String s) { BigDecimal d = new BigDecimal(s); return new Decimal(ByteBuffer.wrap(d.unscaledValue().toByteArray()), (short)d.scale()); diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/cache/CachedStore.java b/metastore/src/java/org/apache/hadoop/hive/metastore/cache/CachedStore.java index 3ac4fe1604..34a32715ea 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/cache/CachedStore.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/cache/CachedStore.java @@ -80,8 +80,8 @@ import org.apache.hadoop.hive.metastore.api.UnknownDBException; import org.apache.hadoop.hive.metastore.api.UnknownPartitionException; import org.apache.hadoop.hive.metastore.api.UnknownTableException; -import org.apache.hadoop.hive.metastore.hbase.stats.merge.ColumnStatsMerger; -import org.apache.hadoop.hive.metastore.hbase.stats.merge.ColumnStatsMergerFactory; +import org.apache.hadoop.hive.metastore.columnstats.merge.ColumnStatsMerger; +import org.apache.hadoop.hive.metastore.columnstats.merge.ColumnStatsMergerFactory; import org.apache.hadoop.hive.metastore.partition.spec.PartitionSpecProxy; import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/BinaryColumnStatsAggregator.java b/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/BinaryColumnStatsAggregator.java similarity index 97% rename from metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/BinaryColumnStatsAggregator.java rename to metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/BinaryColumnStatsAggregator.java index d81d612e92..e6c836b183 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/BinaryColumnStatsAggregator.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/BinaryColumnStatsAggregator.java @@ -17,7 +17,7 @@ * under the License. */ -package org.apache.hadoop.hive.metastore.hbase.stats; +package org.apache.hadoop.hive.metastore.columnstats.aggr; import java.util.List; diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/BooleanColumnStatsAggregator.java b/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/BooleanColumnStatsAggregator.java similarity index 97% rename from metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/BooleanColumnStatsAggregator.java rename to metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/BooleanColumnStatsAggregator.java index e796df2422..a34bc9f38b 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/BooleanColumnStatsAggregator.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/BooleanColumnStatsAggregator.java @@ -17,7 +17,7 @@ * under the License. */ -package org.apache.hadoop.hive.metastore.hbase.stats; +package org.apache.hadoop.hive.metastore.columnstats.aggr; import java.util.List; diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/ColumnStatsAggregator.java b/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/ColumnStatsAggregator.java similarity index 93% rename from metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/ColumnStatsAggregator.java rename to metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/ColumnStatsAggregator.java index 29a05390bf..f5ebc35fb3 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/ColumnStatsAggregator.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/ColumnStatsAggregator.java @@ -17,7 +17,7 @@ * under the License. */ -package org.apache.hadoop.hive.metastore.hbase.stats; +package org.apache.hadoop.hive.metastore.columnstats.aggr; import java.util.List; @@ -28,6 +28,8 @@ public abstract class ColumnStatsAggregator { public boolean useDensityFunctionForNDVEstimation; + public double ndvTuner; + public abstract ColumnStatisticsObj aggregate(String colName, List partNames, List css) throws MetaException; } diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/ColumnStatsAggregatorFactory.java b/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/ColumnStatsAggregatorFactory.java similarity index 89% rename from metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/ColumnStatsAggregatorFactory.java rename to metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/ColumnStatsAggregatorFactory.java index 568bf0609b..173e06fe8e 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/ColumnStatsAggregatorFactory.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/ColumnStatsAggregatorFactory.java @@ -17,13 +17,14 @@ * under the License. */ -package org.apache.hadoop.hive.metastore.hbase.stats; +package org.apache.hadoop.hive.metastore.columnstats.aggr; import org.apache.hadoop.hive.metastore.api.BinaryColumnStatsData; import org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData; import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData; import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData._Fields; import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; +import org.apache.hadoop.hive.metastore.api.DateColumnStatsData; import org.apache.hadoop.hive.metastore.api.DecimalColumnStatsData; import org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData; import org.apache.hadoop.hive.metastore.api.LongColumnStatsData; @@ -34,7 +35,8 @@ private ColumnStatsAggregatorFactory() { } - public static ColumnStatsAggregator getColumnStatsAggregator(_Fields type, boolean useDensityFunctionForNDVEstimation) { + public static ColumnStatsAggregator getColumnStatsAggregator(_Fields type, + boolean useDensityFunctionForNDVEstimation, double ndvTuner) { ColumnStatsAggregator agg; switch (type) { case BOOLEAN_STATS: @@ -43,6 +45,9 @@ public static ColumnStatsAggregator getColumnStatsAggregator(_Fields type, boole case LONG_STATS: agg = new LongColumnStatsAggregator(); break; + case DATE_STATS: + agg = new DateColumnStatsAggregator(); + break; case DOUBLE_STATS: agg = new DoubleColumnStatsAggregator(); break; @@ -59,6 +64,7 @@ public static ColumnStatsAggregator getColumnStatsAggregator(_Fields type, boole throw new RuntimeException("Woh, bad. Unknown stats type " + type.toString()); } agg.useDensityFunctionForNDVEstimation = useDensityFunctionForNDVEstimation; + agg.ndvTuner = ndvTuner; return agg; } @@ -76,6 +82,10 @@ public static ColumnStatisticsObj newColumnStaticsObj(String colName, String col csd.setLongStats(new LongColumnStatsData()); break; + case DATE_STATS: + csd.setDateStats(new DateColumnStatsData()); + break; + case DOUBLE_STATS: csd.setDoubleStats(new DoubleColumnStatsData()); break; diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/DateColumnStatsAggregator.java b/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/DateColumnStatsAggregator.java new file mode 100644 index 0000000000..e4d973289a --- /dev/null +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/DateColumnStatsAggregator.java @@ -0,0 +1,356 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hadoop.hive.metastore.columnstats.aggr; + +import java.util.Collections; +import java.util.Comparator; +import java.util.HashMap; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; + +import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimator; +import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimatorFactory; +import org.apache.hadoop.hive.metastore.api.ColumnStatistics; +import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData; +import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; +import org.apache.hadoop.hive.metastore.api.Date; +import org.apache.hadoop.hive.metastore.api.DateColumnStatsData; +import org.apache.hadoop.hive.metastore.api.MetaException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class DateColumnStatsAggregator extends ColumnStatsAggregator implements + IExtrapolatePartStatus { + + private static final Logger LOG = LoggerFactory.getLogger(DateColumnStatsAggregator.class); + + @Override + public ColumnStatisticsObj aggregate(String colName, List partNames, + List css) throws MetaException { + ColumnStatisticsObj statsObj = null; + + // check if all the ColumnStatisticsObjs contain stats and all the ndv are + // bitvectors + boolean doAllPartitionContainStats = partNames.size() == css.size(); + LOG.info("doAllPartitionContainStats for " + colName + " is " + doAllPartitionContainStats); + NumDistinctValueEstimator ndvEstimator = null; + String colType = null; + for (ColumnStatistics cs : css) { + if (cs.getStatsObjSize() != 1) { + throw new MetaException( + "The number of columns should be exactly one in aggrStats, but found " + + cs.getStatsObjSize()); + } + ColumnStatisticsObj cso = cs.getStatsObjIterator().next(); + if (statsObj == null) { + colType = cso.getColType(); + statsObj = ColumnStatsAggregatorFactory.newColumnStaticsObj(colName, colType, cso + .getStatsData().getSetField()); + } + if (!cso.getStatsData().getDateStats().isSetBitVectors() + || cso.getStatsData().getDateStats().getBitVectors().length() == 0) { + ndvEstimator = null; + break; + } else { + // check if all of the bit vectors can merge + NumDistinctValueEstimator estimator = NumDistinctValueEstimatorFactory + .getNumDistinctValueEstimator(cso.getStatsData().getDateStats().getBitVectors()); + if (ndvEstimator == null) { + ndvEstimator = estimator; + } else { + if (ndvEstimator.canMerge(estimator)) { + continue; + } else { + ndvEstimator = null; + break; + } + } + } + } + if (ndvEstimator != null) { + ndvEstimator = NumDistinctValueEstimatorFactory + .getEmptyNumDistinctValueEstimator(ndvEstimator); + } + LOG.info("all of the bit vectors can merge for " + colName + " is " + (ndvEstimator != null)); + ColumnStatisticsData columnStatisticsData = new ColumnStatisticsData(); + if (doAllPartitionContainStats || css.size() < 2) { + DateColumnStatsData aggregateData = null; + long lowerBound = 0; + long higherBound = 0; + double densityAvgSum = 0.0; + for (ColumnStatistics cs : css) { + ColumnStatisticsObj cso = cs.getStatsObjIterator().next(); + DateColumnStatsData newData = cso.getStatsData().getDateStats(); + lowerBound = Math.max(lowerBound, newData.getNumDVs()); + higherBound += newData.getNumDVs(); + densityAvgSum += (diff(newData.getHighValue(), newData.getLowValue())) + / newData.getNumDVs(); + if (ndvEstimator != null) { + ndvEstimator.mergeEstimators(NumDistinctValueEstimatorFactory + .getNumDistinctValueEstimator(newData.getBitVectors())); + } + if (aggregateData == null) { + aggregateData = newData.deepCopy(); + } else { + aggregateData.setLowValue(min(aggregateData.getLowValue(), newData.getLowValue())); + aggregateData + .setHighValue(max(aggregateData.getHighValue(), newData.getHighValue())); + aggregateData.setNumNulls(aggregateData.getNumNulls() + newData.getNumNulls()); + aggregateData.setNumDVs(Math.max(aggregateData.getNumDVs(), newData.getNumDVs())); + } + } + if (ndvEstimator != null) { + // if all the ColumnStatisticsObjs contain bitvectors, we do not need to + // use uniform distribution assumption because we can merge bitvectors + // to get a good estimation. + aggregateData.setNumDVs(ndvEstimator.estimateNumDistinctValues()); + } else { + long estimation; + if (useDensityFunctionForNDVEstimation) { + // We have estimation, lowerbound and higherbound. We use estimation + // if it is between lowerbound and higherbound. + double densityAvg = densityAvgSum / partNames.size(); + estimation = (long) (diff(aggregateData.getHighValue(), aggregateData.getLowValue()) / densityAvg); + if (estimation < lowerBound) { + estimation = lowerBound; + } else if (estimation > higherBound) { + estimation = higherBound; + } + } else { + estimation = (long) (lowerBound + (higherBound - lowerBound) * ndvTuner); + } + aggregateData.setNumDVs(estimation); + } + columnStatisticsData.setDateStats(aggregateData); + } else { + // we need extrapolation + LOG.info("start extrapolation for " + colName); + + Map indexMap = new HashMap(); + for (int index = 0; index < partNames.size(); index++) { + indexMap.put(partNames.get(index), index); + } + Map adjustedIndexMap = new HashMap(); + Map adjustedStatsMap = new HashMap(); + // while we scan the css, we also get the densityAvg, lowerbound and + // higerbound when useDensityFunctionForNDVEstimation is true. + double densityAvgSum = 0.0; + if (ndvEstimator == null) { + // if not every partition uses bitvector for ndv, we just fall back to + // the traditional extrapolation methods. + for (ColumnStatistics cs : css) { + String partName = cs.getStatsDesc().getPartName(); + ColumnStatisticsObj cso = cs.getStatsObjIterator().next(); + DateColumnStatsData newData = cso.getStatsData().getDateStats(); + if (useDensityFunctionForNDVEstimation) { + densityAvgSum += diff(newData.getHighValue(), newData.getLowValue()) / newData.getNumDVs(); + } + adjustedIndexMap.put(partName, (double) indexMap.get(partName)); + adjustedStatsMap.put(partName, cso.getStatsData()); + } + } else { + // we first merge all the adjacent bitvectors that we could merge and + // derive new partition names and index. + StringBuilder pseudoPartName = new StringBuilder(); + double pseudoIndexSum = 0; + int length = 0; + int curIndex = -1; + DateColumnStatsData aggregateData = null; + for (ColumnStatistics cs : css) { + String partName = cs.getStatsDesc().getPartName(); + ColumnStatisticsObj cso = cs.getStatsObjIterator().next(); + DateColumnStatsData newData = cso.getStatsData().getDateStats(); + // newData.isSetBitVectors() should be true for sure because we + // already checked it before. + if (indexMap.get(partName) != curIndex) { + // There is bitvector, but it is not adjacent to the previous ones. + if (length > 0) { + // we have to set ndv + adjustedIndexMap.put(pseudoPartName.toString(), pseudoIndexSum / length); + aggregateData.setNumDVs(ndvEstimator.estimateNumDistinctValues()); + ColumnStatisticsData csd = new ColumnStatisticsData(); + csd.setDateStats(aggregateData); + adjustedStatsMap.put(pseudoPartName.toString(), csd); + if (useDensityFunctionForNDVEstimation) { + densityAvgSum += diff(aggregateData.getHighValue(), aggregateData.getLowValue()) + / aggregateData.getNumDVs(); + } + // reset everything + pseudoPartName = new StringBuilder(); + pseudoIndexSum = 0; + length = 0; + ndvEstimator = NumDistinctValueEstimatorFactory.getEmptyNumDistinctValueEstimator(ndvEstimator); + } + aggregateData = null; + } + curIndex = indexMap.get(partName); + pseudoPartName.append(partName); + pseudoIndexSum += curIndex; + length++; + curIndex++; + if (aggregateData == null) { + aggregateData = newData.deepCopy(); + } else { + aggregateData.setLowValue(min(aggregateData.getLowValue(), newData.getLowValue())); + aggregateData.setHighValue(max(aggregateData.getHighValue(), newData.getHighValue())); + aggregateData.setNumNulls(aggregateData.getNumNulls() + newData.getNumNulls()); + } + ndvEstimator.mergeEstimators(NumDistinctValueEstimatorFactory + .getNumDistinctValueEstimator(newData.getBitVectors())); + } + if (length > 0) { + // we have to set ndv + adjustedIndexMap.put(pseudoPartName.toString(), pseudoIndexSum / length); + aggregateData.setNumDVs(ndvEstimator.estimateNumDistinctValues()); + ColumnStatisticsData csd = new ColumnStatisticsData(); + csd.setDateStats(aggregateData); + adjustedStatsMap.put(pseudoPartName.toString(), csd); + if (useDensityFunctionForNDVEstimation) { + densityAvgSum += diff(aggregateData.getHighValue(), aggregateData.getLowValue()) + / aggregateData.getNumDVs(); + } + } + } + extrapolate(columnStatisticsData, partNames.size(), css.size(), adjustedIndexMap, + adjustedStatsMap, densityAvgSum / adjustedStatsMap.size()); + } + statsObj.setStatsData(columnStatisticsData); + return statsObj; + } + + private long diff(Date d1, Date d2) { + return d1.getDaysSinceEpoch() - d2.getDaysSinceEpoch(); + } + + private Date min(Date d1, Date d2) { + return d1.compareTo(d2) < 0 ? d1 : d2; + } + + private Date max(Date d1, Date d2) { + return d1.compareTo(d2) < 0 ? d2 : d1; + } + + @Override + public void extrapolate(ColumnStatisticsData extrapolateData, int numParts, + int numPartsWithStats, Map adjustedIndexMap, + Map adjustedStatsMap, double densityAvg) { + int rightBorderInd = numParts; + DateColumnStatsData extrapolateDateData = new DateColumnStatsData(); + Map extractedAdjustedStatsMap = new HashMap<>(); + for (Map.Entry entry : adjustedStatsMap.entrySet()) { + extractedAdjustedStatsMap.put(entry.getKey(), entry.getValue().getDateStats()); + } + List> list = new LinkedList>( + extractedAdjustedStatsMap.entrySet()); + // get the lowValue + Collections.sort(list, new Comparator>() { + public int compare(Map.Entry o1, + Map.Entry o2) { + return diff(o1.getValue().getLowValue(), o2.getValue().getLowValue()) < 0 ? -1 : 1; + } + }); + double minInd = adjustedIndexMap.get(list.get(0).getKey()); + double maxInd = adjustedIndexMap.get(list.get(list.size() - 1).getKey()); + long lowValue = 0; + long min = list.get(0).getValue().getLowValue().getDaysSinceEpoch(); + long max = list.get(list.size() - 1).getValue().getLowValue().getDaysSinceEpoch(); + if (minInd == maxInd) { + lowValue = min; + } else if (minInd < maxInd) { + // left border is the min + lowValue = (long) (max - (max - min) * maxInd / (maxInd - minInd)); + } else { + // right border is the min + lowValue = (long) (max - (max - min) * (rightBorderInd - maxInd) / (minInd - maxInd)); + } + + // get the highValue + Collections.sort(list, new Comparator>() { + public int compare(Map.Entry o1, + Map.Entry o2) { + return diff(o1.getValue().getHighValue(), o2.getValue().getHighValue()) < 0 ? -1 : 1; + } + }); + minInd = adjustedIndexMap.get(list.get(0).getKey()); + maxInd = adjustedIndexMap.get(list.get(list.size() - 1).getKey()); + long highValue = 0; + min = list.get(0).getValue().getHighValue().getDaysSinceEpoch(); + max = list.get(list.size() - 1).getValue().getHighValue().getDaysSinceEpoch(); + if (minInd == maxInd) { + highValue = min; + } else if (minInd < maxInd) { + // right border is the max + highValue = (long) (min + (max - min) * (rightBorderInd - minInd) / (maxInd - minInd)); + } else { + // left border is the max + highValue = (long) (min + (max - min) * minInd / (minInd - maxInd)); + } + + // get the #nulls + long numNulls = 0; + for (Map.Entry entry : extractedAdjustedStatsMap.entrySet()) { + numNulls += entry.getValue().getNumNulls(); + } + // we scale up sumNulls based on the number of partitions + numNulls = numNulls * numParts / numPartsWithStats; + + // get the ndv + long ndv = 0; + Collections.sort(list, new Comparator>() { + public int compare(Map.Entry o1, + Map.Entry o2) { + return o1.getValue().getNumDVs() < o2.getValue().getNumDVs() ? -1 : 1; + } + }); + long lowerBound = list.get(list.size() - 1).getValue().getNumDVs(); + long higherBound = 0; + for (Map.Entry entry : list) { + higherBound += entry.getValue().getNumDVs(); + } + if (useDensityFunctionForNDVEstimation && densityAvg != 0.0) { + ndv = (long) ((highValue - lowValue) / densityAvg); + if (ndv < lowerBound) { + ndv = lowerBound; + } else if (ndv > higherBound) { + ndv = higherBound; + } + } else { + minInd = adjustedIndexMap.get(list.get(0).getKey()); + maxInd = adjustedIndexMap.get(list.get(list.size() - 1).getKey()); + min = list.get(0).getValue().getNumDVs(); + max = list.get(list.size() - 1).getValue().getNumDVs(); + if (minInd == maxInd) { + ndv = min; + } else if (minInd < maxInd) { + // right border is the max + ndv = (long) (min + (max - min) * (rightBorderInd - minInd) / (maxInd - minInd)); + } else { + // left border is the max + ndv = (long) (min + (max - min) * minInd / (minInd - maxInd)); + } + } + extrapolateDateData.setLowValue(new Date(lowValue)); + extrapolateDateData.setHighValue(new Date(highValue)); + extrapolateDateData.setNumNulls(numNulls); + extrapolateDateData.setNumDVs(ndv); + extrapolateData.setDateStats(extrapolateDateData); + } +} diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/DecimalColumnStatsAggregator.java b/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/DecimalColumnStatsAggregator.java similarity index 93% rename from metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/DecimalColumnStatsAggregator.java rename to metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/DecimalColumnStatsAggregator.java index 8eb64e0143..b230ba3f94 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/DecimalColumnStatsAggregator.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/DecimalColumnStatsAggregator.java @@ -17,7 +17,7 @@ * under the License. */ -package org.apache.hadoop.hive.metastore.hbase.stats; +package org.apache.hadoop.hive.metastore.columnstats.aggr; import java.util.Collections; import java.util.Comparator; @@ -35,9 +35,13 @@ import org.apache.hadoop.hive.metastore.api.DecimalColumnStatsData; import org.apache.hadoop.hive.metastore.api.MetaException; import org.apache.hadoop.hive.metastore.hbase.HBaseUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; public class DecimalColumnStatsAggregator extends ColumnStatsAggregator implements IExtrapolatePartStatus { + + private static final Logger LOG = LoggerFactory.getLogger(DecimalColumnStatsAggregator.class); @Override public ColumnStatisticsObj aggregate(String colName, List partNames, @@ -47,6 +51,7 @@ public ColumnStatisticsObj aggregate(String colName, List partNames, // check if all the ColumnStatisticsObjs contain stats and all the ndv are // bitvectors boolean doAllPartitionContainStats = partNames.size() == css.size(); + LOG.info("doAllPartitionContainStats for " + colName + " is " + doAllPartitionContainStats); NumDistinctValueEstimator ndvEstimator = null; String colType = null; for (ColumnStatistics cs : css) { @@ -85,6 +90,7 @@ public ColumnStatisticsObj aggregate(String colName, List partNames, ndvEstimator = NumDistinctValueEstimatorFactory .getEmptyNumDistinctValueEstimator(ndvEstimator); } + LOG.info("all of the bit vectors can merge for " + colName + " is " + (ndvEstimator != null)); ColumnStatisticsData columnStatisticsData = new ColumnStatisticsData(); if (doAllPartitionContainStats || css.size() < 2) { DecimalColumnStatsData aggregateData = null; @@ -94,12 +100,10 @@ public ColumnStatisticsObj aggregate(String colName, List partNames, for (ColumnStatistics cs : css) { ColumnStatisticsObj cso = cs.getStatsObjIterator().next(); DecimalColumnStatsData newData = cso.getStatsData().getDecimalStats(); - if (useDensityFunctionForNDVEstimation) { - lowerBound = Math.max(lowerBound, newData.getNumDVs()); - higherBound += newData.getNumDVs(); - densityAvgSum += (HBaseUtils.getDoubleValue(newData.getHighValue()) - HBaseUtils - .getDoubleValue(newData.getLowValue())) / newData.getNumDVs(); - } + lowerBound = Math.max(lowerBound, newData.getNumDVs()); + higherBound += newData.getNumDVs(); + densityAvgSum += (HBaseUtils.getDoubleValue(newData.getHighValue()) - HBaseUtils + .getDoubleValue(newData.getLowValue())) / newData.getNumDVs(); if (ndvEstimator != null) { ndvEstimator.mergeEstimators(NumDistinctValueEstimatorFactory .getNumDistinctValueEstimator(newData.getBitVectors())); @@ -129,28 +133,27 @@ public ColumnStatisticsObj aggregate(String colName, List partNames, // to get a good estimation. aggregateData.setNumDVs(ndvEstimator.estimateNumDistinctValues()); } else { + long estimation; if (useDensityFunctionForNDVEstimation) { // We have estimation, lowerbound and higherbound. We use estimation // if it is between lowerbound and higherbound. double densityAvg = densityAvgSum / partNames.size(); - long estimation = (long) ((HBaseUtils.getDoubleValue(aggregateData.getHighValue()) - HBaseUtils + estimation = (long) ((HBaseUtils.getDoubleValue(aggregateData.getHighValue()) - HBaseUtils .getDoubleValue(aggregateData.getLowValue())) / densityAvg); if (estimation < lowerBound) { - aggregateData.setNumDVs(lowerBound); + estimation = lowerBound; } else if (estimation > higherBound) { - aggregateData.setNumDVs(higherBound); - } else { - aggregateData.setNumDVs(estimation); + estimation = higherBound; } } else { - // Without useDensityFunctionForNDVEstimation, we just use the - // default one, which is the max of all the partitions and it is - // already done. + estimation = (long) (lowerBound + (higherBound - lowerBound) * ndvTuner); } + aggregateData.setNumDVs(estimation); } columnStatisticsData.setDecimalStats(aggregateData); } else { // we need extrapolation + LOG.info("start extrapolation for " + colName); Map indexMap = new HashMap(); for (int index = 0; index < partNames.size(); index++) { indexMap.put(partNames.get(index), index); diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/DoubleColumnStatsAggregator.java b/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/DoubleColumnStatsAggregator.java similarity index 93% rename from metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/DoubleColumnStatsAggregator.java rename to metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/DoubleColumnStatsAggregator.java index b6b86123b2..7d9db53332 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/DoubleColumnStatsAggregator.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/DoubleColumnStatsAggregator.java @@ -17,7 +17,7 @@ * under the License. */ -package org.apache.hadoop.hive.metastore.hbase.stats; +package org.apache.hadoop.hive.metastore.columnstats.aggr; import java.util.Collections; import java.util.Comparator; @@ -33,10 +33,14 @@ import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; import org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData; import org.apache.hadoop.hive.metastore.api.MetaException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; public class DoubleColumnStatsAggregator extends ColumnStatsAggregator implements IExtrapolatePartStatus { + private static final Logger LOG = LoggerFactory.getLogger(LongColumnStatsAggregator.class); + @Override public ColumnStatisticsObj aggregate(String colName, List partNames, List css) throws MetaException { @@ -45,6 +49,7 @@ public ColumnStatisticsObj aggregate(String colName, List partNames, // check if all the ColumnStatisticsObjs contain stats and all the ndv are // bitvectors boolean doAllPartitionContainStats = partNames.size() == css.size(); + LOG.info("doAllPartitionContainStats for " + colName + " is " + doAllPartitionContainStats); NumDistinctValueEstimator ndvEstimator = null; String colType = null; for (ColumnStatistics cs : css) { @@ -83,6 +88,7 @@ public ColumnStatisticsObj aggregate(String colName, List partNames, ndvEstimator = NumDistinctValueEstimatorFactory .getEmptyNumDistinctValueEstimator(ndvEstimator); } + LOG.info("all of the bit vectors can merge for " + colName + " is " + (ndvEstimator != null)); ColumnStatisticsData columnStatisticsData = new ColumnStatisticsData(); if (doAllPartitionContainStats || css.size() < 2) { DoubleColumnStatsData aggregateData = null; @@ -92,11 +98,9 @@ public ColumnStatisticsObj aggregate(String colName, List partNames, for (ColumnStatistics cs : css) { ColumnStatisticsObj cso = cs.getStatsObjIterator().next(); DoubleColumnStatsData newData = cso.getStatsData().getDoubleStats(); - if (useDensityFunctionForNDVEstimation) { - lowerBound = Math.max(lowerBound, newData.getNumDVs()); - higherBound += newData.getNumDVs(); - densityAvgSum += (newData.getHighValue() - newData.getLowValue()) / newData.getNumDVs(); - } + lowerBound = Math.max(lowerBound, newData.getNumDVs()); + higherBound += newData.getNumDVs(); + densityAvgSum += (newData.getHighValue() - newData.getLowValue()) / newData.getNumDVs(); if (ndvEstimator != null) { ndvEstimator.mergeEstimators(NumDistinctValueEstimatorFactory .getNumDistinctValueEstimator(newData.getBitVectors())); @@ -117,27 +121,26 @@ public ColumnStatisticsObj aggregate(String colName, List partNames, // to get a good estimation. aggregateData.setNumDVs(ndvEstimator.estimateNumDistinctValues()); } else { + long estimation; if (useDensityFunctionForNDVEstimation) { // We have estimation, lowerbound and higherbound. We use estimation // if it is between lowerbound and higherbound. double densityAvg = densityAvgSum / partNames.size(); - long estimation = (long) ((aggregateData.getHighValue() - aggregateData.getLowValue()) / densityAvg); + estimation = (long) ((aggregateData.getHighValue() - aggregateData.getLowValue()) / densityAvg); if (estimation < lowerBound) { - aggregateData.setNumDVs(lowerBound); + estimation = lowerBound; } else if (estimation > higherBound) { - aggregateData.setNumDVs(higherBound); - } else { - aggregateData.setNumDVs(estimation); + estimation = higherBound; } } else { - // Without useDensityFunctionForNDVEstimation, we just use the - // default one, which is the max of all the partitions and it is - // already done. + estimation = (long) (lowerBound + (higherBound - lowerBound) * ndvTuner); } + aggregateData.setNumDVs(estimation); } columnStatisticsData.setDoubleStats(aggregateData); } else { // we need extrapolation + LOG.info("start extrapolation for " + colName); Map indexMap = new HashMap(); for (int index = 0; index < partNames.size(); index++) { indexMap.put(partNames.get(index), index); diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/IExtrapolatePartStatus.java b/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/IExtrapolatePartStatus.java similarity index 96% rename from metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/IExtrapolatePartStatus.java rename to metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/IExtrapolatePartStatus.java index af75bced72..acf679e1c3 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/IExtrapolatePartStatus.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/IExtrapolatePartStatus.java @@ -15,7 +15,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.hadoop.hive.metastore.hbase.stats; +package org.apache.hadoop.hive.metastore.columnstats.aggr; import java.util.Map; diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/LongColumnStatsAggregator.java b/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/LongColumnStatsAggregator.java similarity index 93% rename from metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/LongColumnStatsAggregator.java rename to metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/LongColumnStatsAggregator.java index 2da6f60167..f28a4f2b3a 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/LongColumnStatsAggregator.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/LongColumnStatsAggregator.java @@ -17,7 +17,7 @@ * under the License. */ -package org.apache.hadoop.hive.metastore.hbase.stats; +package org.apache.hadoop.hive.metastore.columnstats.aggr; import java.util.Collections; import java.util.Comparator; @@ -33,10 +33,14 @@ import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; import org.apache.hadoop.hive.metastore.api.LongColumnStatsData; import org.apache.hadoop.hive.metastore.api.MetaException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; public class LongColumnStatsAggregator extends ColumnStatsAggregator implements IExtrapolatePartStatus { + private static final Logger LOG = LoggerFactory.getLogger(LongColumnStatsAggregator.class); + @Override public ColumnStatisticsObj aggregate(String colName, List partNames, List css) throws MetaException { @@ -45,6 +49,7 @@ public ColumnStatisticsObj aggregate(String colName, List partNames, // check if all the ColumnStatisticsObjs contain stats and all the ndv are // bitvectors boolean doAllPartitionContainStats = partNames.size() == css.size(); + LOG.info("doAllPartitionContainStats for " + colName + " is " + doAllPartitionContainStats); NumDistinctValueEstimator ndvEstimator = null; String colType = null; for (ColumnStatistics cs : css) { @@ -83,6 +88,7 @@ public ColumnStatisticsObj aggregate(String colName, List partNames, ndvEstimator = NumDistinctValueEstimatorFactory .getEmptyNumDistinctValueEstimator(ndvEstimator); } + LOG.info("all of the bit vectors can merge for " + colName + " is " + (ndvEstimator != null)); ColumnStatisticsData columnStatisticsData = new ColumnStatisticsData(); if (doAllPartitionContainStats || css.size() < 2) { LongColumnStatsData aggregateData = null; @@ -92,11 +98,9 @@ public ColumnStatisticsObj aggregate(String colName, List partNames, for (ColumnStatistics cs : css) { ColumnStatisticsObj cso = cs.getStatsObjIterator().next(); LongColumnStatsData newData = cso.getStatsData().getLongStats(); - if (useDensityFunctionForNDVEstimation) { - lowerBound = Math.max(lowerBound, newData.getNumDVs()); - higherBound += newData.getNumDVs(); - densityAvgSum += (newData.getHighValue() - newData.getLowValue()) / newData.getNumDVs(); - } + lowerBound = Math.max(lowerBound, newData.getNumDVs()); + higherBound += newData.getNumDVs(); + densityAvgSum += (newData.getHighValue() - newData.getLowValue()) / newData.getNumDVs(); if (ndvEstimator != null) { ndvEstimator.mergeEstimators(NumDistinctValueEstimatorFactory .getNumDistinctValueEstimator(newData.getBitVectors())); @@ -117,27 +121,27 @@ public ColumnStatisticsObj aggregate(String colName, List partNames, // to get a good estimation. aggregateData.setNumDVs(ndvEstimator.estimateNumDistinctValues()); } else { + long estimation; if (useDensityFunctionForNDVEstimation) { // We have estimation, lowerbound and higherbound. We use estimation // if it is between lowerbound and higherbound. double densityAvg = densityAvgSum / partNames.size(); - long estimation = (long) ((aggregateData.getHighValue() - aggregateData.getLowValue()) / densityAvg); + estimation = (long) ((aggregateData.getHighValue() - aggregateData.getLowValue()) / densityAvg); if (estimation < lowerBound) { - aggregateData.setNumDVs(lowerBound); + estimation = lowerBound; } else if (estimation > higherBound) { - aggregateData.setNumDVs(higherBound); - } else { - aggregateData.setNumDVs(estimation); + estimation = higherBound; } } else { - // Without useDensityFunctionForNDVEstimation, we just use the - // default one, which is the max of all the partitions and it is - // already done. + estimation = (long) (lowerBound + (higherBound - lowerBound) * ndvTuner); } + aggregateData.setNumDVs(estimation); } columnStatisticsData.setLongStats(aggregateData); } else { // we need extrapolation + LOG.info("start extrapolation for " + colName); + Map indexMap = new HashMap(); for (int index = 0; index < partNames.size(); index++) { indexMap.put(partNames.get(index), index); diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/StringColumnStatsAggregator.java b/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/StringColumnStatsAggregator.java new file mode 100644 index 0000000000..cf5a895881 --- /dev/null +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/StringColumnStatsAggregator.java @@ -0,0 +1,301 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hadoop.hive.metastore.columnstats.aggr; + +import java.util.Collections; +import java.util.Comparator; +import java.util.HashMap; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; + +import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimator; +import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimatorFactory; +import org.apache.hadoop.hive.metastore.api.ColumnStatistics; +import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData; +import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; +import org.apache.hadoop.hive.metastore.api.MetaException; +import org.apache.hadoop.hive.metastore.api.StringColumnStatsData; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class StringColumnStatsAggregator extends ColumnStatsAggregator implements + IExtrapolatePartStatus { + + private static final Logger LOG = LoggerFactory.getLogger(LongColumnStatsAggregator.class); + + @Override + public ColumnStatisticsObj aggregate(String colName, List partNames, + List css) throws MetaException { + ColumnStatisticsObj statsObj = null; + + // check if all the ColumnStatisticsObjs contain stats and all the ndv are + // bitvectors. Only when both of the conditions are true, we merge bit + // vectors. Otherwise, just use the maximum function. + boolean doAllPartitionContainStats = partNames.size() == css.size(); + LOG.info("doAllPartitionContainStats for " + colName + " is " + doAllPartitionContainStats); + NumDistinctValueEstimator ndvEstimator = null; + String colType = null; + for (ColumnStatistics cs : css) { + if (cs.getStatsObjSize() != 1) { + throw new MetaException( + "The number of columns should be exactly one in aggrStats, but found " + + cs.getStatsObjSize()); + } + ColumnStatisticsObj cso = cs.getStatsObjIterator().next(); + if (statsObj == null) { + colType = cso.getColType(); + statsObj = ColumnStatsAggregatorFactory.newColumnStaticsObj(colName, colType, cso + .getStatsData().getSetField()); + } + if (!cso.getStatsData().getStringStats().isSetBitVectors() + || cso.getStatsData().getStringStats().getBitVectors().length() == 0) { + ndvEstimator = null; + break; + } else { + // check if all of the bit vectors can merge + NumDistinctValueEstimator estimator = NumDistinctValueEstimatorFactory + .getNumDistinctValueEstimator(cso.getStatsData().getStringStats().getBitVectors()); + if (ndvEstimator == null) { + ndvEstimator = estimator; + } else { + if (ndvEstimator.canMerge(estimator)) { + continue; + } else { + ndvEstimator = null; + break; + } + } + } + } + if (ndvEstimator != null) { + ndvEstimator = NumDistinctValueEstimatorFactory + .getEmptyNumDistinctValueEstimator(ndvEstimator); + } + LOG.info("all of the bit vectors can merge for " + colName + " is " + (ndvEstimator != null)); + ColumnStatisticsData columnStatisticsData = new ColumnStatisticsData(); + if (doAllPartitionContainStats || css.size() < 2) { + StringColumnStatsData aggregateData = null; + for (ColumnStatistics cs : css) { + ColumnStatisticsObj cso = cs.getStatsObjIterator().next(); + StringColumnStatsData newData = cso.getStatsData().getStringStats(); + if (ndvEstimator != null) { + ndvEstimator.mergeEstimators(NumDistinctValueEstimatorFactory + .getNumDistinctValueEstimator(newData.getBitVectors())); + } + if (aggregateData == null) { + aggregateData = newData.deepCopy(); + } else { + aggregateData + .setMaxColLen(Math.max(aggregateData.getMaxColLen(), newData.getMaxColLen())); + aggregateData + .setAvgColLen(Math.max(aggregateData.getAvgColLen(), newData.getAvgColLen())); + aggregateData.setNumNulls(aggregateData.getNumNulls() + newData.getNumNulls()); + aggregateData.setNumDVs(Math.max(aggregateData.getNumDVs(), newData.getNumDVs())); + } + } + if (ndvEstimator != null) { + // if all the ColumnStatisticsObjs contain bitvectors, we do not need to + // use uniform distribution assumption because we can merge bitvectors + // to get a good estimation. + aggregateData.setNumDVs(ndvEstimator.estimateNumDistinctValues()); + } else { + // aggregateData already has the ndv of the max of all + } + columnStatisticsData.setStringStats(aggregateData); + } else { + // we need extrapolation + LOG.info("start extrapolation for " + colName); + + Map indexMap = new HashMap(); + for (int index = 0; index < partNames.size(); index++) { + indexMap.put(partNames.get(index), index); + } + Map adjustedIndexMap = new HashMap(); + Map adjustedStatsMap = new HashMap(); + if (ndvEstimator == null) { + // if not every partition uses bitvector for ndv, we just fall back to + // the traditional extrapolation methods. + for (ColumnStatistics cs : css) { + String partName = cs.getStatsDesc().getPartName(); + ColumnStatisticsObj cso = cs.getStatsObjIterator().next(); + StringColumnStatsData newData = cso.getStatsData().getStringStats(); + adjustedIndexMap.put(partName, (double) indexMap.get(partName)); + adjustedStatsMap.put(partName, cso.getStatsData()); + } + } else { + // we first merge all the adjacent bitvectors that we could merge and + // derive new partition names and index. + StringBuilder pseudoPartName = new StringBuilder(); + double pseudoIndexSum = 0; + int length = 0; + int curIndex = -1; + StringColumnStatsData aggregateData = null; + for (ColumnStatistics cs : css) { + String partName = cs.getStatsDesc().getPartName(); + ColumnStatisticsObj cso = cs.getStatsObjIterator().next(); + StringColumnStatsData newData = cso.getStatsData().getStringStats(); + // newData.isSetBitVectors() should be true for sure because we + // already checked it before. + if (indexMap.get(partName) != curIndex) { + // There is bitvector, but it is not adjacent to the previous ones. + if (length > 0) { + // we have to set ndv + adjustedIndexMap.put(pseudoPartName.toString(), pseudoIndexSum / length); + aggregateData.setNumDVs(ndvEstimator.estimateNumDistinctValues()); + ColumnStatisticsData csd = new ColumnStatisticsData(); + csd.setStringStats(aggregateData); + adjustedStatsMap.put(pseudoPartName.toString(), csd); + // reset everything + pseudoPartName = new StringBuilder(); + pseudoIndexSum = 0; + length = 0; + ndvEstimator = NumDistinctValueEstimatorFactory + .getEmptyNumDistinctValueEstimator(ndvEstimator); + } + aggregateData = null; + } + curIndex = indexMap.get(partName); + pseudoPartName.append(partName); + pseudoIndexSum += curIndex; + length++; + curIndex++; + if (aggregateData == null) { + aggregateData = newData.deepCopy(); + } else { + aggregateData.setAvgColLen(Math.min(aggregateData.getAvgColLen(), + newData.getAvgColLen())); + aggregateData.setMaxColLen(Math.max(aggregateData.getMaxColLen(), + newData.getMaxColLen())); + aggregateData.setNumNulls(aggregateData.getNumNulls() + newData.getNumNulls()); + } + ndvEstimator.mergeEstimators(NumDistinctValueEstimatorFactory + .getNumDistinctValueEstimator(newData.getBitVectors())); + } + if (length > 0) { + // we have to set ndv + adjustedIndexMap.put(pseudoPartName.toString(), pseudoIndexSum / length); + aggregateData.setNumDVs(ndvEstimator.estimateNumDistinctValues()); + ColumnStatisticsData csd = new ColumnStatisticsData(); + csd.setStringStats(aggregateData); + adjustedStatsMap.put(pseudoPartName.toString(), csd); + } + } + extrapolate(columnStatisticsData, partNames.size(), css.size(), adjustedIndexMap, + adjustedStatsMap, -1); + } + statsObj.setStatsData(columnStatisticsData); + return statsObj; + } + + @Override + public void extrapolate(ColumnStatisticsData extrapolateData, int numParts, + int numPartsWithStats, Map adjustedIndexMap, + Map adjustedStatsMap, double densityAvg) { + int rightBorderInd = numParts; + StringColumnStatsData extrapolateStringData = new StringColumnStatsData(); + Map extractedAdjustedStatsMap = new HashMap<>(); + for (Map.Entry entry : adjustedStatsMap.entrySet()) { + extractedAdjustedStatsMap.put(entry.getKey(), entry.getValue().getStringStats()); + } + List> list = new LinkedList>( + extractedAdjustedStatsMap.entrySet()); + // get the avgLen + Collections.sort(list, new Comparator>() { + public int compare(Map.Entry o1, + Map.Entry o2) { + return o1.getValue().getAvgColLen() < o2.getValue().getAvgColLen() ? -1 : 1; + } + }); + double minInd = adjustedIndexMap.get(list.get(0).getKey()); + double maxInd = adjustedIndexMap.get(list.get(list.size() - 1).getKey()); + double avgColLen = 0; + double min = list.get(0).getValue().getAvgColLen(); + double max = list.get(list.size() - 1).getValue().getAvgColLen(); + if (minInd == maxInd) { + avgColLen = min; + } else if (minInd < maxInd) { + // right border is the max + avgColLen = (min + (max - min) * (rightBorderInd - minInd) / (maxInd - minInd)); + } else { + // left border is the max + avgColLen = (min + (max - min) * minInd / (minInd - maxInd)); + } + + // get the maxLen + Collections.sort(list, new Comparator>() { + public int compare(Map.Entry o1, + Map.Entry o2) { + return o1.getValue().getMaxColLen() < o2.getValue().getMaxColLen() ? -1 : 1; + } + }); + minInd = adjustedIndexMap.get(list.get(0).getKey()); + maxInd = adjustedIndexMap.get(list.get(list.size() - 1).getKey()); + double maxColLen = 0; + min = list.get(0).getValue().getAvgColLen(); + max = list.get(list.size() - 1).getValue().getAvgColLen(); + if (minInd == maxInd) { + maxColLen = min; + } else if (minInd < maxInd) { + // right border is the max + maxColLen = (min + (max - min) * (rightBorderInd - minInd) / (maxInd - minInd)); + } else { + // left border is the max + maxColLen = (min + (max - min) * minInd / (minInd - maxInd)); + } + + // get the #nulls + long numNulls = 0; + for (Map.Entry entry : extractedAdjustedStatsMap.entrySet()) { + numNulls += entry.getValue().getNumNulls(); + } + // we scale up sumNulls based on the number of partitions + numNulls = numNulls * numParts / numPartsWithStats; + + // get the ndv + long ndv = 0; + Collections.sort(list, new Comparator>() { + public int compare(Map.Entry o1, + Map.Entry o2) { + return o1.getValue().getNumDVs() < o2.getValue().getNumDVs() ? -1 : 1; + } + }); + minInd = adjustedIndexMap.get(list.get(0).getKey()); + maxInd = adjustedIndexMap.get(list.get(list.size() - 1).getKey()); + min = list.get(0).getValue().getNumDVs(); + max = list.get(list.size() - 1).getValue().getNumDVs(); + if (minInd == maxInd) { + ndv = (long) min; + } else if (minInd < maxInd) { + // right border is the max + ndv = (long) (min + (max - min) * (rightBorderInd - minInd) / (maxInd - minInd)); + } else { + // left border is the max + ndv = (long) (min + (max - min) * minInd / (minInd - maxInd)); + } + extrapolateStringData.setAvgColLen(avgColLen); + ; + extrapolateStringData.setMaxColLen((long) maxColLen); + extrapolateStringData.setNumNulls(numNulls); + extrapolateStringData.setNumDVs(ndv); + extrapolateData.setStringStats(extrapolateStringData); + } + +} diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/BinaryColumnStatsMerger.java b/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/merge/BinaryColumnStatsMerger.java similarity index 96% rename from metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/BinaryColumnStatsMerger.java rename to metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/merge/BinaryColumnStatsMerger.java index af0669eb65..4c2d1bc602 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/BinaryColumnStatsMerger.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/merge/BinaryColumnStatsMerger.java @@ -17,7 +17,7 @@ * under the License. */ -package org.apache.hadoop.hive.metastore.hbase.stats.merge; +package org.apache.hadoop.hive.metastore.columnstats.merge; import org.apache.hadoop.hive.metastore.api.BinaryColumnStatsData; import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/BooleanColumnStatsMerger.java b/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/merge/BooleanColumnStatsMerger.java similarity index 96% rename from metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/BooleanColumnStatsMerger.java rename to metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/merge/BooleanColumnStatsMerger.java index 33ff6a19f5..8e5015323f 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/BooleanColumnStatsMerger.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/merge/BooleanColumnStatsMerger.java @@ -17,7 +17,7 @@ * under the License. */ -package org.apache.hadoop.hive.metastore.hbase.stats.merge; +package org.apache.hadoop.hive.metastore.columnstats.merge; import org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData; import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/ColumnStatsMerger.java b/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/merge/ColumnStatsMerger.java similarity index 95% rename from metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/ColumnStatsMerger.java rename to metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/merge/ColumnStatsMerger.java index d3051a2b00..474d4ddcd1 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/ColumnStatsMerger.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/merge/ColumnStatsMerger.java @@ -17,7 +17,7 @@ * under the License. */ -package org.apache.hadoop.hive.metastore.hbase.stats.merge; +package org.apache.hadoop.hive.metastore.columnstats.merge; import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; import org.slf4j.Logger; diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/ColumnStatsMergerFactory.java b/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/merge/ColumnStatsMergerFactory.java similarity index 98% rename from metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/ColumnStatsMergerFactory.java rename to metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/merge/ColumnStatsMergerFactory.java index c013ba5c5d..0ce1847d1c 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/ColumnStatsMergerFactory.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/merge/ColumnStatsMergerFactory.java @@ -17,7 +17,7 @@ * under the License. */ -package org.apache.hadoop.hive.metastore.hbase.stats.merge; +package org.apache.hadoop.hive.metastore.columnstats.merge; import org.apache.commons.lang.StringUtils; import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimatorFactory; diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/DateColumnStatsMerger.java b/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/merge/DateColumnStatsMerger.java similarity index 98% rename from metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/DateColumnStatsMerger.java rename to metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/merge/DateColumnStatsMerger.java index e899bfe85f..2542a00d36 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/DateColumnStatsMerger.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/merge/DateColumnStatsMerger.java @@ -17,7 +17,7 @@ * under the License. */ -package org.apache.hadoop.hive.metastore.hbase.stats.merge; +package org.apache.hadoop.hive.metastore.columnstats.merge; import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimator; import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimatorFactory; diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/DecimalColumnStatsMerger.java b/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/merge/DecimalColumnStatsMerger.java similarity index 98% rename from metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/DecimalColumnStatsMerger.java rename to metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/merge/DecimalColumnStatsMerger.java index 4099ffcace..4e8e129758 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/DecimalColumnStatsMerger.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/merge/DecimalColumnStatsMerger.java @@ -17,7 +17,7 @@ * under the License. */ -package org.apache.hadoop.hive.metastore.hbase.stats.merge; +package org.apache.hadoop.hive.metastore.columnstats.merge; import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimator; import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimatorFactory; diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/DoubleColumnStatsMerger.java b/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/merge/DoubleColumnStatsMerger.java similarity index 97% rename from metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/DoubleColumnStatsMerger.java rename to metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/merge/DoubleColumnStatsMerger.java index 1691fc97df..4ef5c39d1c 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/DoubleColumnStatsMerger.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/merge/DoubleColumnStatsMerger.java @@ -17,7 +17,7 @@ * under the License. */ -package org.apache.hadoop.hive.metastore.hbase.stats.merge; +package org.apache.hadoop.hive.metastore.columnstats.merge; import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimator; import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimatorFactory; diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/LongColumnStatsMerger.java b/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/merge/LongColumnStatsMerger.java similarity index 97% rename from metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/LongColumnStatsMerger.java rename to metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/merge/LongColumnStatsMerger.java index 361af350fe..acf7f03c72 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/LongColumnStatsMerger.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/merge/LongColumnStatsMerger.java @@ -17,7 +17,7 @@ * under the License. */ -package org.apache.hadoop.hive.metastore.hbase.stats.merge; +package org.apache.hadoop.hive.metastore.columnstats.merge; import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimator; import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimatorFactory; diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/StringColumnStatsMerger.java b/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/merge/StringColumnStatsMerger.java similarity index 97% rename from metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/StringColumnStatsMerger.java rename to metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/merge/StringColumnStatsMerger.java index 8e28f907ee..b3cd33c671 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/StringColumnStatsMerger.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/merge/StringColumnStatsMerger.java @@ -17,7 +17,7 @@ * under the License. */ -package org.apache.hadoop.hive.metastore.hbase.stats.merge; +package org.apache.hadoop.hive.metastore.columnstats.merge; import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimator; import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimatorFactory; diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/StatsCache.java b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/StatsCache.java index 0e119896a5..78a962a0e6 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/StatsCache.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/StatsCache.java @@ -32,8 +32,8 @@ import org.apache.hadoop.hive.metastore.api.AggrStats; import org.apache.hadoop.hive.metastore.api.ColumnStatistics; import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; -import org.apache.hadoop.hive.metastore.hbase.stats.ColumnStatsAggregator; -import org.apache.hadoop.hive.metastore.hbase.stats.ColumnStatsAggregatorFactory; +import org.apache.hadoop.hive.metastore.columnstats.aggr.ColumnStatsAggregator; +import org.apache.hadoop.hive.metastore.columnstats.aggr.ColumnStatsAggregatorFactory; import java.io.IOException; import java.security.MessageDigest; @@ -84,7 +84,10 @@ private StatsCache(final Configuration conf) { .build(new CacheLoader() { @Override public AggrStats load(StatsCacheKey key) throws Exception { - boolean useDensityFunctionForNDVEstimation = HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_METASTORE_STATS_NDV_DENSITY_FUNCTION); + boolean useDensityFunctionForNDVEstimation = HiveConf.getBoolVar(conf, + HiveConf.ConfVars.HIVE_METASTORE_STATS_NDV_DENSITY_FUNCTION); + double ndvTuner = HiveConf.getFloatVar(conf, + HiveConf.ConfVars.HIVE_METASTORE_STATS_NDV_TUNER); HBaseReadWrite hrw = HBaseReadWrite.getInstance(); AggrStats aggrStats = hrw.getAggregatedStats(key.hashed); if (aggrStats == null) { @@ -100,7 +103,7 @@ public AggrStats load(StatsCacheKey key) throws Exception { if (aggregator == null) { aggregator = ColumnStatsAggregatorFactory.getColumnStatsAggregator(css.iterator() .next().getStatsObj().iterator().next().getStatsData().getSetField(), - useDensityFunctionForNDVEstimation); + useDensityFunctionForNDVEstimation, ndvTuner); } ColumnStatisticsObj statsObj = aggregator .aggregate(key.colName, key.partNames, css); diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/StringColumnStatsAggregator.java b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/StringColumnStatsAggregator.java deleted file mode 100644 index 83c6c54fd2..0000000000 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/StringColumnStatsAggregator.java +++ /dev/null @@ -1,122 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hadoop.hive.metastore.hbase.stats; - -import java.util.List; - -import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimator; -import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimatorFactory; -import org.apache.hadoop.hive.metastore.api.ColumnStatistics; -import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData; -import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; -import org.apache.hadoop.hive.metastore.api.MetaException; -import org.apache.hadoop.hive.metastore.api.StringColumnStatsData; - -public class StringColumnStatsAggregator extends ColumnStatsAggregator { - - @Override - public ColumnStatisticsObj aggregate(String colName, List partNames, - List css) throws MetaException { - ColumnStatisticsObj statsObj = null; - - // check if all the ColumnStatisticsObjs contain stats and all the ndv are - // bitvectors. Only when both of the conditions are true, we merge bit - // vectors. Otherwise, just use the maximum function. - boolean doAllPartitionContainStats = partNames.size() == css.size(); - NumDistinctValueEstimator ndvEstimator = null; - String colType = null; - for (ColumnStatistics cs : css) { - if (cs.getStatsObjSize() != 1) { - throw new MetaException( - "The number of columns should be exactly one in aggrStats, but found " - + cs.getStatsObjSize()); - } - ColumnStatisticsObj cso = cs.getStatsObjIterator().next(); - if (statsObj == null) { - colType = cso.getColType(); - statsObj = ColumnStatsAggregatorFactory.newColumnStaticsObj(colName, colType, cso - .getStatsData().getSetField()); - } - if (!cso.getStatsData().getStringStats().isSetBitVectors() - || cso.getStatsData().getStringStats().getBitVectors().length() == 0) { - ndvEstimator = null; - break; - } else { - // check if all of the bit vectors can merge - NumDistinctValueEstimator estimator = NumDistinctValueEstimatorFactory - .getNumDistinctValueEstimator(cso.getStatsData().getStringStats().getBitVectors()); - if (ndvEstimator == null) { - ndvEstimator = estimator; - } else { - if (ndvEstimator.canMerge(estimator)) { - continue; - } else { - ndvEstimator = null; - break; - } - } - } - } - if (ndvEstimator != null) { - ndvEstimator = NumDistinctValueEstimatorFactory.getEmptyNumDistinctValueEstimator(ndvEstimator); - } - ColumnStatisticsData columnStatisticsData = new ColumnStatisticsData(); - if (doAllPartitionContainStats && ndvEstimator!=null) { - StringColumnStatsData aggregateData = null; - for (ColumnStatistics cs : css) { - ColumnStatisticsObj cso = cs.getStatsObjIterator().next(); - StringColumnStatsData newData = cso.getStatsData().getStringStats(); - ndvEstimator.mergeEstimators(NumDistinctValueEstimatorFactory - .getNumDistinctValueEstimator(newData.getBitVectors())); - if (aggregateData == null) { - aggregateData = newData.deepCopy(); - } else { - aggregateData - .setMaxColLen(Math.max(aggregateData.getMaxColLen(), newData.getMaxColLen())); - aggregateData - .setAvgColLen(Math.max(aggregateData.getAvgColLen(), newData.getAvgColLen())); - aggregateData.setNumNulls(aggregateData.getNumNulls() + newData.getNumNulls()); - } - } - aggregateData.setNumDVs(ndvEstimator.estimateNumDistinctValues()); - columnStatisticsData.setStringStats(aggregateData); - } else { - StringColumnStatsData aggregateData = null; - for (ColumnStatistics cs : css) { - ColumnStatisticsObj cso = cs.getStatsObjIterator().next(); - StringColumnStatsData newData = cso.getStatsData().getStringStats(); - if (aggregateData == null) { - aggregateData = newData.deepCopy(); - } else { - aggregateData - .setMaxColLen(Math.max(aggregateData.getMaxColLen(), newData.getMaxColLen())); - aggregateData - .setAvgColLen(Math.max(aggregateData.getAvgColLen(), newData.getAvgColLen())); - aggregateData.setNumNulls(aggregateData.getNumNulls() + newData.getNumNulls()); - aggregateData.setNumDVs(Math.max(aggregateData.getNumDVs(), newData.getNumDVs())); - } - } - columnStatisticsData.setStringStats(aggregateData); - } - statsObj.setStatsData(columnStatisticsData); - return statsObj; - } - -} diff --git a/metastore/src/model/org/apache/hadoop/hive/metastore/model/MPartitionColumnStatistics.java b/metastore/src/model/org/apache/hadoop/hive/metastore/model/MPartitionColumnStatistics.java index 2967a60fae..27fbdd30ba 100644 --- a/metastore/src/model/org/apache/hadoop/hive/metastore/model/MPartitionColumnStatistics.java +++ b/metastore/src/model/org/apache/hadoop/hive/metastore/model/MPartitionColumnStatistics.java @@ -48,6 +48,7 @@ private String decimalHighValue; private Long numNulls; private Long numDVs; + private String bitVector; private Double avgColLen; private Long maxColLen; private Long numTrues; @@ -166,31 +167,35 @@ public void setBooleanStats(Long numTrues, Long numFalses, Long numNulls) { this.numNulls = numNulls; } - public void setLongStats(Long numNulls, Long numNDVs, Long lowValue, Long highValue) { + public void setLongStats(Long numNulls, Long numNDVs, String bitVector, Long lowValue, Long highValue) { this.numNulls = numNulls; this.numDVs = numNDVs; + this.bitVector = bitVector; this.longLowValue = lowValue; this.longHighValue = highValue; } - public void setDoubleStats(Long numNulls, Long numNDVs, Double lowValue, Double highValue) { + public void setDoubleStats(Long numNulls, Long numNDVs, String bitVector, Double lowValue, Double highValue) { this.numNulls = numNulls; this.numDVs = numNDVs; + this.bitVector = bitVector; this.doubleLowValue = lowValue; this.doubleHighValue = highValue; } public void setDecimalStats( - Long numNulls, Long numNDVs, String lowValue, String highValue) { + Long numNulls, Long numNDVs, String bitVector, String lowValue, String highValue) { this.numNulls = numNulls; this.numDVs = numNDVs; + this.bitVector = bitVector; this.decimalLowValue = lowValue; this.decimalHighValue = highValue; } - public void setStringStats(Long numNulls, Long numNDVs, Long maxColLen, Double avgColLen) { + public void setStringStats(Long numNulls, Long numNDVs, String bitVector, Long maxColLen, Double avgColLen) { this.numNulls = numNulls; this.numDVs = numNDVs; + this.bitVector = bitVector; this.maxColLen = maxColLen; this.avgColLen = avgColLen; } @@ -201,9 +206,10 @@ public void setBinaryStats(Long numNulls, Long maxColLen, Double avgColLen) { this.avgColLen = avgColLen; } - public void setDateStats(Long numNulls, Long numNDVs, Long lowValue, Long highValue) { + public void setDateStats(Long numNulls, Long numNDVs, String bitVector, Long lowValue, Long highValue) { this.numNulls = numNulls; this.numDVs = numNDVs; + this.bitVector = bitVector; this.longLowValue = lowValue; this.longHighValue = highValue; } @@ -255,4 +261,12 @@ public String getDecimalHighValue() { public void setDecimalHighValue(String decimalHighValue) { this.decimalHighValue = decimalHighValue; } + + public String getBitVector() { + return bitVector; + } + + public void setBitVector(String bitVector) { + this.bitVector = bitVector; + } } diff --git a/metastore/src/model/org/apache/hadoop/hive/metastore/model/MTableColumnStatistics.java b/metastore/src/model/org/apache/hadoop/hive/metastore/model/MTableColumnStatistics.java index 132f7a137b..755087618b 100644 --- a/metastore/src/model/org/apache/hadoop/hive/metastore/model/MTableColumnStatistics.java +++ b/metastore/src/model/org/apache/hadoop/hive/metastore/model/MTableColumnStatistics.java @@ -46,6 +46,7 @@ private String decimalHighValue; private Long numNulls; private Long numDVs; + private String bitVector; private Double avgColLen; private Long maxColLen; private Long numTrues; @@ -156,31 +157,35 @@ public void setBooleanStats(Long numTrues, Long numFalses, Long numNulls) { this.numNulls = numNulls; } - public void setLongStats(Long numNulls, Long numNDVs, Long lowValue, Long highValue) { + public void setLongStats(Long numNulls, Long numNDVs, String bitVector, Long lowValue, Long highValue) { this.numNulls = numNulls; this.numDVs = numNDVs; + this.bitVector = bitVector; this.longLowValue = lowValue; this.longHighValue = highValue; } - public void setDoubleStats(Long numNulls, Long numNDVs, Double lowValue, Double highValue) { + public void setDoubleStats(Long numNulls, Long numNDVs, String bitVector, Double lowValue, Double highValue) { this.numNulls = numNulls; this.numDVs = numNDVs; + this.bitVector = bitVector; this.doubleLowValue = lowValue; this.doubleHighValue = highValue; } public void setDecimalStats( - Long numNulls, Long numNDVs, String lowValue, String highValue) { + Long numNulls, Long numNDVs, String bitVector, String lowValue, String highValue) { this.numNulls = numNulls; this.numDVs = numNDVs; + this.bitVector = bitVector; this.decimalLowValue = lowValue; this.decimalHighValue = highValue; } - public void setStringStats(Long numNulls, Long numNDVs, Long maxColLen, Double avgColLen) { + public void setStringStats(Long numNulls, Long numNDVs, String bitVector, Long maxColLen, Double avgColLen) { this.numNulls = numNulls; this.numDVs = numNDVs; + this.bitVector = bitVector; this.maxColLen = maxColLen; this.avgColLen = avgColLen; } @@ -191,9 +196,10 @@ public void setBinaryStats(Long numNulls, Long maxColLen, Double avgColLen) { this.avgColLen = avgColLen; } - public void setDateStats(Long numNulls, Long numNDVs, Long lowValue, Long highValue) { + public void setDateStats(Long numNulls, Long numNDVs, String bitVector, Long lowValue, Long highValue) { this.numNulls = numNulls; this.numDVs = numNDVs; + this.bitVector = bitVector; this.longLowValue = lowValue; this.longHighValue = highValue; } @@ -246,4 +252,12 @@ public String getDecimalHighValue() { public void setDecimalHighValue(String decimalHighValue) { this.decimalHighValue = decimalHighValue; } + + public String getBitVector() { + return bitVector; + } + + public void setBitVector(String bitVector) { + this.bitVector = bitVector; + } } diff --git a/metastore/src/model/package.jdo b/metastore/src/model/package.jdo index 9c4bc219f2..3d759c7764 100644 --- a/metastore/src/model/package.jdo +++ b/metastore/src/model/package.jdo @@ -879,6 +879,9 @@ + + + @@ -943,6 +946,9 @@ + + + diff --git a/metastore/src/test/org/apache/hadoop/hive/metastore/hbase/TestHBaseAggregateStatsCacheWithBitVector.java b/metastore/src/test/org/apache/hadoop/hive/metastore/hbase/TestHBaseAggregateStatsCacheWithBitVector.java index ecc99c3300..9cf1fb8986 100644 --- a/metastore/src/test/org/apache/hadoop/hive/metastore/hbase/TestHBaseAggregateStatsCacheWithBitVector.java +++ b/metastore/src/test/org/apache/hadoop/hive/metastore/hbase/TestHBaseAggregateStatsCacheWithBitVector.java @@ -28,6 +28,7 @@ import org.apache.hadoop.hbase.Cell; import org.apache.hadoop.hbase.client.HTableInterface; +import org.apache.hadoop.hive.common.ndv.hll.HyperLogLog; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.api.AggrStats; import org.apache.hadoop.hive.metastore.api.ColumnStatistics; @@ -115,7 +116,11 @@ public void allPartitions() throws Exception { dcsd.setLowValue(-20.1234213423); dcsd.setNumNulls(30); dcsd.setNumDVs(12342); - dcsd.setBitVectors("{0, 4, 5, 7}{0, 1}{0, 1, 2}{0, 1, 4}{0}{0, 2}{0, 3}{0, 2, 3, 4}{0, 1, 4}{0, 1}{0}{0, 1, 3, 8}{0, 2}{0, 2}{0, 9}{0, 1, 4}"); + HyperLogLog hll = HyperLogLog.builder().build(); + hll.addDouble(1); + hll.addDouble(2); + hll.addDouble(3); + dcsd.setBitVectors(hll.serialize()); data.setDoubleStats(dcsd); obj.setStatsData(data); cs.addToStatsObj(obj); @@ -135,7 +140,11 @@ public void allPartitions() throws Exception { dcsd.setLowValue(-20.1234213423); dcsd.setNumNulls(30); dcsd.setNumDVs(12342); - dcsd.setBitVectors("{0, 1}{0, 1}{1, 2, 4}{0, 1, 2}{0, 1, 2}{0, 2}{0, 1, 3, 4}{0, 1}{0, 1}{3, 4, 6}{2}{0, 1}{0, 3}{0}{0, 1}{0, 1, 4}"); + hll = HyperLogLog.builder().build(); + hll.addDouble(3); + hll.addDouble(4); + hll.addDouble(5); + dcsd.setBitVectors(hll.serialize()); data.setDoubleStats(dcsd); obj.setStatsData(data); cs.addToStatsObj(obj); diff --git a/metastore/src/test/org/apache/hadoop/hive/metastore/hbase/TestHBaseAggregateStatsExtrapolation.java b/metastore/src/test/org/apache/hadoop/hive/metastore/hbase/TestHBaseAggregateStatsExtrapolation.java index 99ce96ca0d..4d868b0146 100644 --- a/metastore/src/test/org/apache/hadoop/hive/metastore/hbase/TestHBaseAggregateStatsExtrapolation.java +++ b/metastore/src/test/org/apache/hadoop/hive/metastore/hbase/TestHBaseAggregateStatsExtrapolation.java @@ -28,6 +28,7 @@ import org.apache.hadoop.hbase.Cell; import org.apache.hadoop.hbase.client.HTableInterface; +import org.apache.hadoop.hive.common.ndv.hll.HyperLogLog; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.StatObjectConverter; import org.apache.hadoop.hive.metastore.api.AggrStats; @@ -62,8 +63,7 @@ SortedMap rows = new TreeMap<>(); // NDV will be 3 for the bitVectors - String bitVectors = "{0, 4, 5, 7}{0, 1}{0, 1, 2}{0, 1, 4}{0}{0, 2}{0, 3}{0, 2, 3, 4}{0, 1, 4}{0, 1}{0}{0, 1, 3, 8}{0, 2}{0, 2}{0, 9}{0, 1, 4}"; - + String bitVectors = null; @Before public void before() throws IOException { MockitoAnnotations.initMocks(this); @@ -71,6 +71,11 @@ public void before() throws IOException { conf.setBoolean(HBaseReadWrite.NO_CACHE_CONF, true); store = MockUtils.init(conf, htable, rows); store.backdoor().getStatsCache().resetCounters(); + HyperLogLog hll = HyperLogLog.builder().build(); + hll.addLong(1); + hll.addLong(2); + hll.addLong(3); + bitVectors = hll.serialize(); } private static interface Checker { @@ -395,7 +400,7 @@ public void noPartitionsHaveBitVectorStatus() throws Exception { dcsd.setHighValue(1000 + i); dcsd.setLowValue(-1000 - i); dcsd.setNumNulls(i); - dcsd.setNumDVs(10 * i); + dcsd.setNumDVs(i == 0 ? 1 : 10 * i); data.setLongStats(dcsd); obj.setStatsData(data); cs.addToStatsObj(obj); diff --git a/metastore/src/test/org/apache/hadoop/hive/metastore/hbase/TestHBaseAggregateStatsNDVUniformDist.java b/metastore/src/test/org/apache/hadoop/hive/metastore/hbase/TestHBaseAggregateStatsNDVUniformDist.java index 74e16695a9..0ad27806d1 100644 --- a/metastore/src/test/org/apache/hadoop/hive/metastore/hbase/TestHBaseAggregateStatsNDVUniformDist.java +++ b/metastore/src/test/org/apache/hadoop/hive/metastore/hbase/TestHBaseAggregateStatsNDVUniformDist.java @@ -28,6 +28,7 @@ import org.apache.hadoop.hbase.Cell; import org.apache.hadoop.hbase.client.HTableInterface; +import org.apache.hadoop.hive.common.ndv.hll.HyperLogLog; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.StatObjectConverter; import org.apache.hadoop.hive.metastore.api.AggrStats; @@ -61,9 +62,8 @@ SortedMap rows = new TreeMap<>(); // NDV will be 3 for bitVectors[0] and 1 for bitVectors[1] - String bitVectors[] = { - "{0, 4, 5, 7}{0, 1}{0, 1, 2}{0, 1, 4}{0}{0, 2}{0, 3}{0, 2, 3, 4}{0, 1, 4}{0, 1}{0}{0, 1, 3, 8}{0, 2}{0, 2}{0, 9}{0, 1, 4}", - "{1, 2}{1, 2}{1, 2}{1, 2}{1, 2}{1, 2}{1, 2}{1, 2}{1, 2}{1, 2}{1, 2}{1, 2}{1, 2}{1, 2}{1, 2}{1, 2}" }; + String bitVectors[] = new String[2]; + @Before public void before() throws IOException { @@ -73,6 +73,15 @@ public void before() throws IOException { conf.setBoolean(HiveConf.ConfVars.HIVE_METASTORE_STATS_NDV_DENSITY_FUNCTION.varname, true); store = MockUtils.init(conf, htable, rows); store.backdoor().getStatsCache().resetCounters(); + HyperLogLog hll = HyperLogLog.builder().build(); + hll.addLong(1); + bitVectors[1] = hll.serialize(); + hll = HyperLogLog.builder().build(); + hll.addLong(2); + hll.addLong(3); + hll.addLong(3); + hll.addLong(4); + bitVectors[0] = hll.serialize(); } private static interface Checker { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java index 97bf839ae1..16c440fc61 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java @@ -3396,7 +3396,7 @@ private int describeTable(Hive db, DescTableDesc descTbl) throws HiveException, ColStatistics.Range r = cs.getRange(); StatObjectConverter.fillColumnStatisticsData(partCol.getType(), data, r == null ? null : r.minValue, r == null ? null : r.maxValue, r == null ? null : r.minValue, r == null ? null : r.maxValue, r == null ? null : r.minValue.toString(), r == null ? null : r.maxValue.toString(), - cs.getNumNulls(), cs.getCountDistint(), cs.getAvgColLen(), cs.getAvgColLen(), cs.getNumTrues(), cs.getNumFalses()); + cs.getNumNulls(), cs.getCountDistint(), null, cs.getAvgColLen(), cs.getAvgColLen(), cs.getNumTrues(), cs.getNumFalses()); ColumnStatisticsObj cso = new ColumnStatisticsObj(partCol.getName(), partCol.getType(), data); colStats = Collections.singletonList(cso); StatsSetupConst.setColumnStatsState(tblProps, colNames); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/MetaDataFormatUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/MetaDataFormatUtils.java index aa77234c28..464f0b7cae 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/MetaDataFormatUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/MetaDataFormatUtils.java @@ -695,38 +695,40 @@ private static void formatWithIndentation(String colName, String colType, String ColumnStatisticsData csd = cso.getStatsData(); if (csd.isSetBinaryStats()) { BinaryColumnStatsData bcsd = csd.getBinaryStats(); - appendColumnStats(tableInfo, "", "", bcsd.getNumNulls(), "", bcsd.getAvgColLen(), + appendColumnStats(tableInfo, "", "", bcsd.getNumNulls(), "", "", bcsd.getAvgColLen(), bcsd.getMaxColLen(), "", ""); } else if (csd.isSetStringStats()) { StringColumnStatsData scsd = csd.getStringStats(); appendColumnStats(tableInfo, "", "", scsd.getNumNulls(), scsd.getNumDVs(), - scsd.getAvgColLen(), scsd.getMaxColLen(), "", ""); + scsd.getBitVectors() == null ? "" : scsd.getBitVectors(), scsd.getAvgColLen(), + scsd.getMaxColLen(), "", ""); } else if (csd.isSetBooleanStats()) { BooleanColumnStatsData bcsd = csd.getBooleanStats(); - appendColumnStats(tableInfo, "", "", bcsd.getNumNulls(), "", "", "", + appendColumnStats(tableInfo, "", "", bcsd.getNumNulls(), "", "", "", "", bcsd.getNumTrues(), bcsd.getNumFalses()); } else if (csd.isSetDecimalStats()) { DecimalColumnStatsData dcsd = csd.getDecimalStats(); appendColumnStats(tableInfo, convertToString(dcsd.getLowValue()), convertToString(dcsd.getHighValue()), dcsd.getNumNulls(), dcsd.getNumDVs(), + dcsd.getBitVectors() == null ? "" : dcsd.getBitVectors(), "", "", "", ""); } else if (csd.isSetDoubleStats()) { DoubleColumnStatsData dcsd = csd.getDoubleStats(); appendColumnStats(tableInfo, dcsd.getLowValue(), dcsd.getHighValue(), dcsd.getNumNulls(), - dcsd.getNumDVs(), "", "", "", ""); + dcsd.getNumDVs(), dcsd.getBitVectors() == null ? "" : dcsd.getBitVectors(), "", "", "", ""); } else if (csd.isSetLongStats()) { LongColumnStatsData lcsd = csd.getLongStats(); appendColumnStats(tableInfo, lcsd.getLowValue(), lcsd.getHighValue(), lcsd.getNumNulls(), - lcsd.getNumDVs(), "", "", "", ""); + lcsd.getNumDVs(), lcsd.getBitVectors() == null ? "" : lcsd.getBitVectors(), "", "", "", ""); } else if (csd.isSetDateStats()) { DateColumnStatsData dcsd = csd.getDateStats(); appendColumnStats(tableInfo, convertToString(dcsd.getLowValue()), convertToString(dcsd.getHighValue()), - dcsd.getNumNulls(), dcsd.getNumDVs(), "", "", "", ""); + dcsd.getNumNulls(), dcsd.getNumDVs(), dcsd.getBitVectors() == null ? "" : dcsd.getBitVectors(), "", "", "", ""); } } else { - appendColumnStats(tableInfo, "", "", "", "", "", "", "", ""); + appendColumnStats(tableInfo, "", "", "", "", "", "", "", "", ""); } } @@ -779,11 +781,12 @@ private static void printPadding(StringBuilder tableInfo, int[] columnWidths) { } private static void appendColumnStats(StringBuilder sb, Object min, Object max, Object numNulls, - Object ndv, Object avgColLen, Object maxColLen, Object numTrues, Object numFalses) { + Object ndv, Object bitVector, Object avgColLen, Object maxColLen, Object numTrues, Object numFalses) { sb.append(String.format("%-" + ALIGNMENT + "s", min)).append(FIELD_DELIM); sb.append(String.format("%-" + ALIGNMENT + "s", max)).append(FIELD_DELIM); sb.append(String.format("%-" + ALIGNMENT + "s", numNulls)).append(FIELD_DELIM); sb.append(String.format("%-" + ALIGNMENT + "s", ndv)).append(FIELD_DELIM); + sb.append(String.format("%-" + ALIGNMENT + "s", bitVector)).append(FIELD_DELIM); sb.append(String.format("%-" + ALIGNMENT + "s", avgColLen)).append(FIELD_DELIM); sb.append(String.format("%-" + ALIGNMENT + "s", maxColLen)).append(FIELD_DELIM); sb.append(String.format("%-" + ALIGNMENT + "s", numTrues)).append(FIELD_DELIM); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/ColStatistics.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/ColStatistics.java index 41a1c7a582..f2d2e2dc0b 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/ColStatistics.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/ColStatistics.java @@ -18,9 +18,6 @@ package org.apache.hadoop.hive.ql.plan; -import org.apache.hadoop.hive.ql.stats.StatsUtils; - - public class ColStatistics { private String colName; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/DescTableDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/DescTableDesc.java index d7a9888389..c413d16126 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/DescTableDesc.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/DescTableDesc.java @@ -59,8 +59,8 @@ public void setPartSpec(Map partSpec) { */ private static final String schema = "col_name,data_type,comment#string:string:string"; private static final String colStatsSchema = "col_name,data_type,min,max,num_nulls," - + "distinct_count,avg_col_len,max_col_len,num_trues,num_falses,comment" - + "#string:string:string:string:string:string:string:string:string:string:string"; + + "distinct_count,bitVector,avg_col_len,max_col_len,num_trues,num_falses,comment" + + "#string:string:string:string:string:string:string:string:string:string:string:string"; public DescTableDesc() { } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFComputeStats.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFComputeStats.java index 2d56950cb1..8ee41bfab2 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFComputeStats.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFComputeStats.java @@ -23,9 +23,9 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.hive.common.classification.InterfaceAudience; -import org.apache.hadoop.hive.common.ndv.FMSketch; import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimator; import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimatorFactory; +import org.apache.hadoop.hive.common.ndv.fm.FMSketch; import org.apache.hadoop.hive.common.ndv.hll.HyperLogLog; import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.ql.exec.Description; diff --git a/ql/src/test/queries/clientpositive/bitvector.q b/ql/src/test/queries/clientpositive/bitvector.q new file mode 100644 index 0000000000..d8669f254b --- /dev/null +++ b/ql/src/test/queries/clientpositive/bitvector.q @@ -0,0 +1,3 @@ +set hive.mapred.mode=nonstrict; + +desc formatted src key; diff --git a/ql/src/test/queries/clientpositive/fm-sketch.q b/ql/src/test/queries/clientpositive/fm-sketch.q new file mode 100644 index 0000000000..6a65442076 --- /dev/null +++ b/ql/src/test/queries/clientpositive/fm-sketch.q @@ -0,0 +1,58 @@ +set hive.mapred.mode=nonstrict; +set hive.stats.ndv.algo=fm; + +create table n(key int); + +insert overwrite table n select null from src; + +explain analyze table n compute statistics for columns; + +analyze table n compute statistics for columns; + +desc formatted n key; + + +create table i(key int); + +insert overwrite table i select key from src; + +explain analyze table i compute statistics for columns; + +analyze table i compute statistics for columns; + +desc formatted i key; + +drop table i; + +create table i(key double); + +insert overwrite table i select key from src; + +analyze table i compute statistics for columns; + +desc formatted i key; + +drop table i; + +create table i(key decimal); + +insert overwrite table i select key from src; + +analyze table i compute statistics for columns; + +desc formatted i key; + +drop table i; + +create table i(key date); + +insert into i values ('2012-08-17'); +insert into i values ('2012-08-17'); +insert into i values ('2013-08-17'); +insert into i values ('2012-03-17'); +insert into i values ('2012-05-17'); + +analyze table i compute statistics for columns; + +desc formatted i key; + diff --git a/ql/src/test/queries/clientpositive/hll.q b/ql/src/test/queries/clientpositive/hll.q index edfdce8a29..91c4e788d3 100644 --- a/ql/src/test/queries/clientpositive/hll.q +++ b/ql/src/test/queries/clientpositive/hll.q @@ -1,5 +1,16 @@ set hive.mapred.mode=nonstrict; +create table n(key int); + +insert overwrite table n select null from src; + +explain analyze table n compute statistics for columns; + +analyze table n compute statistics for columns; + +desc formatted n key; + + create table i(key int); insert overwrite table i select key from src; diff --git a/ql/src/test/results/clientpositive/alterColumnStats.q.out b/ql/src/test/results/clientpositive/alterColumnStats.q.out index 519a62a190..6463fc610e 100644 --- a/ql/src/test/results/clientpositive/alterColumnStats.q.out +++ b/ql/src/test/results/clientpositive/alterColumnStats.q.out @@ -142,17 +142,17 @@ PREHOOK: Input: default@p POSTHOOK: query: desc formatted p c1 POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@p -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -c1 string from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +c1 string from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} PREHOOK: query: desc formatted p c2 PREHOOK: type: DESCTABLE PREHOOK: Input: default@p POSTHOOK: query: desc formatted p c2 POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@p -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -c2 string from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +c2 string from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} diff --git a/ql/src/test/results/clientpositive/alterColumnStatsPart.q.out b/ql/src/test/results/clientpositive/alterColumnStatsPart.q.out index 672bd9f4bb..a315a6be39 100644 --- a/ql/src/test/results/clientpositive/alterColumnStatsPart.q.out +++ b/ql/src/test/results/clientpositive/alterColumnStatsPart.q.out @@ -65,9 +65,9 @@ PREHOOK: Input: default@p POSTHOOK: query: desc formatted p partition (c=1) a POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@p -# col_name data_type comment - -a int from deserializer +# col_name data_type comment + +a int from deserializer PREHOOK: query: desc formatted p partition (c=1) PREHOOK: type: DESCTABLE PREHOOK: Input: default@p diff --git a/ql/src/test/results/clientpositive/alter_partition_update_status.q.out b/ql/src/test/results/clientpositive/alter_partition_update_status.q.out index c0d4eeefb4..3c08133ff9 100644 --- a/ql/src/test/results/clientpositive/alter_partition_update_status.q.out +++ b/ql/src/test/results/clientpositive/alter_partition_update_status.q.out @@ -34,9 +34,11 @@ PREHOOK: Input: default@src_stat_part_one POSTHOOK: query: describe formatted src_stat_part_one PARTITION(partitionId=1) key POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@src_stat_part_one -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -key string 0 16 1.72 3 from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +key string 0 16 SExM4BAQgaTbFYDg/BOBktRrv6WcHMDErSCAotAegLu2HIHRunr/p8hhwLbpSsD22ZgBgdS9KMHV +myn/wfcugOGjfsCYzA0= + 1.72 3 from deserializer PREHOOK: query: ALTER TABLE src_stat_part_one PARTITION(partitionId=1) UPDATE STATISTICS for column key SET ('numDVs'='11','avgColLen'='2.2') PREHOOK: type: ALTERTABLE_UPDATEPARTSTATS POSTHOOK: query: ALTER TABLE src_stat_part_one PARTITION(partitionId=1) UPDATE STATISTICS for column key SET ('numDVs'='11','avgColLen'='2.2') @@ -47,9 +49,11 @@ PREHOOK: Input: default@src_stat_part_one POSTHOOK: query: describe formatted src_stat_part_one PARTITION(partitionId=1) key POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@src_stat_part_one -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -key string 0 11 2.2 3 from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +key string 0 11 SExM4BAQgaTbFYDg/BOBktRrv6WcHMDErSCAotAegLu2HIHRunr/p8hhwLbpSsD22ZgBgdS9KMHV +myn/wfcugOGjfsCYzA0= + 2.2 3 from deserializer PREHOOK: query: create table src_stat_part_two(key string, value string) partitioned by (px int, py string) PREHOOK: type: CREATETABLE PREHOOK: Output: database:default @@ -86,9 +90,11 @@ PREHOOK: Input: default@src_stat_part_two POSTHOOK: query: describe formatted src_stat_part_two PARTITION(px=1, py='a') key POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@src_stat_part_two -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -key string 0 16 1.72 3 from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +key string 0 16 SExM4BAQgaTbFYDg/BOBktRrv6WcHMDErSCAotAegLu2HIHRunr/p8hhwLbpSsD22ZgBgdS9KMHV +myn/wfcugOGjfsCYzA0= + 1.72 3 from deserializer PREHOOK: query: ALTER TABLE src_stat_part_two PARTITION(px=1, py='a') UPDATE STATISTICS for column key SET ('numDVs'='30','maxColLen'='40') PREHOOK: type: ALTERTABLE_UPDATEPARTSTATS POSTHOOK: query: ALTER TABLE src_stat_part_two PARTITION(px=1, py='a') UPDATE STATISTICS for column key SET ('numDVs'='30','maxColLen'='40') @@ -99,9 +105,11 @@ PREHOOK: Input: default@src_stat_part_two POSTHOOK: query: describe formatted src_stat_part_two PARTITION(px=1, py='a') key POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@src_stat_part_two -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -key string 0 30 1.72 40 from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +key string 0 30 SExM4BAQgaTbFYDg/BOBktRrv6WcHMDErSCAotAegLu2HIHRunr/p8hhwLbpSsD22ZgBgdS9KMHV +myn/wfcugOGjfsCYzA0= + 1.72 40 from deserializer PREHOOK: query: create database if not exists dummydb PREHOOK: type: CREATEDATABASE PREHOOK: Output: database:dummydb @@ -124,9 +132,11 @@ PREHOOK: Input: default@src_stat_part_two POSTHOOK: query: describe formatted default.src_stat_part_two PARTITION(px=1, py='a') key POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@src_stat_part_two -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -key string 0 40 1.72 50 from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +key string 0 40 SExM4BAQgaTbFYDg/BOBktRrv6WcHMDErSCAotAegLu2HIHRunr/p8hhwLbpSsD22ZgBgdS9KMHV +myn/wfcugOGjfsCYzA0= + 1.72 50 from deserializer PREHOOK: query: use default PREHOOK: type: SWITCHDATABASE PREHOOK: Input: database:default diff --git a/ql/src/test/results/clientpositive/alter_table_column_stats.q.out b/ql/src/test/results/clientpositive/alter_table_column_stats.q.out index 96dce1e2c5..4d2f738d79 100644 --- a/ql/src/test/results/clientpositive/alter_table_column_stats.q.out +++ b/ql/src/test/results/clientpositive/alter_table_column_stats.q.out @@ -123,30 +123,33 @@ PREHOOK: Input: statsdb1@testtable0 POSTHOOK: query: describe formatted statsdb1.testtable0 col1 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testtable0 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col1 int 27 484 0 10 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +col1 int 27 484 0 10 SExM4AoKwtK/OYCi1Bu/sMcRx9SoWfnl+S+BsIeDAb/T4wfChbuXAf+34EDBsoy/AQ== + from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}} PREHOOK: query: describe formatted statsdb1.testtable0 col2 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testtable0 POSTHOOK: query: describe formatted statsdb1.testtable0 col2 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testtable0 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col2 string 0 10 6.7 7 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +col2 string 0 10 SExM4AoKge76QcHLqCHAtN4dv5LMNcG35k7A+Oe9Af+Nq6wCgoTFpgG+39Yigri1AQ== + 6.7 7 from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}} PREHOOK: query: describe formatted statsdb1.testtable0 col3 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testtable0 POSTHOOK: query: describe formatted statsdb1.testtable0 col3 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testtable0 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col3 string 0 1 4.0 4 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +col3 string 0 1 SExM4AEBgeL8+wM= + 4.0 4 from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}} PREHOOK: query: alter table statsdb1.testtable0 rename to statsdb1.testtable1 PREHOOK: type: ALTERTABLE_RENAME PREHOOK: Input: statsdb1@testtable0 @@ -199,30 +202,33 @@ PREHOOK: Input: statsdb1@testtable1 POSTHOOK: query: describe formatted statsdb1.testtable1 col1 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testtable1 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col1 int 27 484 0 10 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +col1 int 27 484 0 10 SExM4AoKwtK/OYCi1Bu/sMcRx9SoWfnl+S+BsIeDAb/T4wfChbuXAf+34EDBsoy/AQ== + from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}} PREHOOK: query: describe formatted statsdb1.testtable1 col2 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testtable1 POSTHOOK: query: describe formatted statsdb1.testtable1 col2 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testtable1 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col2 string 0 10 6.7 7 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +col2 string 0 10 SExM4AoKge76QcHLqCHAtN4dv5LMNcG35k7A+Oe9Af+Nq6wCgoTFpgG+39Yigri1AQ== + 6.7 7 from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}} PREHOOK: query: describe formatted statsdb1.testtable1 col3 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testtable1 POSTHOOK: query: describe formatted statsdb1.testtable1 col3 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testtable1 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col3 string 0 1 4.0 4 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +col3 string 0 1 SExM4AEBgeL8+wM= + 4.0 4 from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}} PREHOOK: query: alter table testtable1 replace columns (col1 int, col2 string, col4 string) PREHOOK: type: ALTERTABLE_REPLACECOLS PREHOOK: Input: statsdb1@testtable1 @@ -274,30 +280,32 @@ PREHOOK: Input: statsdb1@testtable1 POSTHOOK: query: describe formatted statsdb1.testtable1 col1 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testtable1 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col1 int 27 484 0 10 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\"}} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +col1 int 27 484 0 10 SExM4AoKwtK/OYCi1Bu/sMcRx9SoWfnl+S+BsIeDAb/T4wfChbuXAf+34EDBsoy/AQ== + from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\"}} PREHOOK: query: describe formatted statsdb1.testtable1 col2 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testtable1 POSTHOOK: query: describe formatted statsdb1.testtable1 col2 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testtable1 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col2 string 0 10 6.7 7 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\"}} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +col2 string 0 10 SExM4AoKge76QcHLqCHAtN4dv5LMNcG35k7A+Oe9Af+Nq6wCgoTFpgG+39Yigri1AQ== + 6.7 7 from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\"}} PREHOOK: query: describe formatted statsdb1.testtable1 col4 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testtable1 POSTHOOK: query: describe formatted statsdb1.testtable1 col4 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testtable1 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col4 string from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\"}} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +col4 string from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\"}} PREHOOK: query: alter table testtable1 change col1 col1 string PREHOOK: type: ALTERTABLE_RENAMECOL PREHOOK: Input: statsdb1@testtable1 @@ -349,30 +357,31 @@ PREHOOK: Input: statsdb1@testtable1 POSTHOOK: query: describe formatted statsdb1.testtable1 col1 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testtable1 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col1 string from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col2\":\"true\"}} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +col1 string from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col2\":\"true\"}} PREHOOK: query: describe formatted statsdb1.testtable1 col2 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testtable1 POSTHOOK: query: describe formatted statsdb1.testtable1 col2 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testtable1 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col2 string 0 10 6.7 7 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col2\":\"true\"}} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +col2 string 0 10 SExM4AoKge76QcHLqCHAtN4dv5LMNcG35k7A+Oe9Af+Nq6wCgoTFpgG+39Yigri1AQ== + 6.7 7 from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col2\":\"true\"}} PREHOOK: query: describe formatted statsdb1.testtable1 col4 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testtable1 POSTHOOK: query: describe formatted statsdb1.testtable1 col4 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testtable1 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col4 string from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col2\":\"true\"}} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +col4 string from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col2\":\"true\"}} PREHOOK: query: alter table statsdb1.testtable1 rename to statsdb2.testtable2 PREHOOK: type: ALTERTABLE_RENAME PREHOOK: Input: statsdb1@testtable1 @@ -425,30 +434,31 @@ PREHOOK: Input: statsdb2@testtable2 POSTHOOK: query: describe formatted statsdb2.testtable2 col1 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb2@testtable2 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col1 string from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col2\":\"true\"}} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +col1 string from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col2\":\"true\"}} PREHOOK: query: describe formatted statsdb2.testtable2 col2 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb2@testtable2 POSTHOOK: query: describe formatted statsdb2.testtable2 col2 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb2@testtable2 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col2 string 0 10 6.7 7 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col2\":\"true\"}} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +col2 string 0 10 SExM4AoKge76QcHLqCHAtN4dv5LMNcG35k7A+Oe9Af+Nq6wCgoTFpgG+39Yigri1AQ== + 6.7 7 from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col2\":\"true\"}} PREHOOK: query: describe formatted statsdb2.testtable2 col4 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb2@testtable2 POSTHOOK: query: describe formatted statsdb2.testtable2 col4 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb2@testtable2 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col4 string from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col2\":\"true\"}} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +col4 string from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col2\":\"true\"}} PREHOOK: query: analyze table testpart0 compute statistics for columns PREHOOK: type: QUERY PREHOOK: Input: statsdb1@testpart0 @@ -549,27 +559,30 @@ PREHOOK: Input: statsdb1@testpart0 POSTHOOK: query: describe formatted statsdb1.testpart0 partition (part = 'part1') col1 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testpart0 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col1 int 27 484 0 10 from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +col1 int 27 484 0 10 SExM4AoKwtK/OYCi1Bu/sMcRx9SoWfnl+S+BsIeDAb/T4wfChbuXAf+34EDBsoy/AQ== + from deserializer PREHOOK: query: describe formatted statsdb1.testpart0 partition (part = 'part1') col2 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testpart0 POSTHOOK: query: describe formatted statsdb1.testpart0 partition (part = 'part1') col2 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testpart0 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col2 string 0 10 6.7 7 from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +col2 string 0 10 SExM4AoKge76QcHLqCHAtN4dv5LMNcG35k7A+Oe9Af+Nq6wCgoTFpgG+39Yigri1AQ== + 6.7 7 from deserializer PREHOOK: query: describe formatted statsdb1.testpart0 partition (part = 'part1') col3 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testpart0 POSTHOOK: query: describe formatted statsdb1.testpart0 partition (part = 'part1') col3 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testpart0 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col3 string 0 1 4.0 4 from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +col3 string 0 1 SExM4AEBgeL8+wM= + 4.0 4 from deserializer PREHOOK: query: describe formatted statsdb1.testpart0 partition (part = 'part2') PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testpart0 @@ -616,27 +629,32 @@ PREHOOK: Input: statsdb1@testpart0 POSTHOOK: query: describe formatted statsdb1.testpart0 partition (part = 'part2') col1 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testpart0 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col1 int 27 484 0 20 from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +col1 int 27 484 0 20 SExM4BQUwv+PD4DTryqAhvQHgJzgE8DtpQL/wqEPx9SoWfm94QmAqJgmgbCHgwG/0+MHwJKHG4De +jWeClaYVvr3WP8H6iQHDxpxl/uvvWb7ssz2AsKk6 + from deserializer PREHOOK: query: describe formatted statsdb1.testpart0 partition (part = 'part2') col2 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testpart0 POSTHOOK: query: describe formatted statsdb1.testpart0 partition (part = 'part2') col2 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testpart0 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col2 string 0 20 6.8 7 from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +col2 string 0 20 SExM4BQUwcbsKcCnjhjArvYEgZ2yHMC03h2A4LIS/4KtDMCv7BbAz6JGgejDCP+AlzSA84UvwYTL +WsDygtQBv5uoWIKExaYBvt/WIoK4tQG/vpwPv72sGg== + 6.8 7 from deserializer PREHOOK: query: describe formatted statsdb1.testpart0 partition (part = 'part2') col3 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testpart0 POSTHOOK: query: describe formatted statsdb1.testpart0 partition (part = 'part2') col3 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testpart0 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col3 string 0 1 4.0 4 from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +col3 string 0 1 SExM4AEBgeL8+wM= + 4.0 4 from deserializer PREHOOK: query: alter table statsdb1.testpart0 rename to statsdb1.testpart1 PREHOOK: type: ALTERTABLE_RENAME PREHOOK: Input: statsdb1@testpart0 @@ -735,27 +753,30 @@ PREHOOK: Input: statsdb1@testpart1 POSTHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part1') col1 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testpart1 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col1 int 27 484 0 10 from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +col1 int 27 484 0 10 SExM4AoKwtK/OYCi1Bu/sMcRx9SoWfnl+S+BsIeDAb/T4wfChbuXAf+34EDBsoy/AQ== + from deserializer PREHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part1') col2 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testpart1 POSTHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part1') col2 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testpart1 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col2 string 0 10 6.7 7 from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +col2 string 0 10 SExM4AoKge76QcHLqCHAtN4dv5LMNcG35k7A+Oe9Af+Nq6wCgoTFpgG+39Yigri1AQ== + 6.7 7 from deserializer PREHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part1') col3 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testpart1 POSTHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part1') col3 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testpart1 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col3 string 0 1 4.0 4 from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +col3 string 0 1 SExM4AEBgeL8+wM= + 4.0 4 from deserializer PREHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part2') PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testpart1 @@ -802,27 +823,32 @@ PREHOOK: Input: statsdb1@testpart1 POSTHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part2') col1 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testpart1 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col1 int 27 484 0 20 from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +col1 int 27 484 0 20 SExM4BQUwv+PD4DTryqAhvQHgJzgE8DtpQL/wqEPx9SoWfm94QmAqJgmgbCHgwG/0+MHwJKHG4De +jWeClaYVvr3WP8H6iQHDxpxl/uvvWb7ssz2AsKk6 + from deserializer PREHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part2') col2 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testpart1 POSTHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part2') col2 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testpart1 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col2 string 0 20 6.8 7 from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +col2 string 0 20 SExM4BQUwcbsKcCnjhjArvYEgZ2yHMC03h2A4LIS/4KtDMCv7BbAz6JGgejDCP+AlzSA84UvwYTL +WsDygtQBv5uoWIKExaYBvt/WIoK4tQG/vpwPv72sGg== + 6.8 7 from deserializer PREHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part2') col3 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testpart1 POSTHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part2') col3 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testpart1 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col3 string 0 1 4.0 4 from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +col3 string 0 1 SExM4AEBgeL8+wM= + 4.0 4 from deserializer PREHOOK: query: alter table statsdb1.testpart1 partition (part = 'part1') rename to partition (part = 'part11') PREHOOK: type: ALTERTABLE_RENAMEPART PREHOOK: Input: statsdb1@testpart1 @@ -922,27 +948,30 @@ PREHOOK: Input: statsdb1@testpart1 POSTHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part11') col1 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testpart1 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col1 int 27 484 0 10 from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +col1 int 27 484 0 10 SExM4AoKwtK/OYCi1Bu/sMcRx9SoWfnl+S+BsIeDAb/T4wfChbuXAf+34EDBsoy/AQ== + from deserializer PREHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part11') col2 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testpart1 POSTHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part11') col2 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testpart1 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col2 string 0 10 6.7 7 from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +col2 string 0 10 SExM4AoKge76QcHLqCHAtN4dv5LMNcG35k7A+Oe9Af+Nq6wCgoTFpgG+39Yigri1AQ== + 6.7 7 from deserializer PREHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part11') col3 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testpart1 POSTHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part11') col3 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testpart1 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col3 string 0 1 4.0 4 from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +col3 string 0 1 SExM4AEBgeL8+wM= + 4.0 4 from deserializer PREHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part2') PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testpart1 @@ -989,27 +1018,32 @@ PREHOOK: Input: statsdb1@testpart1 POSTHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part2') col1 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testpart1 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col1 int 27 484 0 20 from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +col1 int 27 484 0 20 SExM4BQUwv+PD4DTryqAhvQHgJzgE8DtpQL/wqEPx9SoWfm94QmAqJgmgbCHgwG/0+MHwJKHG4De +jWeClaYVvr3WP8H6iQHDxpxl/uvvWb7ssz2AsKk6 + from deserializer PREHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part2') col2 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testpart1 POSTHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part2') col2 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testpart1 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col2 string 0 20 6.8 7 from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +col2 string 0 20 SExM4BQUwcbsKcCnjhjArvYEgZ2yHMC03h2A4LIS/4KtDMCv7BbAz6JGgejDCP+AlzSA84UvwYTL +WsDygtQBv5uoWIKExaYBvt/WIoK4tQG/vpwPv72sGg== + 6.8 7 from deserializer PREHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part2') col3 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testpart1 POSTHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part2') col3 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testpart1 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col3 string 0 1 4.0 4 from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +col3 string 0 1 SExM4AEBgeL8+wM= + 4.0 4 from deserializer PREHOOK: query: alter table statsdb1.testpart1 replace columns (col1 int, col2 string, col4 string) cascade PREHOOK: type: ALTERTABLE_REPLACECOLS PREHOOK: Input: statsdb1@testpart1 @@ -1111,27 +1145,29 @@ PREHOOK: Input: statsdb1@testpart1 POSTHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part11') col1 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testpart1 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col1 int 27 484 0 10 from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +col1 int 27 484 0 10 SExM4AoKwtK/OYCi1Bu/sMcRx9SoWfnl+S+BsIeDAb/T4wfChbuXAf+34EDBsoy/AQ== + from deserializer PREHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part11') col2 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testpart1 POSTHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part11') col2 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testpart1 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col2 string 0 10 6.7 7 from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +col2 string 0 10 SExM4AoKge76QcHLqCHAtN4dv5LMNcG35k7A+Oe9Af+Nq6wCgoTFpgG+39Yigri1AQ== + 6.7 7 from deserializer PREHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part11') col4 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testpart1 POSTHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part11') col4 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testpart1 -# col_name data_type comment - -col4 string from deserializer +# col_name data_type comment + +col4 string from deserializer PREHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part2') PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testpart1 @@ -1178,27 +1214,31 @@ PREHOOK: Input: statsdb1@testpart1 POSTHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part2') col1 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testpart1 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col1 int 27 484 0 20 from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +col1 int 27 484 0 20 SExM4BQUwv+PD4DTryqAhvQHgJzgE8DtpQL/wqEPx9SoWfm94QmAqJgmgbCHgwG/0+MHwJKHG4De +jWeClaYVvr3WP8H6iQHDxpxl/uvvWb7ssz2AsKk6 + from deserializer PREHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part2') col2 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testpart1 POSTHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part2') col2 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testpart1 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col2 string 0 20 6.8 7 from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +col2 string 0 20 SExM4BQUwcbsKcCnjhjArvYEgZ2yHMC03h2A4LIS/4KtDMCv7BbAz6JGgejDCP+AlzSA84UvwYTL +WsDygtQBv5uoWIKExaYBvt/WIoK4tQG/vpwPv72sGg== + 6.8 7 from deserializer PREHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part2') col4 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testpart1 POSTHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part2') col4 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testpart1 -# col_name data_type comment - -col4 string from deserializer +# col_name data_type comment + +col4 string from deserializer PREHOOK: query: alter table statsdb1.testpart1 change column col1 col1 string cascade PREHOOK: type: ALTERTABLE_RENAMECOL PREHOOK: Input: statsdb1@testpart1 @@ -1300,27 +1340,28 @@ PREHOOK: Input: statsdb1@testpart1 POSTHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part11') col1 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testpart1 -# col_name data_type comment - -col1 string from deserializer +# col_name data_type comment + +col1 string from deserializer PREHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part11') col2 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testpart1 POSTHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part11') col2 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testpart1 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col2 string 0 10 6.7 7 from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +col2 string 0 10 SExM4AoKge76QcHLqCHAtN4dv5LMNcG35k7A+Oe9Af+Nq6wCgoTFpgG+39Yigri1AQ== + 6.7 7 from deserializer PREHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part11') col4 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testpart1 POSTHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part11') col4 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testpart1 -# col_name data_type comment - -col4 string from deserializer +# col_name data_type comment + +col4 string from deserializer PREHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part2') PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testpart1 @@ -1367,27 +1408,29 @@ PREHOOK: Input: statsdb1@testpart1 POSTHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part2') col1 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testpart1 -# col_name data_type comment - -col1 string from deserializer +# col_name data_type comment + +col1 string from deserializer PREHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part2') col2 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testpart1 POSTHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part2') col2 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testpart1 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col2 string 0 20 6.8 7 from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +col2 string 0 20 SExM4BQUwcbsKcCnjhjArvYEgZ2yHMC03h2A4LIS/4KtDMCv7BbAz6JGgejDCP+AlzSA84UvwYTL +WsDygtQBv5uoWIKExaYBvt/WIoK4tQG/vpwPv72sGg== + 6.8 7 from deserializer PREHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part2') col4 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testpart1 POSTHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part2') col4 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testpart1 -# col_name data_type comment - -col4 string from deserializer +# col_name data_type comment + +col4 string from deserializer PREHOOK: query: alter table statsdb1.testpart1 rename to statsdb2.testpart2 PREHOOK: type: ALTERTABLE_RENAME PREHOOK: Input: statsdb1@testpart1 @@ -1446,54 +1489,57 @@ PREHOOK: Input: statsdb2@testpart2 POSTHOOK: query: describe formatted statsdb2.testpart2 partition (part = 'part11') col1 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb2@testpart2 -# col_name data_type comment - -col1 string from deserializer +# col_name data_type comment + +col1 string from deserializer PREHOOK: query: describe formatted statsdb2.testpart2 partition (part = 'part11') col2 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb2@testpart2 POSTHOOK: query: describe formatted statsdb2.testpart2 partition (part = 'part11') col2 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb2@testpart2 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col2 string 0 10 6.7 7 from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +col2 string 0 10 SExM4AoKge76QcHLqCHAtN4dv5LMNcG35k7A+Oe9Af+Nq6wCgoTFpgG+39Yigri1AQ== + 6.7 7 from deserializer PREHOOK: query: describe formatted statsdb2.testpart2 partition (part = 'part11') col4 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb2@testpart2 POSTHOOK: query: describe formatted statsdb2.testpart2 partition (part = 'part11') col4 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb2@testpart2 -# col_name data_type comment - -col4 string from deserializer +# col_name data_type comment + +col4 string from deserializer PREHOOK: query: describe formatted statsdb2.testpart2 partition (part = 'part2') col1 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb2@testpart2 POSTHOOK: query: describe formatted statsdb2.testpart2 partition (part = 'part2') col1 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb2@testpart2 -# col_name data_type comment - -col1 string from deserializer +# col_name data_type comment + +col1 string from deserializer PREHOOK: query: describe formatted statsdb2.testpart2 partition (part = 'part2') col2 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb2@testpart2 POSTHOOK: query: describe formatted statsdb2.testpart2 partition (part = 'part2') col2 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb2@testpart2 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col2 string 0 20 6.8 7 from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +col2 string 0 20 SExM4BQUwcbsKcCnjhjArvYEgZ2yHMC03h2A4LIS/4KtDMCv7BbAz6JGgejDCP+AlzSA84UvwYTL +WsDygtQBv5uoWIKExaYBvt/WIoK4tQG/vpwPv72sGg== + 6.8 7 from deserializer PREHOOK: query: describe formatted statsdb2.testpart2 partition (part = 'part2') col4 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb2@testpart2 POSTHOOK: query: describe formatted statsdb2.testpart2 partition (part = 'part2') col4 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb2@testpart2 -# col_name data_type comment - -col4 string from deserializer +# col_name data_type comment + +col4 string from deserializer PREHOOK: query: use statsdb2 PREHOOK: type: SWITCHDATABASE PREHOOK: Input: database:statsdb2 @@ -1663,30 +1709,33 @@ PREHOOK: Input: statsdb1@testtable0 POSTHOOK: query: describe formatted statsdb1.testtable0 col1 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testtable0 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col1 int 27 484 0 10 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +col1 int 27 484 0 10 SExM4AoKwtK/OYCi1Bu/sMcRx9SoWfnl+S+BsIeDAb/T4wfChbuXAf+34EDBsoy/AQ== + from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}} PREHOOK: query: describe formatted statsdb1.testtable0 col2 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testtable0 POSTHOOK: query: describe formatted statsdb1.testtable0 col2 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testtable0 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col2 string 0 10 6.7 7 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +col2 string 0 10 SExM4AoKge76QcHLqCHAtN4dv5LMNcG35k7A+Oe9Af+Nq6wCgoTFpgG+39Yigri1AQ== + 6.7 7 from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}} PREHOOK: query: describe formatted statsdb1.testtable0 col3 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testtable0 POSTHOOK: query: describe formatted statsdb1.testtable0 col3 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testtable0 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col3 string 0 1 4.0 4 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +col3 string 0 1 SExM4AEBgeL8+wM= + 4.0 4 from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}} PREHOOK: query: alter table statsdb1.testtable0 rename to statsdb1.testtable1 PREHOOK: type: ALTERTABLE_RENAME PREHOOK: Input: statsdb1@testtable0 @@ -1739,30 +1788,33 @@ PREHOOK: Input: statsdb1@testtable1 POSTHOOK: query: describe formatted statsdb1.testtable1 col1 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testtable1 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col1 int 27 484 0 10 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +col1 int 27 484 0 10 SExM4AoKwtK/OYCi1Bu/sMcRx9SoWfnl+S+BsIeDAb/T4wfChbuXAf+34EDBsoy/AQ== + from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}} PREHOOK: query: describe formatted statsdb1.testtable1 col2 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testtable1 POSTHOOK: query: describe formatted statsdb1.testtable1 col2 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testtable1 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col2 string 0 10 6.7 7 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +col2 string 0 10 SExM4AoKge76QcHLqCHAtN4dv5LMNcG35k7A+Oe9Af+Nq6wCgoTFpgG+39Yigri1AQ== + 6.7 7 from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}} PREHOOK: query: describe formatted statsdb1.testtable1 col3 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testtable1 POSTHOOK: query: describe formatted statsdb1.testtable1 col3 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testtable1 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col3 string 0 1 4.0 4 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +col3 string 0 1 SExM4AEBgeL8+wM= + 4.0 4 from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}} PREHOOK: query: alter table testtable1 replace columns (col1 int, col2 string, col4 string) PREHOOK: type: ALTERTABLE_REPLACECOLS PREHOOK: Input: statsdb1@testtable1 @@ -1814,30 +1866,32 @@ PREHOOK: Input: statsdb1@testtable1 POSTHOOK: query: describe formatted statsdb1.testtable1 col1 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testtable1 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col1 int 27 484 0 10 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\"}} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +col1 int 27 484 0 10 SExM4AoKwtK/OYCi1Bu/sMcRx9SoWfnl+S+BsIeDAb/T4wfChbuXAf+34EDBsoy/AQ== + from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\"}} PREHOOK: query: describe formatted statsdb1.testtable1 col2 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testtable1 POSTHOOK: query: describe formatted statsdb1.testtable1 col2 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testtable1 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col2 string 0 10 6.7 7 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\"}} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +col2 string 0 10 SExM4AoKge76QcHLqCHAtN4dv5LMNcG35k7A+Oe9Af+Nq6wCgoTFpgG+39Yigri1AQ== + 6.7 7 from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\"}} PREHOOK: query: describe formatted statsdb1.testtable1 col4 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testtable1 POSTHOOK: query: describe formatted statsdb1.testtable1 col4 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testtable1 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col4 string from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\"}} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +col4 string from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\"}} PREHOOK: query: alter table testtable1 change col1 col1 string PREHOOK: type: ALTERTABLE_RENAMECOL PREHOOK: Input: statsdb1@testtable1 @@ -1889,30 +1943,31 @@ PREHOOK: Input: statsdb1@testtable1 POSTHOOK: query: describe formatted statsdb1.testtable1 col1 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testtable1 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col1 string from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col2\":\"true\"}} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +col1 string from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col2\":\"true\"}} PREHOOK: query: describe formatted statsdb1.testtable1 col2 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testtable1 POSTHOOK: query: describe formatted statsdb1.testtable1 col2 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testtable1 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col2 string 0 10 6.7 7 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col2\":\"true\"}} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +col2 string 0 10 SExM4AoKge76QcHLqCHAtN4dv5LMNcG35k7A+Oe9Af+Nq6wCgoTFpgG+39Yigri1AQ== + 6.7 7 from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col2\":\"true\"}} PREHOOK: query: describe formatted statsdb1.testtable1 col4 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testtable1 POSTHOOK: query: describe formatted statsdb1.testtable1 col4 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testtable1 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col4 string from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col2\":\"true\"}} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +col4 string from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col2\":\"true\"}} PREHOOK: query: alter table statsdb1.testtable1 rename to statsdb2.testtable2 PREHOOK: type: ALTERTABLE_RENAME PREHOOK: Input: statsdb1@testtable1 @@ -1965,30 +2020,31 @@ PREHOOK: Input: statsdb2@testtable2 POSTHOOK: query: describe formatted statsdb2.testtable2 col1 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb2@testtable2 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col1 string from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col2\":\"true\"}} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +col1 string from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col2\":\"true\"}} PREHOOK: query: describe formatted statsdb2.testtable2 col2 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb2@testtable2 POSTHOOK: query: describe formatted statsdb2.testtable2 col2 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb2@testtable2 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col2 string 0 10 6.7 7 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col2\":\"true\"}} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +col2 string 0 10 SExM4AoKge76QcHLqCHAtN4dv5LMNcG35k7A+Oe9Af+Nq6wCgoTFpgG+39Yigri1AQ== + 6.7 7 from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col2\":\"true\"}} PREHOOK: query: describe formatted statsdb2.testtable2 col4 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb2@testtable2 POSTHOOK: query: describe formatted statsdb2.testtable2 col4 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb2@testtable2 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col4 string from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col2\":\"true\"}} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +col4 string from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col2\":\"true\"}} PREHOOK: query: analyze table testpart0 compute statistics for columns PREHOOK: type: QUERY PREHOOK: Input: statsdb1@testpart0 @@ -2089,27 +2145,30 @@ PREHOOK: Input: statsdb1@testpart0 POSTHOOK: query: describe formatted statsdb1.testpart0 partition (part = 'part1') col1 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testpart0 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col1 int 27 484 0 10 from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +col1 int 27 484 0 10 SExM4AoKwtK/OYCi1Bu/sMcRx9SoWfnl+S+BsIeDAb/T4wfChbuXAf+34EDBsoy/AQ== + from deserializer PREHOOK: query: describe formatted statsdb1.testpart0 partition (part = 'part1') col2 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testpart0 POSTHOOK: query: describe formatted statsdb1.testpart0 partition (part = 'part1') col2 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testpart0 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col2 string 0 10 6.7 7 from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +col2 string 0 10 SExM4AoKge76QcHLqCHAtN4dv5LMNcG35k7A+Oe9Af+Nq6wCgoTFpgG+39Yigri1AQ== + 6.7 7 from deserializer PREHOOK: query: describe formatted statsdb1.testpart0 partition (part = 'part1') col3 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testpart0 POSTHOOK: query: describe formatted statsdb1.testpart0 partition (part = 'part1') col3 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testpart0 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col3 string 0 1 4.0 4 from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +col3 string 0 1 SExM4AEBgeL8+wM= + 4.0 4 from deserializer PREHOOK: query: describe formatted statsdb1.testpart0 partition (part = 'part2') PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testpart0 @@ -2156,27 +2215,32 @@ PREHOOK: Input: statsdb1@testpart0 POSTHOOK: query: describe formatted statsdb1.testpart0 partition (part = 'part2') col1 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testpart0 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col1 int 27 484 0 20 from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +col1 int 27 484 0 20 SExM4BQUwv+PD4DTryqAhvQHgJzgE8DtpQL/wqEPx9SoWfm94QmAqJgmgbCHgwG/0+MHwJKHG4De +jWeClaYVvr3WP8H6iQHDxpxl/uvvWb7ssz2AsKk6 + from deserializer PREHOOK: query: describe formatted statsdb1.testpart0 partition (part = 'part2') col2 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testpart0 POSTHOOK: query: describe formatted statsdb1.testpart0 partition (part = 'part2') col2 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testpart0 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col2 string 0 20 6.8 7 from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +col2 string 0 20 SExM4BQUwcbsKcCnjhjArvYEgZ2yHMC03h2A4LIS/4KtDMCv7BbAz6JGgejDCP+AlzSA84UvwYTL +WsDygtQBv5uoWIKExaYBvt/WIoK4tQG/vpwPv72sGg== + 6.8 7 from deserializer PREHOOK: query: describe formatted statsdb1.testpart0 partition (part = 'part2') col3 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testpart0 POSTHOOK: query: describe formatted statsdb1.testpart0 partition (part = 'part2') col3 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testpart0 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col3 string 0 1 4.0 4 from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +col3 string 0 1 SExM4AEBgeL8+wM= + 4.0 4 from deserializer PREHOOK: query: alter table statsdb1.testpart0 rename to statsdb1.testpart1 PREHOOK: type: ALTERTABLE_RENAME PREHOOK: Input: statsdb1@testpart0 @@ -2275,27 +2339,30 @@ PREHOOK: Input: statsdb1@testpart1 POSTHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part1') col1 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testpart1 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col1 int 27 484 0 10 from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +col1 int 27 484 0 10 SExM4AoKwtK/OYCi1Bu/sMcRx9SoWfnl+S+BsIeDAb/T4wfChbuXAf+34EDBsoy/AQ== + from deserializer PREHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part1') col2 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testpart1 POSTHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part1') col2 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testpart1 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col2 string 0 10 6.7 7 from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +col2 string 0 10 SExM4AoKge76QcHLqCHAtN4dv5LMNcG35k7A+Oe9Af+Nq6wCgoTFpgG+39Yigri1AQ== + 6.7 7 from deserializer PREHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part1') col3 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testpart1 POSTHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part1') col3 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testpart1 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col3 string 0 1 4.0 4 from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +col3 string 0 1 SExM4AEBgeL8+wM= + 4.0 4 from deserializer PREHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part2') PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testpart1 @@ -2342,27 +2409,32 @@ PREHOOK: Input: statsdb1@testpart1 POSTHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part2') col1 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testpart1 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col1 int 27 484 0 20 from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +col1 int 27 484 0 20 SExM4BQUwv+PD4DTryqAhvQHgJzgE8DtpQL/wqEPx9SoWfm94QmAqJgmgbCHgwG/0+MHwJKHG4De +jWeClaYVvr3WP8H6iQHDxpxl/uvvWb7ssz2AsKk6 + from deserializer PREHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part2') col2 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testpart1 POSTHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part2') col2 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testpart1 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col2 string 0 20 6.8 7 from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +col2 string 0 20 SExM4BQUwcbsKcCnjhjArvYEgZ2yHMC03h2A4LIS/4KtDMCv7BbAz6JGgejDCP+AlzSA84UvwYTL +WsDygtQBv5uoWIKExaYBvt/WIoK4tQG/vpwPv72sGg== + 6.8 7 from deserializer PREHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part2') col3 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testpart1 POSTHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part2') col3 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testpart1 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col3 string 0 1 4.0 4 from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +col3 string 0 1 SExM4AEBgeL8+wM= + 4.0 4 from deserializer PREHOOK: query: alter table statsdb1.testpart1 partition (part = 'part1') rename to partition (part = 'part11') PREHOOK: type: ALTERTABLE_RENAMEPART PREHOOK: Input: statsdb1@testpart1 @@ -2462,27 +2534,30 @@ PREHOOK: Input: statsdb1@testpart1 POSTHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part11') col1 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testpart1 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col1 int 27 484 0 10 from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +col1 int 27 484 0 10 SExM4AoKwtK/OYCi1Bu/sMcRx9SoWfnl+S+BsIeDAb/T4wfChbuXAf+34EDBsoy/AQ== + from deserializer PREHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part11') col2 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testpart1 POSTHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part11') col2 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testpart1 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col2 string 0 10 6.7 7 from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +col2 string 0 10 SExM4AoKge76QcHLqCHAtN4dv5LMNcG35k7A+Oe9Af+Nq6wCgoTFpgG+39Yigri1AQ== + 6.7 7 from deserializer PREHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part11') col3 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testpart1 POSTHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part11') col3 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testpart1 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col3 string 0 1 4.0 4 from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +col3 string 0 1 SExM4AEBgeL8+wM= + 4.0 4 from deserializer PREHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part2') PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testpart1 @@ -2529,27 +2604,32 @@ PREHOOK: Input: statsdb1@testpart1 POSTHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part2') col1 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testpart1 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col1 int 27 484 0 20 from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +col1 int 27 484 0 20 SExM4BQUwv+PD4DTryqAhvQHgJzgE8DtpQL/wqEPx9SoWfm94QmAqJgmgbCHgwG/0+MHwJKHG4De +jWeClaYVvr3WP8H6iQHDxpxl/uvvWb7ssz2AsKk6 + from deserializer PREHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part2') col2 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testpart1 POSTHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part2') col2 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testpart1 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col2 string 0 20 6.8 7 from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +col2 string 0 20 SExM4BQUwcbsKcCnjhjArvYEgZ2yHMC03h2A4LIS/4KtDMCv7BbAz6JGgejDCP+AlzSA84UvwYTL +WsDygtQBv5uoWIKExaYBvt/WIoK4tQG/vpwPv72sGg== + 6.8 7 from deserializer PREHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part2') col3 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testpart1 POSTHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part2') col3 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testpart1 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col3 string 0 1 4.0 4 from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +col3 string 0 1 SExM4AEBgeL8+wM= + 4.0 4 from deserializer PREHOOK: query: alter table statsdb1.testpart1 replace columns (col1 int, col2 string, col4 string) cascade PREHOOK: type: ALTERTABLE_REPLACECOLS PREHOOK: Input: statsdb1@testpart1 @@ -2651,27 +2731,29 @@ PREHOOK: Input: statsdb1@testpart1 POSTHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part11') col1 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testpart1 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col1 int 27 484 0 10 from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +col1 int 27 484 0 10 SExM4AoKwtK/OYCi1Bu/sMcRx9SoWfnl+S+BsIeDAb/T4wfChbuXAf+34EDBsoy/AQ== + from deserializer PREHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part11') col2 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testpart1 POSTHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part11') col2 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testpart1 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col2 string 0 10 6.7 7 from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +col2 string 0 10 SExM4AoKge76QcHLqCHAtN4dv5LMNcG35k7A+Oe9Af+Nq6wCgoTFpgG+39Yigri1AQ== + 6.7 7 from deserializer PREHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part11') col4 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testpart1 POSTHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part11') col4 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testpart1 -# col_name data_type comment - -col4 string from deserializer +# col_name data_type comment + +col4 string from deserializer PREHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part2') PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testpart1 @@ -2718,27 +2800,31 @@ PREHOOK: Input: statsdb1@testpart1 POSTHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part2') col1 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testpart1 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col1 int 27 484 0 20 from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +col1 int 27 484 0 20 SExM4BQUwv+PD4DTryqAhvQHgJzgE8DtpQL/wqEPx9SoWfm94QmAqJgmgbCHgwG/0+MHwJKHG4De +jWeClaYVvr3WP8H6iQHDxpxl/uvvWb7ssz2AsKk6 + from deserializer PREHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part2') col2 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testpart1 POSTHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part2') col2 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testpart1 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col2 string 0 20 6.8 7 from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +col2 string 0 20 SExM4BQUwcbsKcCnjhjArvYEgZ2yHMC03h2A4LIS/4KtDMCv7BbAz6JGgejDCP+AlzSA84UvwYTL +WsDygtQBv5uoWIKExaYBvt/WIoK4tQG/vpwPv72sGg== + 6.8 7 from deserializer PREHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part2') col4 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testpart1 POSTHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part2') col4 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testpart1 -# col_name data_type comment - -col4 string from deserializer +# col_name data_type comment + +col4 string from deserializer PREHOOK: query: alter table statsdb1.testpart1 change column col1 col1 string cascade PREHOOK: type: ALTERTABLE_RENAMECOL PREHOOK: Input: statsdb1@testpart1 @@ -2840,27 +2926,28 @@ PREHOOK: Input: statsdb1@testpart1 POSTHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part11') col1 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testpart1 -# col_name data_type comment - -col1 string from deserializer +# col_name data_type comment + +col1 string from deserializer PREHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part11') col2 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testpart1 POSTHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part11') col2 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testpart1 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col2 string 0 10 6.7 7 from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +col2 string 0 10 SExM4AoKge76QcHLqCHAtN4dv5LMNcG35k7A+Oe9Af+Nq6wCgoTFpgG+39Yigri1AQ== + 6.7 7 from deserializer PREHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part11') col4 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testpart1 POSTHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part11') col4 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testpart1 -# col_name data_type comment - -col4 string from deserializer +# col_name data_type comment + +col4 string from deserializer PREHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part2') PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testpart1 @@ -2907,27 +2994,29 @@ PREHOOK: Input: statsdb1@testpart1 POSTHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part2') col1 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testpart1 -# col_name data_type comment - -col1 string from deserializer +# col_name data_type comment + +col1 string from deserializer PREHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part2') col2 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testpart1 POSTHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part2') col2 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testpart1 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col2 string 0 20 6.8 7 from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +col2 string 0 20 SExM4BQUwcbsKcCnjhjArvYEgZ2yHMC03h2A4LIS/4KtDMCv7BbAz6JGgejDCP+AlzSA84UvwYTL +WsDygtQBv5uoWIKExaYBvt/WIoK4tQG/vpwPv72sGg== + 6.8 7 from deserializer PREHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part2') col4 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testpart1 POSTHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part2') col4 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testpart1 -# col_name data_type comment - -col4 string from deserializer +# col_name data_type comment + +col4 string from deserializer PREHOOK: query: alter table statsdb1.testpart1 rename to statsdb2.testpart2 PREHOOK: type: ALTERTABLE_RENAME PREHOOK: Input: statsdb1@testpart1 @@ -2986,54 +3075,57 @@ PREHOOK: Input: statsdb2@testpart2 POSTHOOK: query: describe formatted statsdb2.testpart2 partition (part = 'part11') col1 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb2@testpart2 -# col_name data_type comment - -col1 string from deserializer +# col_name data_type comment + +col1 string from deserializer PREHOOK: query: describe formatted statsdb2.testpart2 partition (part = 'part11') col2 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb2@testpart2 POSTHOOK: query: describe formatted statsdb2.testpart2 partition (part = 'part11') col2 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb2@testpart2 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col2 string 0 10 6.7 7 from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +col2 string 0 10 SExM4AoKge76QcHLqCHAtN4dv5LMNcG35k7A+Oe9Af+Nq6wCgoTFpgG+39Yigri1AQ== + 6.7 7 from deserializer PREHOOK: query: describe formatted statsdb2.testpart2 partition (part = 'part11') col4 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb2@testpart2 POSTHOOK: query: describe formatted statsdb2.testpart2 partition (part = 'part11') col4 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb2@testpart2 -# col_name data_type comment - -col4 string from deserializer +# col_name data_type comment + +col4 string from deserializer PREHOOK: query: describe formatted statsdb2.testpart2 partition (part = 'part2') col1 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb2@testpart2 POSTHOOK: query: describe formatted statsdb2.testpart2 partition (part = 'part2') col1 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb2@testpart2 -# col_name data_type comment - -col1 string from deserializer +# col_name data_type comment + +col1 string from deserializer PREHOOK: query: describe formatted statsdb2.testpart2 partition (part = 'part2') col2 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb2@testpart2 POSTHOOK: query: describe formatted statsdb2.testpart2 partition (part = 'part2') col2 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb2@testpart2 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col2 string 0 20 6.8 7 from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +col2 string 0 20 SExM4BQUwcbsKcCnjhjArvYEgZ2yHMC03h2A4LIS/4KtDMCv7BbAz6JGgejDCP+AlzSA84UvwYTL +WsDygtQBv5uoWIKExaYBvt/WIoK4tQG/vpwPv72sGg== + 6.8 7 from deserializer PREHOOK: query: describe formatted statsdb2.testpart2 partition (part = 'part2') col4 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb2@testpart2 POSTHOOK: query: describe formatted statsdb2.testpart2 partition (part = 'part2') col4 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb2@testpart2 -# col_name data_type comment - -col4 string from deserializer +# col_name data_type comment + +col4 string from deserializer PREHOOK: query: use statsdb2 PREHOOK: type: SWITCHDATABASE PREHOOK: Input: database:statsdb2 diff --git a/ql/src/test/results/clientpositive/alter_table_update_status.q.out b/ql/src/test/results/clientpositive/alter_table_update_status.q.out index 9cd9a8dbe0..6471292466 100644 --- a/ql/src/test/results/clientpositive/alter_table_update_status.q.out +++ b/ql/src/test/results/clientpositive/alter_table_update_status.q.out @@ -46,10 +46,12 @@ PREHOOK: Input: default@src_stat POSTHOOK: query: describe formatted src_stat key POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@src_stat -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -key string 0 16 1.72 3 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\"}} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +key string 0 16 SExM4BAQgaTbFYDg/BOBktRrv6WcHMDErSCAotAegLu2HIHRunr/p8hhwLbpSsD22ZgBgdS9KMHV +myn/wfcugOGjfsCYzA0= + 1.72 3 from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\"}} PREHOOK: query: ALTER TABLE src_stat UPDATE STATISTICS for column key SET ('numDVs'='1111','avgColLen'='1.111') PREHOOK: type: ALTERTABLE_UPDATETABLESTATS POSTHOOK: query: ALTER TABLE src_stat UPDATE STATISTICS for column key SET ('numDVs'='1111','avgColLen'='1.111') @@ -60,10 +62,12 @@ PREHOOK: Input: default@src_stat POSTHOOK: query: describe formatted src_stat key POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@src_stat -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -key string 0 1111 1.111 3 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\"}} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +key string 0 1111 SExM4BAQgaTbFYDg/BOBktRrv6WcHMDErSCAotAegLu2HIHRunr/p8hhwLbpSsD22ZgBgdS9KMHV +myn/wfcugOGjfsCYzA0= + 1.111 3 from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\"}} PREHOOK: query: ALTER TABLE src_stat UPDATE STATISTICS for column value SET ('numDVs'='121','numNulls'='122','avgColLen'='1.23','maxColLen'='124') PREHOOK: type: ALTERTABLE_UPDATETABLESTATS POSTHOOK: query: ALTER TABLE src_stat UPDATE STATISTICS for column value SET ('numDVs'='121','numNulls'='122','avgColLen'='1.23','maxColLen'='124') @@ -74,10 +78,10 @@ PREHOOK: Input: default@src_stat POSTHOOK: query: describe formatted src_stat value POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@src_stat -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -value string 122 121 1.23 124 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +value string 122 121 1.23 124 from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} PREHOOK: query: ANALYZE TABLE src_stat_int COMPUTE STATISTICS for columns key PREHOOK: type: QUERY PREHOOK: Input: default@src_stat_int @@ -92,10 +96,12 @@ PREHOOK: Input: default@src_stat_int POSTHOOK: query: describe formatted src_stat_int key POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@src_stat_int -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -key double 66.0 406.0 10 15 from deserializer -COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"key\":\"true\"}} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +key double 66.0 406.0 10 15 SExM4A8PgZLrJoLyx3uBrPspvqnUPoHIoA/+prAWgPaQT4Du5BLDosR5vZLrGIDtbYDVh+QBwKHW +UIOz9UG+ouNE + from deserializer +COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"key\":\"true\"}} PREHOOK: query: ALTER TABLE src_stat_int UPDATE STATISTICS for column key SET ('numDVs'='2222','lowValue'='333.22','highValue'='22.22') PREHOOK: type: ALTERTABLE_UPDATETABLESTATS POSTHOOK: query: ALTER TABLE src_stat_int UPDATE STATISTICS for column key SET ('numDVs'='2222','lowValue'='333.22','highValue'='22.22') @@ -106,10 +112,12 @@ PREHOOK: Input: default@src_stat_int POSTHOOK: query: describe formatted src_stat_int key POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@src_stat_int -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -key double 333.22 22.22 10 2222 from deserializer -COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"key\":\"true\"}} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +key double 333.22 22.22 10 2222 SExM4A8PgZLrJoLyx3uBrPspvqnUPoHIoA/+prAWgPaQT4Du5BLDosR5vZLrGIDtbYDVh+QBwKHW +UIOz9UG+ouNE + from deserializer +COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"key\":\"true\"}} PREHOOK: query: create database if not exists dummydb PREHOOK: type: CREATEDATABASE PREHOOK: Output: database:dummydb @@ -132,10 +140,12 @@ PREHOOK: Input: default@src_stat POSTHOOK: query: describe formatted default.src_stat key POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@src_stat -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -key string 0 3333 2.222 3 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +key string 0 3333 SExM4BAQgaTbFYDg/BOBktRrv6WcHMDErSCAotAegLu2HIHRunr/p8hhwLbpSsD22ZgBgdS9KMHV +myn/wfcugOGjfsCYzA0= + 2.222 3 from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} PREHOOK: query: ALTER TABLE default.src_stat UPDATE STATISTICS for column value SET ('numDVs'='232','numNulls'='233','avgColLen'='2.34','maxColLen'='235') PREHOOK: type: ALTERTABLE_UPDATETABLESTATS POSTHOOK: query: ALTER TABLE default.src_stat UPDATE STATISTICS for column value SET ('numDVs'='232','numNulls'='233','avgColLen'='2.34','maxColLen'='235') @@ -146,10 +156,10 @@ PREHOOK: Input: default@src_stat POSTHOOK: query: describe formatted default.src_stat value POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@src_stat -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -value string 233 232 2.34 235 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +value string 233 232 2.34 235 from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} PREHOOK: query: use default PREHOOK: type: SWITCHDATABASE PREHOOK: Input: database:default @@ -246,140 +256,140 @@ PREHOOK: Input: default@datatype_stats POSTHOOK: query: DESC FORMATTED datatype_stats s POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@datatype_stats -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -s smallint from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +s smallint from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} PREHOOK: query: DESC FORMATTED datatype_stats i PREHOOK: type: DESCTABLE PREHOOK: Input: default@datatype_stats POSTHOOK: query: DESC FORMATTED datatype_stats i POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@datatype_stats -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -i int from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +i int from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} PREHOOK: query: DESC FORMATTED datatype_stats b PREHOOK: type: DESCTABLE PREHOOK: Input: default@datatype_stats POSTHOOK: query: DESC FORMATTED datatype_stats b POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@datatype_stats -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -b bigint from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +b bigint from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} PREHOOK: query: DESC FORMATTED datatype_stats f PREHOOK: type: DESCTABLE PREHOOK: Input: default@datatype_stats POSTHOOK: query: DESC FORMATTED datatype_stats f POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@datatype_stats -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -f float from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +f float from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} PREHOOK: query: DESC FORMATTED datatype_stats d PREHOOK: type: DESCTABLE PREHOOK: Input: default@datatype_stats POSTHOOK: query: DESC FORMATTED datatype_stats d POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@datatype_stats -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -d double from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +d double from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} PREHOOK: query: DESC FORMATTED datatype_stats dem PREHOOK: type: DESCTABLE PREHOOK: Input: default@datatype_stats POSTHOOK: query: DESC FORMATTED datatype_stats dem POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@datatype_stats -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -dem decimal(10,0) from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +dem decimal(10,0) from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} PREHOOK: query: DESC FORMATTED datatype_stats ts PREHOOK: type: DESCTABLE PREHOOK: Input: default@datatype_stats POSTHOOK: query: DESC FORMATTED datatype_stats ts POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@datatype_stats -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -ts timestamp from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +ts timestamp from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} PREHOOK: query: DESC FORMATTED datatype_stats dt PREHOOK: type: DESCTABLE PREHOOK: Input: default@datatype_stats POSTHOOK: query: DESC FORMATTED datatype_stats dt POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@datatype_stats -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -dt date from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +dt date from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} PREHOOK: query: DESC FORMATTED datatype_stats str PREHOOK: type: DESCTABLE PREHOOK: Input: default@datatype_stats POSTHOOK: query: DESC FORMATTED datatype_stats str POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@datatype_stats -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -str string from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +str string from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} PREHOOK: query: DESC FORMATTED datatype_stats v PREHOOK: type: DESCTABLE PREHOOK: Input: default@datatype_stats POSTHOOK: query: DESC FORMATTED datatype_stats v POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@datatype_stats -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -v varchar(12) from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +v varchar(12) from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} PREHOOK: query: DESC FORMATTED datatype_stats c PREHOOK: type: DESCTABLE PREHOOK: Input: default@datatype_stats POSTHOOK: query: DESC FORMATTED datatype_stats c POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@datatype_stats -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -c char(5) from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +c char(5) from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} PREHOOK: query: DESC FORMATTED datatype_stats bl PREHOOK: type: DESCTABLE PREHOOK: Input: default@datatype_stats POSTHOOK: query: DESC FORMATTED datatype_stats bl POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@datatype_stats -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -bl boolean from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +bl boolean from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} PREHOOK: query: DESC FORMATTED datatype_stats bin PREHOOK: type: DESCTABLE PREHOOK: Input: default@datatype_stats POSTHOOK: query: DESC FORMATTED datatype_stats bin POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@datatype_stats -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -bin binary from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +bin binary from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} PREHOOK: query: DESC FORMATTED datatype_stats t PREHOOK: type: DESCTABLE PREHOOK: Input: default@datatype_stats POSTHOOK: query: DESC FORMATTED datatype_stats t POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@datatype_stats -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -t tinyint from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +t tinyint from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} PREHOOK: query: ALTER TABLE default.datatype_stats UPDATE STATISTICS for column t SET ('numDVs'='232','numNulls'='233','highValue'='234','lowValue'='35') PREHOOK: type: ALTERTABLE_UPDATETABLESTATS POSTHOOK: query: ALTER TABLE default.datatype_stats UPDATE STATISTICS for column t SET ('numDVs'='232','numNulls'='233','highValue'='234','lowValue'='35') @@ -390,20 +400,20 @@ PREHOOK: Input: default@datatype_stats POSTHOOK: query: DESC FORMATTED datatype_stats t POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@datatype_stats -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -t tinyint 35 234 233 232 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"t\":\"true\"}} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +t tinyint 35 234 233 232 from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"t\":\"true\"}} PREHOOK: query: DESC FORMATTED datatype_stats s PREHOOK: type: DESCTABLE PREHOOK: Input: default@datatype_stats POSTHOOK: query: DESC FORMATTED datatype_stats s POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@datatype_stats -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -s smallint from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"t\":\"true\"}} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +s smallint from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"t\":\"true\"}} PREHOOK: query: ALTER TABLE default.datatype_stats UPDATE STATISTICS for column s SET ('numDVs'='56','numNulls'='56','highValue'='489','lowValue'='25') PREHOOK: type: ALTERTABLE_UPDATETABLESTATS POSTHOOK: query: ALTER TABLE default.datatype_stats UPDATE STATISTICS for column s SET ('numDVs'='56','numNulls'='56','highValue'='489','lowValue'='25') @@ -414,20 +424,20 @@ PREHOOK: Input: default@datatype_stats POSTHOOK: query: DESC FORMATTED datatype_stats s POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@datatype_stats -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -s smallint 25 489 56 56 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"s\":\"true\",\"t\":\"true\"}} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +s smallint 25 489 56 56 from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"s\":\"true\",\"t\":\"true\"}} PREHOOK: query: DESC FORMATTED datatype_stats i PREHOOK: type: DESCTABLE PREHOOK: Input: default@datatype_stats POSTHOOK: query: DESC FORMATTED datatype_stats i POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@datatype_stats -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -i int from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"s\":\"true\",\"t\":\"true\"}} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +i int from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"s\":\"true\",\"t\":\"true\"}} PREHOOK: query: ALTER TABLE default.datatype_stats UPDATE STATISTICS for column i SET ('numDVs'='59','numNulls'='1','highValue'='889','lowValue'='5') PREHOOK: type: ALTERTABLE_UPDATETABLESTATS POSTHOOK: query: ALTER TABLE default.datatype_stats UPDATE STATISTICS for column i SET ('numDVs'='59','numNulls'='1','highValue'='889','lowValue'='5') @@ -438,20 +448,20 @@ PREHOOK: Input: default@datatype_stats POSTHOOK: query: DESC FORMATTED datatype_stats i POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@datatype_stats -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -i int 5 889 1 59 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"i\":\"true\",\"s\":\"true\",\"t\":\"true\"}} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +i int 5 889 1 59 from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"i\":\"true\",\"s\":\"true\",\"t\":\"true\"}} PREHOOK: query: DESC FORMATTED datatype_stats b PREHOOK: type: DESCTABLE PREHOOK: Input: default@datatype_stats POSTHOOK: query: DESC FORMATTED datatype_stats b POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@datatype_stats -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -b bigint from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"i\":\"true\",\"s\":\"true\",\"t\":\"true\"}} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +b bigint from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"i\":\"true\",\"s\":\"true\",\"t\":\"true\"}} PREHOOK: query: ALTER TABLE default.datatype_stats UPDATE STATISTICS for column b SET ('numDVs'='9','numNulls'='14','highValue'='89','lowValue'='8') PREHOOK: type: ALTERTABLE_UPDATETABLESTATS POSTHOOK: query: ALTER TABLE default.datatype_stats UPDATE STATISTICS for column b SET ('numDVs'='9','numNulls'='14','highValue'='89','lowValue'='8') @@ -462,20 +472,20 @@ PREHOOK: Input: default@datatype_stats POSTHOOK: query: DESC FORMATTED datatype_stats b POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@datatype_stats -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -b bigint 8 89 14 9 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"i\":\"true\",\"s\":\"true\",\"t\":\"true\"}} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +b bigint 8 89 14 9 from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"i\":\"true\",\"s\":\"true\",\"t\":\"true\"}} PREHOOK: query: DESC FORMATTED datatype_stats f PREHOOK: type: DESCTABLE PREHOOK: Input: default@datatype_stats POSTHOOK: query: DESC FORMATTED datatype_stats f POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@datatype_stats -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -f float from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"i\":\"true\",\"s\":\"true\",\"t\":\"true\"}} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +f float from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"i\":\"true\",\"s\":\"true\",\"t\":\"true\"}} PREHOOK: query: ALTER TABLE default.datatype_stats UPDATE STATISTICS for column f SET ('numDVs'='563','numNulls'='45','highValue'='2345.656','lowValue'='8.00') PREHOOK: type: ALTERTABLE_UPDATETABLESTATS POSTHOOK: query: ALTER TABLE default.datatype_stats UPDATE STATISTICS for column f SET ('numDVs'='563','numNulls'='45','highValue'='2345.656','lowValue'='8.00') @@ -486,20 +496,20 @@ PREHOOK: Input: default@datatype_stats POSTHOOK: query: DESC FORMATTED datatype_stats f POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@datatype_stats -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -f float 8.0 2345.656 45 563 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"f\":\"true\",\"i\":\"true\",\"s\":\"true\",\"t\":\"true\"}} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +f float 8.0 2345.656 45 563 from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"f\":\"true\",\"i\":\"true\",\"s\":\"true\",\"t\":\"true\"}} PREHOOK: query: DESC FORMATTED datatype_stats d PREHOOK: type: DESCTABLE PREHOOK: Input: default@datatype_stats POSTHOOK: query: DESC FORMATTED datatype_stats d POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@datatype_stats -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -d double from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"f\":\"true\",\"i\":\"true\",\"s\":\"true\",\"t\":\"true\"}} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +d double from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"f\":\"true\",\"i\":\"true\",\"s\":\"true\",\"t\":\"true\"}} PREHOOK: query: ALTER TABLE default.datatype_stats UPDATE STATISTICS for column d SET ('numDVs'='5677','numNulls'='12','highValue'='560.3367','lowValue'='0.00455') PREHOOK: type: ALTERTABLE_UPDATETABLESTATS POSTHOOK: query: ALTER TABLE default.datatype_stats UPDATE STATISTICS for column d SET ('numDVs'='5677','numNulls'='12','highValue'='560.3367','lowValue'='0.00455') @@ -510,20 +520,20 @@ PREHOOK: Input: default@datatype_stats POSTHOOK: query: DESC FORMATTED datatype_stats d POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@datatype_stats -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -d double 0.00455 560.3367 12 5677 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"d\":\"true\",\"f\":\"true\",\"i\":\"true\",\"s\":\"true\",\"t\":\"true\"}} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +d double 0.00455 560.3367 12 5677 from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"d\":\"true\",\"f\":\"true\",\"i\":\"true\",\"s\":\"true\",\"t\":\"true\"}} PREHOOK: query: DESC FORMATTED datatype_stats dem PREHOOK: type: DESCTABLE PREHOOK: Input: default@datatype_stats POSTHOOK: query: DESC FORMATTED datatype_stats dem POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@datatype_stats -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -dem decimal(10,0) from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"d\":\"true\",\"f\":\"true\",\"i\":\"true\",\"s\":\"true\",\"t\":\"true\"}} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +dem decimal(10,0) from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"d\":\"true\",\"f\":\"true\",\"i\":\"true\",\"s\":\"true\",\"t\":\"true\"}} PREHOOK: query: ALTER TABLE default.datatype_stats UPDATE STATISTICS for column dem SET ('numDVs'='57','numNulls'='912','highValue'='560','lowValue'='0') PREHOOK: type: ALTERTABLE_UPDATETABLESTATS POSTHOOK: query: ALTER TABLE default.datatype_stats UPDATE STATISTICS for column dem SET ('numDVs'='57','numNulls'='912','highValue'='560','lowValue'='0') @@ -534,20 +544,20 @@ PREHOOK: Input: default@datatype_stats POSTHOOK: query: DESC FORMATTED datatype_stats dem POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@datatype_stats -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -dem decimal(10,0) 0 560 912 57 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"d\":\"true\",\"dem\":\"true\",\"f\":\"true\",\"i\":\"true\",\"s\":\"true\",\"t\":\"true\"}} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +dem decimal(10,0) 0 560 912 57 from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"d\":\"true\",\"dem\":\"true\",\"f\":\"true\",\"i\":\"true\",\"s\":\"true\",\"t\":\"true\"}} PREHOOK: query: DESC FORMATTED datatype_stats ts PREHOOK: type: DESCTABLE PREHOOK: Input: default@datatype_stats POSTHOOK: query: DESC FORMATTED datatype_stats ts POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@datatype_stats -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -ts timestamp from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"d\":\"true\",\"dem\":\"true\",\"f\":\"true\",\"i\":\"true\",\"s\":\"true\",\"t\":\"true\"}} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +ts timestamp from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"d\":\"true\",\"dem\":\"true\",\"f\":\"true\",\"i\":\"true\",\"s\":\"true\",\"t\":\"true\"}} PREHOOK: query: ALTER TABLE default.datatype_stats UPDATE STATISTICS for column ts SET ('numDVs'='7','numNulls'='12','highValue'='1357030923','lowValue'='1357030924') PREHOOK: type: ALTERTABLE_UPDATETABLESTATS POSTHOOK: query: ALTER TABLE default.datatype_stats UPDATE STATISTICS for column ts SET ('numDVs'='7','numNulls'='12','highValue'='1357030923','lowValue'='1357030924') @@ -558,20 +568,20 @@ PREHOOK: Input: default@datatype_stats POSTHOOK: query: DESC FORMATTED datatype_stats ts POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@datatype_stats -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -ts timestamp 1357030924 1357030923 12 7 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"d\":\"true\",\"dem\":\"true\",\"f\":\"true\",\"i\":\"true\",\"s\":\"true\",\"t\":\"true\",\"ts\":\"true\"}} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +ts timestamp 1357030924 1357030923 12 7 from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"d\":\"true\",\"dem\":\"true\",\"f\":\"true\",\"i\":\"true\",\"s\":\"true\",\"t\":\"true\",\"ts\":\"true\"}} PREHOOK: query: DESC FORMATTED datatype_stats dt PREHOOK: type: DESCTABLE PREHOOK: Input: default@datatype_stats POSTHOOK: query: DESC FORMATTED datatype_stats dt POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@datatype_stats -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -dt date from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"d\":\"true\",\"dem\":\"true\",\"f\":\"true\",\"i\":\"true\",\"s\":\"true\",\"t\":\"true\",\"ts\":\"true\"}} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +dt date from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"d\":\"true\",\"dem\":\"true\",\"f\":\"true\",\"i\":\"true\",\"s\":\"true\",\"t\":\"true\",\"ts\":\"true\"}} PREHOOK: query: ALTER TABLE default.datatype_stats UPDATE STATISTICS for column dt SET ('numDVs'='57','numNulls'='912','highValue'='2012-01-01','lowValue'='2001-02-04') PREHOOK: type: ALTERTABLE_UPDATETABLESTATS POSTHOOK: query: ALTER TABLE default.datatype_stats UPDATE STATISTICS for column dt SET ('numDVs'='57','numNulls'='912','highValue'='2012-01-01','lowValue'='2001-02-04') @@ -582,20 +592,20 @@ PREHOOK: Input: default@datatype_stats POSTHOOK: query: DESC FORMATTED datatype_stats dt POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@datatype_stats -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -dt date 2001-02-04 2012-01-01 912 57 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"d\":\"true\",\"dem\":\"true\",\"dt\":\"true\",\"f\":\"true\",\"i\":\"true\",\"s\":\"true\",\"t\":\"true\",\"ts\":\"true\"}} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +dt date 2001-02-04 2012-01-01 912 57 from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"d\":\"true\",\"dem\":\"true\",\"dt\":\"true\",\"f\":\"true\",\"i\":\"true\",\"s\":\"true\",\"t\":\"true\",\"ts\":\"true\"}} PREHOOK: query: DESC FORMATTED datatype_stats str PREHOOK: type: DESCTABLE PREHOOK: Input: default@datatype_stats POSTHOOK: query: DESC FORMATTED datatype_stats str POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@datatype_stats -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -str string from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"d\":\"true\",\"dem\":\"true\",\"dt\":\"true\",\"f\":\"true\",\"i\":\"true\",\"s\":\"true\",\"t\":\"true\",\"ts\":\"true\"}} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +str string from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"d\":\"true\",\"dem\":\"true\",\"dt\":\"true\",\"f\":\"true\",\"i\":\"true\",\"s\":\"true\",\"t\":\"true\",\"ts\":\"true\"}} PREHOOK: query: ALTER TABLE default.datatype_stats UPDATE STATISTICS for column str SET ('numDVs'='232','numNulls'='233','avgColLen'='2.34','maxColLen'='235') PREHOOK: type: ALTERTABLE_UPDATETABLESTATS POSTHOOK: query: ALTER TABLE default.datatype_stats UPDATE STATISTICS for column str SET ('numDVs'='232','numNulls'='233','avgColLen'='2.34','maxColLen'='235') @@ -606,20 +616,20 @@ PREHOOK: Input: default@datatype_stats POSTHOOK: query: DESC FORMATTED datatype_stats str POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@datatype_stats -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -str string 233 232 2.34 235 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"d\":\"true\",\"dem\":\"true\",\"dt\":\"true\",\"f\":\"true\",\"i\":\"true\",\"s\":\"true\",\"str\":\"true\",\"t\":\"true\",\"ts\":\"true\"}} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +str string 233 232 2.34 235 from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"d\":\"true\",\"dem\":\"true\",\"dt\":\"true\",\"f\":\"true\",\"i\":\"true\",\"s\":\"true\",\"str\":\"true\",\"t\":\"true\",\"ts\":\"true\"}} PREHOOK: query: DESC FORMATTED datatype_stats v PREHOOK: type: DESCTABLE PREHOOK: Input: default@datatype_stats POSTHOOK: query: DESC FORMATTED datatype_stats v POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@datatype_stats -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -v varchar(12) from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"d\":\"true\",\"dem\":\"true\",\"dt\":\"true\",\"f\":\"true\",\"i\":\"true\",\"s\":\"true\",\"str\":\"true\",\"t\":\"true\",\"ts\":\"true\"}} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +v varchar(12) from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"d\":\"true\",\"dem\":\"true\",\"dt\":\"true\",\"f\":\"true\",\"i\":\"true\",\"s\":\"true\",\"str\":\"true\",\"t\":\"true\",\"ts\":\"true\"}} PREHOOK: query: ALTER TABLE default.datatype_stats UPDATE STATISTICS for column v SET ('numDVs'='22','numNulls'='33','avgColLen'='4.40','maxColLen'='25') PREHOOK: type: ALTERTABLE_UPDATETABLESTATS POSTHOOK: query: ALTER TABLE default.datatype_stats UPDATE STATISTICS for column v SET ('numDVs'='22','numNulls'='33','avgColLen'='4.40','maxColLen'='25') @@ -630,20 +640,20 @@ PREHOOK: Input: default@datatype_stats POSTHOOK: query: DESC FORMATTED datatype_stats v POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@datatype_stats -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -v varchar(12) 33 22 4.4 25 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"d\":\"true\",\"dem\":\"true\",\"dt\":\"true\",\"f\":\"true\",\"i\":\"true\",\"s\":\"true\",\"str\":\"true\",\"t\":\"true\",\"ts\":\"true\",\"v\":\"true\"}} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +v varchar(12) 33 22 4.4 25 from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"d\":\"true\",\"dem\":\"true\",\"dt\":\"true\",\"f\":\"true\",\"i\":\"true\",\"s\":\"true\",\"str\":\"true\",\"t\":\"true\",\"ts\":\"true\",\"v\":\"true\"}} PREHOOK: query: DESC FORMATTED datatype_stats c PREHOOK: type: DESCTABLE PREHOOK: Input: default@datatype_stats POSTHOOK: query: DESC FORMATTED datatype_stats c POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@datatype_stats -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -c char(5) from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"d\":\"true\",\"dem\":\"true\",\"dt\":\"true\",\"f\":\"true\",\"i\":\"true\",\"s\":\"true\",\"str\":\"true\",\"t\":\"true\",\"ts\":\"true\",\"v\":\"true\"}} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +c char(5) from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"d\":\"true\",\"dem\":\"true\",\"dt\":\"true\",\"f\":\"true\",\"i\":\"true\",\"s\":\"true\",\"str\":\"true\",\"t\":\"true\",\"ts\":\"true\",\"v\":\"true\"}} PREHOOK: query: ALTER TABLE default.datatype_stats UPDATE STATISTICS for column c SET ('numDVs'='2','numNulls'='03','avgColLen'='9.00','maxColLen'='58') PREHOOK: type: ALTERTABLE_UPDATETABLESTATS POSTHOOK: query: ALTER TABLE default.datatype_stats UPDATE STATISTICS for column c SET ('numDVs'='2','numNulls'='03','avgColLen'='9.00','maxColLen'='58') @@ -654,20 +664,20 @@ PREHOOK: Input: default@datatype_stats POSTHOOK: query: DESC FORMATTED datatype_stats c POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@datatype_stats -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -c char(5) 3 2 9.0 58 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"c\":\"true\",\"d\":\"true\",\"dem\":\"true\",\"dt\":\"true\",\"f\":\"true\",\"i\":\"true\",\"s\":\"true\",\"str\":\"true\",\"t\":\"true\",\"ts\":\"true\",\"v\":\"true\"}} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +c char(5) 3 2 9.0 58 from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"c\":\"true\",\"d\":\"true\",\"dem\":\"true\",\"dt\":\"true\",\"f\":\"true\",\"i\":\"true\",\"s\":\"true\",\"str\":\"true\",\"t\":\"true\",\"ts\":\"true\",\"v\":\"true\"}} PREHOOK: query: DESC FORMATTED datatype_stats bl PREHOOK: type: DESCTABLE PREHOOK: Input: default@datatype_stats POSTHOOK: query: DESC FORMATTED datatype_stats bl POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@datatype_stats -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -bl boolean from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"c\":\"true\",\"d\":\"true\",\"dem\":\"true\",\"dt\":\"true\",\"f\":\"true\",\"i\":\"true\",\"s\":\"true\",\"str\":\"true\",\"t\":\"true\",\"ts\":\"true\",\"v\":\"true\"}} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +bl boolean from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"c\":\"true\",\"d\":\"true\",\"dem\":\"true\",\"dt\":\"true\",\"f\":\"true\",\"i\":\"true\",\"s\":\"true\",\"str\":\"true\",\"t\":\"true\",\"ts\":\"true\",\"v\":\"true\"}} PREHOOK: query: ALTER TABLE default.datatype_stats UPDATE STATISTICS for column bl SET ('numNulls'='1','numTrues'='9','numFalses'='8') PREHOOK: type: ALTERTABLE_UPDATETABLESTATS POSTHOOK: query: ALTER TABLE default.datatype_stats UPDATE STATISTICS for column bl SET ('numNulls'='1','numTrues'='9','numFalses'='8') @@ -678,20 +688,20 @@ PREHOOK: Input: default@datatype_stats POSTHOOK: query: DESC FORMATTED datatype_stats bl POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@datatype_stats -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -bl boolean 1 9 8 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"bl\":\"true\",\"c\":\"true\",\"d\":\"true\",\"dem\":\"true\",\"dt\":\"true\",\"f\":\"true\",\"i\":\"true\",\"s\":\"true\",\"str\":\"true\",\"t\":\"true\",\"ts\":\"true\",\"v\":\"true\"}} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +bl boolean 1 9 8 from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"bl\":\"true\",\"c\":\"true\",\"d\":\"true\",\"dem\":\"true\",\"dt\":\"true\",\"f\":\"true\",\"i\":\"true\",\"s\":\"true\",\"str\":\"true\",\"t\":\"true\",\"ts\":\"true\",\"v\":\"true\"}} PREHOOK: query: DESC FORMATTED datatype_stats bin PREHOOK: type: DESCTABLE PREHOOK: Input: default@datatype_stats POSTHOOK: query: DESC FORMATTED datatype_stats bin POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@datatype_stats -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -bin binary from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"bl\":\"true\",\"c\":\"true\",\"d\":\"true\",\"dem\":\"true\",\"dt\":\"true\",\"f\":\"true\",\"i\":\"true\",\"s\":\"true\",\"str\":\"true\",\"t\":\"true\",\"ts\":\"true\",\"v\":\"true\"}} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +bin binary from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"bl\":\"true\",\"c\":\"true\",\"d\":\"true\",\"dem\":\"true\",\"dt\":\"true\",\"f\":\"true\",\"i\":\"true\",\"s\":\"true\",\"str\":\"true\",\"t\":\"true\",\"ts\":\"true\",\"v\":\"true\"}} PREHOOK: query: ALTER TABLE default.datatype_stats UPDATE STATISTICS for column bin SET ('numNulls'='8','avgColLen'='2.0','maxColLen'='8') PREHOOK: type: ALTERTABLE_UPDATETABLESTATS POSTHOOK: query: ALTER TABLE default.datatype_stats UPDATE STATISTICS for column bin SET ('numNulls'='8','avgColLen'='2.0','maxColLen'='8') @@ -702,7 +712,7 @@ PREHOOK: Input: default@datatype_stats POSTHOOK: query: DESC FORMATTED datatype_stats bin POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@datatype_stats -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -bin binary 8 2.0 8 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"bin\":\"true\",\"bl\":\"true\",\"c\":\"true\",\"d\":\"true\",\"dem\":\"true\",\"dt\":\"true\",\"f\":\"true\",\"i\":\"true\",\"s\":\"true\",\"str\":\"true\",\"t\":\"true\",\"ts\":\"true\",\"v\":\"true\"}} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +bin binary 8 2.0 8 from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"bin\":\"true\",\"bl\":\"true\",\"c\":\"true\",\"d\":\"true\",\"dem\":\"true\",\"dt\":\"true\",\"f\":\"true\",\"i\":\"true\",\"s\":\"true\",\"str\":\"true\",\"t\":\"true\",\"ts\":\"true\",\"v\":\"true\"}} diff --git a/ql/src/test/results/clientpositive/analyze_tbl_part.q.out b/ql/src/test/results/clientpositive/analyze_tbl_part.q.out index 6a3fbc0cc7..4d329faaea 100644 --- a/ql/src/test/results/clientpositive/analyze_tbl_part.q.out +++ b/ql/src/test/results/clientpositive/analyze_tbl_part.q.out @@ -48,9 +48,11 @@ PREHOOK: Input: default@src_stat_part POSTHOOK: query: describe formatted src_stat_part PARTITION(partitionId=1) key POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@src_stat_part -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -key string 0 16 1.72 3 from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +key string 0 16 SExM4BAQgaTbFYDg/BOBktRrv6WcHMDErSCAotAegLu2HIHRunr/p8hhwLbpSsD22ZgBgdS9KMHV +myn/wfcugOGjfsCYzA0= + 1.72 3 from deserializer PREHOOK: query: ANALYZE TABLE src_stat_part partition (partitionId) COMPUTE STATISTICS for columns key, value PREHOOK: type: QUERY PREHOOK: Input: default@src_stat_part @@ -69,18 +71,22 @@ PREHOOK: Input: default@src_stat_part POSTHOOK: query: describe formatted src_stat_part PARTITION(partitionId=1) key POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@src_stat_part -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -key string 0 16 1.72 3 from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +key string 0 16 SExM4BAQgaTbFYDg/BOBktRrv6WcHMDErSCAotAegLu2HIHRunr/p8hhwLbpSsD22ZgBgdS9KMHV +myn/wfcugOGjfsCYzA0= + 1.72 3 from deserializer PREHOOK: query: describe formatted src_stat_part PARTITION(partitionId=2) value PREHOOK: type: DESCTABLE PREHOOK: Input: default@src_stat_part POSTHOOK: query: describe formatted src_stat_part PARTITION(partitionId=2) value POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@src_stat_part -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -value string 0 19 4.92 7 from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +value string 0 19 SExM4BMTgaTbFcCikRTAp44YwK72BIGdshzAtN4dgfC7Ab6ikDTAz6JGgejDCP+AlzSA84UvwYTL +Wr+ivynA6+uCAsDjm8kBgri1Ab++nA+/vawa + 4.92 7 from deserializer PREHOOK: query: create table src_stat_string_part(key string, value string) partitioned by (partitionName string) PREHOOK: type: CREATETABLE PREHOOK: Output: database:default diff --git a/ql/src/test/results/clientpositive/autoColumnStats_5.q.out b/ql/src/test/results/clientpositive/autoColumnStats_5.q.out index e3abba5bd0..a8329d10ae 100644 --- a/ql/src/test/results/clientpositive/autoColumnStats_5.q.out +++ b/ql/src/test/results/clientpositive/autoColumnStats_5.q.out @@ -192,10 +192,11 @@ PREHOOK: Input: default@partitioned1 POSTHOOK: query: desc formatted partitioned1 partition(part=1) a POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@partitioned1 -col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -a int 1 4 0 4 from deserializer +col_name data_type min max num_nulls distinct_count bitvector avg_col_len max_col_len num_trues num_falses comment +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +a int 1 4 0 4 SExM4AQExfO+SLy7rGKA4vdMwPD8wQI= + from deserializer PREHOOK: query: alter table partitioned1 add columns(c int, d string) PREHOOK: type: ALTERTABLE_ADDCOLS PREHOOK: Input: default@partitioned1 @@ -434,10 +435,11 @@ PREHOOK: Input: default@partitioned1 POSTHOOK: query: desc formatted partitioned1 partition(part=2) c POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@partitioned1 -col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -c int 10 40 0 4 from deserializer +col_name data_type min max num_nulls distinct_count bitvector avg_col_len max_col_len num_trues num_falses comment +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +c int 10 40 0 4 SExM4AQEguSTlQGB4f34Ab/okIMC/4XTfQ== + from deserializer PREHOOK: query: explain insert into table partitioned1 partition(part=1) values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred') PREHOOK: type: QUERY POSTHOOK: query: explain insert into table partitioned1 partition(part=1) values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred') @@ -626,17 +628,19 @@ PREHOOK: Input: default@partitioned1 POSTHOOK: query: desc formatted partitioned1 partition(part=1) a POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@partitioned1 -col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -a int 1 6 0 4 from deserializer +col_name data_type min max num_nulls distinct_count bitvector avg_col_len max_col_len num_trues num_falses comment +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +a int 1 6 0 6 SExM4AYGwZn6L4TaxBi8u6xigOL3TMCSiwGA3vHAAg== + from deserializer PREHOOK: query: desc formatted partitioned1 partition(part=1) c PREHOOK: type: DESCTABLE PREHOOK: Input: default@partitioned1 POSTHOOK: query: desc formatted partitioned1 partition(part=1) c POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@partitioned1 -col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -c int 100 200 0 2 from deserializer +col_name data_type min max num_nulls distinct_count bitvector avg_col_len max_col_len num_trues num_falses comment +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +c int 100 200 0 2 SExM4AICweD/2gaAj/YU + from deserializer diff --git a/ql/src/test/results/clientpositive/autoColumnStats_9.q.out b/ql/src/test/results/clientpositive/autoColumnStats_9.q.out index 06f23b1e7c..e84875b55b 100644 --- a/ql/src/test/results/clientpositive/autoColumnStats_9.q.out +++ b/ql/src/test/results/clientpositive/autoColumnStats_9.q.out @@ -250,17 +250,59 @@ PREHOOK: Input: default@dest_j1 POSTHOOK: query: desc formatted dest_j1 key POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@dest_j1 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -key int 0 498 0 309 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +key int 0 498 0 309 SExM4LUCtQKN6yH2ofgFwu2cAvzblwLAqoIDwf6+AcDkmgT/i5kBxOPKA72MowGA5fAbg4LgAr2L +vQH/+x+A0gOA0fsBgefUBb+gkhPB+03A7aUCg+BdwKUTvbaQA4LIeL7O3QTCo6IDvbuRAsCRFoGb +YL/lO4LfuQK+4acBgLePCsHvrQHD43u87s4EwP0QwJJtyL23ArjLvQPCz7wG/5yDC7/N4AKCxMcO +vvCNA4HI4wP/6rEDgIIJwaZOv+cwgaevAf+GzQHA14ICwPz+BcDb+gKAhg+H7RS67okB/5HHAoOo +Nb2V2wWA6fAIwJSODsCT9gGAiY8Bg/xI/bq4CoDXkgHAyvYGgOduwJKLAYKbvgH+2bQBwNCWBoK7 +Gb+fmQO/6J8Bgb89/9fzAsCPywLAp/wHgbJeg/z8Ar25kQGA4P0Dv/OUA4CgoQaBpL4EwDuC+m69 +yDWA/BLApYELxMEv/I1LgYWwBMbTlAP56cMEgZssgeiCAb+kowHBvf0CgYHSAf3g0QaBi9sC/9yi +AoDbIYHllQnAhAGBqJkFvrBKgZmZDIKEogG9slWC7qgF/q5DwM30DoKHRcCN7wO+ir0DwLOtAcDy +8wKB4L0Dv/HEA4adpAOAqxr6kkyA14EIwbkUgIihCIGfcoCODr/z5wKAs/QBw7JvvLnQBMHmsgL/ +1UTAy5gCgbHaAf+UpgOAjO0HwcRQhOePAYChCruLvQaBtSj/osUBwoK1AYGn+Qm9kLcDgLSoAYCQ +2QeAv54FwoIavsJ2wYYL/9jbAoCTjgGBjDX/ztkBgPF8gtNC/r2PAoGgUcHDcb+LqAe/laoBwsOe +A8D6EsDQkAT+0tcGgIRzgIqQAYCT+gXB7wv/jvQMw4miBr3LvgTA0YYBgKCTCIHyxQHBtPcCvts+ +we3HAsD/9gG/zaEDgMiqA8H6iQHAniPCiIQB/bucAYDykQGCodED/o+VAsa89gO6pqAHwKvqAYGu +9QO/0bgPwLiEAcH7lwHA4v4FgMUrwe9k/v9ggaI5wbniAr7lOYP3tAH9vmXBxscCwPDuAYCkFoPc +6QaCoOUH/MSUAr/4gwmAw4wIv/rBAsCH2QGEl1n86qQBgOWcEoLOsgb+k74EhNjFAbyX2QHAi4MB +gJiCAYHyiwnAvYgC/5LkB4HnoQLA46QU/6+SBsGv6QHBut4Evo/iA8KzFL7b0AKAwJkJwZSRAb+g +4gHBux+B/58F/+D2Av/5tgKAmieA4MsBwrvkBMDIBb77GoCqnwjA3PkBgPOTCMD9e8P8tgK91poD +gIGeAcH3nQKAhqIEv6LdA4DK2AKClCm+mc4BxoVo+rCiAoDfoAKAtPoFwdCUAsHtpwH+j8QBwYWl +Ab+00gOAy9gMgfHAA7/hvwTAqCeCsUq/yUj/t9wCxYPOArvNrQTAq5ADwJrZCcKbX764IcHS1QKA +t+kLwtSlC/3wyweAl2bAhKEDwLXQCYDXhQXBpeICgcpm//3nBoDmGMG7lwH/y+YI//XaAYHTlQKA +4gPA7aoC/6mKCIDZpgLDoEQ= + from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} PREHOOK: query: desc formatted dest_j1 value PREHOOK: type: DESCTABLE PREHOOK: Input: default@dest_j1 POSTHOOK: query: desc formatted dest_j1 value POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@dest_j1 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -value string 0 309 6.834630350194552 7 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +value string 0 309 SExM4LUCtQLM7Sz2qpEHw751/KEegclIgPbqBb/+4QWCxbMFvttNwJ79A8DuYoDMqgOCtK0Dv6qT +A/+3OMCb5AGBvuwFgLXtAb/E0QOB1IQGwKwc/9PdBMCu9gSBw0y/1IYJwKeVDcCdmAGBuSGAhBmF +r5AB+tCzAcGDswGC8n292qYHwZPUAb+SVsDC1gGAuPwCgZG9Av/S+gXDuJAB//KXAv61xQGBgpcB +gIo6geaBAf+Q3wz/tkWBqNID/6ilBoDahwbA1fICwcKtBIHklAe+8sQFgLx/gIXzAcLDwAP+09EB +wP4GwrXYAoDuxwPBy5cCgLUF/dq4BcCkoAaAyfcEgJnRBYD8iAKA4vgGwL4SgIuEAsHnqgP/obsC +gO6YAoKYVb7VSsCR2ALAjJgBwfSUAoD3PMP/4QKAwxO84LoCwKDSAcDaLsH7kAK/u/YCgLf0AYH6 +xwGA1c8Bg7KxAf3Wcf+9pgKA4X+FseMDvZb8Ab7MtQaA0eUDwOUigKINwZUuv+HJBMCWDoLCNf7W +sgSAlO8DgO6lB8KatwT/xdQCv45EgcDADb/E5QTCnJUG/7OtBf+0mAHDmdEDv7edBv6CpgLAj/sC +wIV1gf2zBv/SywTC8dAC/r3JA4LUTP7BuQKAieADgfLcBr/ouAGAs2OAiKUGwJf2CoGQ0Qa/05oG +gNojgJTZBoCWkwTAt4EKx5rPA7mZqQKB5kCB9fcCgLTwB/6O8gTAxvYSwOaRAYDFgAPA+NsCgcm8 +BoC0jASB3esC/oDgBYGcvgXDteICwKoK/MZMgoXCAcCl4gO/m3q/3VKBs84BgY9kgMeyBL/zR8Cl +nALCsOIC/s2tAf/JsQLAmWCA+akCgrq2A//wlAu/26sEwobEBP/ejgG/spYMwOKnCoHqsQHA7oMC +v62oBoDghAaA8oECgPSkB4Dv+gHB+yHAqQ6/xRWBm9EBhPaFAby9jgO/1cgMgK9pwMuiA8C5/AaA +/f8DwsiOA4CiDYDemwS/zXDDxX6+x4UKwpyWBv7n8AH+la4HwvBSv67qAr+duAHA3K8EgvKLA/7+ +ygPA/nmAuQOFvt4Nu7+4A8PXG/2ZboGhF8K6+Ab+6IsJgd6AAf+aiwH/qVyEhB7+o/4F/rO0AYCM +fcC1qQOAuuYBgcbKAcDy5AH/wu0Bga6UAr/e1wPBnscCv9S+BYDKjwPB/JABgJeXBoCjAYHT8gL+ +tV6B6RO/9GGB1aUHxcCxAfqrngKAlIcDhLXJAr3vV//G5ATBiqQDgcGkAcDijQS+gsEJgc+FA4DP +KYKJiwH9hqwBwpJC/sHNAYHStAmB4IoJvsenBoHYR8CZiBTB2m/+mdUIgY2kAv+5P8CunwyA2QaC +sEb+toQCwp3JBYCqiAS+ruIEhq+wAfr6QcHKrw7/xMABxN/NB/ztO4K4tQG/94oBgJf4Ab+F2AHC +zawDwa2BAr6vkwX/u5QFwYfWAsDVgQKCgZwJ/ZYXwNztAoL3/AL+uKIBwdeeA8OYjwGBoIkIvMSm +A/+FF4PnPb6a7gKA1BfA4JMGwOb1Ab+WHg== + 6.834630350194552 7 from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} diff --git a/ql/src/test/results/clientpositive/avro_decimal.q.out b/ql/src/test/results/clientpositive/avro_decimal.q.out index e1045ebea1..aa5a68cbf1 100644 --- a/ql/src/test/results/clientpositive/avro_decimal.q.out +++ b/ql/src/test/results/clientpositive/avro_decimal.q.out @@ -32,10 +32,11 @@ PREHOOK: Input: default@dec POSTHOOK: query: DESC FORMATTED `dec` value POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@dec -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -value decimal(8,4) -12.25 234.79 0 10 from deserializer -COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"value\":\"true\"}} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +value decimal(8,4) -12.25 234.79 0 10 SExM4AoKxdOOGP2An6UDv92lC4HV6VD/sbUNg9u1Bb210FHA981AwdjTnAGB//Ui + from deserializer +COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"value\":\"true\"}} PREHOOK: query: DROP TABLE IF EXISTS avro_dec PREHOOK: type: DROPTABLE POSTHOOK: query: DROP TABLE IF EXISTS avro_dec diff --git a/ql/src/test/results/clientpositive/avro_decimal_native.q.out b/ql/src/test/results/clientpositive/avro_decimal_native.q.out index b73b5f5679..8331aead03 100644 --- a/ql/src/test/results/clientpositive/avro_decimal_native.q.out +++ b/ql/src/test/results/clientpositive/avro_decimal_native.q.out @@ -36,10 +36,11 @@ PREHOOK: Input: default@dec POSTHOOK: query: DESC FORMATTED `dec` value POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@dec -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -value decimal(8,4) -12.25 234.79 0 10 from deserializer -COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"value\":\"true\"}} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +value decimal(8,4) -12.25 234.79 0 10 SExM4AoKxdOOGP2An6UDv92lC4HV6VD/sbUNg9u1Bb210FHA981AwdjTnAGB//Ui + from deserializer +COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"value\":\"true\"}} PREHOOK: query: DROP TABLE IF EXISTS avro_dec PREHOOK: type: DROPTABLE POSTHOOK: query: DROP TABLE IF EXISTS avro_dec diff --git a/ql/src/test/results/clientpositive/bitvector.q.out b/ql/src/test/results/clientpositive/bitvector.q.out new file mode 100644 index 0000000000..df0a36ef7d --- /dev/null +++ b/ql/src/test/results/clientpositive/bitvector.q.out @@ -0,0 +1,31 @@ +PREHOOK: query: desc formatted src key +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@src +POSTHOOK: query: desc formatted src key +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@src +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +key string 0 309 SExM4LUCtQLBoliC580Fv5Nq/8NRwJtoxdPYB/yjmAO/kokBgcjcBMHblgH/lecCgMn9AcC78gm/ +wzeA/BHB6MgH/5KjAsKL1QqAge0D/obCAoCvMMGIDoDPDr/ovgSAn6ALgL7TEMGr8wX/6xnAuqgF +wcgagNoPwY+GAf7ouATEjJwBvIfKAoGetgS/ysoIwN4dwMHtA8DAnQmB7TWBspEFv+7uBMGB4wL+ +mmvAqtEBgrrQAYGlkQb9j1CBuYcE/5VDgNciweGkA4Hf9wS+ws4BgPdxgJr/BcCctQOB2rQD/+LE +CMDf/QHAklbAxaoBgP7gA4Gu1weBx4YB/remBMHjToDh2wH/+sEHwMqxA8DvyATBrUj/7ecCgP0H +ifmTBIC8FriHfv/5pArAlSzAnIoBwJ/bAsXdU7v+0g7ArvQBgOX+AoD5+hPA/4oHgK3rAcCWB4D+ +iwSA/soDgu71Ab68MYD/gQKAh+ECwN6xBMCTYoSh4AO9nESArIcGwYmeA/7TpAWBncQDwthzvrb9 +BIG2hQG+lbgIgan2DP/2R4Cu1giAlJcDg7UU/ZBfgchL/4O8AsDO8gKBodUGwMTcA8OKggP88sYD +w9tCvr+CA7/W3QfE05wB/dWUCcCa2QmBsY4HgrD3BbzCwguAih3B67gD/9zeAYX0twf7048HwLKS +CICJC8GyrgGB0akBvr2mAcDP8QGA0/ECwMScA4GXrAGBpokFgKgKwNSHBP7sMIDfkAbBpzy/pdcK +wLnfAYLeKP6R5wLCssoBv8JY/7IvwNEMgq+7AsDQH/6VwAGA8nXCisoBvvevAoH4PMKOqAO+lnj/ +yjnCwzL/7IADv/8jhK28Brzx1wGCmk6/0t0CgIyaAv/CnwXAzZ0BwJadA4GCKcC53AG/sUCBhs8J +gZmRAb6zwwSBgLYGweuIAb+tbsH6gwPAngH+ysQBhLxh/NihAsGYswS/l8MKgtFX/u6jA8XB6AL8 +3tAC/5TDBMHMvgG/0NICwO79BYS82Qa8oMQBwPqBBcHhI//N2RLAitsEgJnuBcD+qAXA/t0FwJ32 +CoDBrAHDlSD9ltIDwMieBIC26QLBzZkGgbecAYCvCf+Xgwn/40+JgRX3xNcBwIkLweaQAYDuggKB +2okBwPESvtvPBsHxpQL/ucsCwIeJA8CjBoH6SP+UnwKCw8IGv7mcBYHtywKChYICvKDpAsDx5gHC ++MIF/sDeBYCh1ALA+poCwYygCL+TTITO3AK9weMEwf+fAb/V5AKApKMGg8Fg/J9OwfnNCv/pkgLC +z+gEvt+XA4CqlgGAlOQDgIaICYGmzAGCiYUHv96HBv/njQHEzfsEv9LuB7zk1gPD4RiDo/UDu5qr +A//IIcDDHsCwDYDozwLEq9EBvZp1gLwawJaGAv/JwAPC/JcDvqvcAcG1ugODzpMBvZfmAoDWB//s +1wKHzaYCucW2BcL9uQT/+aMB/+WtAsKPN7+sdsTghAG7t6kEwNw+wKLUAsCDIIHcjwGAqpACgM36 +BcC//AOAtLEEgMbwAf+mwQiAqfgH + 2.812 3 from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} diff --git a/ql/src/test/results/clientpositive/char_udf1.q.out b/ql/src/test/results/clientpositive/char_udf1.q.out index fefc7407e0..e701d64357 100644 --- a/ql/src/test/results/clientpositive/char_udf1.q.out +++ b/ql/src/test/results/clientpositive/char_udf1.q.out @@ -406,7 +406,7 @@ from char_udf_1 POSTHOOK: type: QUERY POSTHOOK: Input: default@char_udf_1 #### A masked pattern was here #### -{"columntype":"String","maxlength":7,"avglength":7.0,"countnulls":0,"numdistinctvalues":1,"ndvbitvector":"{0}{3}{2}{3}{1}{0}{2}{0}{1}{0}{0}{1}{3}{2}{0}{3}"} {"columntype":"String","maxlength":7,"avglength":7.0,"countnulls":0,"numdistinctvalues":1,"ndvbitvector":"{0}{3}{2}{3}{1}{0}{2}{0}{1}{0}{0}{1}{3}{2}{0}{3}"} +{"columntype":"String","maxlength":7,"avglength":7.0,"countnulls":0,"numdistinctvalues":1,"ndvbitvector":"Rk0QAAEAAAAIAAAABAAAAAgAAAACAAAAAQAAAAQAAAABAAAAAgAAAAEAAAABAAAAAgAAAAgAAAAE\r\nAAAAAQAAAAgAAAA=\r\n"} {"columntype":"String","maxlength":7,"avglength":7.0,"countnulls":0,"numdistinctvalues":1,"ndvbitvector":"Rk0QAAEAAAAIAAAABAAAAAgAAAACAAAAAQAAAAQAAAABAAAAAgAAAAEAAAABAAAAAgAAAAgAAAAE\r\nAAAAAQAAAAgAAAA=\r\n"} PREHOOK: query: select min(c2), min(c4) diff --git a/ql/src/test/results/clientpositive/colstats_all_nulls.q.out b/ql/src/test/results/clientpositive/colstats_all_nulls.q.out index 0f2822504f..367844b867 100644 --- a/ql/src/test/results/clientpositive/colstats_all_nulls.q.out +++ b/ql/src/test/results/clientpositive/colstats_all_nulls.q.out @@ -41,20 +41,22 @@ PREHOOK: Input: default@all_nulls POSTHOOK: query: describe formatted all_nulls a POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@all_nulls -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -a bigint 0 0 5 0 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"a\":\"true\",\"b\":\"true\",\"c\":\"true\"}} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +a bigint 0 0 5 1 SExM4AEA + from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"a\":\"true\",\"b\":\"true\",\"c\":\"true\"}} PREHOOK: query: describe formatted all_nulls b PREHOOK: type: DESCTABLE PREHOOK: Input: default@all_nulls POSTHOOK: query: describe formatted all_nulls b POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@all_nulls -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -b double 0.0 0.0 5 0 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"a\":\"true\",\"b\":\"true\",\"c\":\"true\"}} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +b double 0.0 0.0 5 1 SExM4AEA + from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"a\":\"true\",\"b\":\"true\",\"c\":\"true\"}} PREHOOK: query: drop table all_nulls PREHOOK: type: DROPTABLE PREHOOK: Input: default@all_nulls diff --git a/ql/src/test/results/clientpositive/column_names_with_leading_and_trailing_spaces.q.out b/ql/src/test/results/clientpositive/column_names_with_leading_and_trailing_spaces.q.out index fb833bccb2..040aa13b7b 100644 --- a/ql/src/test/results/clientpositive/column_names_with_leading_and_trailing_spaces.q.out +++ b/ql/src/test/results/clientpositive/column_names_with_leading_and_trailing_spaces.q.out @@ -48,10 +48,10 @@ PREHOOK: Input: default@space POSTHOOK: query: desc formatted space ` left` POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@space -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - - left string from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\" left\":\"true\",\" middle \":\"true\",\"right \":\"true\"}} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + + left string from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\" left\":\"true\",\" middle \":\"true\",\"right \":\"true\"}} PREHOOK: query: insert into space values ("1", "2", "3") PREHOOK: type: QUERY PREHOOK: Output: default@space @@ -67,10 +67,11 @@ PREHOOK: Input: default@space POSTHOOK: query: desc formatted space ` left` POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@space -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - - left string 0 1 1.0 1 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\" left\":\"true\",\" middle \":\"true\",\"right \":\"true\"}} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + + left string 0 1 SExM4AEBxbi8+AQ= + 1.0 1 from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\" left\":\"true\",\" middle \":\"true\",\"right \":\"true\"}} PREHOOK: query: select * from space PREHOOK: type: QUERY PREHOOK: Input: default@space diff --git a/ql/src/test/results/clientpositive/column_pruner_multiple_children.q.out b/ql/src/test/results/clientpositive/column_pruner_multiple_children.q.out index 9925928da7..bc6c7c25e9 100644 --- a/ql/src/test/results/clientpositive/column_pruner_multiple_children.q.out +++ b/ql/src/test/results/clientpositive/column_pruner_multiple_children.q.out @@ -173,17 +173,19 @@ PREHOOK: Input: default@dest1 POSTHOOK: query: desc formatted DEST1 key POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@dest1 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -key int 10 10 0 1 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +key int 10 10 0 1 SExM4AEBg8WRjgM= + from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} PREHOOK: query: desc formatted DEST1 value PREHOOK: type: DESCTABLE PREHOOK: Input: default@dest1 POSTHOOK: query: desc formatted DEST1 value POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@dest1 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -value string 0 1 4.0 4 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +value string 0 1 SExM4AEBg7CVmgY= + 4.0 4 from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} diff --git a/ql/src/test/results/clientpositive/columnstats_partlvl.q.out b/ql/src/test/results/clientpositive/columnstats_partlvl.q.out index 5ecb20501b..7ec1b8121b 100644 --- a/ql/src/test/results/clientpositive/columnstats_partlvl.q.out +++ b/ql/src/test/results/clientpositive/columnstats_partlvl.q.out @@ -509,18 +509,20 @@ PREHOOK: Input: default@employee_part POSTHOOK: query: describe formatted Employee_Part partition (employeeSalary=2000.0) employeeID POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@employee_part -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -employeeID int 16 34 1 12 from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +employeeID int 16 34 1 12 SExM4AwMgZroaMLqyAGF9dJV+rKWOf/rxgKAgPo7xsfWqAH8u4h+//bMRoPekTO9roqbAcCI5ns= + from deserializer PREHOOK: query: describe formatted Employee_Part partition (employeeSalary=2000.0) employeeName PREHOOK: type: DESCTABLE PREHOOK: Input: default@employee_part POSTHOOK: query: describe formatted Employee_Part partition (employeeSalary=2000.0) employeeName POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@employee_part -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -employeeName string 1 12 4.3076923076923075 6 from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +employeeName string 1 12 SExM4AwMhN+NPL2pzAqA8p0tgKf/ZoCU0AnAwotPw4/Z2AG9tL50wLuAhgHAmduBAcG66mL//JYR + 4.3076923076923075 6 from deserializer PREHOOK: query: explain analyze table Employee_Part compute statistics for columns PREHOOK: type: QUERY @@ -598,18 +600,20 @@ PREHOOK: Input: default@employee_part POSTHOOK: query: describe formatted Employee_Part partition(employeeSalary=2000.0) employeeID POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@employee_part -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -employeeID int 16 34 1 12 from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +employeeID int 16 34 1 12 SExM4AwMgZroaMLqyAGF9dJV+rKWOf/rxgKAgPo7xsfWqAH8u4h+//bMRoPekTO9roqbAcCI5ns= + from deserializer PREHOOK: query: describe formatted Employee_Part partition(employeeSalary=4000.0) employeeID PREHOOK: type: DESCTABLE PREHOOK: Input: default@employee_part POSTHOOK: query: describe formatted Employee_Part partition(employeeSalary=4000.0) employeeID POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@employee_part -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -employeeID int 16 34 1 12 from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +employeeID int 16 34 1 12 SExM4AwMgZroaMLqyAGF9dJV+rKWOf/rxgKAgPo7xsfWqAH8u4h+//bMRoPekTO9roqbAcCI5ns= + from deserializer PREHOOK: query: explain analyze table Employee_Part compute statistics for columns PREHOOK: type: QUERY @@ -679,10 +683,11 @@ PREHOOK: Input: default@employee_part POSTHOOK: query: describe formatted Employee_Part employeeID POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@employee_part -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -employeeID int 16 34 2 12 from deserializer -COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"employeeid\":\"true\",\"employeename\":\"true\"}} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +employeeID int 16 34 2 12 SExM4AwMgZroaMLqyAGF9dJV+rKWOf/rxgKAgPo7xsfWqAH8u4h+//bMRoPekTO9roqbAcCI5ns= + from deserializer +COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"employeeid\":\"true\",\"employeename\":\"true\"}} PREHOOK: query: create database if not exists dummydb PREHOOK: type: CREATEDATABASE PREHOOK: Output: database:dummydb @@ -711,10 +716,11 @@ PREHOOK: Input: default@employee_part POSTHOOK: query: describe formatted default.Employee_Part partition (employeeSalary=2000.0) employeeID POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@employee_part -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -employeeID int 16 34 1 12 from deserializer -COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"employeeid\":\"true\",\"employeename\":\"true\"}} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +employeeID int 16 34 1 12 SExM4AwMgZroaMLqyAGF9dJV+rKWOf/rxgKAgPo7xsfWqAH8u4h+//bMRoPekTO9roqbAcCI5ns= + from deserializer +COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"employeeid\":\"true\",\"employeename\":\"true\"}} PREHOOK: query: analyze table default.Employee_Part compute statistics for columns PREHOOK: type: QUERY PREHOOK: Input: default@employee_part diff --git a/ql/src/test/results/clientpositive/columnstats_partlvl_dp.q.out b/ql/src/test/results/clientpositive/columnstats_partlvl_dp.q.out index a64c76badf..3bbec88f3f 100644 --- a/ql/src/test/results/clientpositive/columnstats_partlvl_dp.q.out +++ b/ql/src/test/results/clientpositive/columnstats_partlvl_dp.q.out @@ -141,9 +141,10 @@ PREHOOK: Input: default@employee_part POSTHOOK: query: describe formatted Employee_Part partition (employeeSalary='4000.0', country='USA') employeeName POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@employee_part -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -employeeName string 0 7 5.142857142857143 6 from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +employeeName string 0 7 SExM4AcHhN+NPL2pzAqA8p0tgLvPcIPS5KcCvbS+dMC7gIYB + 5.142857142857143 6 from deserializer PREHOOK: query: explain analyze table Employee_Part partition (employeeSalary='2000.0') compute statistics for columns employeeID PREHOOK: type: QUERY @@ -221,18 +222,20 @@ PREHOOK: Input: default@employee_part POSTHOOK: query: describe formatted Employee_Part partition (employeeSalary='2000.0', country='USA') employeeID POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@employee_part -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -employeeID int 16 34 1 12 from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +employeeID int 16 34 1 12 SExM4AwMgZroaMLqyAGF9dJV+rKWOf/rxgKAgPo7xsfWqAH8u4h+//bMRoPekTO9roqbAcCI5ns= + from deserializer PREHOOK: query: describe formatted Employee_Part partition (employeeSalary='2000.0', country='UK') employeeID PREHOOK: type: DESCTABLE PREHOOK: Input: default@employee_part POSTHOOK: query: describe formatted Employee_Part partition (employeeSalary='2000.0', country='UK') employeeID POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@employee_part -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -employeeID int 16 31 0 7 from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +employeeID int 16 31 0 7 SExM4AcHw4SxaoX10lX5nt07xsfQ5AH8u4h+gtXeeb2uipsB + from deserializer PREHOOK: query: explain analyze table Employee_Part partition (employeeSalary) compute statistics for columns employeeID PREHOOK: type: QUERY @@ -318,9 +321,10 @@ PREHOOK: Input: default@employee_part POSTHOOK: query: describe formatted Employee_Part partition (employeeSalary='3000.0', country='UK') employeeID POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@employee_part -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -employeeID int 16 34 1 12 from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +employeeID int 16 34 1 12 SExM4AwMgZroaMLqyAGF9dJV+rKWOf/rxgKAgPo7xsfWqAH8u4h+//bMRoPekTO9roqbAcCI5ns= + from deserializer PREHOOK: query: explain analyze table Employee_Part partition (employeeSalary,country) compute statistics for columns PREHOOK: type: QUERY @@ -406,9 +410,10 @@ PREHOOK: Input: default@employee_part POSTHOOK: query: describe formatted Employee_Part partition (employeeSalary='3500.0', country='UK') employeeName POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@employee_part -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -employeeName string 0 12 5.142857142857143 6 from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +employeeName string 0 12 SExM4AwMhN+NPL2pzAqA8p0tgKf/ZoCU0AnAwotPw4/Z2AG9tL50wLuAhgHAmduBAcG66mL//JYR + 5.142857142857143 6 from deserializer PREHOOK: query: drop table Employee PREHOOK: type: DROPTABLE POSTHOOK: query: drop table Employee @@ -481,9 +486,10 @@ PREHOOK: Input: default@employee POSTHOOK: query: describe formatted Employee partition (employeeSalary='3500.0', country='UK') employeeName POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@employee -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -employeeName string 0 12 5.142857142857143 6 from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +employeeName string 0 12 SExM4AwMhN+NPL2pzAqA8p0tgKf/ZoCU0AnAwotPw4/Z2AG9tL50wLuAhgHAmduBAcG66mL//JYR + 5.142857142857143 6 from deserializer PREHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee partition(employeeSalary='3000.0', country='USA') PREHOOK: type: LOAD #### A masked pattern was here #### @@ -528,9 +534,10 @@ PREHOOK: Input: default@employee POSTHOOK: query: describe formatted Employee partition (employeeSalary='3000.0', country='USA') employeeName POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@employee -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -employeeName string 0 12 5.142857142857143 6 from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +employeeName string 0 12 SExM4AwMhN+NPL2pzAqA8p0tgKf/ZoCU0AnAwotPw4/Z2AG9tL50wLuAhgHAmduBAcG66mL//JYR + 5.142857142857143 6 from deserializer PREHOOK: query: alter table Employee add columns (c int ,d string) PREHOOK: type: ALTERTABLE_ADDCOLS PREHOOK: Input: default@employee @@ -564,24 +571,27 @@ PREHOOK: Input: default@employee POSTHOOK: query: describe formatted Employee partition (employeeSalary='6000.0', country='UK') employeeName POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@employee -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -employeeName string 0 9 4.777777777777778 6 from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +employeeName string 0 9 SExM4AkJhN+NPL2pzAqA8p0tgLvPcIPS5KcCvbS+dMC7gIYBwJnbgQGAz/1W + 4.777777777777778 6 from deserializer PREHOOK: query: describe formatted Employee partition (employeeSalary='6000.0', country='UK') c PREHOOK: type: DESCTABLE PREHOOK: Input: default@employee POSTHOOK: query: describe formatted Employee partition (employeeSalary='6000.0', country='UK') c POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@employee -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -c int 2000 4000 0 3 from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +c int 2000 4000 0 3 SExM4AMDwpKn6wH/9JpogbzaCQ== + from deserializer PREHOOK: query: describe formatted Employee partition (employeeSalary='6000.0', country='UK') d PREHOOK: type: DESCTABLE PREHOOK: Input: default@employee POSTHOOK: query: describe formatted Employee partition (employeeSalary='6000.0', country='UK') d POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@employee -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -d string 0 2 2.4444444444444446 3 from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +d string 0 2 SExM4AICgaD/7QKE/4mqAw== + 2.4444444444444446 3 from deserializer diff --git a/ql/src/test/results/clientpositive/columnstats_tbllvl.q.out b/ql/src/test/results/clientpositive/columnstats_tbllvl.q.out index 91c8f150a2..1acfc60b04 100644 --- a/ql/src/test/results/clientpositive/columnstats_tbllvl.q.out +++ b/ql/src/test/results/clientpositive/columnstats_tbllvl.q.out @@ -285,30 +285,39 @@ PREHOOK: Input: default@uservisits_web_text_none POSTHOOK: query: describe formatted UserVisits_web_text_none destURL POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@uservisits_web_text_none -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -destURL string 0 55 48.945454545454545 96 from deserializer -COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"adrevenue\":\"true\",\"avgtimeonsite\":\"true\",\"ccode\":\"true\",\"desturl\":\"true\",\"lcode\":\"true\",\"skeyword\":\"true\",\"sourceip\":\"true\",\"useragent\":\"true\",\"visitdate\":\"true\"}} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +destURL string 0 55 SExM4Dc3gddnwYLHAsC19xX/4qoLgdjABMSO/kb8t5ELgsyUGb2kwBPAigWA2YICwJDHAsD9qkjA +ptZKwJKdAcHjohi/yjvCzEu+h8IWwYngGsHl7i6+zboChK7WC7z2kQTAmKEZweOqB/+K7zPE+LIH +/JOmGcDr9BjBqsQIwKOPCv/cvwHB1bMW/7jgFICW5gaAuv4IgICNCIOopRq+0IMD/8nJLsDrlwKA +vN4lhfvmCv/49gf8n6kBwLKKBID89gfA/Y4kgKjeNMCAgRHAmYwFxI7hE4H09wf8x+4K + 48.945454545454545 96 from deserializer +COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"adrevenue\":\"true\",\"avgtimeonsite\":\"true\",\"ccode\":\"true\",\"desturl\":\"true\",\"lcode\":\"true\",\"skeyword\":\"true\",\"sourceip\":\"true\",\"useragent\":\"true\",\"visitdate\":\"true\"}} PREHOOK: query: describe formatted UserVisits_web_text_none adRevenue PREHOOK: type: DESCTABLE PREHOOK: Input: default@uservisits_web_text_none POSTHOOK: query: describe formatted UserVisits_web_text_none adRevenue POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@uservisits_web_text_none -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -adRevenue float 13.099044799804688 492.98870849609375 0 55 from deserializer -COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"adrevenue\":\"true\",\"avgtimeonsite\":\"true\",\"ccode\":\"true\",\"desturl\":\"true\",\"lcode\":\"true\",\"skeyword\":\"true\",\"sourceip\":\"true\",\"useragent\":\"true\",\"visitdate\":\"true\"}} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +adRevenue float 13.099044799804688 492.98870849609375 0 55 SExM4Dc3gb3HC8Lswyq+hbYCgOOKIoHH7AKA4u4D/73OA4DH6QnA8ZIbhaSXBv/e/xf9jo4JgJ2b +Av/htwrBsJ4ZwZugD//O6wbB6qcFvoW+E4DW+wyA8/gCgK6GD4HIuhD/pccFgIXqAsCl/wyAv+QK +wNq4HYLrrB++s5sIgOWzPoSMlA/83cMVwdy8PYCjhwL/3LIWxOm7JPye8w/A/O0VwNjgBIDOiRHA +86ELwJ/+AYCr1QzA7YUQgO2gEcDZEIDK6EPAo+kOg4HxCv3ZkSmBrLlRgd6IA/6lwROAlYAL + from deserializer +COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"adrevenue\":\"true\",\"avgtimeonsite\":\"true\",\"ccode\":\"true\",\"desturl\":\"true\",\"lcode\":\"true\",\"skeyword\":\"true\",\"sourceip\":\"true\",\"useragent\":\"true\",\"visitdate\":\"true\"}} PREHOOK: query: describe formatted UserVisits_web_text_none avgTimeOnSite PREHOOK: type: DESCTABLE PREHOOK: Input: default@uservisits_web_text_none POSTHOOK: query: describe formatted UserVisits_web_text_none avgTimeOnSite POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@uservisits_web_text_none -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -avgTimeOnSite int 1 9 0 9 from deserializer -COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"adrevenue\":\"true\",\"avgtimeonsite\":\"true\",\"ccode\":\"true\",\"desturl\":\"true\",\"lcode\":\"true\",\"skeyword\":\"true\",\"sourceip\":\"true\",\"useragent\":\"true\",\"visitdate\":\"true\"}} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +avgTimeOnSite int 1 9 0 9 SExM4AkJwZn6L4TaxBi8u6xigOL3TMCSiwHBrsJOwKr8Df+Es+QBgPyEtwI= + from deserializer +COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"adrevenue\":\"true\",\"avgtimeonsite\":\"true\",\"ccode\":\"true\",\"desturl\":\"true\",\"lcode\":\"true\",\"skeyword\":\"true\",\"sourceip\":\"true\",\"useragent\":\"true\",\"visitdate\":\"true\"}} PREHOOK: query: CREATE TABLE empty_tab( a int, b double, @@ -414,10 +423,14 @@ PREHOOK: Input: default@uservisits_web_text_none POSTHOOK: query: describe formatted default.UserVisits_web_text_none destURL POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@uservisits_web_text_none -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -destURL string 0 55 48.945454545454545 96 from deserializer -COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"adrevenue\":\"true\",\"avgtimeonsite\":\"true\",\"ccode\":\"true\",\"desturl\":\"true\",\"lcode\":\"true\",\"skeyword\":\"true\",\"sourceip\":\"true\",\"useragent\":\"true\",\"visitdate\":\"true\"}} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +destURL string 0 55 SExM4Dc3gddnwYLHAsC19xX/4qoLgdjABMSO/kb8t5ELgsyUGb2kwBPAigWA2YICwJDHAsD9qkjA +ptZKwJKdAcHjohi/yjvCzEu+h8IWwYngGsHl7i6+zboChK7WC7z2kQTAmKEZweOqB/+K7zPE+LIH +/JOmGcDr9BjBqsQIwKOPCv/cvwHB1bMW/7jgFICW5gaAuv4IgICNCIOopRq+0IMD/8nJLsDrlwKA +vN4lhfvmCv/49gf8n6kBwLKKBID89gfA/Y4kgKjeNMCAgRHAmYwFxI7hE4H09wf8x+4K + 48.945454545454545 96 from deserializer +COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"adrevenue\":\"true\",\"avgtimeonsite\":\"true\",\"ccode\":\"true\",\"desturl\":\"true\",\"lcode\":\"true\",\"skeyword\":\"true\",\"sourceip\":\"true\",\"useragent\":\"true\",\"visitdate\":\"true\"}} PREHOOK: query: CREATE TABLE UserVisits_in_dummy_db ( sourceIP string, destURL string, @@ -707,30 +720,39 @@ PREHOOK: Input: dummydb@uservisits_in_dummy_db POSTHOOK: query: describe formatted dummydb.UserVisits_in_dummy_db destURL POSTHOOK: type: DESCTABLE POSTHOOK: Input: dummydb@uservisits_in_dummy_db -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -destURL string 0 55 48.945454545454545 96 from deserializer -COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"adrevenue\":\"true\",\"avgtimeonsite\":\"true\",\"ccode\":\"true\",\"desturl\":\"true\",\"lcode\":\"true\",\"skeyword\":\"true\",\"sourceip\":\"true\",\"useragent\":\"true\",\"visitdate\":\"true\"}} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +destURL string 0 55 SExM4Dc3gddnwYLHAsC19xX/4qoLgdjABMSO/kb8t5ELgsyUGb2kwBPAigWA2YICwJDHAsD9qkjA +ptZKwJKdAcHjohi/yjvCzEu+h8IWwYngGsHl7i6+zboChK7WC7z2kQTAmKEZweOqB/+K7zPE+LIH +/JOmGcDr9BjBqsQIwKOPCv/cvwHB1bMW/7jgFICW5gaAuv4IgICNCIOopRq+0IMD/8nJLsDrlwKA +vN4lhfvmCv/49gf8n6kBwLKKBID89gfA/Y4kgKjeNMCAgRHAmYwFxI7hE4H09wf8x+4K + 48.945454545454545 96 from deserializer +COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"adrevenue\":\"true\",\"avgtimeonsite\":\"true\",\"ccode\":\"true\",\"desturl\":\"true\",\"lcode\":\"true\",\"skeyword\":\"true\",\"sourceip\":\"true\",\"useragent\":\"true\",\"visitdate\":\"true\"}} PREHOOK: query: describe formatted dummydb.UserVisits_in_dummy_db adRevenue PREHOOK: type: DESCTABLE PREHOOK: Input: dummydb@uservisits_in_dummy_db POSTHOOK: query: describe formatted dummydb.UserVisits_in_dummy_db adRevenue POSTHOOK: type: DESCTABLE POSTHOOK: Input: dummydb@uservisits_in_dummy_db -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -adRevenue float 13.099044799804688 492.98870849609375 0 55 from deserializer -COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"adrevenue\":\"true\",\"avgtimeonsite\":\"true\",\"ccode\":\"true\",\"desturl\":\"true\",\"lcode\":\"true\",\"skeyword\":\"true\",\"sourceip\":\"true\",\"useragent\":\"true\",\"visitdate\":\"true\"}} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +adRevenue float 13.099044799804688 492.98870849609375 0 55 SExM4Dc3gb3HC8Lswyq+hbYCgOOKIoHH7AKA4u4D/73OA4DH6QnA8ZIbhaSXBv/e/xf9jo4JgJ2b +Av/htwrBsJ4ZwZugD//O6wbB6qcFvoW+E4DW+wyA8/gCgK6GD4HIuhD/pccFgIXqAsCl/wyAv+QK +wNq4HYLrrB++s5sIgOWzPoSMlA/83cMVwdy8PYCjhwL/3LIWxOm7JPye8w/A/O0VwNjgBIDOiRHA +86ELwJ/+AYCr1QzA7YUQgO2gEcDZEIDK6EPAo+kOg4HxCv3ZkSmBrLlRgd6IA/6lwROAlYAL + from deserializer +COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"adrevenue\":\"true\",\"avgtimeonsite\":\"true\",\"ccode\":\"true\",\"desturl\":\"true\",\"lcode\":\"true\",\"skeyword\":\"true\",\"sourceip\":\"true\",\"useragent\":\"true\",\"visitdate\":\"true\"}} PREHOOK: query: describe formatted dummydb.UserVisits_in_dummy_db avgTimeOnSite PREHOOK: type: DESCTABLE PREHOOK: Input: dummydb@uservisits_in_dummy_db POSTHOOK: query: describe formatted dummydb.UserVisits_in_dummy_db avgTimeOnSite POSTHOOK: type: DESCTABLE POSTHOOK: Input: dummydb@uservisits_in_dummy_db -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -avgTimeOnSite int 1 9 0 9 from deserializer -COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"adrevenue\":\"true\",\"avgtimeonsite\":\"true\",\"ccode\":\"true\",\"desturl\":\"true\",\"lcode\":\"true\",\"skeyword\":\"true\",\"sourceip\":\"true\",\"useragent\":\"true\",\"visitdate\":\"true\"}} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +avgTimeOnSite int 1 9 0 9 SExM4AkJwZn6L4TaxBi8u6xigOL3TMCSiwHBrsJOwKr8Df+Es+QBgPyEtwI= + from deserializer +COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"adrevenue\":\"true\",\"avgtimeonsite\":\"true\",\"ccode\":\"true\",\"desturl\":\"true\",\"lcode\":\"true\",\"skeyword\":\"true\",\"sourceip\":\"true\",\"useragent\":\"true\",\"visitdate\":\"true\"}} PREHOOK: query: drop table dummydb.UserVisits_in_dummy_db PREHOOK: type: DROPTABLE PREHOOK: Input: dummydb@uservisits_in_dummy_db diff --git a/ql/src/test/results/clientpositive/compustat_avro.q.out b/ql/src/test/results/clientpositive/compustat_avro.q.out index 2f8dc10e50..5075f2bbae 100644 --- a/ql/src/test/results/clientpositive/compustat_avro.q.out +++ b/ql/src/test/results/clientpositive/compustat_avro.q.out @@ -30,10 +30,10 @@ PREHOOK: Input: default@testavro POSTHOOK: query: describe formatted testAvro col1 POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@testavro -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col1 string from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\",\"col4\":\"true\",\"col5\":\"true\",\"col6\":\"true\"}} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +col1 string from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\",\"col4\":\"true\",\"col5\":\"true\",\"col6\":\"true\"}} PREHOOK: query: analyze table testAvro compute statistics for columns col1,col3 PREHOOK: type: QUERY PREHOOK: Input: default@testavro @@ -48,7 +48,7 @@ PREHOOK: Input: default@testavro POSTHOOK: query: describe formatted testAvro col1 POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@testavro -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col1 string 0 0 0.0 0 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\",\"col4\":\"true\",\"col5\":\"true\",\"col6\":\"true\"}} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +col1 string 0 0 0.0 0 from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\",\"col4\":\"true\",\"col5\":\"true\",\"col6\":\"true\"}} diff --git a/ql/src/test/results/clientpositive/compute_stats_date.q.out b/ql/src/test/results/clientpositive/compute_stats_date.q.out index 5cd2180108..0c5915cac3 100644 --- a/ql/src/test/results/clientpositive/compute_stats_date.q.out +++ b/ql/src/test/results/clientpositive/compute_stats_date.q.out @@ -109,10 +109,12 @@ PREHOOK: Input: default@tab_date POSTHOOK: query: describe formatted tab_date fl_date POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@tab_date -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -fl_date date 2000-11-20 2010-10-29 0 19 from deserializer -COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"fl_date\":\"true\"}} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +fl_date date 2000-11-20 2010-10-29 0 19 SExM4BMTw6qAFv+ogCGC/7ZdgMDTH73K3+4Bgq+jE766tgWAh/xZgIqTVIDhgVDA655SwfXHA4Dy +/Ve//Z0LwMSIToCZ6QOAhZ8Gg8jOEb38rBw= + from deserializer +COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"fl_date\":\"true\"}} PREHOOK: query: alter table tab_date update statistics for column fl_date set ('numDVs'='19', 'highValue'='2015-01-01', 'lowValue'='0') PREHOOK: type: ALTERTABLE_UPDATETABLESTATS POSTHOOK: query: alter table tab_date update statistics for column fl_date set ('numDVs'='19', 'highValue'='2015-01-01', 'lowValue'='0') @@ -123,7 +125,9 @@ PREHOOK: Input: default@tab_date POSTHOOK: query: describe formatted tab_date fl_date POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@tab_date -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -fl_date date 1970-01-01 2015-01-01 0 19 from deserializer -COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"fl_date\":\"true\"}} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +fl_date date 1970-01-01 2015-01-01 0 19 SExM4BMTw6qAFv+ogCGC/7ZdgMDTH73K3+4Bgq+jE766tgWAh/xZgIqTVIDhgVDA655SwfXHA4Dy +/Ve//Z0LwMSIToCZ6QOAhZ8Gg8jOEb38rBw= + from deserializer +COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"fl_date\":\"true\"}} diff --git a/ql/src/test/results/clientpositive/compute_stats_decimal.q.out b/ql/src/test/results/clientpositive/compute_stats_decimal.q.out index fcfce78b82..e18b989062 100644 --- a/ql/src/test/results/clientpositive/compute_stats_decimal.q.out +++ b/ql/src/test/results/clientpositive/compute_stats_decimal.q.out @@ -31,4 +31,4 @@ POSTHOOK: query: select compute_stats(a, 'fm', 18) from tab_decimal POSTHOOK: type: QUERY POSTHOOK: Input: default@tab_decimal #### A masked pattern was here #### -{"columntype":"Decimal","min":-87.2,"max":123456789012345678901234567890.123,"countnulls":2,"numdistinctvalues":13,"ndvbitvector":"{0, 1, 2, 3, 4}{0, 1, 2, 3, 5}{0, 1, 2, 3}{0, 1, 2}{0, 1, 2}{0, 1, 2, 3, 5}{0, 1, 3}{0, 1, 2, 4}{0, 1, 2, 3, 5}{0, 1, 2, 3}{0, 1, 2}{0, 1}{0, 1, 2}{0, 1, 2, 3}{0, 1, 2, 6, 8}{0, 1, 2, 3}{0, 1, 2}{0, 1, 4, 5}"} +{"columntype":"Decimal","min":-87.2,"max":123456789012345678901234567890.123,"countnulls":2,"numdistinctvalues":13,"ndvbitvector":"Rk0SAB8AAAAvAAAADwAAAAcAAAAHAAAALwAAAAsAAAAXAAAALwAAAA8AAAAHAAAAAwAAAAcAAAAP\r\nAAAARwEAAA8AAAAHAAAAMwAAAA==\r\n"} diff --git a/ql/src/test/results/clientpositive/compute_stats_double.q.out b/ql/src/test/results/clientpositive/compute_stats_double.q.out index e6a087dd98..d937c3a002 100644 --- a/ql/src/test/results/clientpositive/compute_stats_double.q.out +++ b/ql/src/test/results/clientpositive/compute_stats_double.q.out @@ -31,4 +31,4 @@ POSTHOOK: query: select compute_stats(a, 'fm', 16) from tab_double POSTHOOK: type: QUERY POSTHOOK: Input: default@tab_double #### A masked pattern was here #### -{"columntype":"Double","min":-87.2,"max":435.33,"countnulls":2,"numdistinctvalues":11,"ndvbitvector":"{0, 1, 2, 3, 4}{0, 1, 2}{0, 1}{0, 1, 3, 4}{0, 1, 3}{0, 1, 2, 3, 8}{0, 1, 3}{0, 1, 2}{0, 1, 4}{0, 1, 2}{0, 1, 2, 3}{0, 1, 2, 3}{0, 1, 2, 3, 4}{0, 1, 2}{0, 1, 2, 3, 4}{0, 1, 3}"} +{"columntype":"Double","min":-87.2,"max":435.33,"countnulls":2,"numdistinctvalues":11,"ndvbitvector":"Rk0QAB8AAAAHAAAAAwAAABsAAAALAAAADwEAAAsAAAAHAAAAEwAAAAcAAAAPAAAADwAAAB8AAAAH\r\nAAAAHwAAAAsAAAA=\r\n"} diff --git a/ql/src/test/results/clientpositive/compute_stats_long.q.out b/ql/src/test/results/clientpositive/compute_stats_long.q.out index fb985d8266..3451072a1b 100644 --- a/ql/src/test/results/clientpositive/compute_stats_long.q.out +++ b/ql/src/test/results/clientpositive/compute_stats_long.q.out @@ -31,4 +31,4 @@ POSTHOOK: query: select compute_stats(a, 'fm', 16) from tab_int POSTHOOK: type: QUERY POSTHOOK: Input: default@tab_int #### A masked pattern was here #### -{"columntype":"Long","min":4,"max":344,"countnulls":1,"numdistinctvalues":11,"ndvbitvector":"{0, 1, 2, 3}{0, 2, 5}{0, 1, 2, 3, 4}{0, 1, 2, 4, 6, 7}{0, 1, 2, 4}{0, 1, 2, 4, 5}{0, 1, 2, 5}{0, 1, 2}{0, 1, 2, 3}{0, 1, 3, 4}{0, 1, 2, 5, 6}{0, 1, 2, 3}{0, 1, 3}{0, 1, 2, 3}{0, 1, 2, 3, 10}{0, 1, 2, 4}"} +{"columntype":"Long","min":4,"max":344,"countnulls":1,"numdistinctvalues":11,"ndvbitvector":"Rk0QAA8AAAAlAAAAHwAAANcAAAAXAAAANwAAACcAAAAHAAAADwAAABsAAABnAAAADwAAAAsAAAAP\r\nAAAADwQAABcAAAA=\r\n"} diff --git a/ql/src/test/results/clientpositive/compute_stats_string.q.out b/ql/src/test/results/clientpositive/compute_stats_string.q.out index a5d66eba31..bbb236150e 100644 --- a/ql/src/test/results/clientpositive/compute_stats_string.q.out +++ b/ql/src/test/results/clientpositive/compute_stats_string.q.out @@ -31,4 +31,4 @@ POSTHOOK: query: select compute_stats(a, 'fm', 16) from tab_string POSTHOOK: type: QUERY POSTHOOK: Input: default@tab_string #### A masked pattern was here #### -{"columntype":"String","maxlength":11,"avglength":3.9,"countnulls":0,"numdistinctvalues":7,"ndvbitvector":"{0, 1, 2, 3}{0, 1}{0, 1, 3}{0, 2}{0, 1, 2, 3}{0, 1, 3}{0, 1, 2, 3}{0, 1, 3}{0, 1}{0, 1}{0, 1, 2, 4}{0, 1, 4}{0, 2, 4}{0, 1, 2, 3}{0, 1, 2}{0, 1, 2}"} +{"columntype":"String","maxlength":11,"avglength":3.9,"countnulls":0,"numdistinctvalues":7,"ndvbitvector":"Rk0QAA8AAAADAAAACwAAAAUAAAAPAAAACwAAAA8AAAALAAAAAwAAAAMAAAAXAAAAEwAAABUAAAAP\r\nAAAABwAAAAcAAAA=\r\n"} diff --git a/ql/src/test/results/clientpositive/confirm_initial_tbl_stats.q.out b/ql/src/test/results/clientpositive/confirm_initial_tbl_stats.q.out index 5593e422b6..ec841ff241 100644 --- a/ql/src/test/results/clientpositive/confirm_initial_tbl_stats.q.out +++ b/ql/src/test/results/clientpositive/confirm_initial_tbl_stats.q.out @@ -14,10 +14,31 @@ PREHOOK: Input: default@src POSTHOOK: query: describe formatted src key POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@src -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -key string 0 309 2.812 3 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +key string 0 309 SExM4LUCtQLBoliC580Fv5Nq/8NRwJtoxdPYB/yjmAO/kokBgcjcBMHblgH/lecCgMn9AcC78gm/ +wzeA/BHB6MgH/5KjAsKL1QqAge0D/obCAoCvMMGIDoDPDr/ovgSAn6ALgL7TEMGr8wX/6xnAuqgF +wcgagNoPwY+GAf7ouATEjJwBvIfKAoGetgS/ysoIwN4dwMHtA8DAnQmB7TWBspEFv+7uBMGB4wL+ +mmvAqtEBgrrQAYGlkQb9j1CBuYcE/5VDgNciweGkA4Hf9wS+ws4BgPdxgJr/BcCctQOB2rQD/+LE +CMDf/QHAklbAxaoBgP7gA4Gu1weBx4YB/remBMHjToDh2wH/+sEHwMqxA8DvyATBrUj/7ecCgP0H +ifmTBIC8FriHfv/5pArAlSzAnIoBwJ/bAsXdU7v+0g7ArvQBgOX+AoD5+hPA/4oHgK3rAcCWB4D+ +iwSA/soDgu71Ab68MYD/gQKAh+ECwN6xBMCTYoSh4AO9nESArIcGwYmeA/7TpAWBncQDwthzvrb9 +BIG2hQG+lbgIgan2DP/2R4Cu1giAlJcDg7UU/ZBfgchL/4O8AsDO8gKBodUGwMTcA8OKggP88sYD +w9tCvr+CA7/W3QfE05wB/dWUCcCa2QmBsY4HgrD3BbzCwguAih3B67gD/9zeAYX0twf7048HwLKS +CICJC8GyrgGB0akBvr2mAcDP8QGA0/ECwMScA4GXrAGBpokFgKgKwNSHBP7sMIDfkAbBpzy/pdcK +wLnfAYLeKP6R5wLCssoBv8JY/7IvwNEMgq+7AsDQH/6VwAGA8nXCisoBvvevAoH4PMKOqAO+lnj/ +yjnCwzL/7IADv/8jhK28Brzx1wGCmk6/0t0CgIyaAv/CnwXAzZ0BwJadA4GCKcC53AG/sUCBhs8J +gZmRAb6zwwSBgLYGweuIAb+tbsH6gwPAngH+ysQBhLxh/NihAsGYswS/l8MKgtFX/u6jA8XB6AL8 +3tAC/5TDBMHMvgG/0NICwO79BYS82Qa8oMQBwPqBBcHhI//N2RLAitsEgJnuBcD+qAXA/t0FwJ32 +CoDBrAHDlSD9ltIDwMieBIC26QLBzZkGgbecAYCvCf+Xgwn/40+JgRX3xNcBwIkLweaQAYDuggKB +2okBwPESvtvPBsHxpQL/ucsCwIeJA8CjBoH6SP+UnwKCw8IGv7mcBYHtywKChYICvKDpAsDx5gHC ++MIF/sDeBYCh1ALA+poCwYygCL+TTITO3AK9weMEwf+fAb/V5AKApKMGg8Fg/J9OwfnNCv/pkgLC +z+gEvt+XA4CqlgGAlOQDgIaICYGmzAGCiYUHv96HBv/njQHEzfsEv9LuB7zk1gPD4RiDo/UDu5qr +A//IIcDDHsCwDYDozwLEq9EBvZp1gLwawJaGAv/JwAPC/JcDvqvcAcG1ugODzpMBvZfmAoDWB//s +1wKHzaYCucW2BcL9uQT/+aMB/+WtAsKPN7+sdsTghAG7t6kEwNw+wKLUAsCDIIHcjwGAqpACgM36 +BcC//AOAtLEEgMbwAf+mwQiAqfgH + 2.812 3 from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} PREHOOK: query: describe extended src1 PREHOOK: type: DESCTABLE PREHOOK: Input: default@src1 @@ -34,10 +55,12 @@ PREHOOK: Input: default@src1 POSTHOOK: query: describe formatted src1 value POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@src1 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -value string 0 19 4.92 7 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +value string 0 19 SExM4BMTgaTbFcCikRTAp44YwK72BIGdshzAtN4dgfC7Ab6ikDTAz6JGgejDCP+AlzSA84UvwYTL +Wr+ivynA6+uCAsDjm8kBgri1Ab++nA+/vawa + 4.92 7 from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} PREHOOK: query: describe extended src_json PREHOOK: type: DESCTABLE PREHOOK: Input: default@src_json @@ -53,10 +76,11 @@ PREHOOK: Input: default@src_json POSTHOOK: query: describe formatted src_json json POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@src_json -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -json string 0 1 644.0 644 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"json\":\"true\"}} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +json string 0 1 SExM4AEBhZK/6AY= + 644.0 644 from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"json\":\"true\"}} PREHOOK: query: describe extended src_sequencefile PREHOOK: type: DESCTABLE PREHOOK: Input: default@src_sequencefile @@ -73,10 +97,31 @@ PREHOOK: Input: default@src_sequencefile POSTHOOK: query: describe formatted src_sequencefile value POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@src_sequencefile -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -value string 0 309 6.812 7 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +value string 0 309 SExM4LUCtQLM7Sz2qpEHw751/KEegclIgPbqBb/+4QWCxbMFvttNwJ79A8DuYoDMqgOCtK0Dv6qT +A/+3OMCb5AGBvuwFgLXtAb/E0QOB1IQGwKwc/9PdBMCu9gSBw0y/1IYJwKeVDcCdmAGBuSGAhBmF +r5AB+tCzAcGDswGC8n292qYHwZPUAb+SVsDC1gGAuPwCgZG9Av/S+gXDuJAB//KXAv61xQGBgpcB +gIo6geaBAf+Q3wz/tkWBqNID/6ilBoDahwbA1fICwcKtBIHklAe+8sQFgLx/gIXzAcLDwAP+09EB +wP4GwrXYAoDuxwPBy5cCgLUF/dq4BcCkoAaAyfcEgJnRBYD8iAKA4vgGwL4SgIuEAsHnqgP/obsC +gO6YAoKYVb7VSsCR2ALAjJgBwfSUAoD3PMP/4QKAwxO84LoCwKDSAcDaLsH7kAK/u/YCgLf0AYH6 +xwGA1c8Bg7KxAf3Wcf+9pgKA4X+FseMDvZb8Ab7MtQaA0eUDwOUigKINwZUuv+HJBMCWDoLCNf7W +sgSAlO8DgO6lB8KatwT/xdQCv45EgcDADb/E5QTCnJUG/7OtBf+0mAHDmdEDv7edBv6CpgLAj/sC +wIV1gf2zBv/SywTC8dAC/r3JA4LUTP7BuQKAieADgfLcBr/ouAGAs2OAiKUGwJf2CoGQ0Qa/05oG +gNojgJTZBoCWkwTAt4EKx5rPA7mZqQKB5kCB9fcCgLTwB/6O8gTAxvYSwOaRAYDFgAPA+NsCgcm8 +BoC0jASB3esC/oDgBYGcvgXDteICwKoK/MZMgoXCAcCl4gO/m3q/3VKBs84BgY9kgMeyBL/zR8Cl +nALCsOIC/s2tAf/JsQLAmWCA+akCgrq2A//wlAu/26sEwobEBP/ejgG/spYMwOKnCoHqsQHA7oMC +v62oBoDghAaA8oECgPSkB4Dv+gHB+yHAqQ6/xRWBm9EBhPaFAby9jgO/1cgMgK9pwMuiA8C5/AaA +/f8DwsiOA4CiDYDemwS/zXDDxX6+x4UKwpyWBv7n8AH+la4HwvBSv67qAr+duAHA3K8EgvKLA/7+ +ygPA/nmAuQOFvt4Nu7+4A8PXG/2ZboGhF8K6+Ab+6IsJgd6AAf+aiwH/qVyEhB7+o/4F/rO0AYCM +fcC1qQOAuuYBgcbKAcDy5AH/wu0Bga6UAr/e1wPBnscCv9S+BYDKjwPB/JABgJeXBoCjAYHT8gL+ +tV6B6RO/9GGB1aUHxcCxAfqrngKAlIcDhLXJAr3vV//G5ATBiqQDgcGkAcDijQS+gsEJgc+FA4DP +KYKJiwH9hqwBwpJC/sHNAYHStAmB4IoJvsenBoHYR8CZiBTB2m/+mdUIgY2kAv+5P8CunwyA2QaC +sEb+toQCwp3JBYCqiAS+ruIEhq+wAfr6QcHKrw7/xMABxN/NB/ztO4K4tQG/94oBgJf4Ab+F2AHC +zawDwa2BAr6vkwX/u5QFwYfWAsDVgQKCgZwJ/ZYXwNztAoL3/AL+uKIBwdeeA8OYjwGBoIkIvMSm +A/+FF4PnPb6a7gKA1BfA4JMGwOb1Ab+WHg== + 6.812 7 from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} PREHOOK: query: describe extended srcbucket PREHOOK: type: DESCTABLE PREHOOK: Input: default@srcbucket @@ -93,10 +138,38 @@ PREHOOK: Input: default@srcbucket POSTHOOK: query: describe formatted srcbucket value POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@srcbucket -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -value string 0 430 6.802 7 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +value string 0 430 SExM4K4DrgPM7Sz2soMHgPgNw751/KEegclIgPbqBb/h5wOAnfoBgsWzBb7bTcD0a4CqkQPA7mKB +osYB/6nkAcHVGcHekwOA7wS/u44D/7c4wJvkAYLQxAL/7acDgLXtAb/E0QOB1IQGwKwcv76qAsCV +swLAzlqA4JsEgcNMv9SGCcDHgAWC2Ei+18ACwLCLBcCdmAGBuSGAhBmFr5AB+tCzAcGDswGC8n39 +6cQDwPDhA8GT1AG/klbAwtYBgvP0Av7EB4GRvQL/0voFgeMggv1uwFj9i2iC06gBgJQH/rXFAYGC +lwGAijqB5oEB/5DfDP+2RYGo0gP/qKUGgNqHBoKL0AK+3BaA7gvBwq0Ev4j7BcLbmQG+8sQFgLx/ +gIXzAYDdjQHC5rIC/+OKAf/vRsD+BsK12AK+3iDCj6cDwcuXAoC1Bb6hYL+52ATAhvEDgKkLgPWj +AoDJ9wSCiY0B/o/EBID8iALB/12/4poGwL4Sw7JgvdijAcHnqgP/obsCw4ZzveelAYKYVb7VSoCo +/wHA6VjAjJgBwfSUAoD3PMPPygKAsBeAwxO84LoCgYc9v5mVAcDaLsH7kALA+SH/wdQCgLf0AYH6 +xwGA1c8Bg7KxAf3Wcf+9pgKA4X+FseMDvZb8Ab7+rASAzogCgfKyAv/esgHA5SKAog3BlS6/4ckE +wJYOgsI1/qWdAYCxlQPAndwBwPaSAoPN3gH9oMcFgaefAcHzlwP/xdQCv45EwK+ZDMCAmAGBkA+/ +xOUEgZXHAcGHzgT+hdkBw4QvvqmlA/+0mAGA5qwDw7Mkv7edBv6CpgLAj/sCwIV1gf2zBoSuiAH7 +pMMDwvHQAv69yQOC1Ez+wbkCgIngA4Hy3Aa/6LgBwIUMwK1XgIilBsCX9grBvakCv6m4AYGp7wKB +nni/1fkE/98ogNojgJTZBoCWkwTAkPUCgablA/+ApwPA2JoCh8K0AbmZqQKB5kCB9fcCv5bvBMGd +gQP+0F2AvpQEgeChCr/m1AjA5pEBgMWAA8D42wLCwMkB/oMgwYTTBIC0jAT/oD2CvK4C/rQ5gMym +BcPGyQS+1XTDteICwKoK/MZMgoXCAcCl4gO/m3q/3VKBs84BgY9kgMeyBL/zR8ClnALCsOIC/s2t +Af/JsQLAmWCA+akCgrq2A77LxgPBpc4Hv9urBMKGxAT/3o4B/6abB8CL+wTA4qcKgeqxAcDugwK/ +nLICgsOsAv7NyQHA/f4CwOKFA4DygQKA7IwFgIiYAoC23QGAuR3B+yHAqQ6/xRWBm9EBhPaFAby9 +jgO/rbwEwuDXBr7HtAHA+1HAsxfAy6IDhMahBLyaSIHHSf+RyQGAjkvAtxrAt5oDwsiOA4CiDb6m +0gHCt8kCv81ww8V+vseFCsKclgb+5/AB/pWuB8LwUr+u6gKB8Z8BvqwYwOqBAYSbVPzW2QKC8osD +/v7KA8D+eYC5A8GYygLEpZQLu/fNAoDIasPXG8DAOL3ZNYGhF8Cj9gWCl4IB/YFvgeecCIHegAH/ +mosB/6lchIQe/qP+BYChKv6SigGAjH3AtakDg78k/frBAYHGygHA8uQB/8LtAYGulAK/3tcDwZ7H +Ar/UvgWB6Sz/4OICwfyQAf/X9wWBvx+AowGAxY8CgY5j/rVegekTgMQ4v7ApwIryAsDSmQGB+JkD +xcCxAfqrngKA7O0CgKgZhLXJAsCEDP3qS//G5ATBiqQDgcGkAcDijQT+w44BgMCUCMD+HcCY/QHB +togBgM8pgrZ3gNMTvuRLv6JgwpJC/sHNAcK0uQL+vzSA45YEwfqvAoDG/AH/4rwDgrfRA77HpwaB +2Ee/mMcHgafGAr+mzwfBs6sCwdpvgO8X/qq9CIGNpAL/uT+ApoACwIifCoDZBoKwRv62hALA7YcE +gKF2go9LgKqIBL6u4gSB6UWFxmr6+kHByq8O/8TAAcTpsQeA9hv87TuCuLUBv/eKAYCX+AHAhQ7/ +/8kBws2sA8GtgQK+r5MFwfjTBL7DQMGH1gLA1YECgoGcCf2WF8Dc7QKC9/wC/riiAcHXngPDmI8B +/JhghYepB7zEpgP/hReD5z29musBgYCDAYDUF8DgkwbA5vUBv5Ye + 6.802 7 from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} PREHOOK: query: describe extended srcbucket2 PREHOOK: type: DESCTABLE PREHOOK: Input: default@srcbucket2 @@ -113,10 +186,31 @@ PREHOOK: Input: default@srcbucket2 POSTHOOK: query: describe formatted srcbucket2 value POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@srcbucket2 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -value string 0 309 6.812 7 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +value string 0 309 SExM4LUCtQLM7Sz2qpEHw751/KEegclIgPbqBb/+4QWCxbMFvttNwJ79A8DuYoDMqgOCtK0Dv6qT +A/+3OMCb5AGBvuwFgLXtAb/E0QOB1IQGwKwc/9PdBMCu9gSBw0y/1IYJwKeVDcCdmAGBuSGAhBmF +r5AB+tCzAcGDswGC8n292qYHwZPUAb+SVsDC1gGAuPwCgZG9Av/S+gXDuJAB//KXAv61xQGBgpcB +gIo6geaBAf+Q3wz/tkWBqNID/6ilBoDahwbA1fICwcKtBIHklAe+8sQFgLx/gIXzAcLDwAP+09EB +wP4GwrXYAoDuxwPBy5cCgLUF/dq4BcCkoAaAyfcEgJnRBYD8iAKA4vgGwL4SgIuEAsHnqgP/obsC +gO6YAoKYVb7VSsCR2ALAjJgBwfSUAoD3PMP/4QKAwxO84LoCwKDSAcDaLsH7kAK/u/YCgLf0AYH6 +xwGA1c8Bg7KxAf3Wcf+9pgKA4X+FseMDvZb8Ab7MtQaA0eUDwOUigKINwZUuv+HJBMCWDoLCNf7W +sgSAlO8DgO6lB8KatwT/xdQCv45EgcDADb/E5QTCnJUG/7OtBf+0mAHDmdEDv7edBv6CpgLAj/sC +wIV1gf2zBv/SywTC8dAC/r3JA4LUTP7BuQKAieADgfLcBr/ouAGAs2OAiKUGwJf2CoGQ0Qa/05oG +gNojgJTZBoCWkwTAt4EKx5rPA7mZqQKB5kCB9fcCgLTwB/6O8gTAxvYSwOaRAYDFgAPA+NsCgcm8 +BoC0jASB3esC/oDgBYGcvgXDteICwKoK/MZMgoXCAcCl4gO/m3q/3VKBs84BgY9kgMeyBL/zR8Cl +nALCsOIC/s2tAf/JsQLAmWCA+akCgrq2A//wlAu/26sEwobEBP/ejgG/spYMwOKnCoHqsQHA7oMC +v62oBoDghAaA8oECgPSkB4Dv+gHB+yHAqQ6/xRWBm9EBhPaFAby9jgO/1cgMgK9pwMuiA8C5/AaA +/f8DwsiOA4CiDYDemwS/zXDDxX6+x4UKwpyWBv7n8AH+la4HwvBSv67qAr+duAHA3K8EgvKLA/7+ +ygPA/nmAuQOFvt4Nu7+4A8PXG/2ZboGhF8K6+Ab+6IsJgd6AAf+aiwH/qVyEhB7+o/4F/rO0AYCM +fcC1qQOAuuYBgcbKAcDy5AH/wu0Bga6UAr/e1wPBnscCv9S+BYDKjwPB/JABgJeXBoCjAYHT8gL+ +tV6B6RO/9GGB1aUHxcCxAfqrngKAlIcDhLXJAr3vV//G5ATBiqQDgcGkAcDijQS+gsEJgc+FA4DP +KYKJiwH9hqwBwpJC/sHNAYHStAmB4IoJvsenBoHYR8CZiBTB2m/+mdUIgY2kAv+5P8CunwyA2QaC +sEb+toQCwp3JBYCqiAS+ruIEhq+wAfr6QcHKrw7/xMABxN/NB/ztO4K4tQG/94oBgJf4Ab+F2AHC +zawDwa2BAr6vkwX/u5QFwYfWAsDVgQKCgZwJ/ZYXwNztAoL3/AL+uKIBwdeeA8OYjwGBoIkIvMSm +A/+FF4PnPb6a7gKA1BfA4JMGwOb1Ab+WHg== + 6.812 7 from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} PREHOOK: query: describe extended srcpart PREHOOK: type: DESCTABLE PREHOOK: Input: default@srcpart @@ -141,9 +235,30 @@ PREHOOK: Input: default@srcpart POSTHOOK: query: describe formatted srcpart PARTITION (ds="2008-04-09", hr="12") key POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@srcpart -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -key string 0 309 2.812 3 from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +key string 0 309 SExM4LUCtQLBoliC580Fv5Nq/8NRwJtoxdPYB/yjmAO/kokBgcjcBMHblgH/lecCgMn9AcC78gm/ +wzeA/BHB6MgH/5KjAsKL1QqAge0D/obCAoCvMMGIDoDPDr/ovgSAn6ALgL7TEMGr8wX/6xnAuqgF +wcgagNoPwY+GAf7ouATEjJwBvIfKAoGetgS/ysoIwN4dwMHtA8DAnQmB7TWBspEFv+7uBMGB4wL+ +mmvAqtEBgrrQAYGlkQb9j1CBuYcE/5VDgNciweGkA4Hf9wS+ws4BgPdxgJr/BcCctQOB2rQD/+LE +CMDf/QHAklbAxaoBgP7gA4Gu1weBx4YB/remBMHjToDh2wH/+sEHwMqxA8DvyATBrUj/7ecCgP0H +ifmTBIC8FriHfv/5pArAlSzAnIoBwJ/bAsXdU7v+0g7ArvQBgOX+AoD5+hPA/4oHgK3rAcCWB4D+ +iwSA/soDgu71Ab68MYD/gQKAh+ECwN6xBMCTYoSh4AO9nESArIcGwYmeA/7TpAWBncQDwthzvrb9 +BIG2hQG+lbgIgan2DP/2R4Cu1giAlJcDg7UU/ZBfgchL/4O8AsDO8gKBodUGwMTcA8OKggP88sYD +w9tCvr+CA7/W3QfE05wB/dWUCcCa2QmBsY4HgrD3BbzCwguAih3B67gD/9zeAYX0twf7048HwLKS +CICJC8GyrgGB0akBvr2mAcDP8QGA0/ECwMScA4GXrAGBpokFgKgKwNSHBP7sMIDfkAbBpzy/pdcK +wLnfAYLeKP6R5wLCssoBv8JY/7IvwNEMgq+7AsDQH/6VwAGA8nXCisoBvvevAoH4PMKOqAO+lnj/ +yjnCwzL/7IADv/8jhK28Brzx1wGCmk6/0t0CgIyaAv/CnwXAzZ0BwJadA4GCKcC53AG/sUCBhs8J +gZmRAb6zwwSBgLYGweuIAb+tbsH6gwPAngH+ysQBhLxh/NihAsGYswS/l8MKgtFX/u6jA8XB6AL8 +3tAC/5TDBMHMvgG/0NICwO79BYS82Qa8oMQBwPqBBcHhI//N2RLAitsEgJnuBcD+qAXA/t0FwJ32 +CoDBrAHDlSD9ltIDwMieBIC26QLBzZkGgbecAYCvCf+Xgwn/40+JgRX3xNcBwIkLweaQAYDuggKB +2okBwPESvtvPBsHxpQL/ucsCwIeJA8CjBoH6SP+UnwKCw8IGv7mcBYHtywKChYICvKDpAsDx5gHC ++MIF/sDeBYCh1ALA+poCwYygCL+TTITO3AK9weMEwf+fAb/V5AKApKMGg8Fg/J9OwfnNCv/pkgLC +z+gEvt+XA4CqlgGAlOQDgIaICYGmzAGCiYUHv96HBv/njQHEzfsEv9LuB7zk1gPD4RiDo/UDu5qr +A//IIcDDHsCwDYDozwLEq9EBvZp1gLwawJaGAv/JwAPC/JcDvqvcAcG1ugODzpMBvZfmAoDWB//s +1wKHzaYCucW2BcL9uQT/+aMB/+WtAsKPN7+sdsTghAG7t6kEwNw+wKLUAsCDIIHcjwGAqpACgM36 +BcC//AOAtLEEgMbwAf+mwQiAqfgH + 2.812 3 from deserializer PREHOOK: query: describe extended alltypesorc PREHOOK: type: DESCTABLE PREHOOK: Input: default@alltypesorc @@ -170,37 +285,59 @@ PREHOOK: Input: default@alltypesorc POSTHOOK: query: describe formatted alltypesorc ctinyint POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@alltypesorc -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -ctinyint tinyint -64 62 3115 127 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"cbigint\":\"true\",\"cboolean1\":\"true\",\"cboolean2\":\"true\",\"cdouble\":\"true\",\"cfloat\":\"true\",\"cint\":\"true\",\"csmallint\":\"true\",\"cstring1\":\"true\",\"cstring2\":\"true\",\"ctimestamp1\":\"true\",\"ctimestamp2\":\"true\",\"ctinyint\":\"true\"}} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +ctinyint tinyint -64 62 3115 127 SExM4H9/wffjBsSrxgHA1wz9+IYHg+DQCP3JihW/hsoBwPWWAcCpzQaAka8Nguq/AsK1iAL87ZUO +gJNyw+PsDL27kQLAhqMCwurIAb6YtwuCsqkM/uPMAoHrwgqAxvIFv4+0BsPOQb6yowG/ur4Nx8qY +FbyohgT+yXaA6b0C/8ydAYCV8gKA2pQswJKLAYGJLP/rxgKAk/YDhfPdELvbjQTB5+wEv5WXE8Ch +lAuHqL8IupGWB4O/f76/iwK+iKEJwaPQAf+XygGCnp4fv/WnBcCSzg/AgSSBieICv7yhBb/FvwnC +oMIVv+uBKYWN+wT6/KIGxLiuAf3/uAPCqdEIvabTBYHXviP//KQEgKuDCcGV3Ai/h7wGwdqmA4DO +pwaB1+kGgJuiFP6GugLB6sYcgNvQAb+vZMHalQX/jPYBgaebAoCHiwj/8NQIgeajCb+mUYHomgGA +uLEa/+yOBoTZwAK8pmPAwvUCgNSdB4SjmgW86+QGgZWyAsLt0wL/26AG/9OGDf+a0QSCvZgGvp/J +AYKB0AK+4ZoFwMqsA8CF8QqAy5cCgMjKDoDasB7A3/wYgoa7BP6VxASCvrwD/p2zAoGivgLAhPwX +/+7aD8Hztw7AkIsE/8CDC4GHyxT/zd0EwaXNBcCM4hP/qMsBgaClAr+48AbCqqEI + from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"cbigint\":\"true\",\"cboolean1\":\"true\",\"cboolean2\":\"true\",\"cdouble\":\"true\",\"cfloat\":\"true\",\"cint\":\"true\",\"csmallint\":\"true\",\"cstring1\":\"true\",\"cstring2\":\"true\",\"ctimestamp1\":\"true\",\"ctimestamp2\":\"true\",\"ctinyint\":\"true\"}} PREHOOK: query: describe formatted alltypesorc cfloat PREHOOK: type: DESCTABLE PREHOOK: Input: default@alltypesorc POSTHOOK: query: describe formatted alltypesorc cfloat POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@alltypesorc -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -cfloat float -64.0 79.5530014038086 3115 131 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"cbigint\":\"true\",\"cboolean1\":\"true\",\"cboolean2\":\"true\",\"cdouble\":\"true\",\"cfloat\":\"true\",\"cint\":\"true\",\"csmallint\":\"true\",\"cstring1\":\"true\",\"cstring2\":\"true\",\"ctimestamp1\":\"true\",\"ctimestamp2\":\"true\",\"ctinyint\":\"true\"}} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +cfloat float -64.0 79.5530014038086 3115 131 SExM4IMBgwGCipYJ/7xdxIybDoK1gwiAsCS79s8MgIafCsHkghfAg+AHvrH2AYHVhgfDuPwIwIsU +voWVAr/C+AiB5IADvtnDCYC79RPA2PkIwozYBb6JtAGA0owCgJ7bBMLTyQ/+j8UHweetA4C3Nb/e +hwaAh7oRwOdSxvHFAvzIpAK+w44ExIbjEb6o7AH/kYodgYKOB//70wy/wdUCwPehFcTdqAX/wuML +vY68B4b06QS7m/4GwL3bB7/RxQOB0twCgLoTw5/mAsS/6we59YkDwd2GC8GD7gW9xLgHwdrsA4Wx +fP3L7Au9usENgM/tDYLx4Ar+n7MJho+kBPrh2ROAs+4Cwc+0CoKbrAP+2f8Ev8mtFcC6lgWAivUC +wKH3B4ff0RH66u0OwM/7C4PflA29yYUBgIyFBIHt3QzB2YsK/aDFEoCT4QPA2bYVwNXRB8Gc0gO/ +xLcMwea/A8GajwP+q58Cga3hCv/vnwTD0LsE/fvlBoCDb8GBygWC+/oK/oaXBcC+CIOT3wb+wu8B +/8j9FMCM9A3/wPgDgqybAb65xAKAm/ICwNErgIbqCsCwiQfAy98IwPumAYTJrwT8i6ECgo/rA4KX +3AmA0A/85FDB0qsCgc2RA4Cw6g/+3+EGgZSjEsC/8An/+cwbwvjlBcCv4Qe+8tILwIDhAYGDxgvA +q6ECv6q2CQ== + from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"cbigint\":\"true\",\"cboolean1\":\"true\",\"cboolean2\":\"true\",\"cdouble\":\"true\",\"cfloat\":\"true\",\"cint\":\"true\",\"csmallint\":\"true\",\"cstring1\":\"true\",\"cstring2\":\"true\",\"ctimestamp1\":\"true\",\"ctimestamp2\":\"true\",\"ctinyint\":\"true\"}} PREHOOK: query: describe formatted alltypesorc ctimestamp1 PREHOOK: type: DESCTABLE PREHOOK: Input: default@alltypesorc POSTHOOK: query: describe formatted alltypesorc ctimestamp1 POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@alltypesorc -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -ctimestamp1 timestamp -30 31 3115 35 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"cbigint\":\"true\",\"cboolean1\":\"true\",\"cboolean2\":\"true\",\"cdouble\":\"true\",\"cfloat\":\"true\",\"cint\":\"true\",\"csmallint\":\"true\",\"cstring1\":\"true\",\"cstring2\":\"true\",\"ctimestamp1\":\"true\",\"ctimestamp2\":\"true\",\"ctinyint\":\"true\"}} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +ctimestamp1 timestamp -30 31 3115 35 SExM4CMjxdOOGLzQ1BbA9ZYBwKnNBoCRrw3En8gEvKmpLYCSgzWA4vdMwJKLAcGuwk7AqvwNwcjk +OMLQ8Vn9/7gDgKfjMcC9hBa/h7wGwajOCYHyixu+m7FqwML1AoCwmzCAj+odgMuXAoDIyg7Aua03 +gJz/CIDc7wWBor4Cv/PWJ4GEwxKAyNs9/6jLAYKDtxE= + from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"cbigint\":\"true\",\"cboolean1\":\"true\",\"cboolean2\":\"true\",\"cdouble\":\"true\",\"cfloat\":\"true\",\"cint\":\"true\",\"csmallint\":\"true\",\"cstring1\":\"true\",\"cstring2\":\"true\",\"ctimestamp1\":\"true\",\"ctimestamp2\":\"true\",\"ctinyint\":\"true\"}} PREHOOK: query: describe formatted alltypesorc cboolean2 PREHOOK: type: DESCTABLE PREHOOK: Input: default@alltypesorc POSTHOOK: query: describe formatted alltypesorc cboolean2 POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@alltypesorc -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -cboolean2 boolean 3115 3983 5190 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"cbigint\":\"true\",\"cboolean1\":\"true\",\"cboolean2\":\"true\",\"cdouble\":\"true\",\"cfloat\":\"true\",\"cint\":\"true\",\"csmallint\":\"true\",\"cstring1\":\"true\",\"cstring2\":\"true\",\"ctimestamp1\":\"true\",\"ctimestamp2\":\"true\",\"ctinyint\":\"true\"}} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +cboolean2 boolean 3115 3983 5190 from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"cbigint\":\"true\",\"cboolean1\":\"true\",\"cboolean2\":\"true\",\"cdouble\":\"true\",\"cfloat\":\"true\",\"cint\":\"true\",\"csmallint\":\"true\",\"cstring1\":\"true\",\"cstring2\":\"true\",\"ctimestamp1\":\"true\",\"ctimestamp2\":\"true\",\"ctinyint\":\"true\"}} diff --git a/ql/src/test/results/clientpositive/decimal_stats.q.out b/ql/src/test/results/clientpositive/decimal_stats.q.out index f58a7cc8e1..204cd161f7 100644 --- a/ql/src/test/results/clientpositive/decimal_stats.q.out +++ b/ql/src/test/results/clientpositive/decimal_stats.q.out @@ -46,10 +46,11 @@ PREHOOK: Input: default@decimal_1 POSTHOOK: query: desc formatted decimal_1 v POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@decimal_1 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -v decimal(10,0) 500 0 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"t\":\"true\",\"u\":\"true\",\"v\":\"true\"}} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +v decimal(10,0) 500 1 SExM4AEA + from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"t\":\"true\",\"u\":\"true\",\"v\":\"true\"}} PREHOOK: query: explain select * from decimal_1 order by t limit 100 PREHOOK: type: QUERY POSTHOOK: query: explain select * from decimal_1 order by t limit 100 diff --git a/ql/src/test/results/clientpositive/deleteAnalyze.q.out b/ql/src/test/results/clientpositive/deleteAnalyze.q.out index 1bae859e2c..747fc154b0 100644 --- a/ql/src/test/results/clientpositive/deleteAnalyze.q.out +++ b/ql/src/test/results/clientpositive/deleteAnalyze.q.out @@ -72,10 +72,10 @@ PREHOOK: Input: default@testdeci2 POSTHOOK: query: describe formatted testdeci2 amount POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@testdeci2 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -amount decimal(10,3) from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +amount decimal(10,3) from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} PREHOOK: query: analyze table testdeci2 compute statistics for columns PREHOOK: type: QUERY PREHOOK: Input: default@testdeci2 diff --git a/ql/src/test/results/clientpositive/describe_syntax.q.out b/ql/src/test/results/clientpositive/describe_syntax.q.out index 19147a1d92..435a20c00e 100644 --- a/ql/src/test/results/clientpositive/describe_syntax.q.out +++ b/ql/src/test/results/clientpositive/describe_syntax.q.out @@ -211,10 +211,10 @@ PREHOOK: Input: db1@t1 POSTHOOK: query: DESCRIBE FORMATTED t1 key1 POSTHOOK: type: DESCTABLE POSTHOOK: Input: db1@t1 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -key1 int from deserializer -COLUMN_STATS_ACCURATE {} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +key1 int from deserializer +COLUMN_STATS_ACCURATE {} PREHOOK: query: DESCRIBE db1.t1 key1 PREHOOK: type: DESCTABLE PREHOOK: Input: db1@t1 @@ -235,10 +235,10 @@ PREHOOK: Input: db1@t1 POSTHOOK: query: DESCRIBE FORMATTED db1.t1 key1 POSTHOOK: type: DESCTABLE POSTHOOK: Input: db1@t1 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -key1 int from deserializer -COLUMN_STATS_ACCURATE {} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +key1 int from deserializer +COLUMN_STATS_ACCURATE {} PREHOOK: query: DESCRIBE t1 key1 PREHOOK: type: DESCTABLE PREHOOK: Input: db1@t1 @@ -259,10 +259,10 @@ PREHOOK: Input: db1@t1 POSTHOOK: query: DESCRIBE FORMATTED t1 key1 POSTHOOK: type: DESCTABLE POSTHOOK: Input: db1@t1 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -key1 int from deserializer -COLUMN_STATS_ACCURATE {} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +key1 int from deserializer +COLUMN_STATS_ACCURATE {} PREHOOK: query: DESCRIBE t1 PARTITION(ds='4', part='5') PREHOOK: type: DESCTABLE PREHOOK: Input: db1@t1 diff --git a/ql/src/test/results/clientpositive/describe_table.q.out b/ql/src/test/results/clientpositive/describe_table.q.out index 3ba9a7b942..368e7626e8 100644 --- a/ql/src/test/results/clientpositive/describe_table.q.out +++ b/ql/src/test/results/clientpositive/describe_table.q.out @@ -210,10 +210,31 @@ PREHOOK: Input: default@srcpart POSTHOOK: query: describe formatted srcpart key POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@srcpart -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -key string 0 309 2.812 3 from deserializer -COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"key\":\"true\"}} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +key string 0 309 SExM4LUCtQLBoliC580Fv5Nq/8NRwJtoxdPYB/yjmAO/kokBgcjcBMHblgH/lecCgMn9AcC78gm/ +wzeA/BHB6MgH/5KjAsKL1QqAge0D/obCAoCvMMGIDoDPDr/ovgSAn6ALgL7TEMGr8wX/6xnAuqgF +wcgagNoPwY+GAf7ouATEjJwBvIfKAoGetgS/ysoIwN4dwMHtA8DAnQmB7TWBspEFv+7uBMGB4wL+ +mmvAqtEBgrrQAYGlkQb9j1CBuYcE/5VDgNciweGkA4Hf9wS+ws4BgPdxgJr/BcCctQOB2rQD/+LE +CMDf/QHAklbAxaoBgP7gA4Gu1weBx4YB/remBMHjToDh2wH/+sEHwMqxA8DvyATBrUj/7ecCgP0H +ifmTBIC8FriHfv/5pArAlSzAnIoBwJ/bAsXdU7v+0g7ArvQBgOX+AoD5+hPA/4oHgK3rAcCWB4D+ +iwSA/soDgu71Ab68MYD/gQKAh+ECwN6xBMCTYoSh4AO9nESArIcGwYmeA/7TpAWBncQDwthzvrb9 +BIG2hQG+lbgIgan2DP/2R4Cu1giAlJcDg7UU/ZBfgchL/4O8AsDO8gKBodUGwMTcA8OKggP88sYD +w9tCvr+CA7/W3QfE05wB/dWUCcCa2QmBsY4HgrD3BbzCwguAih3B67gD/9zeAYX0twf7048HwLKS +CICJC8GyrgGB0akBvr2mAcDP8QGA0/ECwMScA4GXrAGBpokFgKgKwNSHBP7sMIDfkAbBpzy/pdcK +wLnfAYLeKP6R5wLCssoBv8JY/7IvwNEMgq+7AsDQH/6VwAGA8nXCisoBvvevAoH4PMKOqAO+lnj/ +yjnCwzL/7IADv/8jhK28Brzx1wGCmk6/0t0CgIyaAv/CnwXAzZ0BwJadA4GCKcC53AG/sUCBhs8J +gZmRAb6zwwSBgLYGweuIAb+tbsH6gwPAngH+ysQBhLxh/NihAsGYswS/l8MKgtFX/u6jA8XB6AL8 +3tAC/5TDBMHMvgG/0NICwO79BYS82Qa8oMQBwPqBBcHhI//N2RLAitsEgJnuBcD+qAXA/t0FwJ32 +CoDBrAHDlSD9ltIDwMieBIC26QLBzZkGgbecAYCvCf+Xgwn/40+JgRX3xNcBwIkLweaQAYDuggKB +2okBwPESvtvPBsHxpQL/ucsCwIeJA8CjBoH6SP+UnwKCw8IGv7mcBYHtywKChYICvKDpAsDx5gHC ++MIF/sDeBYCh1ALA+poCwYygCL+TTITO3AK9weMEwf+fAb/V5AKApKMGg8Fg/J9OwfnNCv/pkgLC +z+gEvt+XA4CqlgGAlOQDgIaICYGmzAGCiYUHv96HBv/njQHEzfsEv9LuB7zk1gPD4RiDo/UDu5qr +A//IIcDDHsCwDYDozwLEq9EBvZp1gLwawJaGAv/JwAPC/JcDvqvcAcG1ugODzpMBvZfmAoDWB//s +1wKHzaYCucW2BcL9uQT/+aMB/+WtAsKPN7+sdsTghAG7t6kEwNw+wKLUAsCDIIHcjwGAqpACgM36 +BcC//AOAtLEEgMbwAf+mwQiAqfgH + 2.812 3 from deserializer +COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"key\":\"true\"}} PREHOOK: query: describe formatted srcpart PARTITION(ds='2008-04-08', hr='12') PREHOOK: type: DESCTABLE PREHOOK: Input: default@srcpart @@ -302,10 +323,31 @@ PREHOOK: Input: default@srcpart POSTHOOK: query: describe formatted `srcpart` `key` POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@srcpart -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -key string 0 309 2.812 3 from deserializer -COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"key\":\"true\"}} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +key string 0 309 SExM4LUCtQLBoliC580Fv5Nq/8NRwJtoxdPYB/yjmAO/kokBgcjcBMHblgH/lecCgMn9AcC78gm/ +wzeA/BHB6MgH/5KjAsKL1QqAge0D/obCAoCvMMGIDoDPDr/ovgSAn6ALgL7TEMGr8wX/6xnAuqgF +wcgagNoPwY+GAf7ouATEjJwBvIfKAoGetgS/ysoIwN4dwMHtA8DAnQmB7TWBspEFv+7uBMGB4wL+ +mmvAqtEBgrrQAYGlkQb9j1CBuYcE/5VDgNciweGkA4Hf9wS+ws4BgPdxgJr/BcCctQOB2rQD/+LE +CMDf/QHAklbAxaoBgP7gA4Gu1weBx4YB/remBMHjToDh2wH/+sEHwMqxA8DvyATBrUj/7ecCgP0H +ifmTBIC8FriHfv/5pArAlSzAnIoBwJ/bAsXdU7v+0g7ArvQBgOX+AoD5+hPA/4oHgK3rAcCWB4D+ +iwSA/soDgu71Ab68MYD/gQKAh+ECwN6xBMCTYoSh4AO9nESArIcGwYmeA/7TpAWBncQDwthzvrb9 +BIG2hQG+lbgIgan2DP/2R4Cu1giAlJcDg7UU/ZBfgchL/4O8AsDO8gKBodUGwMTcA8OKggP88sYD +w9tCvr+CA7/W3QfE05wB/dWUCcCa2QmBsY4HgrD3BbzCwguAih3B67gD/9zeAYX0twf7048HwLKS +CICJC8GyrgGB0akBvr2mAcDP8QGA0/ECwMScA4GXrAGBpokFgKgKwNSHBP7sMIDfkAbBpzy/pdcK +wLnfAYLeKP6R5wLCssoBv8JY/7IvwNEMgq+7AsDQH/6VwAGA8nXCisoBvvevAoH4PMKOqAO+lnj/ +yjnCwzL/7IADv/8jhK28Brzx1wGCmk6/0t0CgIyaAv/CnwXAzZ0BwJadA4GCKcC53AG/sUCBhs8J +gZmRAb6zwwSBgLYGweuIAb+tbsH6gwPAngH+ysQBhLxh/NihAsGYswS/l8MKgtFX/u6jA8XB6AL8 +3tAC/5TDBMHMvgG/0NICwO79BYS82Qa8oMQBwPqBBcHhI//N2RLAitsEgJnuBcD+qAXA/t0FwJ32 +CoDBrAHDlSD9ltIDwMieBIC26QLBzZkGgbecAYCvCf+Xgwn/40+JgRX3xNcBwIkLweaQAYDuggKB +2okBwPESvtvPBsHxpQL/ucsCwIeJA8CjBoH6SP+UnwKCw8IGv7mcBYHtywKChYICvKDpAsDx5gHC ++MIF/sDeBYCh1ALA+poCwYygCL+TTITO3AK9weMEwf+fAb/V5AKApKMGg8Fg/J9OwfnNCv/pkgLC +z+gEvt+XA4CqlgGAlOQDgIaICYGmzAGCiYUHv96HBv/njQHEzfsEv9LuB7zk1gPD4RiDo/UDu5qr +A//IIcDDHsCwDYDozwLEq9EBvZp1gLwawJaGAv/JwAPC/JcDvqvcAcG1ugODzpMBvZfmAoDWB//s +1wKHzaYCucW2BcL9uQT/+aMB/+WtAsKPN7+sdsTghAG7t6kEwNw+wKLUAsCDIIHcjwGAqpACgM36 +BcC//AOAtLEEgMbwAf+mwQiAqfgH + 2.812 3 from deserializer +COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"key\":\"true\"}} PREHOOK: query: describe formatted `srcpart` PARTITION(ds='2008-04-08', hr='12') PREHOOK: type: DESCTABLE PREHOOK: Input: default@srcpart @@ -352,20 +394,20 @@ PREHOOK: Input: default@srcpart POSTHOOK: query: describe formatted `srcpart` `ds` POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@srcpart -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -ds string 0 2 100.0 100 -COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"ds\":\"true\"}} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +ds string 0 2 100.0 100 +COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"ds\":\"true\"}} PREHOOK: query: describe formatted `srcpart` `hr` PREHOOK: type: DESCTABLE PREHOOK: Input: default@srcpart POSTHOOK: query: describe formatted `srcpart` `hr` POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@srcpart -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -hr string 0 2 100.0 100 -COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"hr\":\"true\"}} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +hr string 0 2 100.0 100 +COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"hr\":\"true\"}} PREHOOK: query: create table srcpart_serdeprops like srcpart PREHOOK: type: CREATETABLE PREHOOK: Output: database:default diff --git a/ql/src/test/results/clientpositive/display_colstats_tbllvl.q.out b/ql/src/test/results/clientpositive/display_colstats_tbllvl.q.out index 73d4cd7660..7aae751767 100644 --- a/ql/src/test/results/clientpositive/display_colstats_tbllvl.q.out +++ b/ql/src/test/results/clientpositive/display_colstats_tbllvl.q.out @@ -51,9 +51,9 @@ PREHOOK: Input: default@uservisits_web_text_none POSTHOOK: query: desc formatted UserVisits_web_text_none sourceIP POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@uservisits_web_text_none -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -sourceIP string from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +sourceIP string from deserializer PREHOOK: query: explain analyze table UserVisits_web_text_none compute statistics for columns sourceIP, avgTimeOnSite, adRevenue PREHOOK: type: QUERY @@ -242,30 +242,39 @@ PREHOOK: Input: default@uservisits_web_text_none POSTHOOK: query: desc formatted UserVisits_web_text_none sourceIP POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@uservisits_web_text_none -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -sourceIP string 0 55 12.763636363636364 13 from deserializer -COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"adrevenue\":\"true\",\"avgtimeonsite\":\"true\",\"sourceip\":\"true\"}} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +sourceIP string 0 55 SExM4Dc3wbjRG8DNgg/A1YwYwNiYDsCVzwXBzLkCgOc1v9LCJcG2rAK/65wVwYL2Br/zjxnBze8M +wMiBIMDE/DG/n50HwcqyAoCXmQi/0KAPgMSxIIGKsRi/oqUSwKD9F4DuAYH72Rn/48sWgLP+EMGB +wgS/28MZwPT9KsGGrwuAluEFv+ngDYGoqgT/09AOgLCEBYHVvg6/l78rgevVFMD77Q+AkZ0I/7Wz +AoOimAj+mLMJwdPMCL7P1BvC9sIM/+puv4W+A4KWxlP+nsMpwYbnCf+4qyHCnJgXgPenMA== + 12.763636363636364 13 from deserializer +COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"adrevenue\":\"true\",\"avgtimeonsite\":\"true\",\"sourceip\":\"true\"}} PREHOOK: query: desc formatted UserVisits_web_text_none avgTimeOnSite PREHOOK: type: DESCTABLE PREHOOK: Input: default@uservisits_web_text_none POSTHOOK: query: desc formatted UserVisits_web_text_none avgTimeOnSite POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@uservisits_web_text_none -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -avgTimeOnSite int 1 9 0 9 from deserializer -COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"adrevenue\":\"true\",\"avgtimeonsite\":\"true\",\"sourceip\":\"true\"}} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +avgTimeOnSite int 1 9 0 9 SExM4AkJwZn6L4TaxBi8u6xigOL3TMCSiwHBrsJOwKr8Df+Es+QBgPyEtwI= + from deserializer +COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"adrevenue\":\"true\",\"avgtimeonsite\":\"true\",\"sourceip\":\"true\"}} PREHOOK: query: desc formatted UserVisits_web_text_none adRevenue PREHOOK: type: DESCTABLE PREHOOK: Input: default@uservisits_web_text_none POSTHOOK: query: desc formatted UserVisits_web_text_none adRevenue POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@uservisits_web_text_none -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -adRevenue float 13.099044799804688 492.98870849609375 0 55 from deserializer -COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"adrevenue\":\"true\",\"avgtimeonsite\":\"true\",\"sourceip\":\"true\"}} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +adRevenue float 13.099044799804688 492.98870849609375 0 55 SExM4Dc3gb3HC8Lswyq+hbYCgOOKIoHH7AKA4u4D/73OA4DH6QnA8ZIbhaSXBv/e/xf9jo4JgJ2b +Av/htwrBsJ4ZwZugD//O6wbB6qcFvoW+E4DW+wyA8/gCgK6GD4HIuhD/pccFgIXqAsCl/wyAv+QK +wNq4HYLrrB++s5sIgOWzPoSMlA/83cMVwdy8PYCjhwL/3LIWxOm7JPye8w/A/O0VwNjgBIDOiRHA +86ELwJ/+AYCr1QzA7YUQgO2gEcDZEIDK6EPAo+kOg4HxCv3ZkSmBrLlRgd6IA/6lwROAlYAL + from deserializer +COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"adrevenue\":\"true\",\"avgtimeonsite\":\"true\",\"sourceip\":\"true\"}} PREHOOK: query: CREATE TABLE empty_tab( a int, b double, @@ -292,10 +301,10 @@ PREHOOK: Input: default@empty_tab POSTHOOK: query: desc formatted empty_tab a POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@empty_tab -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -a int from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"a\":\"true\",\"b\":\"true\",\"c\":\"true\",\"d\":\"true\",\"e\":\"true\"}} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +a int from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"a\":\"true\",\"b\":\"true\",\"c\":\"true\",\"d\":\"true\",\"e\":\"true\"}} PREHOOK: query: explain analyze table empty_tab compute statistics for columns a,b,c,d,e PREHOOK: type: QUERY @@ -361,20 +370,20 @@ PREHOOK: Input: default@empty_tab POSTHOOK: query: desc formatted empty_tab a POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@empty_tab -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -a int 0 0 0 0 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"a\":\"true\",\"b\":\"true\",\"c\":\"true\",\"d\":\"true\",\"e\":\"true\"}} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +a int 0 0 0 0 from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"a\":\"true\",\"b\":\"true\",\"c\":\"true\",\"d\":\"true\",\"e\":\"true\"}} PREHOOK: query: desc formatted empty_tab b PREHOOK: type: DESCTABLE PREHOOK: Input: default@empty_tab POSTHOOK: query: desc formatted empty_tab b POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@empty_tab -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -b double 0.0 0.0 0 0 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"a\":\"true\",\"b\":\"true\",\"c\":\"true\",\"d\":\"true\",\"e\":\"true\"}} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +b double 0.0 0.0 0 0 from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"a\":\"true\",\"b\":\"true\",\"c\":\"true\",\"d\":\"true\",\"e\":\"true\"}} PREHOOK: query: CREATE DATABASE test PREHOOK: type: CREATEDATABASE PREHOOK: Output: database:test @@ -451,28 +460,32 @@ PREHOOK: Input: test@uservisits_web_text_none POSTHOOK: query: desc formatted UserVisits_web_text_none sourceIP POSTHOOK: type: DESCTABLE POSTHOOK: Input: test@uservisits_web_text_none -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -sourceIP string from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +sourceIP string from deserializer PREHOOK: query: desc formatted test.UserVisits_web_text_none sourceIP PREHOOK: type: DESCTABLE PREHOOK: Input: test@uservisits_web_text_none POSTHOOK: query: desc formatted test.UserVisits_web_text_none sourceIP POSTHOOK: type: DESCTABLE POSTHOOK: Input: test@uservisits_web_text_none -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -sourceIP string from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +sourceIP string from deserializer PREHOOK: query: desc formatted default.UserVisits_web_text_none sourceIP PREHOOK: type: DESCTABLE PREHOOK: Input: default@uservisits_web_text_none POSTHOOK: query: desc formatted default.UserVisits_web_text_none sourceIP POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@uservisits_web_text_none -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -sourceIP string 0 55 12.763636363636364 13 from deserializer -COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"adrevenue\":\"true\",\"avgtimeonsite\":\"true\",\"sourceip\":\"true\"}} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +sourceIP string 0 55 SExM4Dc3wbjRG8DNgg/A1YwYwNiYDsCVzwXBzLkCgOc1v9LCJcG2rAK/65wVwYL2Br/zjxnBze8M +wMiBIMDE/DG/n50HwcqyAoCXmQi/0KAPgMSxIIGKsRi/oqUSwKD9F4DuAYH72Rn/48sWgLP+EMGB +wgS/28MZwPT9KsGGrwuAluEFv+ngDYGoqgT/09AOgLCEBYHVvg6/l78rgevVFMD77Q+AkZ0I/7Wz +AoOimAj+mLMJwdPMCL7P1BvC9sIM/+puv4W+A4KWxlP+nsMpwYbnCf+4qyHCnJgXgPenMA== + 12.763636363636364 13 from deserializer +COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"adrevenue\":\"true\",\"avgtimeonsite\":\"true\",\"sourceip\":\"true\"}} PREHOOK: query: analyze table UserVisits_web_text_none compute statistics for columns sKeyword PREHOOK: type: QUERY PREHOOK: Input: test@uservisits_web_text_none @@ -495,17 +508,25 @@ PREHOOK: Input: test@uservisits_web_text_none POSTHOOK: query: desc formatted UserVisits_web_text_none sKeyword POSTHOOK: type: DESCTABLE POSTHOOK: Input: test@uservisits_web_text_none -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -sKeyword string 0 54 7.872727272727273 19 from deserializer -COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"skeyword\":\"true\"}} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +sKeyword string 0 54 SExM4DY2gavGA8LX6ha/63i/4NIZgP/NA8Hmxi7D8X68yu4JwKnuAYKBvg6+/cIOwO2NH4D7xCeA ++Q7Al64DwLnLC8Gi2Rj/p8wIwrK2LoGP3w2B/p4EvoGCEsDMiCi+xtAqwJ/3BITmuRb8sqcLgIPr +aYC3txTA4/MHgN7cBICm/g3Bx13AiJMOwI79Bb+wjQLAm7oEgJnWH4LUzgL/4PYagKCOBoLi+yC9 +x84VgK/tGcKxyAL+6NULwcKYE8KzmD/F7/IDuK6yFoT3wgG9m5UJv9WvIcDC5DA= + 7.872727272727273 19 from deserializer +COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"skeyword\":\"true\"}} PREHOOK: query: desc formatted test.UserVisits_web_text_none sKeyword PREHOOK: type: DESCTABLE PREHOOK: Input: test@uservisits_web_text_none POSTHOOK: query: desc formatted test.UserVisits_web_text_none sKeyword POSTHOOK: type: DESCTABLE POSTHOOK: Input: test@uservisits_web_text_none -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -sKeyword string 0 54 7.872727272727273 19 from deserializer -COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"skeyword\":\"true\"}} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +sKeyword string 0 54 SExM4DY2gavGA8LX6ha/63i/4NIZgP/NA8Hmxi7D8X68yu4JwKnuAYKBvg6+/cIOwO2NH4D7xCeA ++Q7Al64DwLnLC8Gi2Rj/p8wIwrK2LoGP3w2B/p4EvoGCEsDMiCi+xtAqwJ/3BITmuRb8sqcLgIPr +aYC3txTA4/MHgN7cBICm/g3Bx13AiJMOwI79Bb+wjQLAm7oEgJnWH4LUzgL/4PYagKCOBoLi+yC9 +x84VgK/tGcKxyAL+6NULwcKYE8KzmD/F7/IDuK6yFoT3wgG9m5UJv9WvIcDC5DA= + 7.872727272727273 19 from deserializer +COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"skeyword\":\"true\"}} diff --git a/ql/src/test/results/clientpositive/encrypted/encryption_move_tbl.q.out b/ql/src/test/results/clientpositive/encrypted/encryption_move_tbl.q.out index 1096e9fc64..78511ad62d 100644 --- a/ql/src/test/results/clientpositive/encrypted/encryption_move_tbl.q.out +++ b/ql/src/test/results/clientpositive/encrypted/encryption_move_tbl.q.out @@ -59,20 +59,62 @@ PREHOOK: Input: default@encrypted_table POSTHOOK: query: DESCRIBE FORMATTED encrypted_table key POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@encrypted_table -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -key int 0 498 0 309 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +key int 0 498 0 309 SExM4LUCtQKN6yH2ofgFwu2cAvzblwLAqoIDwf6+AcDkmgT/i5kBxOPKA72MowGA5fAbg4LgAr2L +vQH/+x+A0gOA0fsBgefUBb+gkhPB+03A7aUCg+BdwKUTvbaQA4LIeL7O3QTCo6IDvbuRAsCRFoGb +YL/lO4LfuQK+4acBgLePCsHvrQHD43u87s4EwP0QwJJtyL23ArjLvQPCz7wG/5yDC7/N4AKCxMcO +vvCNA4HI4wP/6rEDgIIJwaZOv+cwgaevAf+GzQHA14ICwPz+BcDb+gKAhg+H7RS67okB/5HHAoOo +Nb2V2wWA6fAIwJSODsCT9gGAiY8Bg/xI/bq4CoDXkgHAyvYGgOduwJKLAYKbvgH+2bQBwNCWBoK7 +Gb+fmQO/6J8Bgb89/9fzAsCPywLAp/wHgbJeg/z8Ar25kQGA4P0Dv/OUA4CgoQaBpL4EwDuC+m69 +yDWA/BLApYELxMEv/I1LgYWwBMbTlAP56cMEgZssgeiCAb+kowHBvf0CgYHSAf3g0QaBi9sC/9yi +AoDbIYHllQnAhAGBqJkFvrBKgZmZDIKEogG9slWC7qgF/q5DwM30DoKHRcCN7wO+ir0DwLOtAcDy +8wKB4L0Dv/HEA4adpAOAqxr6kkyA14EIwbkUgIihCIGfcoCODr/z5wKAs/QBw7JvvLnQBMHmsgL/ +1UTAy5gCgbHaAf+UpgOAjO0HwcRQhOePAYChCruLvQaBtSj/osUBwoK1AYGn+Qm9kLcDgLSoAYCQ +2QeAv54FwoIavsJ2wYYL/9jbAoCTjgGBjDX/ztkBgPF8gtNC/r2PAoGgUcHDcb+LqAe/laoBwsOe +A8D6EsDQkAT+0tcGgIRzgIqQAYCT+gXB7wv/jvQMw4miBr3LvgTA0YYBgKCTCIHyxQHBtPcCvts+ +we3HAsD/9gG/zaEDgMiqA8H6iQHAniPCiIQB/bucAYDykQGCodED/o+VAsa89gO6pqAHwKvqAYGu +9QO/0bgPwLiEAcH7lwHA4v4FgMUrwe9k/v9ggaI5wbniAr7lOYP3tAH9vmXBxscCwPDuAYCkFoPc +6QaCoOUH/MSUAr/4gwmAw4wIv/rBAsCH2QGEl1n86qQBgOWcEoLOsgb+k74EhNjFAbyX2QHAi4MB +gJiCAYHyiwnAvYgC/5LkB4HnoQLA46QU/6+SBsGv6QHBut4Evo/iA8KzFL7b0AKAwJkJwZSRAb+g +4gHBux+B/58F/+D2Av/5tgKAmieA4MsBwrvkBMDIBb77GoCqnwjA3PkBgPOTCMD9e8P8tgK91poD +gIGeAcH3nQKAhqIEv6LdA4DK2AKClCm+mc4BxoVo+rCiAoDfoAKAtPoFwdCUAsHtpwH+j8QBwYWl +Ab+00gOAy9gMgfHAA7/hvwTAqCeCsUq/yUj/t9wCxYPOArvNrQTAq5ADwJrZCcKbX764IcHS1QKA +t+kLwtSlC/3wyweAl2bAhKEDwLXQCYDXhQXBpeICgcpm//3nBoDmGMG7lwH/y+YI//XaAYHTlQKA +4gPA7aoC/6mKCIDZpgLDoEQ= + from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} PREHOOK: query: DESCRIBE FORMATTED encrypted_table value PREHOOK: type: DESCTABLE PREHOOK: Input: default@encrypted_table POSTHOOK: query: DESCRIBE FORMATTED encrypted_table value POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@encrypted_table -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -value string 0 309 6.812 7 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +value string 0 309 SExM4LUCtQLM7Sz2qpEHw751/KEegclIgPbqBb/+4QWCxbMFvttNwJ79A8DuYoDMqgOCtK0Dv6qT +A/+3OMCb5AGBvuwFgLXtAb/E0QOB1IQGwKwc/9PdBMCu9gSBw0y/1IYJwKeVDcCdmAGBuSGAhBmF +r5AB+tCzAcGDswGC8n292qYHwZPUAb+SVsDC1gGAuPwCgZG9Av/S+gXDuJAB//KXAv61xQGBgpcB +gIo6geaBAf+Q3wz/tkWBqNID/6ilBoDahwbA1fICwcKtBIHklAe+8sQFgLx/gIXzAcLDwAP+09EB +wP4GwrXYAoDuxwPBy5cCgLUF/dq4BcCkoAaAyfcEgJnRBYD8iAKA4vgGwL4SgIuEAsHnqgP/obsC +gO6YAoKYVb7VSsCR2ALAjJgBwfSUAoD3PMP/4QKAwxO84LoCwKDSAcDaLsH7kAK/u/YCgLf0AYH6 +xwGA1c8Bg7KxAf3Wcf+9pgKA4X+FseMDvZb8Ab7MtQaA0eUDwOUigKINwZUuv+HJBMCWDoLCNf7W +sgSAlO8DgO6lB8KatwT/xdQCv45EgcDADb/E5QTCnJUG/7OtBf+0mAHDmdEDv7edBv6CpgLAj/sC +wIV1gf2zBv/SywTC8dAC/r3JA4LUTP7BuQKAieADgfLcBr/ouAGAs2OAiKUGwJf2CoGQ0Qa/05oG +gNojgJTZBoCWkwTAt4EKx5rPA7mZqQKB5kCB9fcCgLTwB/6O8gTAxvYSwOaRAYDFgAPA+NsCgcm8 +BoC0jASB3esC/oDgBYGcvgXDteICwKoK/MZMgoXCAcCl4gO/m3q/3VKBs84BgY9kgMeyBL/zR8Cl +nALCsOIC/s2tAf/JsQLAmWCA+akCgrq2A//wlAu/26sEwobEBP/ejgG/spYMwOKnCoHqsQHA7oMC +v62oBoDghAaA8oECgPSkB4Dv+gHB+yHAqQ6/xRWBm9EBhPaFAby9jgO/1cgMgK9pwMuiA8C5/AaA +/f8DwsiOA4CiDYDemwS/zXDDxX6+x4UKwpyWBv7n8AH+la4HwvBSv67qAr+duAHA3K8EgvKLA/7+ +ygPA/nmAuQOFvt4Nu7+4A8PXG/2ZboGhF8K6+Ab+6IsJgd6AAf+aiwH/qVyEhB7+o/4F/rO0AYCM +fcC1qQOAuuYBgcbKAcDy5AH/wu0Bga6UAr/e1wPBnscCv9S+BYDKjwPB/JABgJeXBoCjAYHT8gL+ +tV6B6RO/9GGB1aUHxcCxAfqrngKAlIcDhLXJAr3vV//G5ATBiqQDgcGkAcDijQS+gsEJgc+FA4DP +KYKJiwH9hqwBwpJC/sHNAYHStAmB4IoJvsenBoHYR8CZiBTB2m/+mdUIgY2kAv+5P8CunwyA2QaC +sEb+toQCwp3JBYCqiAS+ruIEhq+wAfr6QcHKrw7/xMABxN/NB/ztO4K4tQG/94oBgJf4Ab+F2AHC +zawDwa2BAr6vkwX/u5QFwYfWAsDVgQKCgZwJ/ZYXwNztAoL3/AL+uKIBwdeeA8OYjwGBoIkIvMSm +A/+FF4PnPb6a7gKA1BfA4JMGwOb1Ab+WHg== + 6.812 7 from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} PREHOOK: query: ALTER TABLE default.encrypted_table RENAME TO encrypted_db.encrypted_table_2 PREHOOK: type: ALTERTABLE_RENAME PREHOOK: Input: default@encrypted_table @@ -92,20 +134,62 @@ PREHOOK: Input: default@encrypted_table POSTHOOK: query: DESCRIBE FORMATTED encrypted_table key POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@encrypted_table -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -key int 0 498 0 309 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +key int 0 498 0 309 SExM4LUCtQKN6yH2ofgFwu2cAvzblwLAqoIDwf6+AcDkmgT/i5kBxOPKA72MowGA5fAbg4LgAr2L +vQH/+x+A0gOA0fsBgefUBb+gkhPB+03A7aUCg+BdwKUTvbaQA4LIeL7O3QTCo6IDvbuRAsCRFoGb +YL/lO4LfuQK+4acBgLePCsHvrQHD43u87s4EwP0QwJJtyL23ArjLvQPCz7wG/5yDC7/N4AKCxMcO +vvCNA4HI4wP/6rEDgIIJwaZOv+cwgaevAf+GzQHA14ICwPz+BcDb+gKAhg+H7RS67okB/5HHAoOo +Nb2V2wWA6fAIwJSODsCT9gGAiY8Bg/xI/bq4CoDXkgHAyvYGgOduwJKLAYKbvgH+2bQBwNCWBoK7 +Gb+fmQO/6J8Bgb89/9fzAsCPywLAp/wHgbJeg/z8Ar25kQGA4P0Dv/OUA4CgoQaBpL4EwDuC+m69 +yDWA/BLApYELxMEv/I1LgYWwBMbTlAP56cMEgZssgeiCAb+kowHBvf0CgYHSAf3g0QaBi9sC/9yi +AoDbIYHllQnAhAGBqJkFvrBKgZmZDIKEogG9slWC7qgF/q5DwM30DoKHRcCN7wO+ir0DwLOtAcDy +8wKB4L0Dv/HEA4adpAOAqxr6kkyA14EIwbkUgIihCIGfcoCODr/z5wKAs/QBw7JvvLnQBMHmsgL/ +1UTAy5gCgbHaAf+UpgOAjO0HwcRQhOePAYChCruLvQaBtSj/osUBwoK1AYGn+Qm9kLcDgLSoAYCQ +2QeAv54FwoIavsJ2wYYL/9jbAoCTjgGBjDX/ztkBgPF8gtNC/r2PAoGgUcHDcb+LqAe/laoBwsOe +A8D6EsDQkAT+0tcGgIRzgIqQAYCT+gXB7wv/jvQMw4miBr3LvgTA0YYBgKCTCIHyxQHBtPcCvts+ +we3HAsD/9gG/zaEDgMiqA8H6iQHAniPCiIQB/bucAYDykQGCodED/o+VAsa89gO6pqAHwKvqAYGu +9QO/0bgPwLiEAcH7lwHA4v4FgMUrwe9k/v9ggaI5wbniAr7lOYP3tAH9vmXBxscCwPDuAYCkFoPc +6QaCoOUH/MSUAr/4gwmAw4wIv/rBAsCH2QGEl1n86qQBgOWcEoLOsgb+k74EhNjFAbyX2QHAi4MB +gJiCAYHyiwnAvYgC/5LkB4HnoQLA46QU/6+SBsGv6QHBut4Evo/iA8KzFL7b0AKAwJkJwZSRAb+g +4gHBux+B/58F/+D2Av/5tgKAmieA4MsBwrvkBMDIBb77GoCqnwjA3PkBgPOTCMD9e8P8tgK91poD +gIGeAcH3nQKAhqIEv6LdA4DK2AKClCm+mc4BxoVo+rCiAoDfoAKAtPoFwdCUAsHtpwH+j8QBwYWl +Ab+00gOAy9gMgfHAA7/hvwTAqCeCsUq/yUj/t9wCxYPOArvNrQTAq5ADwJrZCcKbX764IcHS1QKA +t+kLwtSlC/3wyweAl2bAhKEDwLXQCYDXhQXBpeICgcpm//3nBoDmGMG7lwH/y+YI//XaAYHTlQKA +4gPA7aoC/6mKCIDZpgLDoEQ= + from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} PREHOOK: query: DESCRIBE FORMATTED encrypted_table value PREHOOK: type: DESCTABLE PREHOOK: Input: default@encrypted_table POSTHOOK: query: DESCRIBE FORMATTED encrypted_table value POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@encrypted_table -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -value string 0 309 6.812 7 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +value string 0 309 SExM4LUCtQLM7Sz2qpEHw751/KEegclIgPbqBb/+4QWCxbMFvttNwJ79A8DuYoDMqgOCtK0Dv6qT +A/+3OMCb5AGBvuwFgLXtAb/E0QOB1IQGwKwc/9PdBMCu9gSBw0y/1IYJwKeVDcCdmAGBuSGAhBmF +r5AB+tCzAcGDswGC8n292qYHwZPUAb+SVsDC1gGAuPwCgZG9Av/S+gXDuJAB//KXAv61xQGBgpcB +gIo6geaBAf+Q3wz/tkWBqNID/6ilBoDahwbA1fICwcKtBIHklAe+8sQFgLx/gIXzAcLDwAP+09EB +wP4GwrXYAoDuxwPBy5cCgLUF/dq4BcCkoAaAyfcEgJnRBYD8iAKA4vgGwL4SgIuEAsHnqgP/obsC +gO6YAoKYVb7VSsCR2ALAjJgBwfSUAoD3PMP/4QKAwxO84LoCwKDSAcDaLsH7kAK/u/YCgLf0AYH6 +xwGA1c8Bg7KxAf3Wcf+9pgKA4X+FseMDvZb8Ab7MtQaA0eUDwOUigKINwZUuv+HJBMCWDoLCNf7W +sgSAlO8DgO6lB8KatwT/xdQCv45EgcDADb/E5QTCnJUG/7OtBf+0mAHDmdEDv7edBv6CpgLAj/sC +wIV1gf2zBv/SywTC8dAC/r3JA4LUTP7BuQKAieADgfLcBr/ouAGAs2OAiKUGwJf2CoGQ0Qa/05oG +gNojgJTZBoCWkwTAt4EKx5rPA7mZqQKB5kCB9fcCgLTwB/6O8gTAxvYSwOaRAYDFgAPA+NsCgcm8 +BoC0jASB3esC/oDgBYGcvgXDteICwKoK/MZMgoXCAcCl4gO/m3q/3VKBs84BgY9kgMeyBL/zR8Cl +nALCsOIC/s2tAf/JsQLAmWCA+akCgrq2A//wlAu/26sEwobEBP/ejgG/spYMwOKnCoHqsQHA7oMC +v62oBoDghAaA8oECgPSkB4Dv+gHB+yHAqQ6/xRWBm9EBhPaFAby9jgO/1cgMgK9pwMuiA8C5/AaA +/f8DwsiOA4CiDYDemwS/zXDDxX6+x4UKwpyWBv7n8AH+la4HwvBSv67qAr+duAHA3K8EgvKLA/7+ +ygPA/nmAuQOFvt4Nu7+4A8PXG/2ZboGhF8K6+Ab+6IsJgd6AAf+aiwH/qVyEhB7+o/4F/rO0AYCM +fcC1qQOAuuYBgcbKAcDy5AH/wu0Bga6UAr/e1wPBnscCv9S+BYDKjwPB/JABgJeXBoCjAYHT8gL+ +tV6B6RO/9GGB1aUHxcCxAfqrngKAlIcDhLXJAr3vV//G5ATBiqQDgcGkAcDijQS+gsEJgc+FA4DP +KYKJiwH9hqwBwpJC/sHNAYHStAmB4IoJvsenBoHYR8CZiBTB2m/+mdUIgY2kAv+5P8CunwyA2QaC +sEb+toQCwp3JBYCqiAS+ruIEhq+wAfr6QcHKrw7/xMABxN/NB/ztO4K4tQG/94oBgJf4Ab+F2AHC +zawDwa2BAr6vkwX/u5QFwYfWAsDVgQKCgZwJ/ZYXwNztAoL3/AL+uKIBwdeeA8OYjwGBoIkIvMSm +A/+FF4PnPb6a7gKA1BfA4JMGwOb1Ab+WHg== + 6.812 7 from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} PREHOOK: query: ALTER TABLE default.encrypted_table RENAME TO default.plain_table PREHOOK: type: ALTERTABLE_RENAME PREHOOK: Input: default@encrypted_table diff --git a/ql/src/test/results/clientpositive/extrapolate_part_stats_full.q.out b/ql/src/test/results/clientpositive/extrapolate_part_stats_full.q.out index b212da907b..eff8774841 100644 --- a/ql/src/test/results/clientpositive/extrapolate_part_stats_full.q.out +++ b/ql/src/test/results/clientpositive/extrapolate_part_stats_full.q.out @@ -81,9 +81,10 @@ PREHOOK: Input: default@loc_orc_1d POSTHOOK: query: describe formatted loc_orc_1d PARTITION(year='2001') state POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@loc_orc_1d -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -state string 0 3 0.75 2 from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +state string 0 3 SExM4AMDgaTbFcD8mOYCwMOJoQQ= + 0.75 2 from deserializer PREHOOK: query: explain extended select state from loc_orc_1d PREHOOK: type: QUERY POSTHOOK: query: explain extended select state from loc_orc_1d diff --git a/ql/src/test/results/clientpositive/extrapolate_part_stats_partial.q.out b/ql/src/test/results/clientpositive/extrapolate_part_stats_partial.q.out index b5f4feede0..48ee0759b8 100644 --- a/ql/src/test/results/clientpositive/extrapolate_part_stats_partial.q.out +++ b/ql/src/test/results/clientpositive/extrapolate_part_stats_partial.q.out @@ -89,18 +89,20 @@ PREHOOK: Input: default@loc_orc_1d POSTHOOK: query: describe formatted loc_orc_1d PARTITION(year='2001') state POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@loc_orc_1d -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -state string 0 3 0.75 2 from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +state string 0 3 SExM4AMDgaTbFcD8mOYCwMOJoQQ= + 0.75 2 from deserializer PREHOOK: query: describe formatted loc_orc_1d PARTITION(year='2002') state PREHOOK: type: DESCTABLE PREHOOK: Input: default@loc_orc_1d POSTHOOK: query: describe formatted loc_orc_1d PARTITION(year='2002') state POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@loc_orc_1d -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -state string 0 6 3.0 3 from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +state string 0 6 SExM4AYGhJ2RPL68foHA90C/kJJjgJX39QKAwfg7 + 3.0 3 from deserializer PREHOOK: query: explain extended select state from loc_orc_1d PREHOOK: type: QUERY POSTHOOK: query: explain extended select state from loc_orc_1d @@ -296,12 +298,12 @@ STAGE PLANS: Processor Tree: TableScan alias: loc_orc_1d - Statistics: Num rows: 20 Data size: 1780 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 20 Data size: 1700 Basic stats: COMPLETE Column stats: PARTIAL GatherStats: false Select Operator expressions: state (type: string) outputColumnNames: _col0 - Statistics: Num rows: 20 Data size: 1780 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 20 Data size: 1700 Basic stats: COMPLETE Column stats: PARTIAL ListSink PREHOOK: query: explain extended select state,locid from loc_orc_1d @@ -499,12 +501,12 @@ STAGE PLANS: Processor Tree: TableScan alias: loc_orc_1d - Statistics: Num rows: 20 Data size: 1860 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 20 Data size: 1780 Basic stats: COMPLETE Column stats: PARTIAL GatherStats: false Select Operator expressions: state (type: string), locid (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 20 Data size: 1860 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 20 Data size: 1780 Basic stats: COMPLETE Column stats: PARTIAL ListSink PREHOOK: query: analyze table loc_orc_1d partition(year='2000') compute statistics for columns state diff --git a/ql/src/test/results/clientpositive/fm-sketch.q.out b/ql/src/test/results/clientpositive/fm-sketch.q.out new file mode 100644 index 0000000000..0028d48546 --- /dev/null +++ b/ql/src/test/results/clientpositive/fm-sketch.q.out @@ -0,0 +1,333 @@ +PREHOOK: query: create table n(key int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@n +POSTHOOK: query: create table n(key int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@n +PREHOOK: query: insert overwrite table n select null from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@n +POSTHOOK: query: insert overwrite table n select null from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@n +POSTHOOK: Lineage: n.key EXPRESSION [] +PREHOOK: query: explain analyze table n compute statistics for columns +PREHOOK: type: QUERY +POSTHOOK: query: explain analyze table n compute statistics for columns +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + Stage-1 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-0 + Map Reduce + Map Operator Tree: + TableScan + alias: n + Statistics: Num rows: 500 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: key + Statistics: Num rows: 500 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'fm', 16) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-1 + Column Stats Work + Column Stats Desc: + Columns: key + Column Types: int + Table: default.n + +PREHOOK: query: analyze table n compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@n +#### A masked pattern was here #### +POSTHOOK: query: analyze table n compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@n +#### A masked pattern was here #### +PREHOOK: query: desc formatted n key +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@n +POSTHOOK: query: desc formatted n key +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@n +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +key int 0 0 500 1 Rk0QAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA +AAAAAAAAAAAAAAA= + from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\"}} +PREHOOK: query: create table i(key int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@i +POSTHOOK: query: create table i(key int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@i +PREHOOK: query: insert overwrite table i select key from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@i +POSTHOOK: query: insert overwrite table i select key from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@i +POSTHOOK: Lineage: i.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: explain analyze table i compute statistics for columns +PREHOOK: type: QUERY +POSTHOOK: query: explain analyze table i compute statistics for columns +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + Stage-1 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-0 + Map Reduce + Map Operator Tree: + TableScan + alias: i + Statistics: Num rows: 500 Data size: 1406 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: key + Statistics: Num rows: 500 Data size: 1406 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'fm', 16) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-1 + Column Stats Work + Column Stats Desc: + Columns: key + Column Types: int + Table: default.i + +PREHOOK: query: analyze table i compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@i +#### A masked pattern was here #### +POSTHOOK: query: analyze table i compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@i +#### A masked pattern was here #### +PREHOOK: query: desc formatted i key +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@i +POSTHOOK: query: desc formatted i key +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@i +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +key int 0 498 0 196 Rk0QAP8YAAB/AAAA/woAAP8AAAC/AQAA/wEAAH8BAAD/AgAAfwAAAPsLAAB/AgAA/wgAAH9DAAA/ +AAAA/xQAAP8DAAA= + from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\"}} +PREHOOK: query: drop table i +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@i +PREHOOK: Output: default@i +POSTHOOK: query: drop table i +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@i +POSTHOOK: Output: default@i +PREHOOK: query: create table i(key double) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@i +POSTHOOK: query: create table i(key double) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@i +PREHOOK: query: insert overwrite table i select key from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@i +POSTHOOK: query: insert overwrite table i select key from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@i +POSTHOOK: Lineage: i.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: analyze table i compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@i +#### A masked pattern was here #### +POSTHOOK: query: analyze table i compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@i +#### A masked pattern was here #### +PREHOOK: query: desc formatted i key +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@i +POSTHOOK: query: desc formatted i key +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@i +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +key double 0.0 498.0 0 234 Rk0QAP8AAAD/AQAA/wAAAJ8NAAB/MAAA/xEAAP8CAAD/AgAAfwIAAP8AAAB/EQAA/wAAAP8AAAB/ +AAAA3wEAAP8CAAA= + from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\"}} +PREHOOK: query: drop table i +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@i +PREHOOK: Output: default@i +POSTHOOK: query: drop table i +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@i +POSTHOOK: Output: default@i +PREHOOK: query: create table i(key decimal) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@i +POSTHOOK: query: create table i(key decimal) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@i +PREHOOK: query: insert overwrite table i select key from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@i +POSTHOOK: query: insert overwrite table i select key from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@i +POSTHOOK: Lineage: i.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: analyze table i compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@i +#### A masked pattern was here #### +POSTHOOK: query: analyze table i compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@i +#### A masked pattern was here #### +PREHOOK: query: desc formatted i key +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@i +POSTHOOK: query: desc formatted i key +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@i +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +key decimal(10,0) 0 498 0 180 Rk0QAP8AAAD/AwAA/wUAAP8DAAD/AwAAvwIAAH8eAAC/AQAAPwAAAL8AAAAHAAAAvwAAAP0CAAD/ +AQAA/wMAAH8CAAA= + from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\"}} +PREHOOK: query: drop table i +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@i +PREHOOK: Output: default@i +POSTHOOK: query: drop table i +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@i +POSTHOOK: Output: default@i +PREHOOK: query: create table i(key date) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@i +POSTHOOK: query: create table i(key date) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@i +PREHOOK: query: insert into i values ('2012-08-17') +PREHOOK: type: QUERY +PREHOOK: Output: default@i +POSTHOOK: query: insert into i values ('2012-08-17') +POSTHOOK: type: QUERY +POSTHOOK: Output: default@i +POSTHOOK: Lineage: i.key EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: insert into i values ('2012-08-17') +PREHOOK: type: QUERY +PREHOOK: Output: default@i +POSTHOOK: query: insert into i values ('2012-08-17') +POSTHOOK: type: QUERY +POSTHOOK: Output: default@i +POSTHOOK: Lineage: i.key EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: insert into i values ('2013-08-17') +PREHOOK: type: QUERY +PREHOOK: Output: default@i +POSTHOOK: query: insert into i values ('2013-08-17') +POSTHOOK: type: QUERY +POSTHOOK: Output: default@i +POSTHOOK: Lineage: i.key EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: insert into i values ('2012-03-17') +PREHOOK: type: QUERY +PREHOOK: Output: default@i +POSTHOOK: query: insert into i values ('2012-03-17') +POSTHOOK: type: QUERY +POSTHOOK: Output: default@i +POSTHOOK: Lineage: i.key EXPRESSION [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: insert into i values ('2012-05-17') +PREHOOK: type: QUERY +PREHOOK: Output: default@i +POSTHOOK: query: insert into i values ('2012-05-17') +POSTHOOK: type: QUERY +POSTHOOK: Output: default@i +POSTHOOK: Lineage: i.key EXPRESSION [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: analyze table i compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@i +#### A masked pattern was here #### +POSTHOOK: query: analyze table i compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@i +#### A masked pattern was here #### +PREHOOK: query: desc formatted i key +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@i +POSTHOOK: query: desc formatted i key +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@i +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +key date 2012-03-17 2013-08-17 0 3 Rk0QAAEAAAAGAAAAAwAAAA0AAAADAAAABwAAAAsAAAAJAAAAEwAAAAkAAAADAAAABwAAAAMAAAAB +AAAABAAAAAUAAAA= + from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\"}} diff --git a/ql/src/test/results/clientpositive/hll.q.out b/ql/src/test/results/clientpositive/hll.q.out index b9357c3043..10bad4d818 100644 --- a/ql/src/test/results/clientpositive/hll.q.out +++ b/ql/src/test/results/clientpositive/hll.q.out @@ -1,3 +1,88 @@ +PREHOOK: query: create table n(key int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@n +POSTHOOK: query: create table n(key int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@n +PREHOOK: query: insert overwrite table n select null from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@n +POSTHOOK: query: insert overwrite table n select null from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@n +POSTHOOK: Lineage: n.key EXPRESSION [] +PREHOOK: query: explain analyze table n compute statistics for columns +PREHOOK: type: QUERY +POSTHOOK: query: explain analyze table n compute statistics for columns +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + Stage-1 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-0 + Map Reduce + Map Operator Tree: + TableScan + alias: n + Statistics: Num rows: 500 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: key + Statistics: Num rows: 500 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll') + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-1 + Column Stats Work + Column Stats Desc: + Columns: key + Column Types: int + Table: default.n + +PREHOOK: query: analyze table n compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@n +#### A masked pattern was here #### +POSTHOOK: query: analyze table n compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@n +#### A masked pattern was here #### +PREHOOK: query: desc formatted n key +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@n +POSTHOOK: query: desc formatted n key +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@n +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +key int 0 0 500 1 SExM4AEA + from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\"}} PREHOOK: query: create table i(key int) PREHOOK: type: CREATETABLE PREHOOK: Output: database:default @@ -78,10 +163,31 @@ PREHOOK: Input: default@i POSTHOOK: query: desc formatted i key POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@i -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -key int 0 498 0 309 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\"}} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +key int 0 498 0 309 SExM4LUCtQKN6yH2ofgFwu2cAvzblwLAqoIDwf6+AcDkmgT/i5kBxOPKA72MowGA5fAbg4LgAr2L +vQH/+x+A0gOA0fsBgefUBb+gkhPB+03A7aUCg+BdwKUTvbaQA4LIeL7O3QTCo6IDvbuRAsCRFoGb +YL/lO4LfuQK+4acBgLePCsHvrQHD43u87s4EwP0QwJJtyL23ArjLvQPCz7wG/5yDC7/N4AKCxMcO +vvCNA4HI4wP/6rEDgIIJwaZOv+cwgaevAf+GzQHA14ICwPz+BcDb+gKAhg+H7RS67okB/5HHAoOo +Nb2V2wWA6fAIwJSODsCT9gGAiY8Bg/xI/bq4CoDXkgHAyvYGgOduwJKLAYKbvgH+2bQBwNCWBoK7 +Gb+fmQO/6J8Bgb89/9fzAsCPywLAp/wHgbJeg/z8Ar25kQGA4P0Dv/OUA4CgoQaBpL4EwDuC+m69 +yDWA/BLApYELxMEv/I1LgYWwBMbTlAP56cMEgZssgeiCAb+kowHBvf0CgYHSAf3g0QaBi9sC/9yi +AoDbIYHllQnAhAGBqJkFvrBKgZmZDIKEogG9slWC7qgF/q5DwM30DoKHRcCN7wO+ir0DwLOtAcDy +8wKB4L0Dv/HEA4adpAOAqxr6kkyA14EIwbkUgIihCIGfcoCODr/z5wKAs/QBw7JvvLnQBMHmsgL/ +1UTAy5gCgbHaAf+UpgOAjO0HwcRQhOePAYChCruLvQaBtSj/osUBwoK1AYGn+Qm9kLcDgLSoAYCQ +2QeAv54FwoIavsJ2wYYL/9jbAoCTjgGBjDX/ztkBgPF8gtNC/r2PAoGgUcHDcb+LqAe/laoBwsOe +A8D6EsDQkAT+0tcGgIRzgIqQAYCT+gXB7wv/jvQMw4miBr3LvgTA0YYBgKCTCIHyxQHBtPcCvts+ +we3HAsD/9gG/zaEDgMiqA8H6iQHAniPCiIQB/bucAYDykQGCodED/o+VAsa89gO6pqAHwKvqAYGu +9QO/0bgPwLiEAcH7lwHA4v4FgMUrwe9k/v9ggaI5wbniAr7lOYP3tAH9vmXBxscCwPDuAYCkFoPc +6QaCoOUH/MSUAr/4gwmAw4wIv/rBAsCH2QGEl1n86qQBgOWcEoLOsgb+k74EhNjFAbyX2QHAi4MB +gJiCAYHyiwnAvYgC/5LkB4HnoQLA46QU/6+SBsGv6QHBut4Evo/iA8KzFL7b0AKAwJkJwZSRAb+g +4gHBux+B/58F/+D2Av/5tgKAmieA4MsBwrvkBMDIBb77GoCqnwjA3PkBgPOTCMD9e8P8tgK91poD +gIGeAcH3nQKAhqIEv6LdA4DK2AKClCm+mc4BxoVo+rCiAoDfoAKAtPoFwdCUAsHtpwH+j8QBwYWl +Ab+00gOAy9gMgfHAA7/hvwTAqCeCsUq/yUj/t9wCxYPOArvNrQTAq5ADwJrZCcKbX764IcHS1QKA +t+kLwtSlC/3wyweAl2bAhKEDwLXQCYDXhQXBpeICgcpm//3nBoDmGMG7lwH/y+YI//XaAYHTlQKA +4gPA7aoC/6mKCIDZpgLDoEQ= + from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\"}} PREHOOK: query: drop table i PREHOOK: type: DROPTABLE PREHOOK: Input: default@i @@ -121,10 +227,31 @@ PREHOOK: Input: default@i POSTHOOK: query: desc formatted i key POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@i -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -key double 0.0 498.0 0 309 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\"}} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +key double 0.0 498.0 0 309 SExM4LUCtQLB60iBzkW/t98FgZmoAoClIL/zvgGA98wFwZxDgNEev/fQAoTVmQS8tMUCgIfzAsap +7wL7vdICwMCHAv/a2gGAj+MJwMSpBsHPLcGehw++nboDgY6bAb+Z3Q2CycMBvtO0A8DE9QOCh+sE +v4/3AoHde7++1QKB1ZUFvvHBAYCumwGAm4sBwOjEAoD4LsDB1APA0rEEwo6RAb665QaCysQBvua+ +BMGxF8C4qASAqtwLhZqTAbqmvAHChlWBuNsJ/fzNAoCjiwSA7f0BgMHaBIDtjQ3B9zLCwa0F/8ba +AYPTogK7xaMGgNT7BIOAQr7ZogH//r4Bwb88xcts+7xwv97zAYPPhAG+mJkBgeabAb6jFcDZmgmA +wWiB6NQG/6y2AcDn7QLB2OMBv7PADMK6K7+obMCrqwrBnPUE/qNQwaacDsCm9gG/wdUCwauuAf/I +yQGCgtkB/r+qAcAgwNvzD8C0GoTuwAS87xqAx7wJwc+QAYDolQL/164BwJ+VA8K4Hf/DjwG//ecB +wNTFEMCugwLA3CyBopEBv4fdAoOhSb6DjQPDjm79gI0B//ixBoCFA4HvpwGAx4sC/7KuD4DKGoPk +qwH9g8YBwI95wMCYAsD0lAHAhViGgt0F+7hQv4bMAsCKEICDmQHE06oD/J1FgJBfgOapA8PJlQf9 +itwEwM2GC4P8pgi997EDgP2aAsCzhgTAj5ADw5HBA77hdb/IL8LNxQT+zzOA4aQBgf+gBf/SWMDl +iAWF/r4BgPwD/KhS/5M+wIP+AcBkgPCBA4CxjAGAwgHEgFz/9vYLvZuZA8CHqgPDjJwBvddOwJua +BMCpUYKEvgS+pVaA9PoEg6osvZ1Gga/IAv/9wQLA94EDwOwigO1tgfadAcO4f7yA/ATAq94BwP2X +A4CahgLA64ECwOzUCoPq5wP9s5cBwNkJgISgAcCN7gLB0bMFgOyLCMDTlA7Bzm2/rYoBgbv8Av+G +esGE3gKAuSu+8YwFxK+9BICqLv70iAq/z1vB2oQDv790gZOXA8DxhQi+3r0Ewe2+AsGpfL7JtgGB +sdgHgt+IAb3riwKA/xqAx4YBwM6BBMD24QeE/sgEvM3RAsD/4QHA9KUBg9/PBr7xxgaB0aUD//aC +A8D0gxSB19wEvtOyCcDBmQGC9q4BvqHgCYDEbMGnaoHK2QT/j5kDv+w7gutQgP3zC/6+kgSAsh3B +xkC/ybsCgYq4Ab+iS8LN2wK/3dUEgMGICMHQ9wK+ucQCgJvyAofd9Ai5wzbC3LcFwrjwAf78jgq+ +xiPBgzO/0myEya8E/OKkAsHYPcHfqQP/ndwCwNH/BcOngAG8/d8Egd5S/+khgr+zEICIJ4bv0AH4 +isQCgN6lAsTolwO88EDA56UEwsSgDf7U4gHDpUa9570DweyNAb/LyQfA/PwGga7MA8Db7QGBpYEB +vqNhwNSNBMCL3AHBqzu/gGXAweUCgIqDAoCBdYLHyAbAaL/rgAWA9e4RgMwTv76yAoDZDcHd1wGA +tucFgd6SE8DhBr+JUQ== + from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\"}} PREHOOK: query: drop table i PREHOOK: type: DROPTABLE PREHOOK: Input: default@i @@ -164,10 +291,31 @@ PREHOOK: Input: default@i POSTHOOK: query: desc formatted i key POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@i -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -key decimal(10,0) 0 498 0 309 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\"}} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +key decimal(10,0) 0 498 0 309 SExM4LUCtQLB60iBzkW/t98FgZmoAoClIL/zvgGA98wFwZxDgNEev/fQAoTVmQS8tMUCgIfzAsap +7wL7vdICwMCHAv/a2gGAj+MJwMSpBsHPLcGehw++nboDgY6bAb+Z3Q2CycMBvtO0A8DE9QOCh+sE +v4/3AoHde7++1QKB1ZUFvvHBAYCumwGAm4sBwOjEAoD4LsDB1APA0rEEwo6RAb665QaCysQBvua+ +BMGxF8C4qASAqtwLhZqTAbqmvAHChlWBuNsJ/fzNAoCjiwSA7f0BgMHaBIDtjQ3B9zLCwa0F/8ba +AYPTogK7xaMGgNT7BIOAQr7ZogH//r4Bwb88xcts+7xwv97zAYPPhAG+mJkBgeabAb6jFcDZmgmA +wWiB6NQG/6y2AcDn7QLB2OMBv7PADMK6K7+obMCrqwrBnPUE/qNQwaacDsCm9gG/wdUCwauuAf/I +yQGCgtkB/r+qAcAgwNvzD8C0GoTuwAS87xqAx7wJwc+QAYDolQL/164BwJ+VA8K4Hf/DjwG//ecB +wNTFEMCugwLA3CyBopEBv4fdAoOhSb6DjQPDjm79gI0B//ixBoCFA4HvpwGAx4sC/7KuD4DKGoPk +qwH9g8YBwI95wMCYAsD0lAHAhViGgt0F+7hQv4bMAsCKEICDmQHE06oD/J1FgJBfgOapA8PJlQf9 +itwEwM2GC4P8pgi997EDgP2aAsCzhgTAj5ADw5HBA77hdb/IL8LNxQT+zzOA4aQBgf+gBf/SWMDl +iAWF/r4BgPwD/KhS/5M+wIP+AcBkgPCBA4CxjAGAwgHEgFz/9vYLvZuZA8CHqgPDjJwBvddOwJua +BMCpUYKEvgS+pVaA9PoEg6osvZ1Gga/IAv/9wQLA94EDwOwigO1tgfadAcO4f7yA/ATAq94BwP2X +A4CahgLA64ECwOzUCoPq5wP9s5cBwNkJgISgAcCN7gLB0bMFgOyLCMDTlA7Bzm2/rYoBgbv8Av+G +esGE3gKAuSu+8YwFxK+9BICqLv70iAq/z1vB2oQDv790gZOXA8DxhQi+3r0Ewe2+AsGpfL7JtgGB +sdgHgt+IAb3riwKA/xqAx4YBwM6BBMD24QeE/sgEvM3RAsD/4QHA9KUBg9/PBr7xxgaB0aUD//aC +A8D0gxSB19wEvtOyCcDBmQGC9q4BvqHgCYDEbMGnaoHK2QT/j5kDv+w7gutQgP3zC/6+kgSAsh3B +xkC/ybsCgYq4Ab+iS8LN2wK/3dUEgMGICMHQ9wK+ucQCgJvyAofd9Ai5wzbC3LcFwrjwAf78jgq+ +xiPBgzO/0myEya8E/OKkAsHYPcHfqQP/ndwCwNH/BcOngAG8/d8Egd5S/+khgr+zEICIJ4bv0AH4 +isQCgN6lAsTolwO88EDA56UEwsSgDf7U4gHDpUa9570DweyNAb/LyQfA/PwGga7MA8Db7QGBpYEB +vqNhwNSNBMCL3AHBqzu/gGXAweUCgIqDAoCBdYLHyAbAaL/rgAWA9e4RgMwTv76yAoDZDcHd1wGA +tucFgd6SE8DhBr+JUQ== + from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\"}} PREHOOK: query: drop table i PREHOOK: type: DROPTABLE PREHOOK: Input: default@i @@ -233,7 +381,8 @@ PREHOOK: Input: default@i POSTHOOK: query: desc formatted i key POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@i -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -key date 2012-03-17 2013-08-17 0 4 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\"}} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +key date 2012-03-17 2013-08-17 0 4 SExM4AQEgZ3gM4Gdw13A3/qtA4L855QD + from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\"}} diff --git a/ql/src/test/results/clientpositive/llap/autoColumnStats_2.q.out b/ql/src/test/results/clientpositive/llap/autoColumnStats_2.q.out index f29f7b5d1a..ce524f9e1a 100644 --- a/ql/src/test/results/clientpositive/llap/autoColumnStats_2.q.out +++ b/ql/src/test/results/clientpositive/llap/autoColumnStats_2.q.out @@ -123,20 +123,62 @@ PREHOOK: Input: default@a POSTHOOK: query: describe formatted a key POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@a -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -key string 0 309 2.812 3 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +key string 0 309 SExM4LUCtQLBoliC580Fv5Nq/8NRwJtoxdPYB/yjmAO/kokBgcjcBMHblgH/lecCgMn9AcC78gm/ +wzeA/BHB6MgH/5KjAsKL1QqAge0D/obCAoCvMMGIDoDPDr/ovgSAn6ALgL7TEMGr8wX/6xnAuqgF +wcgagNoPwY+GAf7ouATEjJwBvIfKAoGetgS/ysoIwN4dwMHtA8DAnQmB7TWBspEFv+7uBMGB4wL+ +mmvAqtEBgrrQAYGlkQb9j1CBuYcE/5VDgNciweGkA4Hf9wS+ws4BgPdxgJr/BcCctQOB2rQD/+LE +CMDf/QHAklbAxaoBgP7gA4Gu1weBx4YB/remBMHjToDh2wH/+sEHwMqxA8DvyATBrUj/7ecCgP0H +ifmTBIC8FriHfv/5pArAlSzAnIoBwJ/bAsXdU7v+0g7ArvQBgOX+AoD5+hPA/4oHgK3rAcCWB4D+ +iwSA/soDgu71Ab68MYD/gQKAh+ECwN6xBMCTYoSh4AO9nESArIcGwYmeA/7TpAWBncQDwthzvrb9 +BIG2hQG+lbgIgan2DP/2R4Cu1giAlJcDg7UU/ZBfgchL/4O8AsDO8gKBodUGwMTcA8OKggP88sYD +w9tCvr+CA7/W3QfE05wB/dWUCcCa2QmBsY4HgrD3BbzCwguAih3B67gD/9zeAYX0twf7048HwLKS +CICJC8GyrgGB0akBvr2mAcDP8QGA0/ECwMScA4GXrAGBpokFgKgKwNSHBP7sMIDfkAbBpzy/pdcK +wLnfAYLeKP6R5wLCssoBv8JY/7IvwNEMgq+7AsDQH/6VwAGA8nXCisoBvvevAoH4PMKOqAO+lnj/ +yjnCwzL/7IADv/8jhK28Brzx1wGCmk6/0t0CgIyaAv/CnwXAzZ0BwJadA4GCKcC53AG/sUCBhs8J +gZmRAb6zwwSBgLYGweuIAb+tbsH6gwPAngH+ysQBhLxh/NihAsGYswS/l8MKgtFX/u6jA8XB6AL8 +3tAC/5TDBMHMvgG/0NICwO79BYS82Qa8oMQBwPqBBcHhI//N2RLAitsEgJnuBcD+qAXA/t0FwJ32 +CoDBrAHDlSD9ltIDwMieBIC26QLBzZkGgbecAYCvCf+Xgwn/40+JgRX3xNcBwIkLweaQAYDuggKB +2okBwPESvtvPBsHxpQL/ucsCwIeJA8CjBoH6SP+UnwKCw8IGv7mcBYHtywKChYICvKDpAsDx5gHC ++MIF/sDeBYCh1ALA+poCwYygCL+TTITO3AK9weMEwf+fAb/V5AKApKMGg8Fg/J9OwfnNCv/pkgLC +z+gEvt+XA4CqlgGAlOQDgIaICYGmzAGCiYUHv96HBv/njQHEzfsEv9LuB7zk1gPD4RiDo/UDu5qr +A//IIcDDHsCwDYDozwLEq9EBvZp1gLwawJaGAv/JwAPC/JcDvqvcAcG1ugODzpMBvZfmAoDWB//s +1wKHzaYCucW2BcL9uQT/+aMB/+WtAsKPN7+sdsTghAG7t6kEwNw+wKLUAsCDIIHcjwGAqpACgM36 +BcC//AOAtLEEgMbwAf+mwQiAqfgH + 2.812 3 from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} PREHOOK: query: describe formatted b key PREHOOK: type: DESCTABLE PREHOOK: Input: default@b POSTHOOK: query: describe formatted b key POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@b -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -key string 0 309 2.812 3 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +key string 0 309 SExM4LUCtQLBoliC580Fv5Nq/8NRwJtoxdPYB/yjmAO/kokBgcjcBMHblgH/lecCgMn9AcC78gm/ +wzeA/BHB6MgH/5KjAsKL1QqAge0D/obCAoCvMMGIDoDPDr/ovgSAn6ALgL7TEMGr8wX/6xnAuqgF +wcgagNoPwY+GAf7ouATEjJwBvIfKAoGetgS/ysoIwN4dwMHtA8DAnQmB7TWBspEFv+7uBMGB4wL+ +mmvAqtEBgrrQAYGlkQb9j1CBuYcE/5VDgNciweGkA4Hf9wS+ws4BgPdxgJr/BcCctQOB2rQD/+LE +CMDf/QHAklbAxaoBgP7gA4Gu1weBx4YB/remBMHjToDh2wH/+sEHwMqxA8DvyATBrUj/7ecCgP0H +ifmTBIC8FriHfv/5pArAlSzAnIoBwJ/bAsXdU7v+0g7ArvQBgOX+AoD5+hPA/4oHgK3rAcCWB4D+ +iwSA/soDgu71Ab68MYD/gQKAh+ECwN6xBMCTYoSh4AO9nESArIcGwYmeA/7TpAWBncQDwthzvrb9 +BIG2hQG+lbgIgan2DP/2R4Cu1giAlJcDg7UU/ZBfgchL/4O8AsDO8gKBodUGwMTcA8OKggP88sYD +w9tCvr+CA7/W3QfE05wB/dWUCcCa2QmBsY4HgrD3BbzCwguAih3B67gD/9zeAYX0twf7048HwLKS +CICJC8GyrgGB0akBvr2mAcDP8QGA0/ECwMScA4GXrAGBpokFgKgKwNSHBP7sMIDfkAbBpzy/pdcK +wLnfAYLeKP6R5wLCssoBv8JY/7IvwNEMgq+7AsDQH/6VwAGA8nXCisoBvvevAoH4PMKOqAO+lnj/ +yjnCwzL/7IADv/8jhK28Brzx1wGCmk6/0t0CgIyaAv/CnwXAzZ0BwJadA4GCKcC53AG/sUCBhs8J +gZmRAb6zwwSBgLYGweuIAb+tbsH6gwPAngH+ysQBhLxh/NihAsGYswS/l8MKgtFX/u6jA8XB6AL8 +3tAC/5TDBMHMvgG/0NICwO79BYS82Qa8oMQBwPqBBcHhI//N2RLAitsEgJnuBcD+qAXA/t0FwJ32 +CoDBrAHDlSD9ltIDwMieBIC26QLBzZkGgbecAYCvCf+Xgwn/40+JgRX3xNcBwIkLweaQAYDuggKB +2okBwPESvtvPBsHxpQL/ucsCwIeJA8CjBoH6SP+UnwKCw8IGv7mcBYHtywKChYICvKDpAsDx5gHC ++MIF/sDeBYCh1ALA+poCwYygCL+TTITO3AK9weMEwf+fAb/V5AKApKMGg8Fg/J9OwfnNCv/pkgLC +z+gEvt+XA4CqlgGAlOQDgIaICYGmzAGCiYUHv96HBv/njQHEzfsEv9LuB7zk1gPD4RiDo/UDu5qr +A//IIcDDHsCwDYDozwLEq9EBvZp1gLwawJaGAv/JwAPC/JcDvqvcAcG1ugODzpMBvZfmAoDWB//s +1wKHzaYCucW2BcL9uQT/+aMB/+WtAsKPN7+sdsTghAG7t6kEwNw+wKLUAsCDIIHcjwGAqpACgM36 +BcC//AOAtLEEgMbwAf+mwQiAqfgH + 2.812 3 from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} PREHOOK: query: from src insert overwrite table a select * insert into table b select * @@ -231,20 +273,62 @@ PREHOOK: Input: default@b POSTHOOK: query: describe formatted b key POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@b -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -key string 0 309 2.812 3 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +key string 0 309 SExM4LUCtQLBoliC580Fv5Nq/8NRwJtoxdPYB/yjmAO/kokBgcjcBMHblgH/lecCgMn9AcC78gm/ +wzeA/BHB6MgH/5KjAsKL1QqAge0D/obCAoCvMMGIDoDPDr/ovgSAn6ALgL7TEMGr8wX/6xnAuqgF +wcgagNoPwY+GAf7ouATEjJwBvIfKAoGetgS/ysoIwN4dwMHtA8DAnQmB7TWBspEFv+7uBMGB4wL+ +mmvAqtEBgrrQAYGlkQb9j1CBuYcE/5VDgNciweGkA4Hf9wS+ws4BgPdxgJr/BcCctQOB2rQD/+LE +CMDf/QHAklbAxaoBgP7gA4Gu1weBx4YB/remBMHjToDh2wH/+sEHwMqxA8DvyATBrUj/7ecCgP0H +ifmTBIC8FriHfv/5pArAlSzAnIoBwJ/bAsXdU7v+0g7ArvQBgOX+AoD5+hPA/4oHgK3rAcCWB4D+ +iwSA/soDgu71Ab68MYD/gQKAh+ECwN6xBMCTYoSh4AO9nESArIcGwYmeA/7TpAWBncQDwthzvrb9 +BIG2hQG+lbgIgan2DP/2R4Cu1giAlJcDg7UU/ZBfgchL/4O8AsDO8gKBodUGwMTcA8OKggP88sYD +w9tCvr+CA7/W3QfE05wB/dWUCcCa2QmBsY4HgrD3BbzCwguAih3B67gD/9zeAYX0twf7048HwLKS +CICJC8GyrgGB0akBvr2mAcDP8QGA0/ECwMScA4GXrAGBpokFgKgKwNSHBP7sMIDfkAbBpzy/pdcK +wLnfAYLeKP6R5wLCssoBv8JY/7IvwNEMgq+7AsDQH/6VwAGA8nXCisoBvvevAoH4PMKOqAO+lnj/ +yjnCwzL/7IADv/8jhK28Brzx1wGCmk6/0t0CgIyaAv/CnwXAzZ0BwJadA4GCKcC53AG/sUCBhs8J +gZmRAb6zwwSBgLYGweuIAb+tbsH6gwPAngH+ysQBhLxh/NihAsGYswS/l8MKgtFX/u6jA8XB6AL8 +3tAC/5TDBMHMvgG/0NICwO79BYS82Qa8oMQBwPqBBcHhI//N2RLAitsEgJnuBcD+qAXA/t0FwJ32 +CoDBrAHDlSD9ltIDwMieBIC26QLBzZkGgbecAYCvCf+Xgwn/40+JgRX3xNcBwIkLweaQAYDuggKB +2okBwPESvtvPBsHxpQL/ucsCwIeJA8CjBoH6SP+UnwKCw8IGv7mcBYHtywKChYICvKDpAsDx5gHC ++MIF/sDeBYCh1ALA+poCwYygCL+TTITO3AK9weMEwf+fAb/V5AKApKMGg8Fg/J9OwfnNCv/pkgLC +z+gEvt+XA4CqlgGAlOQDgIaICYGmzAGCiYUHv96HBv/njQHEzfsEv9LuB7zk1gPD4RiDo/UDu5qr +A//IIcDDHsCwDYDozwLEq9EBvZp1gLwawJaGAv/JwAPC/JcDvqvcAcG1ugODzpMBvZfmAoDWB//s +1wKHzaYCucW2BcL9uQT/+aMB/+WtAsKPN7+sdsTghAG7t6kEwNw+wKLUAsCDIIHcjwGAqpACgM36 +BcC//AOAtLEEgMbwAf+mwQiAqfgH + 2.812 3 from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} PREHOOK: query: describe formatted b value PREHOOK: type: DESCTABLE PREHOOK: Input: default@b POSTHOOK: query: describe formatted b value POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@b -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -value string 0 309 6.812 7 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +value string 0 309 SExM4LUCtQLM7Sz2qpEHw751/KEegclIgPbqBb/+4QWCxbMFvttNwJ79A8DuYoDMqgOCtK0Dv6qT +A/+3OMCb5AGBvuwFgLXtAb/E0QOB1IQGwKwc/9PdBMCu9gSBw0y/1IYJwKeVDcCdmAGBuSGAhBmF +r5AB+tCzAcGDswGC8n292qYHwZPUAb+SVsDC1gGAuPwCgZG9Av/S+gXDuJAB//KXAv61xQGBgpcB +gIo6geaBAf+Q3wz/tkWBqNID/6ilBoDahwbA1fICwcKtBIHklAe+8sQFgLx/gIXzAcLDwAP+09EB +wP4GwrXYAoDuxwPBy5cCgLUF/dq4BcCkoAaAyfcEgJnRBYD8iAKA4vgGwL4SgIuEAsHnqgP/obsC +gO6YAoKYVb7VSsCR2ALAjJgBwfSUAoD3PMP/4QKAwxO84LoCwKDSAcDaLsH7kAK/u/YCgLf0AYH6 +xwGA1c8Bg7KxAf3Wcf+9pgKA4X+FseMDvZb8Ab7MtQaA0eUDwOUigKINwZUuv+HJBMCWDoLCNf7W +sgSAlO8DgO6lB8KatwT/xdQCv45EgcDADb/E5QTCnJUG/7OtBf+0mAHDmdEDv7edBv6CpgLAj/sC +wIV1gf2zBv/SywTC8dAC/r3JA4LUTP7BuQKAieADgfLcBr/ouAGAs2OAiKUGwJf2CoGQ0Qa/05oG +gNojgJTZBoCWkwTAt4EKx5rPA7mZqQKB5kCB9fcCgLTwB/6O8gTAxvYSwOaRAYDFgAPA+NsCgcm8 +BoC0jASB3esC/oDgBYGcvgXDteICwKoK/MZMgoXCAcCl4gO/m3q/3VKBs84BgY9kgMeyBL/zR8Cl +nALCsOIC/s2tAf/JsQLAmWCA+akCgrq2A//wlAu/26sEwobEBP/ejgG/spYMwOKnCoHqsQHA7oMC +v62oBoDghAaA8oECgPSkB4Dv+gHB+yHAqQ6/xRWBm9EBhPaFAby9jgO/1cgMgK9pwMuiA8C5/AaA +/f8DwsiOA4CiDYDemwS/zXDDxX6+x4UKwpyWBv7n8AH+la4HwvBSv67qAr+duAHA3K8EgvKLA/7+ +ygPA/nmAuQOFvt4Nu7+4A8PXG/2ZboGhF8K6+Ab+6IsJgd6AAf+aiwH/qVyEhB7+o/4F/rO0AYCM +fcC1qQOAuuYBgcbKAcDy5AH/wu0Bga6UAr/e1wPBnscCv9S+BYDKjwPB/JABgJeXBoCjAYHT8gL+ +tV6B6RO/9GGB1aUHxcCxAfqrngKAlIcDhLXJAr3vV//G5ATBiqQDgcGkAcDijQS+gsEJgc+FA4DP +KYKJiwH9hqwBwpJC/sHNAYHStAmB4IoJvsenBoHYR8CZiBTB2m/+mdUIgY2kAv+5P8CunwyA2QaC +sEb+toQCwp3JBYCqiAS+ruIEhq+wAfr6QcHKrw7/xMABxN/NB/ztO4K4tQG/94oBgJf4Ab+F2AHC +zawDwa2BAr6vkwX/u5QFwYfWAsDVgQKCgZwJ/ZYXwNztAoL3/AL+uKIBwdeeA8OYjwGBoIkIvMSm +A/+FF4PnPb6a7gKA1BfA4JMGwOb1Ab+WHg== + 6.812 7 from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} PREHOOK: query: insert into table b select NULL, NULL from src limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@src @@ -261,20 +345,62 @@ PREHOOK: Input: default@b POSTHOOK: query: describe formatted b key POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@b -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -key string 10 309 2.812 3 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +key string 10 309 SExM4LUCtQLBoliC580Fv5Nq/8NRwJtoxdPYB/yjmAO/kokBgcjcBMHblgH/lecCgMn9AcC78gm/ +wzeA/BHB6MgH/5KjAsKL1QqAge0D/obCAoCvMMGIDoDPDr/ovgSAn6ALgL7TEMGr8wX/6xnAuqgF +wcgagNoPwY+GAf7ouATEjJwBvIfKAoGetgS/ysoIwN4dwMHtA8DAnQmB7TWBspEFv+7uBMGB4wL+ +mmvAqtEBgrrQAYGlkQb9j1CBuYcE/5VDgNciweGkA4Hf9wS+ws4BgPdxgJr/BcCctQOB2rQD/+LE +CMDf/QHAklbAxaoBgP7gA4Gu1weBx4YB/remBMHjToDh2wH/+sEHwMqxA8DvyATBrUj/7ecCgP0H +ifmTBIC8FriHfv/5pArAlSzAnIoBwJ/bAsXdU7v+0g7ArvQBgOX+AoD5+hPA/4oHgK3rAcCWB4D+ +iwSA/soDgu71Ab68MYD/gQKAh+ECwN6xBMCTYoSh4AO9nESArIcGwYmeA/7TpAWBncQDwthzvrb9 +BIG2hQG+lbgIgan2DP/2R4Cu1giAlJcDg7UU/ZBfgchL/4O8AsDO8gKBodUGwMTcA8OKggP88sYD +w9tCvr+CA7/W3QfE05wB/dWUCcCa2QmBsY4HgrD3BbzCwguAih3B67gD/9zeAYX0twf7048HwLKS +CICJC8GyrgGB0akBvr2mAcDP8QGA0/ECwMScA4GXrAGBpokFgKgKwNSHBP7sMIDfkAbBpzy/pdcK +wLnfAYLeKP6R5wLCssoBv8JY/7IvwNEMgq+7AsDQH/6VwAGA8nXCisoBvvevAoH4PMKOqAO+lnj/ +yjnCwzL/7IADv/8jhK28Brzx1wGCmk6/0t0CgIyaAv/CnwXAzZ0BwJadA4GCKcC53AG/sUCBhs8J +gZmRAb6zwwSBgLYGweuIAb+tbsH6gwPAngH+ysQBhLxh/NihAsGYswS/l8MKgtFX/u6jA8XB6AL8 +3tAC/5TDBMHMvgG/0NICwO79BYS82Qa8oMQBwPqBBcHhI//N2RLAitsEgJnuBcD+qAXA/t0FwJ32 +CoDBrAHDlSD9ltIDwMieBIC26QLBzZkGgbecAYCvCf+Xgwn/40+JgRX3xNcBwIkLweaQAYDuggKB +2okBwPESvtvPBsHxpQL/ucsCwIeJA8CjBoH6SP+UnwKCw8IGv7mcBYHtywKChYICvKDpAsDx5gHC ++MIF/sDeBYCh1ALA+poCwYygCL+TTITO3AK9weMEwf+fAb/V5AKApKMGg8Fg/J9OwfnNCv/pkgLC +z+gEvt+XA4CqlgGAlOQDgIaICYGmzAGCiYUHv96HBv/njQHEzfsEv9LuB7zk1gPD4RiDo/UDu5qr +A//IIcDDHsCwDYDozwLEq9EBvZp1gLwawJaGAv/JwAPC/JcDvqvcAcG1ugODzpMBvZfmAoDWB//s +1wKHzaYCucW2BcL9uQT/+aMB/+WtAsKPN7+sdsTghAG7t6kEwNw+wKLUAsCDIIHcjwGAqpACgM36 +BcC//AOAtLEEgMbwAf+mwQiAqfgH + 2.812 3 from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} PREHOOK: query: describe formatted b value PREHOOK: type: DESCTABLE PREHOOK: Input: default@b POSTHOOK: query: describe formatted b value POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@b -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -value string 10 309 6.812 7 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +value string 10 309 SExM4LUCtQLM7Sz2qpEHw751/KEegclIgPbqBb/+4QWCxbMFvttNwJ79A8DuYoDMqgOCtK0Dv6qT +A/+3OMCb5AGBvuwFgLXtAb/E0QOB1IQGwKwc/9PdBMCu9gSBw0y/1IYJwKeVDcCdmAGBuSGAhBmF +r5AB+tCzAcGDswGC8n292qYHwZPUAb+SVsDC1gGAuPwCgZG9Av/S+gXDuJAB//KXAv61xQGBgpcB +gIo6geaBAf+Q3wz/tkWBqNID/6ilBoDahwbA1fICwcKtBIHklAe+8sQFgLx/gIXzAcLDwAP+09EB +wP4GwrXYAoDuxwPBy5cCgLUF/dq4BcCkoAaAyfcEgJnRBYD8iAKA4vgGwL4SgIuEAsHnqgP/obsC +gO6YAoKYVb7VSsCR2ALAjJgBwfSUAoD3PMP/4QKAwxO84LoCwKDSAcDaLsH7kAK/u/YCgLf0AYH6 +xwGA1c8Bg7KxAf3Wcf+9pgKA4X+FseMDvZb8Ab7MtQaA0eUDwOUigKINwZUuv+HJBMCWDoLCNf7W +sgSAlO8DgO6lB8KatwT/xdQCv45EgcDADb/E5QTCnJUG/7OtBf+0mAHDmdEDv7edBv6CpgLAj/sC +wIV1gf2zBv/SywTC8dAC/r3JA4LUTP7BuQKAieADgfLcBr/ouAGAs2OAiKUGwJf2CoGQ0Qa/05oG +gNojgJTZBoCWkwTAt4EKx5rPA7mZqQKB5kCB9fcCgLTwB/6O8gTAxvYSwOaRAYDFgAPA+NsCgcm8 +BoC0jASB3esC/oDgBYGcvgXDteICwKoK/MZMgoXCAcCl4gO/m3q/3VKBs84BgY9kgMeyBL/zR8Cl +nALCsOIC/s2tAf/JsQLAmWCA+akCgrq2A//wlAu/26sEwobEBP/ejgG/spYMwOKnCoHqsQHA7oMC +v62oBoDghAaA8oECgPSkB4Dv+gHB+yHAqQ6/xRWBm9EBhPaFAby9jgO/1cgMgK9pwMuiA8C5/AaA +/f8DwsiOA4CiDYDemwS/zXDDxX6+x4UKwpyWBv7n8AH+la4HwvBSv67qAr+duAHA3K8EgvKLA/7+ +ygPA/nmAuQOFvt4Nu7+4A8PXG/2ZboGhF8K6+Ab+6IsJgd6AAf+aiwH/qVyEhB7+o/4F/rO0AYCM +fcC1qQOAuuYBgcbKAcDy5AH/wu0Bga6UAr/e1wPBnscCv9S+BYDKjwPB/JABgJeXBoCjAYHT8gL+ +tV6B6RO/9GGB1aUHxcCxAfqrngKAlIcDhLXJAr3vV//G5ATBiqQDgcGkAcDijQS+gsEJgc+FA4DP +KYKJiwH9hqwBwpJC/sHNAYHStAmB4IoJvsenBoHYR8CZiBTB2m/+mdUIgY2kAv+5P8CunwyA2QaC +sEb+toQCwp3JBYCqiAS+ruIEhq+wAfr6QcHKrw7/xMABxN/NB/ztO4K4tQG/94oBgJf4Ab+F2AHC +zawDwa2BAr6vkwX/u5QFwYfWAsDVgQKCgZwJ/ZYXwNztAoL3/AL+uKIBwdeeA8OYjwGBoIkIvMSm +A/+FF4PnPb6a7gKA1BfA4JMGwOb1Ab+WHg== + 6.812 7 from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} PREHOOK: query: insert into table b(value) select key+100000 from src limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@src @@ -291,20 +417,63 @@ PREHOOK: Input: default@b POSTHOOK: query: describe formatted b key POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@b -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -key string 20 309 2.812 3 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +key string 20 309 SExM4LUCtQLBoliC580Fv5Nq/8NRwJtoxdPYB/yjmAO/kokBgcjcBMHblgH/lecCgMn9AcC78gm/ +wzeA/BHB6MgH/5KjAsKL1QqAge0D/obCAoCvMMGIDoDPDr/ovgSAn6ALgL7TEMGr8wX/6xnAuqgF +wcgagNoPwY+GAf7ouATEjJwBvIfKAoGetgS/ysoIwN4dwMHtA8DAnQmB7TWBspEFv+7uBMGB4wL+ +mmvAqtEBgrrQAYGlkQb9j1CBuYcE/5VDgNciweGkA4Hf9wS+ws4BgPdxgJr/BcCctQOB2rQD/+LE +CMDf/QHAklbAxaoBgP7gA4Gu1weBx4YB/remBMHjToDh2wH/+sEHwMqxA8DvyATBrUj/7ecCgP0H +ifmTBIC8FriHfv/5pArAlSzAnIoBwJ/bAsXdU7v+0g7ArvQBgOX+AoD5+hPA/4oHgK3rAcCWB4D+ +iwSA/soDgu71Ab68MYD/gQKAh+ECwN6xBMCTYoSh4AO9nESArIcGwYmeA/7TpAWBncQDwthzvrb9 +BIG2hQG+lbgIgan2DP/2R4Cu1giAlJcDg7UU/ZBfgchL/4O8AsDO8gKBodUGwMTcA8OKggP88sYD +w9tCvr+CA7/W3QfE05wB/dWUCcCa2QmBsY4HgrD3BbzCwguAih3B67gD/9zeAYX0twf7048HwLKS +CICJC8GyrgGB0akBvr2mAcDP8QGA0/ECwMScA4GXrAGBpokFgKgKwNSHBP7sMIDfkAbBpzy/pdcK +wLnfAYLeKP6R5wLCssoBv8JY/7IvwNEMgq+7AsDQH/6VwAGA8nXCisoBvvevAoH4PMKOqAO+lnj/ +yjnCwzL/7IADv/8jhK28Brzx1wGCmk6/0t0CgIyaAv/CnwXAzZ0BwJadA4GCKcC53AG/sUCBhs8J +gZmRAb6zwwSBgLYGweuIAb+tbsH6gwPAngH+ysQBhLxh/NihAsGYswS/l8MKgtFX/u6jA8XB6AL8 +3tAC/5TDBMHMvgG/0NICwO79BYS82Qa8oMQBwPqBBcHhI//N2RLAitsEgJnuBcD+qAXA/t0FwJ32 +CoDBrAHDlSD9ltIDwMieBIC26QLBzZkGgbecAYCvCf+Xgwn/40+JgRX3xNcBwIkLweaQAYDuggKB +2okBwPESvtvPBsHxpQL/ucsCwIeJA8CjBoH6SP+UnwKCw8IGv7mcBYHtywKChYICvKDpAsDx5gHC ++MIF/sDeBYCh1ALA+poCwYygCL+TTITO3AK9weMEwf+fAb/V5AKApKMGg8Fg/J9OwfnNCv/pkgLC +z+gEvt+XA4CqlgGAlOQDgIaICYGmzAGCiYUHv96HBv/njQHEzfsEv9LuB7zk1gPD4RiDo/UDu5qr +A//IIcDDHsCwDYDozwLEq9EBvZp1gLwawJaGAv/JwAPC/JcDvqvcAcG1ugODzpMBvZfmAoDWB//s +1wKHzaYCucW2BcL9uQT/+aMB/+WtAsKPN7+sdsTghAG7t6kEwNw+wKLUAsCDIIHcjwGAqpACgM36 +BcC//AOAtLEEgMbwAf+mwQiAqfgH + 2.812 3 from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} PREHOOK: query: describe formatted b value PREHOOK: type: DESCTABLE PREHOOK: Input: default@b POSTHOOK: query: describe formatted b value POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@b -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -value string 10 309 8.0 8 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +value string 10 319 SExM4L8CvwLM7SyB+xL1r/4Gw751wOABvMEcgclIgPbqBb/+4QWCxbMFvttNwJ79A8DuYoLguAL+ +63GCtK0Dv6qTA/+3OMCb5AGBvuwFgLXtAb/E0QOB1IQGwKwc/9PdBMCu9gSBw0y/1IYJwKeVDcCd +mAGBuSGAhBmFr5AB+tCzAcGDswGC8n292qYHwZPUAb+SVsDC1gGAuPwCgZG9Av/S+gXDuJAB//KX +Av61xQGBgpcBgIo6geaBAf6ArwyBkDD/tkWBqNID/6ilBoDahwbA1fICwcKtBL+I0QHC28MFvvLE +BYC8f4CF8wHCw8AD/tPRAcD+BsK12AKA7scDwcuXAoC1Bf3auAXApKAGgMn3BICZ0QWA/IgCgOL4 +BsC+EoCLhALB56oD/6G7AoDumAKCmFW+1UrAkdgCwIyYAcH0lAKA9zzD/+ECgMMTvOC6AsCg0gHA +2i7B+5ACv7v2AoC39AGB+scBgNXPAYOysQH91nH/vaYCgOF/hbHjA72W/AG+zLUGgNHlA8DlIoCi +DcGVLr/hyQTAlg6CwjX/5ooB/++nA4CU7wOA7qUHwpq3BP/F1AK/jkSBwMANv8TlBMKclQb/s60F +/7SYAcOZ0QO/t50G/oKmAsCP+wLAhXWB/bMGv/DkA8DiZsLx0AL+vckDgtRM/sG5AoCJ4AOB8twG +v+i4AYCzY4CIpQbAl/YKgZDRBr/TmgaA2iOAlNkGgJaTBMC3gQrHms8DuZmpAoHmQIH19wKAtPAH +/o7yBMDG9hLA5pEBgMWAA8D42wKBybwGgLSMBIHd6wL+gOAFgZy+BcO14gLAqgr8xkyChcIBwKXi +A7+ber/dUoGzzgGBj2SAx7IEv/NHwKWcAsKw4gL+za0B/8mxAsCZYID5qQKCurYD//CUC7/bqwTC +3USAqf8D/96OAb+ylgzA4qcKgeqxAcDugwK/ragGgOCEBoDygQKA9KQHgO/6AcH7IcCpDr/FFYGb +0QGE9oUBvL2OA7/VyAyAr2nAy6IDwLn8BoD9/wPCyI4DgKINgN6bBL/NcMPFfr7HhQrCnJYG/ufw +Af6VrgfC8FK/ruoCv524AcDcrwSC8osD/v7KA8D+eYC5A4W+3g27v7gDw9cb/ZlugaEXwrr4Bv7o +iwmB3oAB/5qLAf+pXISEHv6j/gX+s7QBgIx9wLWpA4C65gGBxsoBwPLkAf/C7QGBrpQCv97XA8Ge +xwK/1L4FgMqPA8H8kAGAl5cGgKMBgdPyAv61XoHpE7/0YYHVpQfFwLEB+queAoCUhwOEtckCve9X +/8bkBMGKpAOBwaQBwOKNBL6CwQmBz4UDgM8pgomLAf2GrAHCkkL+wc0BgdK0CYHgigm+x6cGgdhH +v/SpB4Gl3gzB2m/+mdUIgY2kAv+5P8DmngeAyIAFgNkGgrBG/raEAsKdyQWAqogEvq7iBIavsAH6 ++kHByq8O/8TAAcTfzQf87TuCuLUBv/eKAYCX+AG/hdgBws2sA8GtgQK+r5MF/7uUBcGH1gLA1YEC +goGcCf2WF8Dc7QKC9/wC/riiAcHXngPDmI8BgaCJCLzEpgP/hReD5z2+mu4CgNQXwOCTBsDm9QG/ +lh4= + 8.0 8 from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} PREHOOK: query: drop table src_multi2 PREHOOK: type: DROPTABLE POSTHOOK: query: drop table src_multi2 diff --git a/ql/src/test/results/clientpositive/llap/column_names_with_leading_and_trailing_spaces.q.out b/ql/src/test/results/clientpositive/llap/column_names_with_leading_and_trailing_spaces.q.out index fb833bccb2..040aa13b7b 100644 --- a/ql/src/test/results/clientpositive/llap/column_names_with_leading_and_trailing_spaces.q.out +++ b/ql/src/test/results/clientpositive/llap/column_names_with_leading_and_trailing_spaces.q.out @@ -48,10 +48,10 @@ PREHOOK: Input: default@space POSTHOOK: query: desc formatted space ` left` POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@space -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - - left string from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\" left\":\"true\",\" middle \":\"true\",\"right \":\"true\"}} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + + left string from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\" left\":\"true\",\" middle \":\"true\",\"right \":\"true\"}} PREHOOK: query: insert into space values ("1", "2", "3") PREHOOK: type: QUERY PREHOOK: Output: default@space @@ -67,10 +67,11 @@ PREHOOK: Input: default@space POSTHOOK: query: desc formatted space ` left` POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@space -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - - left string 0 1 1.0 1 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\" left\":\"true\",\" middle \":\"true\",\"right \":\"true\"}} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + + left string 0 1 SExM4AEBxbi8+AQ= + 1.0 1 from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\" left\":\"true\",\" middle \":\"true\",\"right \":\"true\"}} PREHOOK: query: select * from space PREHOOK: type: QUERY PREHOOK: Input: default@space diff --git a/ql/src/test/results/clientpositive/llap/columnstats_part_coltype.q.out b/ql/src/test/results/clientpositive/llap/columnstats_part_coltype.q.out index 5e647433f1..28e3ad70c0 100644 --- a/ql/src/test/results/clientpositive/llap/columnstats_part_coltype.q.out +++ b/ql/src/test/results/clientpositive/llap/columnstats_part_coltype.q.out @@ -80,36 +80,40 @@ PREHOOK: Input: default@partcolstats POSTHOOK: query: describe formatted partcolstats partition (ds=date '2015-04-02', hr=2, part='partA') key POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@partcolstats -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -key int 27 484 0 20 from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +key int 27 484 0 20 SExM4BQUwv+PD4DTryqAhvQHgJzgE8DtpQL/wqEPx9SoWfm94QmAqJgmgbCHgwG/0+MHwJKHG4De +jWeClaYVvr3WP8H6iQHDxpxl/uvvWb7ssz2AsKk6 + from deserializer PREHOOK: query: describe formatted partcolstats partition (ds=date '2015-04-02', hr=2, part='partA') value PREHOOK: type: DESCTABLE PREHOOK: Input: default@partcolstats POSTHOOK: query: describe formatted partcolstats partition (ds=date '2015-04-02', hr=2, part='partA') value POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@partcolstats -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -value string 0 20 6.8 7 from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +value string 0 20 SExM4BQUwcbsKcCnjhjArvYEgZ2yHMC03h2A4LIS/4KtDMCv7BbAz6JGgejDCP+AlzSA84UvwYTL +WsDygtQBv5uoWIKExaYBvt/WIoK4tQG/vpwPv72sGg== + 6.8 7 from deserializer PREHOOK: query: describe formatted partcolstats partition (ds=date '2015-04-02', hr=2, part='partB') key PREHOOK: type: DESCTABLE PREHOOK: Input: default@partcolstats POSTHOOK: query: describe formatted partcolstats partition (ds=date '2015-04-02', hr=2, part='partB') key POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@partcolstats -# col_name data_type comment - -key int from deserializer +# col_name data_type comment + +key int from deserializer PREHOOK: query: describe formatted partcolstats partition (ds=date '2015-04-02', hr=2, part='partB') value PREHOOK: type: DESCTABLE PREHOOK: Input: default@partcolstats POSTHOOK: query: describe formatted partcolstats partition (ds=date '2015-04-02', hr=2, part='partB') value POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@partcolstats -# col_name data_type comment - -value string from deserializer +# col_name data_type comment + +value string from deserializer PREHOOK: query: analyze table partcolstats partition (ds=date '2015-04-02', hr=2, part) compute statistics for columns PREHOOK: type: QUERY PREHOOK: Input: default@partcolstats @@ -134,36 +138,40 @@ PREHOOK: Input: default@partcolstats POSTHOOK: query: describe formatted partcolstats partition (ds=date '2015-04-02', hr=2, part='partB') key POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@partcolstats -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -key int 27 484 0 20 from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +key int 27 484 0 20 SExM4BQUwv+PD4DTryqAhvQHgJzgE8DtpQL/wqEPx9SoWfm94QmAqJgmgbCHgwG/0+MHwJKHG4De +jWeClaYVvr3WP8H6iQHDxpxl/uvvWb7ssz2AsKk6 + from deserializer PREHOOK: query: describe formatted partcolstats partition (ds=date '2015-04-02', hr=2, part='partB') value PREHOOK: type: DESCTABLE PREHOOK: Input: default@partcolstats POSTHOOK: query: describe formatted partcolstats partition (ds=date '2015-04-02', hr=2, part='partB') value POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@partcolstats -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -value string 0 20 6.8 7 from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +value string 0 20 SExM4BQUwcbsKcCnjhjArvYEgZ2yHMC03h2A4LIS/4KtDMCv7BbAz6JGgejDCP+AlzSA84UvwYTL +WsDygtQBv5uoWIKExaYBvt/WIoK4tQG/vpwPv72sGg== + 6.8 7 from deserializer PREHOOK: query: describe formatted partcolstats partition (ds=date '2015-04-02', hr=3, part='partA') key PREHOOK: type: DESCTABLE PREHOOK: Input: default@partcolstats POSTHOOK: query: describe formatted partcolstats partition (ds=date '2015-04-02', hr=3, part='partA') key POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@partcolstats -# col_name data_type comment - -key int from deserializer +# col_name data_type comment + +key int from deserializer PREHOOK: query: describe formatted partcolstats partition (ds=date '2015-04-02', hr=3, part='partA') value PREHOOK: type: DESCTABLE PREHOOK: Input: default@partcolstats POSTHOOK: query: describe formatted partcolstats partition (ds=date '2015-04-02', hr=3, part='partA') value POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@partcolstats -# col_name data_type comment - -value string from deserializer +# col_name data_type comment + +value string from deserializer PREHOOK: query: analyze table partcolstats partition (ds=date '2015-04-02', hr, part) compute statistics for columns PREHOOK: type: QUERY PREHOOK: Input: default@partcolstats @@ -192,54 +200,60 @@ PREHOOK: Input: default@partcolstats POSTHOOK: query: describe formatted partcolstats partition (ds=date '2015-04-02', hr=3, part='partA') key POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@partcolstats -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -key int 27 495 0 30 from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +key int 27 495 0 30 SExM4B4ewv+PD8PH8ii9i70BgIb0B4Cc4BPA7aUC/8KhD8C5hRaHm6ND+b3hCYComCaA+tFngba1 +G7/T4wfAkocbguS2HL+06gTBtfI+/8iBAf/G+AWClaYVvr3WP8H6iQGB35Yz/v9gwYukJIPcgA3+ +6+9ZvuyzPYCwqTo= + from deserializer PREHOOK: query: describe formatted partcolstats partition (ds=date '2015-04-02', hr=3, part='partA') value PREHOOK: type: DESCTABLE PREHOOK: Input: default@partcolstats POSTHOOK: query: describe formatted partcolstats partition (ds=date '2015-04-02', hr=3, part='partA') value POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@partcolstats -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -value string 0 30 6.833333333333333 7 from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +value string 0 30 SExM4B4eg+SgJr7iywPAp44YwK72BIGdshzAtN4dgfC7Af/v9hD/gq0MwK/sFsLS5Df+/L0OgejD +CISu/Ar70popgPOFL8GEy1q/or8pwL+aZcHYtQP/8MIqgcewF7+bqFjCr/eeAcDUzQe+39Yigri1 +Ab++nA+CmogTvaOkBw== + 6.833333333333333 7 from deserializer PREHOOK: query: describe formatted partcolstats partition (ds=date '2015-04-03', hr=3, part='partA') key PREHOOK: type: DESCTABLE PREHOOK: Input: default@partcolstats POSTHOOK: query: describe formatted partcolstats partition (ds=date '2015-04-03', hr=3, part='partA') key POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@partcolstats -# col_name data_type comment - -key int from deserializer +# col_name data_type comment + +key int from deserializer PREHOOK: query: describe formatted partcolstats partition (ds=date '2015-04-03', hr=3, part='partA') value PREHOOK: type: DESCTABLE PREHOOK: Input: default@partcolstats POSTHOOK: query: describe formatted partcolstats partition (ds=date '2015-04-03', hr=3, part='partA') value POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@partcolstats -# col_name data_type comment - -value string from deserializer +# col_name data_type comment + +value string from deserializer PREHOOK: query: describe formatted partcolstats partition (ds=date '2015-04-03', hr=3, part='partB') key PREHOOK: type: DESCTABLE PREHOOK: Input: default@partcolstats POSTHOOK: query: describe formatted partcolstats partition (ds=date '2015-04-03', hr=3, part='partB') key POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@partcolstats -# col_name data_type comment - -key int from deserializer +# col_name data_type comment + +key int from deserializer PREHOOK: query: describe formatted partcolstats partition (ds=date '2015-04-03', hr=3, part='partB') value PREHOOK: type: DESCTABLE PREHOOK: Input: default@partcolstats POSTHOOK: query: describe formatted partcolstats partition (ds=date '2015-04-03', hr=3, part='partB') value POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@partcolstats -# col_name data_type comment - -value string from deserializer +# col_name data_type comment + +value string from deserializer PREHOOK: query: analyze table partcolstats partition (ds, hr, part) compute statistics for columns PREHOOK: type: QUERY PREHOOK: Input: default@partcolstats @@ -276,36 +290,52 @@ PREHOOK: Input: default@partcolstats POSTHOOK: query: describe formatted partcolstats partition (ds=date '2015-04-03', hr=3, part='partA') key POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@partcolstats -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -key int 15 495 0 40 from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +key int 15 495 0 40 SExM4Cgowv+PD8PH8ii9i70BgIb0B4Cc4BPA7aUC/8KhD4D4jA/AwfgGh5ujQ/m94QmAqJgmgPrR +Z4G2tRu/0+MHwJKHG4Lkthy/tOoEwfiHI77r2A7C0ZEN/8iBAf/G+AWClaYVvr3WP8H6iQGB35Yz +/v9gwYukJL+9zgrEnrIC/OqkAYDasSKCp5k2vuyzPYDrkw6AxZUsgK/7DYK2uAr/ivcC + from deserializer PREHOOK: query: describe formatted partcolstats partition (ds=date '2015-04-03', hr=3, part='partA') value PREHOOK: type: DESCTABLE PREHOOK: Input: default@partcolstats POSTHOOK: query: describe formatted partcolstats partition (ds=date '2015-04-03', hr=3, part='partA') value POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@partcolstats -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -value string 0 40 6.825 7 from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +value string 0 40 SExM4Cgog+SgJr7iywPAp44YwK72BIC/6BaB3skFwLTeHYHwuwH/7/YQ/4KtDMCv7BbC0uQ3vqKP +DsDaLoHowwiErvwK+7OXDMDlIsC54ByB1egd/52dEcGEy1q//tAigKTuBsC/mmXB2LUDhN7rGvuS +1w+Bx7AXv5uoWMHXuTmB2L1lwNTNB77f1iKCuLUBv76cD4KaiBO9o6QHgdygE4DUFw== + 6.825 7 from deserializer PREHOOK: query: describe formatted partcolstats partition (ds=date '2015-04-03', hr=3, part='partB') key PREHOOK: type: DESCTABLE PREHOOK: Input: default@partcolstats POSTHOOK: query: describe formatted partcolstats partition (ds=date '2015-04-03', hr=3, part='partB') key POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@partcolstats -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -key int 15 495 0 58 from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +key int 15 495 0 58 SExM4Do6wv+PD8PH8ii9i70BgIb0B4Cc4BPA7aUC/8KhD4GbYP/crA7AwfgGwMyEQMfOngP5veEJ +gJH1GIDAkAyA15IBg8+TPL25xAzCp9gR/smhDYG2tRu/0+MHwJKHG4Lkthy/tOoExJKoGf3l3wm+ +69gOwtGRDf/IgQH/xvgFwsSVEcDQkAS/qJM3/5TDCMH6iQHAwrIawZzkGP7/YMGLpCS/vc4KxJ6y +AvzqpAGA2rEigqeZNr7ssz2AgZ4BgOr1DIaz3wL6kbYpgK/7DcCa2QnCm1//ivcC/5fiIsC10AmC +5uYQvue2GQ== + from deserializer PREHOOK: query: describe formatted partcolstats partition (ds=date '2015-04-03', hr=3, part='partB') value PREHOOK: type: DESCTABLE PREHOOK: Input: default@partcolstats POSTHOOK: query: describe formatted partcolstats partition (ds=date '2015-04-03', hr=3, part='partB') value POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@partcolstats -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -value string 0 58 6.883333333333334 7 from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +value string 0 58 SExM4Do6geTIH4KA2Aa+4ssDwKeOGMCu9gSAv+gWgd7JBcC03h2B8LsB/+/2EP+CrQzAr+wWwIKn +HoLQvRm+87oEgK/UCcDaLsH7kALA7LIGhK78CvuzlwzA5SLAueAcgdXoHf+dnRGA+ZUgwPH7M4C0 ++AWB5kC//tAigKTuBoHJvAaAm+4bv91SgP6cQsHYtQO/4s4XxfucA/uS1w+Bx7AXv5uoWMD2mxmB +4Z0gwMaBGv/Zzz7Ct+wM/raEAsKdyQW+39Yigri1Ab/3igGB6vwG/9yUB4KaiBO9o6QHwdeeA8CE +ghCA1Bc= + 6.883333333333334 7 from deserializer PREHOOK: query: drop table partcolstats PREHOOK: type: DROPTABLE PREHOOK: Input: default@partcolstats @@ -356,9 +386,12 @@ PREHOOK: Input: default@partcolstatsnum POSTHOOK: query: describe formatted partcolstatsnum partition (tint=100, sint=1000, bint=1000000) value POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@partcolstatsnum -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -value string 0 30 6.833333333333333 7 from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +value string 0 30 SExM4B4eg+SgJr7iywPAp44YwK72BIGdshzAtN4dgfC7Af/v9hD/gq0MwK/sFsLS5Df+/L0OgejD +CISu/Ar70popgPOFL8GEy1q/or8pwL+aZcHYtQP/8MIqgcewF7+bqFjCr/eeAcDUzQe+39Yigri1 +Ab++nA+CmogTvaOkBw== + 6.833333333333333 7 from deserializer PREHOOK: query: drop table partcolstatsnum PREHOOK: type: DROPTABLE PREHOOK: Input: default@partcolstatsnum @@ -409,9 +442,12 @@ PREHOOK: Input: default@partcolstatsdec POSTHOOK: query: describe formatted partcolstatsdec partition (decpart='1000.0001') value POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@partcolstatsdec -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -value string 0 30 6.833333333333333 7 from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +value string 0 30 SExM4B4eg+SgJr7iywPAp44YwK72BIGdshzAtN4dgfC7Af/v9hD/gq0MwK/sFsLS5Df+/L0OgejD +CISu/Ar70popgPOFL8GEy1q/or8pwL+aZcHYtQP/8MIqgcewF7+bqFjCr/eeAcDUzQe+39Yigri1 +Ab++nA+CmogTvaOkBw== + 6.833333333333333 7 from deserializer PREHOOK: query: drop table partcolstatsdec PREHOOK: type: DROPTABLE PREHOOK: Input: default@partcolstatsdec @@ -462,9 +498,12 @@ PREHOOK: Input: default@partcolstatschar POSTHOOK: query: describe formatted partcolstatschar partition (varpart='part1', charpart='aaa') value POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@partcolstatschar -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -value string 0 30 6.833333333333333 7 from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +value string 0 30 SExM4B4eg+SgJr7iywPAp44YwK72BIGdshzAtN4dgfC7Af/v9hD/gq0MwK/sFsLS5Df+/L0OgejD +CISu/Ar70popgPOFL8GEy1q/or8pwL+aZcHYtQP/8MIqgcewF7+bqFjCr/eeAcDUzQe+39Yigri1 +Ab++nA+CmogTvaOkBw== + 6.833333333333333 7 from deserializer PREHOOK: query: drop table partcolstatschar PREHOOK: type: DROPTABLE PREHOOK: Input: default@partcolstatschar diff --git a/ql/src/test/results/clientpositive/llap/deleteAnalyze.q.out b/ql/src/test/results/clientpositive/llap/deleteAnalyze.q.out index 5db87d97cf..15d7f1c1ee 100644 --- a/ql/src/test/results/clientpositive/llap/deleteAnalyze.q.out +++ b/ql/src/test/results/clientpositive/llap/deleteAnalyze.q.out @@ -72,10 +72,10 @@ PREHOOK: Input: default@testdeci2 POSTHOOK: query: describe formatted testdeci2 amount POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@testdeci2 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -amount decimal(10,3) from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +amount decimal(10,3) from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} PREHOOK: query: analyze table testdeci2 compute statistics for columns PREHOOK: type: QUERY PREHOOK: Input: default@testdeci2 diff --git a/ql/src/test/results/clientpositive/llap/extrapolate_part_stats_partial_ndv.q.out b/ql/src/test/results/clientpositive/llap/extrapolate_part_stats_partial_ndv.q.out index 6bc1970ad0..8e4dc4c1be 100644 --- a/ql/src/test/results/clientpositive/llap/extrapolate_part_stats_partial_ndv.q.out +++ b/ql/src/test/results/clientpositive/llap/extrapolate_part_stats_partial_ndv.q.out @@ -113,72 +113,80 @@ PREHOOK: Input: default@loc_orc_1d POSTHOOK: query: describe formatted loc_orc_1d PARTITION(year='2001') state POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@loc_orc_1d -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -state string 0 3 0.75 2 from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +state string 0 3 SExM4AMDgaTbFcD8mOYCwMOJoQQ= + 0.75 2 from deserializer PREHOOK: query: describe formatted loc_orc_1d PARTITION(year='2002') state PREHOOK: type: DESCTABLE PREHOOK: Input: default@loc_orc_1d POSTHOOK: query: describe formatted loc_orc_1d PARTITION(year='2002') state POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@loc_orc_1d -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -state string 0 6 3.0 3 from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +state string 0 6 SExM4AYGhJ2RPL68foHA90C/kJJjgJX39QKAwfg7 + 3.0 3 from deserializer PREHOOK: query: describe formatted loc_orc_1d PARTITION(year='2001') locid PREHOOK: type: DESCTABLE PREHOOK: Input: default@loc_orc_1d POSTHOOK: query: describe formatted loc_orc_1d PARTITION(year='2001') locid POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@loc_orc_1d -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -locid double 1.0 4.0 0 4 from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +locid double 1.0 4.0 0 4 SExM4AQEwvmagwOC4fQQ/cXBowKCnueKAg== + from deserializer PREHOOK: query: describe formatted loc_orc_1d PARTITION(year='2002') locid PREHOOK: type: DESCTABLE PREHOOK: Input: default@loc_orc_1d POSTHOOK: query: describe formatted loc_orc_1d PARTITION(year='2002') locid POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@loc_orc_1d -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -locid double 1.0 5.0 0 5 from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +locid double 1.0 5.0 0 5 SExM4AUFgoqWCcDvhPoCguH0EP3FwaMCgp7nigI= + from deserializer PREHOOK: query: describe formatted loc_orc_1d PARTITION(year='2001') cnt PREHOOK: type: DESCTABLE PREHOOK: Input: default@loc_orc_1d POSTHOOK: query: describe formatted loc_orc_1d PARTITION(year='2001') cnt POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@loc_orc_1d -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -cnt decimal(10,0) 10 2000 0 4 from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +cnt decimal(10,0) 10 2000 0 4 SExM4AQEwtKH1wOJpIYp95+qNYHs8ZgB + from deserializer PREHOOK: query: describe formatted loc_orc_1d PARTITION(year='2002') cnt PREHOOK: type: DESCTABLE PREHOOK: Input: default@loc_orc_1d POSTHOOK: query: describe formatted loc_orc_1d PARTITION(year='2002') cnt POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@loc_orc_1d -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -cnt decimal(10,0) 10 910 0 4 from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +cnt decimal(10,0) 10 910 0 4 SExM4AQEwavm2wOC18PyAYDUhBSCqe9l + from deserializer PREHOOK: query: describe formatted loc_orc_1d PARTITION(year='2001') zip PREHOOK: type: DESCTABLE PREHOOK: Input: default@loc_orc_1d POSTHOOK: query: describe formatted loc_orc_1d PARTITION(year='2001') zip POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@loc_orc_1d -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -zip int 43201 94087 0 3 from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +zip int 43201 94087 0 3 SExM4AMDgaPxmgPB562MAr/LtnY= + from deserializer PREHOOK: query: describe formatted loc_orc_1d PARTITION(year='2002') zip PREHOOK: type: DESCTABLE PREHOOK: Input: default@loc_orc_1d POSTHOOK: query: describe formatted loc_orc_1d PARTITION(year='2002') zip POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@loc_orc_1d -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -zip int 43201 94087 0 3 from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +zip int 43201 94087 0 3 SExM4AMDgaPxmgPB562MAr/LtnY= + from deserializer PREHOOK: query: explain extended select state,locid,cnt,zip from loc_orc_1d PREHOOK: type: QUERY POSTHOOK: query: explain extended select state,locid,cnt,zip from loc_orc_1d @@ -414,72 +422,80 @@ PREHOOK: Input: default@loc_orc_1d POSTHOOK: query: describe formatted loc_orc_1d PARTITION(year='2000') state POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@loc_orc_1d -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -state string 0 2 0.5 1 from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +state string 0 2 SExM4AICgaTbFYDJnvoC + 0.5 1 from deserializer PREHOOK: query: describe formatted loc_orc_1d PARTITION(year='2003') state PREHOOK: type: DESCTABLE PREHOOK: Input: default@loc_orc_1d POSTHOOK: query: describe formatted loc_orc_1d PARTITION(year='2003') state POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@loc_orc_1d -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -state string 0 3 1.25 4 from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +state string 0 3 SExM4AMDgaTbFcD8mOYCwJDuDA== + 1.25 4 from deserializer PREHOOK: query: describe formatted loc_orc_1d PARTITION(year='2000') locid PREHOOK: type: DESCTABLE PREHOOK: Input: default@loc_orc_1d POSTHOOK: query: describe formatted loc_orc_1d PARTITION(year='2000') locid POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@loc_orc_1d -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -locid double 1.0 2.0 0 2 from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +locid double 1.0 2.0 0 2 SExM4AICwvmagwP/pra0Ag== + from deserializer PREHOOK: query: describe formatted loc_orc_1d PARTITION(year='2003') locid PREHOOK: type: DESCTABLE PREHOOK: Input: default@loc_orc_1d POSTHOOK: query: describe formatted loc_orc_1d PARTITION(year='2003') locid POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@loc_orc_1d -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -locid double 1.0 31.0 0 5 from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +locid double 1.0 31.0 0 5 SExM4AUFgoqWCb/8tKEBg9TE6QH9xcGjAoKe54oC + from deserializer PREHOOK: query: describe formatted loc_orc_1d PARTITION(year='2000') cnt PREHOOK: type: DESCTABLE PREHOOK: Input: default@loc_orc_1d POSTHOOK: query: describe formatted loc_orc_1d PARTITION(year='2000') cnt POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@loc_orc_1d -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -cnt decimal(10,0) 1000 1010 0 2 from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +cnt decimal(10,0) 1000 1010 0 2 SExM4AICwtKH1wOJpIYp + from deserializer PREHOOK: query: describe formatted loc_orc_1d PARTITION(year='2003') cnt PREHOOK: type: DESCTABLE PREHOOK: Input: default@loc_orc_1d POSTHOOK: query: describe formatted loc_orc_1d PARTITION(year='2003') cnt POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@loc_orc_1d -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -cnt decimal(10,0) 1000 2000 0 3 from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +cnt decimal(10,0) 1000 2000 0 3 SExM4AMDwtKH1wOJpIYp95+qNQ== + from deserializer PREHOOK: query: describe formatted loc_orc_1d PARTITION(year='2000') zip PREHOOK: type: DESCTABLE PREHOOK: Input: default@loc_orc_1d POSTHOOK: query: describe formatted loc_orc_1d PARTITION(year='2000') zip POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@loc_orc_1d -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -zip int 94086 94087 0 2 from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +zip int 94086 94087 0 2 SExM4AICgaPxmgOAs+SCAw== + from deserializer PREHOOK: query: describe formatted loc_orc_1d PARTITION(year='2003') zip PREHOOK: type: DESCTABLE PREHOOK: Input: default@loc_orc_1d POSTHOOK: query: describe formatted loc_orc_1d PARTITION(year='2003') zip POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@loc_orc_1d -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -zip int 43201 94087 0 3 from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +zip int 43201 94087 0 3 SExM4AMDgaPxmgPB562MAr/LtnY= + from deserializer PREHOOK: query: explain extended select state,locid,cnt,zip from loc_orc_1d PREHOOK: type: QUERY POSTHOOK: query: explain extended select state,locid,cnt,zip from loc_orc_1d @@ -786,54 +802,60 @@ PREHOOK: Input: default@loc_orc_2d POSTHOOK: query: describe formatted loc_orc_2d partition(zip=94086, year='2001') state POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@loc_orc_2d -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -state string 0 2 0.5 1 from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +state string 0 2 SExM4AICgaTbFYDAoocH + 0.5 1 from deserializer PREHOOK: query: describe formatted loc_orc_2d partition(zip=94087, year='2002') state PREHOOK: type: DESCTABLE PREHOOK: Input: default@loc_orc_2d POSTHOOK: query: describe formatted loc_orc_2d partition(zip=94087, year='2002') state POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@loc_orc_2d -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -state string 0 3 3.0 3 from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +state string 0 3 SExM4AMDwtmPPYHA90C/kJJj + 3.0 3 from deserializer PREHOOK: query: describe formatted loc_orc_2d partition(zip=94086, year='2001') locid PREHOOK: type: DESCTABLE PREHOOK: Input: default@loc_orc_2d POSTHOOK: query: describe formatted loc_orc_2d partition(zip=94086, year='2001') locid POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@loc_orc_2d -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -locid int 2 3 0 2 from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +locid int 2 3 0 2 SExM4AICga/rqgHA0vSOAw== + from deserializer PREHOOK: query: describe formatted loc_orc_2d partition(zip=94087, year='2002') locid PREHOOK: type: DESCTABLE PREHOOK: Input: default@loc_orc_2d POSTHOOK: query: describe formatted loc_orc_2d partition(zip=94087, year='2002') locid POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@loc_orc_2d -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -locid int 1 5 0 3 from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +locid int 1 5 0 3 SExM4AMDxfO+SLy7rGLA9IJO + from deserializer PREHOOK: query: describe formatted loc_orc_2d partition(zip=94086, year='2001') cnt PREHOOK: type: DESCTABLE PREHOOK: Input: default@loc_orc_2d POSTHOOK: query: describe formatted loc_orc_2d partition(zip=94086, year='2001') cnt POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@loc_orc_2d -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -cnt decimal(10,0) 1000 2000 0 2 from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +cnt decimal(10,0) 1000 2000 0 2 SExM4AICy/aNgAT3n6o1 + from deserializer PREHOOK: query: describe formatted loc_orc_2d partition(zip=94087, year='2002') cnt PREHOOK: type: DESCTABLE PREHOOK: Input: default@loc_orc_2d POSTHOOK: query: describe formatted loc_orc_2d partition(zip=94087, year='2002') cnt POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@loc_orc_2d -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -cnt decimal(10,0) 10 100 0 2 from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +cnt decimal(10,0) 10 100 0 2 SExM4AICw4KqzgWC/fN5 + from deserializer PREHOOK: query: explain extended select state,locid,cnt,zip from loc_orc_2d PREHOOK: type: QUERY POSTHOOK: query: explain extended select state,locid,cnt,zip from loc_orc_2d diff --git a/ql/src/test/results/clientpositive/llap/llap_smb.q.out b/ql/src/test/results/clientpositive/llap/llap_smb.q.out index 87b33db805..6ece9a1982 100644 --- a/ql/src/test/results/clientpositive/llap/llap_smb.q.out +++ b/ql/src/test/results/clientpositive/llap/llap_smb.q.out @@ -321,7 +321,7 @@ POSTHOOK: Input: default@orc_a@y=2001/q=8 POSTHOOK: Input: default@orc_a@y=2001/q=9 POSTHOOK: Input: default@orc_b #### A masked pattern was here #### -2000 5 52 +2001 8 52 2001 5 139630 PREHOOK: query: DROP TABLE orc_a PREHOOK: type: DROPTABLE diff --git a/ql/src/test/results/clientpositive/llap/stats_only_null.q.out b/ql/src/test/results/clientpositive/llap/stats_only_null.q.out index 57aaf557b2..4b3d539a99 100644 --- a/ql/src/test/results/clientpositive/llap/stats_only_null.q.out +++ b/ql/src/test/results/clientpositive/llap/stats_only_null.q.out @@ -413,9 +413,10 @@ PREHOOK: Input: default@stats_null_part POSTHOOK: query: describe formatted stats_null_part partition(dt = 1) a POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@stats_null_part -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -a double 1.0 1.0 1 1 from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +a double 1.0 1.0 1 1 SExM4AEBwaDRtwU= + from deserializer PREHOOK: query: drop table stats_null PREHOOK: type: DROPTABLE PREHOOK: Input: default@stats_null diff --git a/ql/src/test/results/clientpositive/llap/varchar_udf1.q.out b/ql/src/test/results/clientpositive/llap/varchar_udf1.q.out index 2e9d88e343..023d51ccb8 100644 --- a/ql/src/test/results/clientpositive/llap/varchar_udf1.q.out +++ b/ql/src/test/results/clientpositive/llap/varchar_udf1.q.out @@ -406,7 +406,7 @@ from varchar_udf_1 POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1 #### A masked pattern was here #### -{"columntype":"String","maxlength":7,"avglength":7.0,"countnulls":0,"numdistinctvalues":1,"ndvbitvector":"{0}{3}{2}{3}{1}{0}{2}{0}{1}{0}{0}{1}{3}{2}{0}{3}"} {"columntype":"String","maxlength":7,"avglength":7.0,"countnulls":0,"numdistinctvalues":1,"ndvbitvector":"{0}{3}{2}{3}{1}{0}{2}{0}{1}{0}{0}{1}{3}{2}{0}{3}"} +{"columntype":"String","maxlength":7,"avglength":7.0,"countnulls":0,"numdistinctvalues":1,"ndvbitvector":"Rk0QAAEAAAAIAAAABAAAAAgAAAACAAAAAQAAAAQAAAABAAAAAgAAAAEAAAABAAAAAgAAAAgAAAAE\r\nAAAAAQAAAAgAAAA=\r\n"} {"columntype":"String","maxlength":7,"avglength":7.0,"countnulls":0,"numdistinctvalues":1,"ndvbitvector":"Rk0QAAEAAAAIAAAABAAAAAgAAAACAAAAAQAAAAQAAAABAAAAAgAAAAEAAAABAAAAAgAAAAgAAAAE\r\nAAAAAQAAAAgAAAA=\r\n"} PREHOOK: query: select min(c2), min(c4) diff --git a/ql/src/test/results/clientpositive/llap/vector_udf1.q.out b/ql/src/test/results/clientpositive/llap/vector_udf1.q.out index 9a164fe130..b8d19c5637 100644 --- a/ql/src/test/results/clientpositive/llap/vector_udf1.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_udf1.q.out @@ -2713,7 +2713,7 @@ from varchar_udf_1 POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1 #### A masked pattern was here #### -{"columntype":"String","maxlength":7,"avglength":7.0,"countnulls":0,"numdistinctvalues":1,"ndvbitvector":"{0}{3}{2}{3}{1}{0}{2}{0}{1}{0}{0}{1}{3}{2}{0}{3}"} {"columntype":"String","maxlength":7,"avglength":7.0,"countnulls":0,"numdistinctvalues":1,"ndvbitvector":"{0}{3}{2}{3}{1}{0}{2}{0}{1}{0}{0}{1}{3}{2}{0}{3}"} +{"columntype":"String","maxlength":7,"avglength":7.0,"countnulls":0,"numdistinctvalues":1,"ndvbitvector":"Rk0QAAEAAAAIAAAABAAAAAgAAAACAAAAAQAAAAQAAAABAAAAAgAAAAEAAAABAAAAAgAAAAgAAAAE\r\nAAAAAQAAAAgAAAA=\r\n"} {"columntype":"String","maxlength":7,"avglength":7.0,"countnulls":0,"numdistinctvalues":1,"ndvbitvector":"Rk0QAAEAAAAIAAAABAAAAAgAAAACAAAAAQAAAAQAAAABAAAAAgAAAAEAAAABAAAAAgAAAAgAAAAE\r\nAAAAAQAAAAgAAAA=\r\n"} PREHOOK: query: explain vectorization detail select min(c2), diff --git a/ql/src/test/results/clientpositive/partial_column_stats.q.out b/ql/src/test/results/clientpositive/partial_column_stats.q.out index 87d47dae22..95eff9cc26 100644 --- a/ql/src/test/results/clientpositive/partial_column_stats.q.out +++ b/ql/src/test/results/clientpositive/partial_column_stats.q.out @@ -69,7 +69,7 @@ PREHOOK: Input: default@t1 POSTHOOK: query: desc formatted t1 value POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@t1 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -value string 0 0 0.0 0 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"data\":\"true\",\"key\":\"true\",\"value\":\"true\"}} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +value string 0 0 0.0 0 from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"data\":\"true\",\"key\":\"true\",\"value\":\"true\"}} diff --git a/ql/src/test/results/clientpositive/partition_coltype_literals.q.out b/ql/src/test/results/clientpositive/partition_coltype_literals.q.out index d459b36ff0..edaa745dbd 100644 --- a/ql/src/test/results/clientpositive/partition_coltype_literals.q.out +++ b/ql/src/test/results/clientpositive/partition_coltype_literals.q.out @@ -302,48 +302,52 @@ PREHOOK: Input: default@partcoltypenum POSTHOOK: query: describe formatted partcoltypenum partition (tint=110Y, sint=22000S, bint=330000000000L) key POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@partcoltypenum -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -key int 27 484 0 20 from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +key int 27 484 0 20 SExM4BQUwv+PD4DTryqAhvQHgJzgE8DtpQL/wqEPx9SoWfm94QmAqJgmgbCHgwG/0+MHwJKHG4De +jWeClaYVvr3WP8H6iQHDxpxl/uvvWb7ssz2AsKk6 + from deserializer PREHOOK: query: describe formatted partcoltypenum partition (tint=110Y, sint=22000S, bint=330000000000L) value PREHOOK: type: DESCTABLE PREHOOK: Input: default@partcoltypenum POSTHOOK: query: describe formatted partcoltypenum partition (tint=110Y, sint=22000S, bint=330000000000L) value POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@partcoltypenum -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -value string 0 20 6.766666666666667 7 from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +value string 0 20 SExM4BQUwcbsKcCnjhjArvYEgZ2yHMC03h2A4LIS/4KtDMCv7BbAz6JGgejDCP+AlzSA84UvwYTL +WsDygtQBv5uoWIKExaYBvt/WIoK4tQG/vpwPv72sGg== + 6.766666666666667 7 from deserializer PREHOOK: query: describe formatted partcoltypenum tint PREHOOK: type: DESCTABLE PREHOOK: Input: default@partcoltypenum POSTHOOK: query: describe formatted partcoltypenum tint POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@partcoltypenum -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -tint tinyint 110 110 0 1 -COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"tint\":\"true\"}} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +tint tinyint 110 110 0 1 +COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"tint\":\"true\"}} PREHOOK: query: describe formatted partcoltypenum sint PREHOOK: type: DESCTABLE PREHOOK: Input: default@partcoltypenum POSTHOOK: query: describe formatted partcoltypenum sint POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@partcoltypenum -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -sint smallint 22000 22000 0 1 -COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"sint\":\"true\"}} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +sint smallint 22000 22000 0 1 +COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"sint\":\"true\"}} PREHOOK: query: describe formatted partcoltypenum bint PREHOOK: type: DESCTABLE PREHOOK: Input: default@partcoltypenum POSTHOOK: query: describe formatted partcoltypenum bint POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@partcoltypenum -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -bint bigint 330000000000 330000000000 0 1 -COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"bint\":\"true\"}} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +bint bigint 330000000000 330000000000 0 1 +COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"bint\":\"true\"}} PREHOOK: query: alter table partcoltypenum change key key decimal(10,0) PREHOOK: type: ALTERTABLE_RENAMECOL PREHOOK: Input: default@partcoltypenum @@ -458,10 +462,10 @@ PREHOOK: Input: default@partcoltypenum POSTHOOK: query: describe formatted partcoltypenum tint POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@partcoltypenum -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -tint decimal(3,0) 110 110 0 1 -COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"tint\":\"true\"}} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +tint decimal(3,0) 110 110 0 1 +COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"tint\":\"true\"}} PREHOOK: query: show partitions partcoltypenum partition (tint=110BD, sint=22000S, bint=330000000000L) PREHOOK: type: SHOWPARTITIONS PREHOOK: Input: default@partcoltypenum diff --git a/ql/src/test/results/clientpositive/reduceSinkDeDuplication_pRS_key_empty.q.out b/ql/src/test/results/clientpositive/reduceSinkDeDuplication_pRS_key_empty.q.out index 4bddd3bef8..124a4b489a 100644 --- a/ql/src/test/results/clientpositive/reduceSinkDeDuplication_pRS_key_empty.q.out +++ b/ql/src/test/results/clientpositive/reduceSinkDeDuplication_pRS_key_empty.q.out @@ -22,7 +22,7 @@ select POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### -{"columntype":"Double","min":260.182,"max":260.182,"countnulls":0,"numdistinctvalues":1,"ndvbitvector":"{1}{0}{0}{0}{1}{1}{1}{0}{0}{0}{0}{0}{1}{2}{1}{0}"} {"columntype":"String","maxlength":2,"avglength":2.0,"countnulls":0,"numdistinctvalues":1,"ndvbitvector":"{1}{2}{0}{3}{6}{3}{0}{1}{1}{0}{0}{0}{0}{0}{0}{0}"} {"columntype":"Double","min":20428.07287599998,"max":20428.07287599998,"countnulls":0,"numdistinctvalues":2,"ndvbitvector":"{0}{0}{3}{0}{1}{1}{0}{0}{0}{0}{0}{0}{0}{4}{2}{0}"} {"columntype":"Double","min":20469.01089779557,"max":20469.01089779557,"countnulls":0,"numdistinctvalues":1,"ndvbitvector":"{0}{1}{3}{2}{3}{5}{2}{0}{1}{0}{1}{1}{1}{1}{0}{1}"} +{"columntype":"Double","min":260.182,"max":260.182,"countnulls":0,"numdistinctvalues":1,"ndvbitvector":"Rk0QAAIAAAABAAAAAQAAAAEAAAACAAAAAgAAAAIAAAABAAAAAQAAAAEAAAABAAAAAQAAAAIAAAAE\r\nAAAAAgAAAAEAAAA=\r\n"} {"columntype":"String","maxlength":2,"avglength":2.0,"countnulls":0,"numdistinctvalues":1,"ndvbitvector":"Rk0QAAIAAAAEAAAAAQAAAAgAAABAAAAACAAAAAEAAAACAAAAAgAAAAEAAAABAAAAAQAAAAEAAAAB\r\nAAAAAQAAAAEAAAA=\r\n"} {"columntype":"Double","min":20428.07287599998,"max":20428.07287599998,"countnulls":0,"numdistinctvalues":2,"ndvbitvector":"Rk0QAAEAAAABAAAACAAAAAEAAAACAAAAAgAAAAEAAAABAAAAAQAAAAEAAAABAAAAAQAAAAEAAAAQ\r\nAAAABAAAAAEAAAA=\r\n"} {"columntype":"Double","min":20469.01089779557,"max":20469.01089779557,"countnulls":0,"numdistinctvalues":1,"ndvbitvector":"Rk0QAAEAAAACAAAACAAAAAQAAAAIAAAAIAAAAAQAAAABAAAAAgAAAAEAAAACAAAAAgAAAAIAAAAC\r\nAAAAAQAAAAIAAAA=\r\n"} PREHOOK: query: explain select compute_stats(a,'fm',16),compute_stats(b,'fm',16),compute_stats(c,'fm',16),compute_stats(d,'fm',16) from ( @@ -111,7 +111,7 @@ select POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### -{"columntype":"Double","min":256.10355987055016,"max":256.10355987055016,"countnulls":0,"numdistinctvalues":1,"ndvbitvector":"{2}{1}{0}{2}{0}{1}{1}{1}{0}{0}{1}{1}{0}{2}{1}{0}"} {"columntype":"String","maxlength":2,"avglength":2.0,"countnulls":0,"numdistinctvalues":1,"ndvbitvector":"{1}{2}{0}{3}{6}{3}{0}{1}{1}{0}{0}{0}{0}{0}{0}{0}"} {"columntype":"Double","min":20428.07287599999,"max":20428.07287599999,"countnulls":0,"numdistinctvalues":1,"ndvbitvector":"{1}{4}{0}{0}{4}{3}{0}{1}{0}{0}{0}{0}{0}{0}{1}{2}"} {"columntype":"Double","min":20469.010897795582,"max":20469.010897795582,"countnulls":0,"numdistinctvalues":2,"ndvbitvector":"{2}{0}{2}{2}{0}{0}{2}{0}{0}{0}{0}{0}{1}{0}{0}{0}"} +{"columntype":"Double","min":256.10355987055016,"max":256.10355987055016,"countnulls":0,"numdistinctvalues":1,"ndvbitvector":"Rk0QAAQAAAACAAAAAQAAAAQAAAABAAAAAgAAAAIAAAACAAAAAQAAAAEAAAACAAAAAgAAAAEAAAAE\r\nAAAAAgAAAAEAAAA=\r\n"} {"columntype":"String","maxlength":2,"avglength":2.0,"countnulls":0,"numdistinctvalues":1,"ndvbitvector":"Rk0QAAIAAAAEAAAAAQAAAAgAAABAAAAACAAAAAEAAAACAAAAAgAAAAEAAAABAAAAAQAAAAEAAAAB\r\nAAAAAQAAAAEAAAA=\r\n"} {"columntype":"Double","min":20428.07287599999,"max":20428.07287599999,"countnulls":0,"numdistinctvalues":1,"ndvbitvector":"Rk0QAAIAAAAQAAAAAQAAAAEAAAAQAAAACAAAAAEAAAACAAAAAQAAAAEAAAABAAAAAQAAAAEAAAAB\r\nAAAAAgAAAAQAAAA=\r\n"} {"columntype":"Double","min":20469.010897795582,"max":20469.010897795582,"countnulls":0,"numdistinctvalues":2,"ndvbitvector":"Rk0QAAQAAAABAAAABAAAAAQAAAABAAAAAQAAAAQAAAABAAAAAQAAAAEAAAABAAAAAQAAAAIAAAAB\r\nAAAAAQAAAAEAAAA=\r\n"} PREHOOK: query: explain select compute_stats(a,'fm',16),compute_stats(b,'fm',16),compute_stats(c,'fm',16),compute_stats(d,'fm',16) from ( @@ -217,4 +217,4 @@ select POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### -{"columntype":"Double","min":256.10355987055016,"max":256.10355987055016,"countnulls":0,"numdistinctvalues":1,"ndvbitvector":"{2}{1}{0}{2}{0}{1}{1}{1}{0}{0}{1}{1}{0}{2}{1}{0}"} {"columntype":"String","maxlength":2,"avglength":2.0,"countnulls":0,"numdistinctvalues":1,"ndvbitvector":"{1}{2}{0}{3}{6}{3}{0}{1}{1}{0}{0}{0}{0}{0}{0}{0}"} {"columntype":"Double","min":20428.07287599999,"max":20428.07287599999,"countnulls":0,"numdistinctvalues":1,"ndvbitvector":"{1}{4}{0}{0}{4}{3}{0}{1}{0}{0}{0}{0}{0}{0}{1}{2}"} {"columntype":"Double","min":20469.010897795582,"max":20469.010897795582,"countnulls":0,"numdistinctvalues":2,"ndvbitvector":"{2}{0}{2}{2}{0}{0}{2}{0}{0}{0}{0}{0}{1}{0}{0}{0}"} +{"columntype":"Double","min":256.10355987055016,"max":256.10355987055016,"countnulls":0,"numdistinctvalues":1,"ndvbitvector":"Rk0QAAQAAAACAAAAAQAAAAQAAAABAAAAAgAAAAIAAAACAAAAAQAAAAEAAAACAAAAAgAAAAEAAAAE\r\nAAAAAgAAAAEAAAA=\r\n"} {"columntype":"String","maxlength":2,"avglength":2.0,"countnulls":0,"numdistinctvalues":1,"ndvbitvector":"Rk0QAAIAAAAEAAAAAQAAAAgAAABAAAAACAAAAAEAAAACAAAAAgAAAAEAAAABAAAAAQAAAAEAAAAB\r\nAAAAAQAAAAEAAAA=\r\n"} {"columntype":"Double","min":20428.07287599999,"max":20428.07287599999,"countnulls":0,"numdistinctvalues":1,"ndvbitvector":"Rk0QAAIAAAAQAAAAAQAAAAEAAAAQAAAACAAAAAEAAAACAAAAAQAAAAEAAAABAAAAAQAAAAEAAAAB\r\nAAAAAgAAAAQAAAA=\r\n"} {"columntype":"Double","min":20469.010897795582,"max":20469.010897795582,"countnulls":0,"numdistinctvalues":2,"ndvbitvector":"Rk0QAAQAAAABAAAABAAAAAQAAAABAAAAAQAAAAQAAAABAAAAAQAAAAEAAAABAAAAAQAAAAIAAAAB\r\nAAAAAQAAAAEAAAA=\r\n"} diff --git a/ql/src/test/results/clientpositive/rename_external_partition_location.q.out b/ql/src/test/results/clientpositive/rename_external_partition_location.q.out index 19546c38bc..dac82b8dfa 100644 --- a/ql/src/test/results/clientpositive/rename_external_partition_location.q.out +++ b/ql/src/test/results/clientpositive/rename_external_partition_location.q.out @@ -162,18 +162,20 @@ PREHOOK: Input: default@ex_table POSTHOOK: query: DESCRIBE FORMATTED ex_table PARTITION (part='part1') key POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@ex_table -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -key int 0 9 0 6 from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +key int 0 9 0 6 SExM4AYGxdOOGLy91N8BwJKLAcGuwk7AqvwN/4Sz5AE= + from deserializer PREHOOK: query: DESCRIBE FORMATTED ex_table PARTITION (part='part1') value PREHOOK: type: DESCTABLE PREHOOK: Input: default@ex_table POSTHOOK: query: DESCRIBE FORMATTED ex_table PARTITION (part='part1') value POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@ex_table -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -value string 0 6 5.0 5 from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +value string 0 6 SExM4AYGwZXdyQGC2MSsAcCIiJQBvtSupwHDnsmSAr36nzs= + 5.0 5 from deserializer PREHOOK: query: ALTER TABLE ex_table PARTITION (part='part1') RENAME TO PARTITION (part='part2') PREHOOK: type: ALTERTABLE_RENAMEPART PREHOOK: Input: default@ex_table @@ -310,15 +312,17 @@ PREHOOK: Input: default@ex_table POSTHOOK: query: DESCRIBE FORMATTED ex_table PARTITION (part='part2') key POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@ex_table -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -key int 0 9 0 6 from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +key int 0 9 0 6 SExM4AYGxdOOGLy91N8BwJKLAcGuwk7AqvwN/4Sz5AE= + from deserializer PREHOOK: query: DESCRIBE FORMATTED ex_table PARTITION (part='part2') value PREHOOK: type: DESCTABLE PREHOOK: Input: default@ex_table POSTHOOK: query: DESCRIBE FORMATTED ex_table PARTITION (part='part2') value POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@ex_table -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -value string 0 6 5.0 5 from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +value string 0 6 SExM4AYGwZXdyQGC2MSsAcCIiJQBvtSupwHDnsmSAr36nzs= + 5.0 5 from deserializer diff --git a/ql/src/test/results/clientpositive/rename_table_update_column_stats.q.out b/ql/src/test/results/clientpositive/rename_table_update_column_stats.q.out index 16b3a38c46..35ab95cad5 100644 --- a/ql/src/test/results/clientpositive/rename_table_update_column_stats.q.out +++ b/ql/src/test/results/clientpositive/rename_table_update_column_stats.q.out @@ -57,30 +57,33 @@ PREHOOK: Input: statsdb1@testtable1 POSTHOOK: query: describe formatted statsdb1.testtable1 col1 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testtable1 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col1 int 27 484 0 10 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +col1 int 27 484 0 10 SExM4AoKwtK/OYCi1Bu/sMcRx9SoWfnl+S+BsIeDAb/T4wfChbuXAf+34EDBsoy/AQ== + from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}} PREHOOK: query: describe formatted statsdb1.testtable1 col2 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testtable1 POSTHOOK: query: describe formatted statsdb1.testtable1 col2 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testtable1 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col2 string 0 10 6.7 7 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +col2 string 0 10 SExM4AoKge76QcHLqCHAtN4dv5LMNcG35k7A+Oe9Af+Nq6wCgoTFpgG+39Yigri1AQ== + 6.7 7 from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}} PREHOOK: query: describe formatted statsdb1.testtable1 col3 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testtable1 POSTHOOK: query: describe formatted statsdb1.testtable1 col3 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testtable1 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col3 string 0 1 4.0 4 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +col3 string 0 1 SExM4AEBgeL8+wM= + 4.0 4 from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}} PREHOOK: query: alter table statsdb1.testtable1 rename to statsdb2.testtable2 PREHOOK: type: ALTERTABLE_RENAME PREHOOK: Input: statsdb1@testtable1 @@ -96,30 +99,33 @@ PREHOOK: Input: statsdb2@testtable2 POSTHOOK: query: describe formatted statsdb2.testtable2 col1 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb2@testtable2 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col1 int 27 484 0 10 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +col1 int 27 484 0 10 SExM4AoKwtK/OYCi1Bu/sMcRx9SoWfnl+S+BsIeDAb/T4wfChbuXAf+34EDBsoy/AQ== + from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}} PREHOOK: query: describe formatted statsdb2.testtable2 col2 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb2@testtable2 POSTHOOK: query: describe formatted statsdb2.testtable2 col2 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb2@testtable2 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col2 string 0 10 6.7 7 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +col2 string 0 10 SExM4AoKge76QcHLqCHAtN4dv5LMNcG35k7A+Oe9Af+Nq6wCgoTFpgG+39Yigri1AQ== + 6.7 7 from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}} PREHOOK: query: describe formatted statsdb2.testtable2 col3 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb2@testtable2 POSTHOOK: query: describe formatted statsdb2.testtable2 col3 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb2@testtable2 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col3 string 0 1 4.0 4 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +col3 string 0 1 SExM4AEBgeL8+wM= + 4.0 4 from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}} PREHOOK: query: use default PREHOOK: type: SWITCHDATABASE PREHOOK: Input: database:default @@ -203,30 +209,33 @@ PREHOOK: Input: statsdb1@testtable1 POSTHOOK: query: describe formatted statsdb1.testtable1 col1 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testtable1 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col1 int 27 484 0 10 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +col1 int 27 484 0 10 SExM4AoKwtK/OYCi1Bu/sMcRx9SoWfnl+S+BsIeDAb/T4wfChbuXAf+34EDBsoy/AQ== + from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}} PREHOOK: query: describe formatted statsdb1.testtable1 col2 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testtable1 POSTHOOK: query: describe formatted statsdb1.testtable1 col2 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testtable1 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col2 string 0 10 6.7 7 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +col2 string 0 10 SExM4AoKge76QcHLqCHAtN4dv5LMNcG35k7A+Oe9Af+Nq6wCgoTFpgG+39Yigri1AQ== + 6.7 7 from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}} PREHOOK: query: describe formatted statsdb1.testtable1 col3 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testtable1 POSTHOOK: query: describe formatted statsdb1.testtable1 col3 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testtable1 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col3 string 0 1 4.0 4 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +col3 string 0 1 SExM4AEBgeL8+wM= + 4.0 4 from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}} PREHOOK: query: alter table statsdb1.testtable1 rename to statsdb2.testtable2 PREHOOK: type: ALTERTABLE_RENAME PREHOOK: Input: statsdb1@testtable1 @@ -242,30 +251,33 @@ PREHOOK: Input: statsdb2@testtable2 POSTHOOK: query: describe formatted statsdb2.testtable2 col1 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb2@testtable2 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col1 int 27 484 0 10 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +col1 int 27 484 0 10 SExM4AoKwtK/OYCi1Bu/sMcRx9SoWfnl+S+BsIeDAb/T4wfChbuXAf+34EDBsoy/AQ== + from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}} PREHOOK: query: describe formatted statsdb2.testtable2 col2 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb2@testtable2 POSTHOOK: query: describe formatted statsdb2.testtable2 col2 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb2@testtable2 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col2 string 0 10 6.7 7 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +col2 string 0 10 SExM4AoKge76QcHLqCHAtN4dv5LMNcG35k7A+Oe9Af+Nq6wCgoTFpgG+39Yigri1AQ== + 6.7 7 from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}} PREHOOK: query: describe formatted statsdb2.testtable2 col3 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb2@testtable2 POSTHOOK: query: describe formatted statsdb2.testtable2 col3 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb2@testtable2 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col3 string 0 1 4.0 4 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +col3 string 0 1 SExM4AEBgeL8+wM= + 4.0 4 from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}} PREHOOK: query: use default PREHOOK: type: SWITCHDATABASE PREHOOK: Input: database:default diff --git a/ql/src/test/results/clientpositive/spark/avro_decimal_native.q.out b/ql/src/test/results/clientpositive/spark/avro_decimal_native.q.out index b73b5f5679..eafec9aa67 100644 --- a/ql/src/test/results/clientpositive/spark/avro_decimal_native.q.out +++ b/ql/src/test/results/clientpositive/spark/avro_decimal_native.q.out @@ -36,10 +36,10 @@ PREHOOK: Input: default@dec POSTHOOK: query: DESC FORMATTED `dec` value POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@dec -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -value decimal(8,4) -12.25 234.79 0 10 from deserializer -COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"value\":\"true\"}} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +value decimal(8,4) -12.25 234.79 0 10 SExM4AoKxdOOGP2An6UDv92lC4HV6VD/sbUNg9u1Bb210FHA981AwdjTnAGB//Ui from deserializer +COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"value\":\"true\"}} PREHOOK: query: DROP TABLE IF EXISTS avro_dec PREHOOK: type: DROPTABLE POSTHOOK: query: DROP TABLE IF EXISTS avro_dec diff --git a/ql/src/test/results/clientpositive/spark/stats_only_null.q.out b/ql/src/test/results/clientpositive/spark/stats_only_null.q.out index 359eea3acb..7cb2d64a8f 100644 --- a/ql/src/test/results/clientpositive/spark/stats_only_null.q.out +++ b/ql/src/test/results/clientpositive/spark/stats_only_null.q.out @@ -389,9 +389,9 @@ PREHOOK: Input: default@stats_null_part POSTHOOK: query: describe formatted stats_null_part partition(dt = 1) a POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@stats_null_part -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -a double 1.0 1.0 1 1 from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +a double 1.0 1.0 1 1 SExM4AEBwaDRtwU= from deserializer PREHOOK: query: drop table stats_null PREHOOK: type: DROPTABLE PREHOOK: Input: default@stats_null diff --git a/ql/src/test/results/clientpositive/stats_only_null.q.out b/ql/src/test/results/clientpositive/stats_only_null.q.out index 88c2114356..6a75cd2bdc 100644 --- a/ql/src/test/results/clientpositive/stats_only_null.q.out +++ b/ql/src/test/results/clientpositive/stats_only_null.q.out @@ -377,9 +377,10 @@ PREHOOK: Input: default@stats_null_part POSTHOOK: query: describe formatted stats_null_part partition(dt = 1) a POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@stats_null_part -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -a double 1.0 1.0 1 1 from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +a double 1.0 1.0 1 1 SExM4AEBwaDRtwU= + from deserializer PREHOOK: query: drop table stats_null PREHOOK: type: DROPTABLE PREHOOK: Input: default@stats_null diff --git a/ql/src/test/results/clientpositive/temp_table_display_colstats_tbllvl.q.out b/ql/src/test/results/clientpositive/temp_table_display_colstats_tbllvl.q.out index ad92058cab..6c0576b12d 100644 --- a/ql/src/test/results/clientpositive/temp_table_display_colstats_tbllvl.q.out +++ b/ql/src/test/results/clientpositive/temp_table_display_colstats_tbllvl.q.out @@ -55,9 +55,9 @@ PREHOOK: Input: default@uservisits_web_text_none POSTHOOK: query: desc formatted UserVisits_web_text_none sourceIP POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@uservisits_web_text_none -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -sourceIP string from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +sourceIP string from deserializer PREHOOK: query: explain analyze table UserVisits_web_text_none compute statistics for columns sourceIP, avgTimeOnSite, adRevenue PREHOOK: type: QUERY @@ -242,27 +242,36 @@ PREHOOK: Input: default@uservisits_web_text_none POSTHOOK: query: desc formatted UserVisits_web_text_none sourceIP POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@uservisits_web_text_none -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -sourceIP string 0 55 12.763636363636364 13 from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +sourceIP string 0 55 SExM4Dc3wbjRG8DNgg/A1YwYwNiYDsCVzwXBzLkCgOc1v9LCJcG2rAK/65wVwYL2Br/zjxnBze8M +wMiBIMDE/DG/n50HwcqyAoCXmQi/0KAPgMSxIIGKsRi/oqUSwKD9F4DuAYH72Rn/48sWgLP+EMGB +wgS/28MZwPT9KsGGrwuAluEFv+ngDYGoqgT/09AOgLCEBYHVvg6/l78rgevVFMD77Q+AkZ0I/7Wz +AoOimAj+mLMJwdPMCL7P1BvC9sIM/+puv4W+A4KWxlP+nsMpwYbnCf+4qyHCnJgXgPenMA== + 12.763636363636364 13 from deserializer PREHOOK: query: desc formatted UserVisits_web_text_none avgTimeOnSite PREHOOK: type: DESCTABLE PREHOOK: Input: default@uservisits_web_text_none POSTHOOK: query: desc formatted UserVisits_web_text_none avgTimeOnSite POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@uservisits_web_text_none -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -avgTimeOnSite int 1 9 0 9 from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +avgTimeOnSite int 1 9 0 9 SExM4AkJwZn6L4TaxBi8u6xigOL3TMCSiwHBrsJOwKr8Df+Es+QBgPyEtwI= + from deserializer PREHOOK: query: desc formatted UserVisits_web_text_none adRevenue PREHOOK: type: DESCTABLE PREHOOK: Input: default@uservisits_web_text_none POSTHOOK: query: desc formatted UserVisits_web_text_none adRevenue POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@uservisits_web_text_none -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -adRevenue float 13.099044799804688 492.98870849609375 0 55 from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +adRevenue float 13.099044799804688 492.98870849609375 0 55 SExM4Dc3gb3HC8Lswyq+hbYCgOOKIoHH7AKA4u4D/73OA4DH6QnA8ZIbhaSXBv/e/xf9jo4JgJ2b +Av/htwrBsJ4ZwZugD//O6wbB6qcFvoW+E4DW+wyA8/gCgK6GD4HIuhD/pccFgIXqAsCl/wyAv+QK +wNq4HYLrrB++s5sIgOWzPoSMlA/83cMVwdy8PYCjhwL/3LIWxOm7JPye8w/A/O0VwNjgBIDOiRHA +86ELwJ/+AYCr1QzA7YUQgO2gEcDZEIDK6EPAo+kOg4HxCv3ZkSmBrLlRgd6IA/6lwROAlYAL + from deserializer PREHOOK: query: CREATE TEMPORARY TABLE empty_tab( a int, b double, @@ -289,10 +298,10 @@ PREHOOK: Input: default@empty_tab POSTHOOK: query: desc formatted empty_tab a POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@empty_tab -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -a int from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"a\":\"true\",\"b\":\"true\",\"c\":\"true\",\"d\":\"true\",\"e\":\"true\"}} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +a int from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"a\":\"true\",\"b\":\"true\",\"c\":\"true\",\"d\":\"true\",\"e\":\"true\"}} PREHOOK: query: explain analyze table empty_tab compute statistics for columns a,b,c,d,e PREHOOK: type: QUERY @@ -358,20 +367,20 @@ PREHOOK: Input: default@empty_tab POSTHOOK: query: desc formatted empty_tab a POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@empty_tab -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -a int 0 0 0 0 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"a\":\"true\",\"b\":\"true\",\"c\":\"true\",\"d\":\"true\",\"e\":\"true\"}} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +a int 0 0 0 0 from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"a\":\"true\",\"b\":\"true\",\"c\":\"true\",\"d\":\"true\",\"e\":\"true\"}} PREHOOK: query: desc formatted empty_tab b PREHOOK: type: DESCTABLE PREHOOK: Input: default@empty_tab POSTHOOK: query: desc formatted empty_tab b POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@empty_tab -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -b double 0.0 0.0 0 0 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"a\":\"true\",\"b\":\"true\",\"c\":\"true\",\"d\":\"true\",\"e\":\"true\"}} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +b double 0.0 0.0 0 0 from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"a\":\"true\",\"b\":\"true\",\"c\":\"true\",\"d\":\"true\",\"e\":\"true\"}} PREHOOK: query: CREATE DATABASE test PREHOOK: type: CREATEDATABASE PREHOOK: Output: database:test @@ -447,27 +456,31 @@ PREHOOK: Input: test@uservisits_web_text_none POSTHOOK: query: desc formatted UserVisits_web_text_none sourceIP POSTHOOK: type: DESCTABLE POSTHOOK: Input: test@uservisits_web_text_none -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -sourceIP string from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +sourceIP string from deserializer PREHOOK: query: desc formatted test.UserVisits_web_text_none sourceIP PREHOOK: type: DESCTABLE PREHOOK: Input: test@uservisits_web_text_none POSTHOOK: query: desc formatted test.UserVisits_web_text_none sourceIP POSTHOOK: type: DESCTABLE POSTHOOK: Input: test@uservisits_web_text_none -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -sourceIP string from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +sourceIP string from deserializer PREHOOK: query: desc formatted default.UserVisits_web_text_none sourceIP PREHOOK: type: DESCTABLE PREHOOK: Input: default@uservisits_web_text_none POSTHOOK: query: desc formatted default.UserVisits_web_text_none sourceIP POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@uservisits_web_text_none -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -sourceIP string 0 55 12.763636363636364 13 from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +sourceIP string 0 55 SExM4Dc3wbjRG8DNgg/A1YwYwNiYDsCVzwXBzLkCgOc1v9LCJcG2rAK/65wVwYL2Br/zjxnBze8M +wMiBIMDE/DG/n50HwcqyAoCXmQi/0KAPgMSxIIGKsRi/oqUSwKD9F4DuAYH72Rn/48sWgLP+EMGB +wgS/28MZwPT9KsGGrwuAluEFv+ngDYGoqgT/09AOgLCEBYHVvg6/l78rgevVFMD77Q+AkZ0I/7Wz +AoOimAj+mLMJwdPMCL7P1BvC9sIM/+puv4W+A4KWxlP+nsMpwYbnCf+4qyHCnJgXgPenMA== + 12.763636363636364 13 from deserializer PREHOOK: query: analyze table UserVisits_web_text_none compute statistics for columns sKeyword PREHOOK: type: QUERY PREHOOK: Input: test@uservisits_web_text_none @@ -489,15 +502,23 @@ PREHOOK: Input: test@uservisits_web_text_none POSTHOOK: query: desc formatted UserVisits_web_text_none sKeyword POSTHOOK: type: DESCTABLE POSTHOOK: Input: test@uservisits_web_text_none -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -sKeyword string 0 54 7.872727272727273 19 from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +sKeyword string 0 54 SExM4DY2gavGA8LX6ha/63i/4NIZgP/NA8Hmxi7D8X68yu4JwKnuAYKBvg6+/cIOwO2NH4D7xCeA ++Q7Al64DwLnLC8Gi2Rj/p8wIwrK2LoGP3w2B/p4EvoGCEsDMiCi+xtAqwJ/3BITmuRb8sqcLgIPr +aYC3txTA4/MHgN7cBICm/g3Bx13AiJMOwI79Bb+wjQLAm7oEgJnWH4LUzgL/4PYagKCOBoLi+yC9 +x84VgK/tGcKxyAL+6NULwcKYE8KzmD/F7/IDuK6yFoT3wgG9m5UJv9WvIcDC5DA= + 7.872727272727273 19 from deserializer PREHOOK: query: desc formatted test.UserVisits_web_text_none sKeyword PREHOOK: type: DESCTABLE PREHOOK: Input: test@uservisits_web_text_none POSTHOOK: query: desc formatted test.UserVisits_web_text_none sKeyword POSTHOOK: type: DESCTABLE POSTHOOK: Input: test@uservisits_web_text_none -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -sKeyword string 0 54 7.872727272727273 19 from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +sKeyword string 0 54 SExM4DY2gavGA8LX6ha/63i/4NIZgP/NA8Hmxi7D8X68yu4JwKnuAYKBvg6+/cIOwO2NH4D7xCeA ++Q7Al64DwLnLC8Gi2Rj/p8wIwrK2LoGP3w2B/p4EvoGCEsDMiCi+xtAqwJ/3BITmuRb8sqcLgIPr +aYC3txTA4/MHgN7cBICm/g3Bx13AiJMOwI79Bb+wjQLAm7oEgJnWH4LUzgL/4PYagKCOBoLi+yC9 +x84VgK/tGcKxyAL+6NULwcKYE8KzmD/F7/IDuK6yFoT3wgG9m5UJv9WvIcDC5DA= + 7.872727272727273 19 from deserializer diff --git a/ql/src/test/results/clientpositive/tez/explainanalyze_5.q.out b/ql/src/test/results/clientpositive/tez/explainanalyze_5.q.out index 626e1fd4d0..1764164a91 100644 --- a/ql/src/test/results/clientpositive/tez/explainanalyze_5.q.out +++ b/ql/src/test/results/clientpositive/tez/explainanalyze_5.q.out @@ -296,14 +296,14 @@ Stage-3 Reducer 2 File Output Operator [FS_8] table:{"name:":"default.acid_uami"} - Select Operator [SEL_4] (rows=8/2 width=302) + Select Operator [SEL_4] (rows=4/2 width=302) Output:["_col0","_col1","_col2","_col3"] <-Map 1 [SIMPLE_EDGE] SHUFFLE [RS_3] PartitionCols:UDFToInteger(_col0) - Select Operator [SEL_2] (rows=8/2 width=302) + Select Operator [SEL_2] (rows=4/2 width=302) Output:["_col0","_col1","_col3"] - Filter Operator [FIL_9] (rows=8/2 width=226) + Filter Operator [FIL_9] (rows=4/2 width=226) predicate:((de = 109.23) or (de = 119.23)) TableScan [TS_0] (rows=8/4 width=226) default@acid_uami,acid_uami, ACID table,Tbl:COMPLETE,Col:COMPLETE,Output:["i","de","vc"] diff --git a/ql/src/test/results/clientpositive/tunable_ndv.q.out b/ql/src/test/results/clientpositive/tunable_ndv.q.out index 437beafc0d..dd7e6a2402 100644 --- a/ql/src/test/results/clientpositive/tunable_ndv.q.out +++ b/ql/src/test/results/clientpositive/tunable_ndv.q.out @@ -73,48 +73,53 @@ PREHOOK: Input: default@loc_orc_1d POSTHOOK: query: describe formatted loc_orc_1d partition(year=2000) locid POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@loc_orc_1d -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -locid int 1 2 0 2 from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +locid int 1 2 0 2 SExM4AICxfO+SPyNofED + from deserializer PREHOOK: query: describe formatted loc_orc_1d partition(year=2001) locid PREHOOK: type: DESCTABLE PREHOOK: Input: default@loc_orc_1d POSTHOOK: query: describe formatted loc_orc_1d partition(year=2001) locid POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@loc_orc_1d -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -locid int 1 4 0 4 from deserializer +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +locid int 1 4 0 4 SExM4AQExfO+SLy7rGKA4vdMwPD8wQI= + from deserializer PREHOOK: query: describe formatted loc_orc_1d locid PREHOOK: type: DESCTABLE PREHOOK: Input: default@loc_orc_1d POSTHOOK: query: describe formatted loc_orc_1d locid POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@loc_orc_1d -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -locid int 1 4 0 4 from deserializer -COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"locid\":\"true\"}} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +locid int 1 4 0 4 SExM4AICxfO+SPyNofED + from deserializer +COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"locid\":\"true\"}} PREHOOK: query: describe formatted loc_orc_1d locid PREHOOK: type: DESCTABLE PREHOOK: Input: default@loc_orc_1d POSTHOOK: query: describe formatted loc_orc_1d locid POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@loc_orc_1d -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -locid int 1 4 0 4 from deserializer -COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"locid\":\"true\"}} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +locid int 1 4 0 4 SExM4AICxfO+SPyNofED + from deserializer +COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"locid\":\"true\"}} PREHOOK: query: describe formatted loc_orc_1d locid PREHOOK: type: DESCTABLE PREHOOK: Input: default@loc_orc_1d POSTHOOK: query: describe formatted loc_orc_1d locid POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@loc_orc_1d -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -locid int 1 4 0 4 from deserializer -COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"locid\":\"true\"}} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +locid int 1 4 0 4 SExM4AICxfO+SPyNofED + from deserializer +COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"locid\":\"true\"}} PREHOOK: query: create table if not exists loc_orc_2d ( state string, locid int @@ -194,27 +199,30 @@ PREHOOK: Input: default@loc_orc_2d POSTHOOK: query: describe formatted loc_orc_2d locid POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@loc_orc_2d -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -locid int 1 4 0 3 from deserializer -COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"locid\":\"true\"}} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +locid int 1 4 0 4 SExM4AEBwYHguQQ= + from deserializer +COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"locid\":\"true\"}} PREHOOK: query: describe formatted loc_orc_2d locid PREHOOK: type: DESCTABLE PREHOOK: Input: default@loc_orc_2d POSTHOOK: query: describe formatted loc_orc_2d locid POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@loc_orc_2d -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -locid int 1 4 0 4 from deserializer -COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"locid\":\"true\"}} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +locid int 1 4 0 4 SExM4AEBwYHguQQ= + from deserializer +COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"locid\":\"true\"}} PREHOOK: query: describe formatted loc_orc_2d locid PREHOOK: type: DESCTABLE PREHOOK: Input: default@loc_orc_2d POSTHOOK: query: describe formatted loc_orc_2d locid POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@loc_orc_2d -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -locid int 1 4 0 4 from deserializer -COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"locid\":\"true\"}} +# col_name data_type min max num_nulls distinct_count bitVector avg_col_len max_col_len num_trues num_falses comment + +locid int 1 4 0 4 SExM4AEBwYHguQQ= + from deserializer +COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"locid\":\"true\"}}