diff --git common/src/java/org/apache/hadoop/hive/conf/HiveConf.java common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 102e6c6a916..b592d03bc77 100644 --- common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -18,9 +18,6 @@ package org.apache.hadoop.hive.conf; -import com.google.common.base.Joiner; -import com.google.common.collect.ImmutableSet; -import com.google.common.collect.Iterables; import org.apache.commons.lang.StringUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.common.FileUtils; @@ -43,7 +40,10 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import javax.security.auth.login.LoginException; +import com.google.common.base.Joiner; +import com.google.common.collect.ImmutableSet; +import com.google.common.collect.Iterables; + import java.io.ByteArrayOutputStream; import java.io.File; import java.io.IOException; @@ -70,6 +70,8 @@ import java.util.regex.Matcher; import java.util.regex.Pattern; +import javax.security.auth.login.LoginException; + /** * Hive Configuration. */ @@ -2474,6 +2476,8 @@ private static void populateLlapDaemonVarsSet(Set llapDaemonVarsSetLocal "UDTFs change the number of rows of the output. A common UDTF is the explode() method that creates\n" + "multiple rows for each element in the input array. This factor is applied to the number of\n" + "output rows and output size."), + HIVE_STATS_USE_BITVECTORS("hive.stats.use.bitvectors", false, + "Enables to use bitvectors for estimating selectivity."), // Concurrency HIVE_SUPPORT_CONCURRENCY("hive.support.concurrency", false, diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/HiveMurmur3Adapter.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/HiveMurmur3Adapter.java new file mode 100644 index 00000000000..6531240f14a --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/HiveMurmur3Adapter.java @@ -0,0 +1,88 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.optimizer.stats.annotation; + +import java.nio.ByteBuffer; + +import org.apache.hadoop.hive.common.type.HiveDecimal; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.serde2.io.DateWritable; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils; +import org.apache.hive.common.util.Murmur3; + +/** + * This class could be used to map Hive values type to Murmur3 hash values. + */ +public class HiveMurmur3Adapter { + + private PrimitiveCategory type; + private PrimitiveObjectInspector inputOI; + + public HiveMurmur3Adapter(PrimitiveObjectInspector oi) throws HiveException { + this.inputOI = oi; + type = oi.getTypeInfo().getPrimitiveCategory(); + } + + private final ByteBuffer LONG_BUFFER = ByteBuffer.allocate(Long.BYTES); + + public long murmur3(Object objVal) throws HiveException { + Object p = objVal; + switch (type) { + case BYTE: + case SHORT: + case INT: + case LONG: + case TIMESTAMP: { + long val = PrimitiveObjectInspectorUtils.getLong(objVal, inputOI); + LONG_BUFFER.putLong(0, val); + return Murmur3.hash64(LONG_BUFFER.array()); + } + case FLOAT: + case DOUBLE: { + double val = PrimitiveObjectInspectorUtils.getDouble(objVal, inputOI); + LONG_BUFFER.putDouble(0, val); + return Murmur3.hash64(LONG_BUFFER.array()); + } + case STRING: + case CHAR: + case VARCHAR: { + String val = PrimitiveObjectInspectorUtils.getString(objVal, inputOI); + return Murmur3.hash64(val.getBytes()); + } + case DECIMAL: { + HiveDecimal decimal = PrimitiveObjectInspectorUtils.getHiveDecimal(p, inputOI); + LONG_BUFFER.putDouble(0, decimal.doubleValue()); + return Murmur3.hash64(LONG_BUFFER.array()); + } + case DATE: + DateWritable v = new DateWritable((DateWritable) inputOI.getPrimitiveWritableObject(p)); + int days = v.getDays(); + LONG_BUFFER.putLong(0, days); + return Murmur3.hash64(LONG_BUFFER.array()); + case BOOLEAN: + case BINARY: + default: + throw new HiveException("type: " + type + " is not supported"); + } + } + + +} diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java index 32fba6c8ff8..e635a6c3c5f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java @@ -31,6 +31,10 @@ import java.util.Set; import java.util.Stack; +import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimator; +import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimatorFactory; +import org.apache.hadoop.hive.common.ndv.hll.HyperLogLog; +import org.apache.hadoop.hive.common.ndv.hll.HyperLogLog.HyperLogLogBuilder; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.conf.HiveConf.ConfVars; import org.apache.hadoop.hive.ql.Context; @@ -103,7 +107,9 @@ import org.apache.hadoop.hive.ql.udf.generic.GenericUDFStruct; import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.io.DateWritable; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; @@ -113,7 +119,6 @@ import com.google.common.collect.Lists; import com.google.common.collect.Maps; import com.google.common.collect.Sets; -import scala.math.Numeric; public class StatsRulesProcFactory { @@ -532,10 +537,11 @@ private long evaluateInExpr(Statistics stats, ExprNodeDesc pred, long currNumRow } for (int i = 0; i < columnStats.size(); i++) { long dvs = columnStats.get(i) == null ? 0 : columnStats.get(i).getCountDistint(); + long intersectionSize = estimateIntersectionSize(aspCtx.getConf(), columnStats.get(i), values.get(i)); // (num of distinct vals for col in IN clause / num of distinct vals for col ) double columnFactor = dvs == 0 ? 0.5d : (1.0d / dvs); if (!multiColumn) { - columnFactor *=values.get(0).size(); + columnFactor *= intersectionSize; } // max can be 1, even when ndv is larger in IN clause than in column stats factor *= columnFactor > 1d ? 1d : columnFactor; @@ -550,6 +556,51 @@ private long evaluateInExpr(Statistics stats, ExprNodeDesc pred, long currNumRow return Math.round(numRows * factor * inFactor); } + private long estimateIntersectionSize(HiveConf conf, ColStatistics colStatistics, + Set values) { + try { + boolean useBitVectors = HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_STATS_USE_BITVECTORS); + if (!useBitVectors) { + return values.size(); + } + if (colStatistics == null) { + return values.size(); + } + byte[] bitVector = colStatistics.getBitVectors(); + if (bitVector == null) { + return values.size(); + } + NumDistinctValueEstimator sketch = NumDistinctValueEstimatorFactory.getNumDistinctValueEstimator(bitVector); + if (!(sketch instanceof HyperLogLog)) { + return values.size(); + } + HyperLogLog hllCol = (HyperLogLog) sketch; + HyperLogLog hllVals = new HyperLogLogBuilder().build(); + + for (ExprNodeDescEqualityWrapper b : values) { + ObjectInspector oi = b.getExprNodeDesc().getWritableObjectInspector(); + HiveMurmur3Adapter hma = new HiveMurmur3Adapter((PrimitiveObjectInspector) oi); + ExprNodeConstantDesc c = (ExprNodeConstantDesc) b.getExprNodeDesc(); + + hllVals.add(hma.murmur3(c.getWritableObjectInspector().getWritableConstantValue())); + } + + long cntA = hllCol.count(); + long cntB = hllVals.count(); + hllCol.merge(hllVals); + long cntU = hllCol.count(); + + long cntI = cntA + cntB - cntU; + if (cntI < 0) { + return 0; + } + return cntI; + } catch (HiveException e) { + throw new RuntimeException("checking!", e); + } + + } + static class RangeOps { private String colType; diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/ColStatistics.java ql/src/java/org/apache/hadoop/hive/ql/plan/ColStatistics.java index a31f965a5fb..7fc17427122 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/ColStatistics.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/ColStatistics.java @@ -30,6 +30,7 @@ private Range range; private boolean isPrimaryKey; private boolean isEstimated; + private byte[] bitVectors; public ColStatistics(String colName, String colType) { this.setColumnName(colName); @@ -196,4 +197,13 @@ public String toString() { return sb.toString(); } } + + public void setBitVectors(byte[] bitVectors) { + this.bitVectors = bitVectors; + } + + public byte[] getBitVectors() { + return bitVectors; + } + } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDesc.java index 7b8c5d12332..99571540170 100755 --- ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDesc.java @@ -118,7 +118,7 @@ public String getName() { public final static class ExprNodeDescEqualityWrapper { private final ExprNodeDesc exprNodeDesc; // beware of any implementation whose hashcode is mutable by reference - // inserting into a Map and then changing the hashcode can make it + // inserting into a Map and then changing the hashcode can make it // disappear out of the Map during lookups private final int hashcode; diff --git ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java index b7adc485a70..79cac42b218 100644 --- ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java +++ ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java @@ -843,27 +843,32 @@ public static ColStatistics getColStatistics(ColumnStatisticsObj cso, String tab cs.setNumNulls(csd.getLongStats().getNumNulls()); cs.setAvgColLen(JavaDataModel.get().primitive1()); cs.setRange(csd.getLongStats().getLowValue(), csd.getLongStats().getHighValue()); + cs.setBitVectors(csd.getLongStats().getBitVectors()); } else if (colTypeLowerCase.equals(serdeConstants.BIGINT_TYPE_NAME)) { cs.setCountDistint(csd.getLongStats().getNumDVs()); cs.setNumNulls(csd.getLongStats().getNumNulls()); cs.setAvgColLen(JavaDataModel.get().primitive2()); cs.setRange(csd.getLongStats().getLowValue(), csd.getLongStats().getHighValue()); + cs.setBitVectors(csd.getLongStats().getBitVectors()); } else if (colTypeLowerCase.equals(serdeConstants.FLOAT_TYPE_NAME)) { cs.setCountDistint(csd.getDoubleStats().getNumDVs()); cs.setNumNulls(csd.getDoubleStats().getNumNulls()); cs.setAvgColLen(JavaDataModel.get().primitive1()); cs.setRange(csd.getDoubleStats().getLowValue(), csd.getDoubleStats().getHighValue()); + cs.setBitVectors(csd.getDoubleStats().getBitVectors()); } else if (colTypeLowerCase.equals(serdeConstants.DOUBLE_TYPE_NAME)) { cs.setCountDistint(csd.getDoubleStats().getNumDVs()); cs.setNumNulls(csd.getDoubleStats().getNumNulls()); cs.setAvgColLen(JavaDataModel.get().primitive2()); cs.setRange(csd.getDoubleStats().getLowValue(), csd.getDoubleStats().getHighValue()); + cs.setBitVectors(csd.getDoubleStats().getBitVectors()); } else if (colTypeLowerCase.equals(serdeConstants.STRING_TYPE_NAME) || colTypeLowerCase.startsWith(serdeConstants.CHAR_TYPE_NAME) || colTypeLowerCase.startsWith(serdeConstants.VARCHAR_TYPE_NAME)) { cs.setCountDistint(csd.getStringStats().getNumDVs()); cs.setNumNulls(csd.getStringStats().getNumNulls()); cs.setAvgColLen(csd.getStringStats().getAvgColLen()); + cs.setBitVectors(csd.getStringStats().getBitVectors()); } else if (colTypeLowerCase.equals(serdeConstants.BOOLEAN_TYPE_NAME)) { if (csd.getBooleanStats().getNumFalses() > 0 && csd.getBooleanStats().getNumTrues() > 0) { cs.setCountDistint(2); @@ -897,6 +902,7 @@ public static ColStatistics getColStatistics(ColumnStatisticsObj cso, String tab cs.setRange(minVal, maxVal); } } + cs.setBitVectors(csd.getDecimalStats().getBitVectors()); } else if (colTypeLowerCase.equals(serdeConstants.DATE_TYPE_NAME)) { cs.setAvgColLen(JavaDataModel.get().lengthOfDate()); cs.setNumNulls(csd.getDateStats().getNumNulls()); @@ -905,6 +911,7 @@ public static ColStatistics getColStatistics(ColumnStatisticsObj cso, String tab Long highVal = (csd.getDateStats().getHighValue() != null) ? csd.getDateStats().getHighValue() .getDaysSinceEpoch() : null; cs.setRange(lowVal, highVal); + cs.setBitVectors(csd.getDateStats().getBitVectors()); } else { // Columns statistics for complex datatypes are not supported yet return null; diff --git ql/src/test/org/apache/hadoop/hive/ql/plan/mapping/TestStatEstimations.java ql/src/test/org/apache/hadoop/hive/ql/plan/mapping/TestStatEstimations.java index e5233ced3fd..90b268e9311 100644 --- ql/src/test/org/apache/hadoop/hive/ql/plan/mapping/TestStatEstimations.java +++ ql/src/test/org/apache/hadoop/hive/ql/plan/mapping/TestStatEstimations.java @@ -17,8 +17,8 @@ */ package org.apache.hadoop.hive.ql.plan.mapping; -import static org.junit.Assert.assertEquals; import java.util.List; + import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.conf.HiveConf.ConfVars; import org.apache.hadoop.hive.ql.DriverFactory; @@ -28,13 +28,17 @@ import org.apache.hadoop.hive.ql.plan.mapper.PlanMapper; import org.apache.hadoop.hive.ql.session.SessionState; import org.apache.hive.testutils.HiveTestEnvSetup; + import org.junit.AfterClass; import org.junit.BeforeClass; import org.junit.ClassRule; +import org.junit.Ignore; import org.junit.Rule; import org.junit.Test; import org.junit.rules.TestRule; +import static org.junit.Assert.assertEquals; + public class TestStatEstimations { @ClassRule @@ -50,8 +54,8 @@ public static void beforeClass() throws Exception { String cmds[] = { // @formatter:off "create table t2(a integer, b string) STORED AS ORC", - "insert into t2 values(1, 'AAA'),(2, 'AAA'),(3, 'AAA'),(4, 'AAA'),(5, 'AAA')," + - "(6, 'BBB'),(7, 'BBB'),(8, 'BBB'),(9, 'BBB'),(10, 'BBB')", + "insert into t2 values(1, 'A1'),(2, 'A2'),(3, 'A3'),(4, 'A4'),(5, 'A5')," + + "(6, 'B1'),(7, 'B2'),(8, 'B3'),(9, 'B4'),(10, 'B5')", "analyze table t2 compute statistics for columns" // @formatter:on }; @@ -83,6 +87,40 @@ private PlanMapper getMapperForQuery(IDriver driver, String query) { } @Test + public void testFilterStringIn() throws ParseException { + IDriver driver = createDriver(); + String query = "explain select a from t2 where b IN ('A3', 'ABC', 'AXZ') order by a"; + + PlanMapper pm = getMapperForQuery(driver, query); + List fos = pm.getAll(FilterOperator.class); + // the same operator is present 2 times + fos.sort(TestCounterMapping.OPERATOR_ID_COMPARATOR.reversed()); + assertEquals(1, fos.size()); + FilterOperator fop = fos.get(0); + + // any estimation near 1 is ok...currently 1 + assertEquals(1, fop.getStatistics().getNumRows()); + } + + // FIXME: right now not in is transformed into AND( NE(...) , NE(...) ) + @Ignore + @Test + public void testFilterStringNotIn() throws ParseException { + IDriver driver = createDriver(); + String query = "explain select a from t2 where b NOT IN ('XXX', 'UUU') order by a"; + + PlanMapper pm = getMapperForQuery(driver, query); + List fos = pm.getAll(FilterOperator.class); + // the same operator is present 2 times + fos.sort(TestCounterMapping.OPERATOR_ID_COMPARATOR.reversed()); + assertEquals(1, fos.size()); + FilterOperator fop = fos.get(0); + + // any estimation near 10 is ok...currently 10 + assertEquals(10, fop.getStatistics().getNumRows()); + } + + @Test public void testFilterIntIn() throws ParseException { IDriver driver = createDriver(); String query = "explain select a from t2 where a IN (-1,0,1,2,10,20,30,40) order by a"; @@ -102,6 +140,7 @@ public void testFilterIntIn() throws ParseException { private static IDriver createDriver() { HiveConf conf = env_setup.getTestCtx().hiveConf; + conf.setBoolVar(ConfVars.HIVE_STATS_USE_BITVECTORS, true); conf.setBoolVar(ConfVars.HIVE_VECTORIZATION_ENABLED, false); conf.setVar(HiveConf.ConfVars.HIVE_AUTHORIZATION_MANAGER, "org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactory"); diff --git ql/src/test/queries/clientpositive/in_bitvector_filter.q ql/src/test/queries/clientpositive/in_bitvector_filter.q new file mode 100644 index 00000000000..6f8e1a45cdd --- /dev/null +++ ql/src/test/queries/clientpositive/in_bitvector_filter.q @@ -0,0 +1,22 @@ +create table t2(a integer, b string) STORED AS ORC; +insert into t2 values (1, 'A1'),(2, 'A2'),(3, 'A3'),(4, 'A4'),(5, 'A5'), + (6, 'B1'),(7, 'B2'),(8, 'B3'),(9, 'B4'),(20, 'B5'); +analyze table t2 compute statistics for columns; + +set hive.stats.fetch.column.stats=true; + +set hive.stats.use.bitvectors=false; + +-- 1,2,10,11,12,13,14,15,20 => 9 +explain select a from t2 where a IN (-1,0,1,2,10,11,12,13,14,15,20,30,40) order by a; + +set hive.stats.use.bitvectors=true; + +-- 1,2,20 => 3 +explain select a from t2 where a IN (-1,0,1,2,10,11,12,13,14,15,20,30,40) order by a; + +-- A3 only => 1 +explain select a from t2 where b IN ('A3', 'ABC', 'AXZ') order by a; + +-- A3,B1,B5 => 3 +explain select a from t2 where b IN ('A3', 'B1', 'B5') order by a; diff --git ql/src/test/results/clientpositive/in_bitvector_filter.q.out ql/src/test/results/clientpositive/in_bitvector_filter.q.out new file mode 100644 index 00000000000..38c4f449e53 --- /dev/null +++ ql/src/test/results/clientpositive/in_bitvector_filter.q.out @@ -0,0 +1,234 @@ +PREHOOK: query: create table t2(a integer, b string) STORED AS ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t2 +POSTHOOK: query: create table t2(a integer, b string) STORED AS ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t2 +PREHOOK: query: insert into t2 values (1, 'A1'),(2, 'A2'),(3, 'A3'),(4, 'A4'),(5, 'A5'), + (6, 'B1'),(7, 'B2'),(8, 'B3'),(9, 'B4'),(20, 'B5') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@t2 +POSTHOOK: query: insert into t2 values (1, 'A1'),(2, 'A2'),(3, 'A3'),(4, 'A4'),(5, 'A5'), + (6, 'B1'),(7, 'B2'),(8, 'B3'),(9, 'B4'),(20, 'B5') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@t2 +POSTHOOK: Lineage: t2.a SCRIPT [] +POSTHOOK: Lineage: t2.b SCRIPT [] +PREHOOK: query: analyze table t2 compute statistics for columns +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@t2 +PREHOOK: Output: default@t2 +#### A masked pattern was here #### +POSTHOOK: query: analyze table t2 compute statistics for columns +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@t2 +POSTHOOK: Output: default@t2 +#### A masked pattern was here #### +PREHOOK: query: explain select a from t2 where a IN (-1,0,1,2,10,11,12,13,14,15,20,30,40) order by a +PREHOOK: type: QUERY +PREHOOK: Input: default@t2 +#### A masked pattern was here #### +POSTHOOK: query: explain select a from t2 where a IN (-1,0,1,2,10,11,12,13,14,15,20,30,40) order by a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t2 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: t2 + filterExpr: (a) IN (-1, 0, 1, 2, 10, 11, 12, 13, 14, 15, 20, 30, 40) (type: boolean) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (a) IN (-1, 0, 1, 2, 10, 11, 12, 13, 14, 15, 20, 30, 40) (type: boolean) + Statistics: Num rows: 9 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: a (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 9 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 9 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 9 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 9 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select a from t2 where a IN (-1,0,1,2,10,11,12,13,14,15,20,30,40) order by a +PREHOOK: type: QUERY +PREHOOK: Input: default@t2 +#### A masked pattern was here #### +POSTHOOK: query: explain select a from t2 where a IN (-1,0,1,2,10,11,12,13,14,15,20,30,40) order by a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t2 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: t2 + filterExpr: (a) IN (-1, 0, 1, 2, 10, 11, 12, 13, 14, 15, 20, 30, 40) (type: boolean) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (a) IN (-1, 0, 1, 2, 10, 11, 12, 13, 14, 15, 20, 30, 40) (type: boolean) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: a (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select a from t2 where b IN ('A3', 'ABC', 'AXZ') order by a +PREHOOK: type: QUERY +PREHOOK: Input: default@t2 +#### A masked pattern was here #### +POSTHOOK: query: explain select a from t2 where b IN ('A3', 'ABC', 'AXZ') order by a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t2 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: t2 + filterExpr: (b) IN ('A3', 'ABC', 'AXZ') (type: boolean) + Statistics: Num rows: 10 Data size: 900 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (b) IN ('A3', 'ABC', 'AXZ') (type: boolean) + Statistics: Num rows: 1 Data size: 90 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: a (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select a from t2 where b IN ('A3', 'B1', 'B5') order by a +PREHOOK: type: QUERY +PREHOOK: Input: default@t2 +#### A masked pattern was here #### +POSTHOOK: query: explain select a from t2 where b IN ('A3', 'B1', 'B5') order by a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t2 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: t2 + filterExpr: (b) IN ('A3', 'B1', 'B5') (type: boolean) + Statistics: Num rows: 10 Data size: 900 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (b) IN ('A3', 'B1', 'B5') (type: boolean) + Statistics: Num rows: 3 Data size: 270 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: a (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink +