diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java index 76e460ed7a..4ca7e2b9c6 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java @@ -467,6 +467,8 @@ system.registerGenericUDAF("context_ngrams", new GenericUDAFContextNGrams()); system.registerGenericUDAF("compute_stats", new GenericUDAFComputeStats()); + system.registerGenericUDF("ndv_compute_bit_vector", GenericUDFNDVComputeBitVector.class); + system.registerGenericUDAF("compute_bit_vector", new GenericUDAFComputeBitVector()); system.registerGenericUDAF("bloom_filter", new GenericUDAFBloomFilter()); system.registerGenericUDAF("approx_distinct", new GenericUDAFApproximateDistinct()); system.registerUDAF("percentile", UDAFPercentile.class); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java index 2787b47b2e..40ab36bc35 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java @@ -20,6 +20,7 @@ import static org.apache.hadoop.hive.ql.metadata.HiveUtils.unparseIdentifier; +import com.google.common.base.Preconditions; import java.io.IOException; import java.util.ArrayList; import java.util.List; @@ -28,7 +29,6 @@ import org.apache.hadoop.hive.common.HiveStatsUtils; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.conf.HiveConf.ConfVars; -import org.apache.hadoop.hive.conf.HiveVariableSource; import org.apache.hadoop.hive.conf.VariableSubstitution; import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.ql.Context; @@ -40,7 +40,11 @@ import org.apache.hadoop.hive.ql.plan.HiveOperation; import org.apache.hadoop.hive.ql.session.SessionState; import org.apache.hadoop.hive.ql.session.SessionState.LogHelper; +import org.apache.hadoop.hive.ql.stats.ColStatsProcessor.ColumnStatsField; +import org.apache.hadoop.hive.ql.stats.ColStatsProcessor.ColumnStatsType; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; +import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; @@ -74,17 +78,6 @@ public ColumnStatsSemanticAnalyzer(QueryState queryState) throws SemanticExcepti super(queryState); } - public static String getQuote(HiveConf conf) { - String qIdSupport = conf.getVar(ConfVars.HIVE_QUOTEDID_SUPPORT); - if ("column".equals(qIdSupport)) { - return "`"; - } else if ("standard".equals(qIdSupport)) { - return "\""; - } else { - return ""; - } - } - private boolean shouldRewrite(ASTNode tree) { boolean rwt = false; if (tree.getChildCount() > 1) { @@ -246,26 +239,15 @@ public static String genRewrittenQuery(Table tbl, List colNames, HiveCon columnNamesBuilder.append(" , "); columnDummyValuesBuilder.append(" , "); } - String func = HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_STATS_NDV_ALGO).toLowerCase(); - rewrittenQueryBuilder.append("compute_stats("); + final String columnName = unparseIdentifier(colNames.get(i), conf); - rewrittenQueryBuilder.append(columnName); - rewrittenQueryBuilder.append(", '" + func + "'"); - if ("fm".equals(func)) { - int numBitVectors = 0; - try { - numBitVectors = HiveStatsUtils.getNumBitVectorsForNDVEstimation(conf); - } catch (Exception e) { - throw new SemanticException(e.getMessage()); - } - rewrittenQueryBuilder.append(", " + numBitVectors); - } - rewrittenQueryBuilder.append(')'); + final TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(tbl.getCols().get(i).getType()); + genComputeStats(rewrittenQueryBuilder, conf, i, columnName, typeInfo); columnNamesBuilder.append(unparseIdentifier(columnName, conf)); columnDummyValuesBuilder.append( - "cast(null as " + TypeInfoUtils.getTypeInfoFromTypeString(tbl.getCols().get(i).getType()).toString() + ")"); + "cast(null as " + typeInfo.toString() + ")"); } if (isPartitionStats) { @@ -304,15 +286,199 @@ public static String genRewrittenQuery(Table tbl, List colNames, HiveCon } String rewrittenQuery = rewrittenQueryBuilder.toString(); - rewrittenQuery = new VariableSubstitution(new HiveVariableSource() { - @Override - public Map getHiveVariable() { - return SessionState.get().getHiveVariables(); - } - }).substitute(conf, rewrittenQuery); + rewrittenQuery = new VariableSubstitution( + () -> SessionState.get().getHiveVariables()).substitute(conf, rewrittenQuery); return rewrittenQuery; } + private static void genComputeStats(StringBuilder rewrittenQueryBuilder, HiveConf conf, + int pos, String columnName, TypeInfo typeInfo) throws SemanticException { + Preconditions.checkArgument(typeInfo.getCategory() == Category.PRIMITIVE); + ColumnStatsType columnStatsType = + ColumnStatsType.getColumnStatsType((PrimitiveTypeInfo) typeInfo); + // The first column is always the type + // The rest of columns will depend on the type itself + int size = columnStatsType.getColumnStats().size() - 1; + for (int i = 0; i < size; i++) { + ColumnStatsField columnStatsField = columnStatsType.getColumnStats().get(i); + appendStatsField(rewrittenQueryBuilder, conf, columnStatsField, columnStatsType, + columnName, pos); + rewrittenQueryBuilder.append(", "); + } + ColumnStatsField columnStatsField = columnStatsType.getColumnStats().get(size); + appendStatsField(rewrittenQueryBuilder, conf, columnStatsField, columnStatsType, + columnName, pos); + } + + private static void appendStatsField(StringBuilder rewrittenQueryBuilder, HiveConf conf, + ColumnStatsField columnStatsField, ColumnStatsType columnStatsType, + String columnName, int pos) throws SemanticException { + switch (columnStatsField) { + case COLUMN_TYPE: + appendColumnType(rewrittenQueryBuilder, conf, columnStatsType, pos); + break; + case COUNT_TRUES: + appendCountTrues(rewrittenQueryBuilder, conf, columnName, pos); + break; + case COUNT_FALSES: + appendCountFalses(rewrittenQueryBuilder, conf, columnName, pos); + break; + case COUNT_NULLS: + appendCountNulls(rewrittenQueryBuilder, conf, columnName, pos); + break; + case MIN: + appendMin(rewrittenQueryBuilder, conf, columnStatsType, columnName, pos); + break; + case MAX: + appendMax(rewrittenQueryBuilder, conf, columnStatsType, columnName, pos); + break; + case NDV: + appendNDV(rewrittenQueryBuilder, conf, columnName, pos); + break; + case BITVECTOR: + appendBitVector(rewrittenQueryBuilder, conf, columnName, pos); + break; + case MAX_LENGTH: + appendMaxLength(rewrittenQueryBuilder, conf, columnName, pos); + break; + case AVG_LENGTH: + appendAvgLength(rewrittenQueryBuilder, conf, columnName, pos); + break; + default: + throw new SemanticException("Not supported field " + columnStatsField); + } + } + + private static void appendColumnType(StringBuilder rewrittenQueryBuilder, HiveConf conf, + ColumnStatsType columnStatsType, int pos) { + rewrittenQueryBuilder.append("'") + .append(columnStatsType.toString()) + .append("' AS ") + .append(unparseIdentifier(ColumnStatsField.COLUMN_TYPE.getFieldName() + pos, conf)); + } + + private static void appendMin(StringBuilder rewrittenQueryBuilder, HiveConf conf, + ColumnStatsType columnStatsType, String columnName, int pos) { + switch (columnStatsType) { + case LONG: + rewrittenQueryBuilder.append("CAST(min(") + .append(columnName) + .append(") AS bigint) AS "); + break; + case DOUBLE: + rewrittenQueryBuilder.append("CAST(min(") + .append(columnName) + .append(") AS double) AS "); + break; + default: + rewrittenQueryBuilder.append("min(") + .append(columnName) + .append(") AS "); + break; + } + rewrittenQueryBuilder.append( + unparseIdentifier(ColumnStatsField.MIN.getFieldName() + pos, conf)); + } + + private static void appendMax(StringBuilder rewrittenQueryBuilder, HiveConf conf, + ColumnStatsType columnStatsType, String columnName, int pos) { + switch (columnStatsType) { + case LONG: + rewrittenQueryBuilder.append("CAST(max(") + .append(columnName) + .append(") AS bigint) AS "); + break; + case DOUBLE: + rewrittenQueryBuilder.append("CAST(max(") + .append(columnName) + .append(") AS double) AS "); + break; + default: + rewrittenQueryBuilder.append("max(") + .append(columnName) + .append(") AS "); + break; + } + rewrittenQueryBuilder.append( + unparseIdentifier(ColumnStatsField.MAX.getFieldName() + pos, conf)); + } + + private static void appendMaxLength(StringBuilder rewrittenQueryBuilder, HiveConf conf, + String columnName, int pos) { + rewrittenQueryBuilder.append("CAST(COALESCE(max(LENGTH(") + .append(columnName) + .append(")), 0) AS bigint) AS ") + .append(unparseIdentifier(ColumnStatsField.MAX_LENGTH.getFieldName() + pos, conf)); + } + + private static void appendAvgLength(StringBuilder rewrittenQueryBuilder, HiveConf conf, + String columnName, int pos) { + rewrittenQueryBuilder.append("CAST(COALESCE(avg(LENGTH(") + .append(columnName) + .append(")), 0) AS double) AS ") + .append(unparseIdentifier(ColumnStatsField.AVG_LENGTH.getFieldName() + pos, conf)); + } + + private static void appendCountNulls(StringBuilder rewrittenQueryBuilder, HiveConf conf, + String columnName, int pos) { + rewrittenQueryBuilder.append("CAST(count(CASE WHEN ") + .append(columnName) + .append(" IS NULL THEN 1 ELSE null END) AS bigint) AS ") + .append(unparseIdentifier(ColumnStatsField.COUNT_NULLS.getFieldName() + pos, conf)); + } + + private static void appendNDV(StringBuilder rewrittenQueryBuilder, HiveConf conf, + String columnName, int pos) throws SemanticException { + rewrittenQueryBuilder.append("COALESCE(NDV_COMPUTE_BIT_VECTOR("); + appendBitVector(rewrittenQueryBuilder, conf, columnName); + rewrittenQueryBuilder.append("), 0) AS ") + .append(unparseIdentifier(ColumnStatsField.NDV.getFieldName() + pos, conf)); + } + + private static void appendBitVector(StringBuilder rewrittenQueryBuilder, HiveConf conf, + String columnName, int pos) throws SemanticException { + appendBitVector(rewrittenQueryBuilder, conf, columnName); + rewrittenQueryBuilder.append(" AS ") + .append(unparseIdentifier(ColumnStatsField.BITVECTOR.getFieldName() + pos, conf)); + } + + private static void appendBitVector(StringBuilder rewrittenQueryBuilder, HiveConf conf, + String columnName) throws SemanticException { + String func = HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_STATS_NDV_ALGO).toLowerCase(); + rewrittenQueryBuilder.append("compute_bit_vector(") + .append(columnName) + .append(", '") + .append(func) + .append("'"); + if ("fm".equals(func)) { + int numBitVectors; + try { + numBitVectors = HiveStatsUtils.getNumBitVectorsForNDVEstimation(conf); + } catch (Exception e) { + throw new SemanticException(e.getMessage()); + } + rewrittenQueryBuilder.append(", ") + .append(numBitVectors); + } + rewrittenQueryBuilder.append(")"); + } + + private static void appendCountTrues(StringBuilder rewrittenQueryBuilder, HiveConf conf, + String columnName, int pos) { + rewrittenQueryBuilder.append("CAST(count(CASE WHEN ") + .append(columnName) + .append(" IS TRUE THEN 1 ELSE 0 END) AS bigint) AS ") + .append(unparseIdentifier(ColumnStatsField.COUNT_TRUES.getFieldName() + pos, conf)); + } + + private static void appendCountFalses(StringBuilder rewrittenQueryBuilder, HiveConf conf, + String columnName, int pos) { + rewrittenQueryBuilder.append("CAST(count(CASE WHEN ") + .append(columnName) + .append(" IS FALSE THEN 1 ELSE 0 END) AS bigint) AS ") + .append(unparseIdentifier(ColumnStatsField.COUNT_FALSES.getFieldName() + pos, conf)); + } + private ASTNode genRewrittenTree(String rewrittenQuery) throws SemanticException { // Parse the rewritten query string try { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/stats/ColStatsProcessor.java b/ql/src/java/org/apache/hadoop/hive/ql/stats/ColStatsProcessor.java index 1a339633d4..a746b07f62 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/stats/ColStatsProcessor.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/stats/ColStatsProcessor.java @@ -18,6 +18,7 @@ package org.apache.hadoop.hive.ql.stats; +import com.google.common.collect.ImmutableList; import java.io.IOException; import java.util.ArrayList; import java.util.Collection; @@ -41,6 +42,7 @@ import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.metadata.Partition; import org.apache.hadoop.hive.ql.metadata.Table; +import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.ql.plan.ColumnStatsDesc; import org.apache.hadoop.hive.ql.plan.FetchWork; import org.apache.hadoop.hive.ql.session.SessionState; @@ -49,11 +51,14 @@ import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.StructField; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; +import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.util.StringUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; + public class ColStatsProcessor implements IStatsProcessor { private static transient final Logger LOG = LoggerFactory.getLogger(ColStatsProcessor.class); @@ -87,10 +92,8 @@ public int process(Hive db, Table tbl) throws Exception { return persistColumnStats(db, tbl); } - private List constructColumnStatsFromPackedRows(Table tbl1) throws HiveException, MetaException, IOException { - - Table tbl = tbl1; - + private List constructColumnStatsFromPackedRows(Table tbl) + throws HiveException, MetaException, IOException { String partName = null; List colName = colStatDesc.getColName(); List colType = colStatDesc.getColType(); @@ -103,22 +106,24 @@ public int process(Hive db, Table tbl) throws Exception { throw new HiveException("Unexpected object type encountered while unpacking row"); } - List statsObjs = new ArrayList(); + List statsObjs = new ArrayList<>(); StructObjectInspector soi = (StructObjectInspector) packedRow.oi; List fields = soi.getAllStructFieldRefs(); List list = soi.getStructFieldsDataAsList(packedRow.o); + List colSchema = tbl.getCols(); List partColSchema = tbl.getPartCols(); + // Partition columns are appended at end, we only care about stats column - int numOfStatCols = isTblLevel ? fields.size() : fields.size() - partColSchema.size(); - assert list != null; - for (int i = 0; i < numOfStatCols; i++) { - StructField structField = fields.get(i); + int pos = 0; + for (int i = 0; i < colSchema.size(); i++) { String columnName = colName.get(i); String columnType = colType.get(i); - Object values = list.get(i); + PrimitiveTypeInfo typeInfo = (PrimitiveTypeInfo) TypeInfoUtils.getTypeInfoFromTypeString(columnType); + List columnStatsFields = ColumnStatsType.getColumnStats(typeInfo); try { - ColumnStatisticsObj statObj = ColumnStatisticsObjTranslator.readHiveStruct(columnName, columnType, structField, values); + ColumnStatisticsObj statObj = ColumnStatisticsObjTranslator.readHiveColumnStatistics( + columnName, columnType, columnStatsFields, pos, soi, list); statsObjs.add(statObj); } catch (Exception e) { if (isStatsReliable) { @@ -127,15 +132,16 @@ public int process(Hive db, Table tbl) throws Exception { LOG.debug("Because {} is infinite or NaN, we skip stats.", columnName, e); } } + pos += columnStatsFields.size(); } if (!statsObjs.isEmpty()) { - if (!isTblLevel) { - List partVals = new ArrayList(); + List partVals = new ArrayList<>(); // Iterate over partition columns to figure out partition name - for (int i = fields.size() - partColSchema.size(); i < fields.size(); i++) { - Object partVal = ((PrimitiveObjectInspector) fields.get(i).getFieldObjectInspector()).getPrimitiveJavaObject(list.get(i)); + for (int i = pos; i < pos + partColSchema.size(); i++) { + Object partVal = ((PrimitiveObjectInspector) fields.get(i).getFieldObjectInspector()) + .getPrimitiveJavaObject(list.get(i)); partVals.add(partVal == null ? // could be null for default partition this.conf.getVar(ConfVars.DEFAULTPARTITIONNAME) : partVal.toString()); } @@ -196,4 +202,148 @@ public int persistColumnStats(Hive db, Table tbl) throws HiveException, MetaExce public void setDpPartSpecs(Collection dpPartSpecs) { } + /** + * Enumeration of column stats fields that can currently + * be computed. Each one has a field name associated. + */ + public enum ColumnStatsField { + COLUMN_TYPE("columntype"), + COUNT_TRUES("counttrues"), + COUNT_FALSES("countfalses"), + COUNT_NULLS("countnulls"), + MIN("min"), + MAX("max"), + NDV("numdistinctvalues"), + BITVECTOR("ndvbitvector"), + MAX_LENGTH("maxlength"), + AVG_LENGTH("avglength"); + + private final String fieldName; + + ColumnStatsField(String fieldName) { + this.fieldName = fieldName; + } + + public String getFieldName() { + return fieldName; + } + } + + /** + * Enumeration of column stats type. Each Hive primitive type maps into a single + * column stats type, e.g., byte, short, int, and bigint types map into long + * column type. Each column stats type has _n_ column stats fields associated + * with it. + */ + public enum ColumnStatsType { + BOOLEAN( + ImmutableList.of( + ColumnStatsField.COLUMN_TYPE, + ColumnStatsField.COUNT_TRUES, + ColumnStatsField.COUNT_FALSES, + ColumnStatsField.COUNT_NULLS)), + LONG( + ImmutableList.of( + ColumnStatsField.COLUMN_TYPE, + ColumnStatsField.MIN, + ColumnStatsField.MAX, + ColumnStatsField.COUNT_NULLS, + ColumnStatsField.NDV, + ColumnStatsField.BITVECTOR)), + DOUBLE( + ImmutableList.of( + ColumnStatsField.COLUMN_TYPE, + ColumnStatsField.MIN, + ColumnStatsField.MAX, + ColumnStatsField.COUNT_NULLS, + ColumnStatsField.NDV, + ColumnStatsField.BITVECTOR)), + STRING( + ImmutableList.of( + ColumnStatsField.COLUMN_TYPE, + ColumnStatsField.MAX_LENGTH, + ColumnStatsField.AVG_LENGTH, + ColumnStatsField.COUNT_NULLS, + ColumnStatsField.NDV, + ColumnStatsField.BITVECTOR)), + BINARY( + ImmutableList.of( + ColumnStatsField.COLUMN_TYPE, + ColumnStatsField.MAX_LENGTH, + ColumnStatsField.AVG_LENGTH, + ColumnStatsField.COUNT_NULLS)), + DECIMAL( + ImmutableList.of( + ColumnStatsField.COLUMN_TYPE, + ColumnStatsField.MIN, + ColumnStatsField.MAX, + ColumnStatsField.COUNT_NULLS, + ColumnStatsField.NDV, + ColumnStatsField.BITVECTOR)), + DATE( + ImmutableList.of( + ColumnStatsField.COLUMN_TYPE, + ColumnStatsField.MIN, + ColumnStatsField.MAX, + ColumnStatsField.COUNT_NULLS, + ColumnStatsField.NDV, + ColumnStatsField.BITVECTOR)), + TIMESTAMP( + ImmutableList.of( + ColumnStatsField.COLUMN_TYPE, + ColumnStatsField.MIN, + ColumnStatsField.MAX, + ColumnStatsField.COUNT_NULLS, + ColumnStatsField.NDV, + ColumnStatsField.BITVECTOR)); + + + private final List columnStats; + + ColumnStatsType(List columnStats) { + this.columnStats = columnStats; + } + + public List getColumnStats() { + return columnStats; + } + + public static ColumnStatsType getColumnStatsType(PrimitiveTypeInfo typeInfo) + throws SemanticException { + switch (typeInfo.getPrimitiveCategory()) { + case BOOLEAN: + return BOOLEAN; + case BYTE: + case SHORT: + case INT: + case LONG: + case TIMESTAMPLOCALTZ: + return LONG; + case FLOAT: + case DOUBLE: + return DOUBLE; + case DECIMAL: + return DECIMAL; + case DATE: + return DATE; + case TIMESTAMP: + return TIMESTAMP; + case STRING: + case CHAR: + case VARCHAR: + return STRING; + case BINARY: + return BINARY; + default: + throw new SemanticException("Not supported type " + + typeInfo.getTypeName() + " for statistics computation"); + } + } + + public static List getColumnStats(PrimitiveTypeInfo typeInfo) + throws SemanticException { + return getColumnStatsType(typeInfo).getColumnStats(); + } + + } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/stats/ColumnStatisticsObjTranslator.java b/ql/src/java/org/apache/hadoop/hive/ql/stats/ColumnStatisticsObjTranslator.java index e6926d3d18..50ce0dd4a7 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/stats/ColumnStatisticsObjTranslator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/stats/ColumnStatisticsObjTranslator.java @@ -37,6 +37,8 @@ import org.apache.hadoop.hive.metastore.columnstats.cache.StringColumnStatsDataInspector; import org.apache.hadoop.hive.metastore.columnstats.cache.TimestampColumnStatsDataInspector; import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.stats.ColStatsProcessor.ColumnStatsField; +import org.apache.hadoop.hive.ql.stats.ColStatsProcessor.ColumnStatsType; import org.apache.hadoop.hive.serde2.io.DateWritableV2; import org.apache.hadoop.hive.serde2.io.TimestampWritableV2; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; @@ -50,40 +52,49 @@ import org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampObjectInspector; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; + public class ColumnStatisticsObjTranslator { - private static transient final Logger LOG = LoggerFactory - .getLogger(ColumnStatisticsObjTranslator.class); + public static ColumnStatisticsObj readHiveColumnStatistics(String columnName, String columnType, + List columnStatsFields, int start, StructObjectInspector soi, List list) + throws HiveException { + List fields = soi.getAllStructFieldRefs(); - public static ColumnStatisticsObj readHiveStruct(String columnName, String columnType, StructField structField, Object values) - throws HiveException - { - // Get the field objectInspector, fieldName and the field object. - ObjectInspector foi = structField.getFieldObjectInspector(); - Object f = values; - String fieldName = structField.getFieldName(); ColumnStatisticsObj statsObj = new ColumnStatisticsObj(); statsObj.setColName(columnName); statsObj.setColType(columnType); - try { - unpackStructObject(foi, f, fieldName, statsObj); - return statsObj; - } catch (Exception e) { - throw new HiveException("error calculating stats for column:" + structField.getFieldName(), e); + + int end = start + columnStatsFields.size(); + for (int i = start; i < end; i++) { + // Get the field objectInspector, fieldName and the field object. + ObjectInspector foi = fields.get(i).getFieldObjectInspector(); + Object f = (list == null ? null : list.get(i)); + try { + unpackPrimitiveObject(foi, f, columnStatsFields.get(i - start), statsObj); + } catch (Exception e) { + throw new HiveException("Error calculating statistics for column:" + columnName, e); + } } + + return statsObj; } - private static void unpackBooleanStats(ObjectInspector oi, Object o, String fName, ColumnStatisticsObj statsObj) { + private static void unpackBooleanStats(ObjectInspector oi, Object o, + ColumnStatsField csf, ColumnStatisticsObj statsObj) { long v = ((LongObjectInspector) oi).get(o); - if (fName.equals("counttrues")) { + switch (csf) { + case COUNT_TRUES: statsObj.getStatsData().getBooleanStats().setNumTrues(v); - } else if (fName.equals("countfalses")) { + break; + case COUNT_FALSES: statsObj.getStatsData().getBooleanStats().setNumFalses(v); - } else if (fName.equals("countnulls")) { + break; + case COUNT_NULLS: statsObj.getStatsData().getBooleanStats().setNumNulls(v); + break; + default: + throw new RuntimeException("Unsupported column stat for BOOLEAN : " + csf); } } @@ -91,51 +102,67 @@ private static void unpackBooleanStats(ObjectInspector oi, Object o, String fNam static class UnsupportedDoubleException extends Exception { } - private static void unpackDoubleStats(ObjectInspector oi, Object o, String fName, ColumnStatisticsObj statsObj) throws UnsupportedDoubleException { - if (fName.equals("countnulls")) { - long v = ((LongObjectInspector) oi).get(o); - statsObj.getStatsData().getDoubleStats().setNumNulls(v); - } else if (fName.equals("numdistinctvalues")) { - long v = ((LongObjectInspector) oi).get(o); - statsObj.getStatsData().getDoubleStats().setNumDVs(v); - } else if (fName.equals("max")) { - double d = ((DoubleObjectInspector) oi).get(o); - if (Double.isInfinite(d) || Double.isNaN(d)) { + private static void unpackDoubleStats(ObjectInspector oi, Object o, + ColumnStatsField csf, ColumnStatisticsObj statsObj) throws UnsupportedDoubleException { + switch (csf) { + case COUNT_NULLS: + long cn = ((LongObjectInspector) oi).get(o); + statsObj.getStatsData().getDoubleStats().setNumNulls(cn); + break; + case MIN: + double min = ((DoubleObjectInspector) oi).get(o); + if (Double.isInfinite(min) || Double.isNaN(min)) { throw new UnsupportedDoubleException(); } - statsObj.getStatsData().getDoubleStats().setHighValue(d); - } else if (fName.equals("min")) { - double d = ((DoubleObjectInspector) oi).get(o); - if (Double.isInfinite(d) || Double.isNaN(d)) { + statsObj.getStatsData().getDoubleStats().setLowValue(min); + break; + case MAX: + double max = ((DoubleObjectInspector) oi).get(o); + if (Double.isInfinite(max) || Double.isNaN(max)) { throw new UnsupportedDoubleException(); } - statsObj.getStatsData().getDoubleStats().setLowValue(d); - } else if (fName.equals("ndvbitvector")) { + statsObj.getStatsData().getDoubleStats().setHighValue(max); + break; + case NDV: + long ndv = ((LongObjectInspector) oi).get(o); + statsObj.getStatsData().getDoubleStats().setNumDVs(ndv); + break; + case BITVECTOR: PrimitiveObjectInspector poi = (PrimitiveObjectInspector) oi; byte[] buf = ((BinaryObjectInspector) poi).getPrimitiveJavaObject(o); statsObj.getStatsData().getDoubleStats().setBitVectors(buf); - ; + break; + default: + throw new RuntimeException("Unsupported column stat for DOUBLE : " + csf); } } - private static void unpackDecimalStats(ObjectInspector oi, Object o, String fName, ColumnStatisticsObj statsObj) { - if (fName.equals("countnulls")) { - long v = ((LongObjectInspector) oi).get(o); - statsObj.getStatsData().getDecimalStats().setNumNulls(v); - } else if (fName.equals("numdistinctvalues")) { - long v = ((LongObjectInspector) oi).get(o); - statsObj.getStatsData().getDecimalStats().setNumDVs(v); - } else if (fName.equals("max")) { - HiveDecimal d = ((HiveDecimalObjectInspector) oi).getPrimitiveJavaObject(o); - statsObj.getStatsData().getDecimalStats().setHighValue(convertToThriftDecimal(d)); - } else if (fName.equals("min")) { - HiveDecimal d = ((HiveDecimalObjectInspector) oi).getPrimitiveJavaObject(o); - statsObj.getStatsData().getDecimalStats().setLowValue(convertToThriftDecimal(d)); - } else if (fName.equals("ndvbitvector")) { + private static void unpackDecimalStats(ObjectInspector oi, Object o, + ColumnStatsField csf, ColumnStatisticsObj statsObj) { + switch (csf) { + case COUNT_NULLS: + long cn = ((LongObjectInspector) oi).get(o); + statsObj.getStatsData().getDecimalStats().setNumNulls(cn); + break; + case MIN: + HiveDecimal min = ((HiveDecimalObjectInspector) oi).getPrimitiveJavaObject(o); + statsObj.getStatsData().getDecimalStats().setLowValue(convertToThriftDecimal(min)); + break; + case MAX: + HiveDecimal max = ((HiveDecimalObjectInspector) oi).getPrimitiveJavaObject(o); + statsObj.getStatsData().getDecimalStats().setHighValue(convertToThriftDecimal(max)); + break; + case NDV: + long ndv = ((LongObjectInspector) oi).get(o); + statsObj.getStatsData().getDecimalStats().setNumDVs(ndv); + break; + case BITVECTOR: PrimitiveObjectInspector poi = (PrimitiveObjectInspector) oi; byte[] buf = ((BinaryObjectInspector) poi).getPrimitiveJavaObject(o); statsObj.getStatsData().getDecimalStats().setBitVectors(buf); - ; + break; + default: + throw new RuntimeException("Unsupported column stat for DECIMAL : " + csf); } } @@ -143,141 +170,182 @@ private static Decimal convertToThriftDecimal(HiveDecimal d) { return DecimalUtils.getDecimal(ByteBuffer.wrap(d.unscaledValue().toByteArray()), (short) d.scale()); } - private static void unpackLongStats(ObjectInspector oi, Object o, String fName, ColumnStatisticsObj statsObj) { - if (fName.equals("countnulls")) { - long v = ((LongObjectInspector) oi).get(o); - statsObj.getStatsData().getLongStats().setNumNulls(v); - } else if (fName.equals("numdistinctvalues")) { - long v = ((LongObjectInspector) oi).get(o); - statsObj.getStatsData().getLongStats().setNumDVs(v); - } else if (fName.equals("max")) { - long v = ((LongObjectInspector) oi).get(o); - statsObj.getStatsData().getLongStats().setHighValue(v); - } else if (fName.equals("min")) { - long v = ((LongObjectInspector) oi).get(o); - statsObj.getStatsData().getLongStats().setLowValue(v); - } else if (fName.equals("ndvbitvector")) { + private static void unpackLongStats(ObjectInspector oi, Object o, + ColumnStatsField csf, ColumnStatisticsObj statsObj) { + switch (csf) { + case COUNT_NULLS: + long cn = ((LongObjectInspector) oi).get(o); + statsObj.getStatsData().getLongStats().setNumNulls(cn); + break; + case MIN: + long min = ((LongObjectInspector) oi).get(o); + statsObj.getStatsData().getLongStats().setLowValue(min); + break; + case MAX: + long max = ((LongObjectInspector) oi).get(o); + statsObj.getStatsData().getLongStats().setHighValue(max); + break; + case NDV: + long ndv = ((LongObjectInspector) oi).get(o); + statsObj.getStatsData().getLongStats().setNumDVs(ndv); + break; + case BITVECTOR: PrimitiveObjectInspector poi = (PrimitiveObjectInspector) oi; byte[] buf = ((BinaryObjectInspector) poi).getPrimitiveJavaObject(o); statsObj.getStatsData().getLongStats().setBitVectors(buf); - ; + break; + default: + throw new RuntimeException("Unsupported column stat for LONG : " + csf); } } - private static void unpackStringStats(ObjectInspector oi, Object o, String fName, ColumnStatisticsObj statsObj) { - if (fName.equals("countnulls")) { - long v = ((LongObjectInspector) oi).get(o); - statsObj.getStatsData().getStringStats().setNumNulls(v); - } else if (fName.equals("numdistinctvalues")) { - long v = ((LongObjectInspector) oi).get(o); - statsObj.getStatsData().getStringStats().setNumDVs(v); - } else if (fName.equals("avglength")) { - double d = ((DoubleObjectInspector) oi).get(o); - statsObj.getStatsData().getStringStats().setAvgColLen(d); - } else if (fName.equals("maxlength")) { - long v = ((LongObjectInspector) oi).get(o); - statsObj.getStatsData().getStringStats().setMaxColLen(v); - } else if (fName.equals("ndvbitvector")) { + private static void unpackStringStats(ObjectInspector oi, Object o, + ColumnStatsField csf, ColumnStatisticsObj statsObj) { + switch (csf) { + case COUNT_NULLS: + long cn = ((LongObjectInspector) oi).get(o); + statsObj.getStatsData().getStringStats().setNumNulls(cn); + break; + case NDV: + long ndv = ((LongObjectInspector) oi).get(o); + statsObj.getStatsData().getStringStats().setNumDVs(ndv); + break; + case BITVECTOR: PrimitiveObjectInspector poi = (PrimitiveObjectInspector) oi; byte[] buf = ((BinaryObjectInspector) poi).getPrimitiveJavaObject(o); statsObj.getStatsData().getStringStats().setBitVectors(buf); - ; + break; + case MAX_LENGTH: + long max = ((LongObjectInspector) oi).get(o); + statsObj.getStatsData().getStringStats().setMaxColLen(max); + break; + case AVG_LENGTH: + double avg = ((DoubleObjectInspector) oi).get(o); + statsObj.getStatsData().getStringStats().setAvgColLen(avg); + break; + default: + throw new RuntimeException("Unsupported column stat for STRING : " + csf); } } - private static void unpackBinaryStats(ObjectInspector oi, Object o, String fName, ColumnStatisticsObj statsObj) { - if (fName.equals("countnulls")) { - long v = ((LongObjectInspector) oi).get(o); - statsObj.getStatsData().getBinaryStats().setNumNulls(v); - } else if (fName.equals("avglength")) { - double d = ((DoubleObjectInspector) oi).get(o); - statsObj.getStatsData().getBinaryStats().setAvgColLen(d); - } else if (fName.equals("maxlength")) { + private static void unpackBinaryStats(ObjectInspector oi, Object o, + ColumnStatsField csf, ColumnStatisticsObj statsObj) { + switch (csf) { + case COUNT_NULLS: + long cn = ((LongObjectInspector) oi).get(o); + statsObj.getStatsData().getBinaryStats().setNumNulls(cn); + break; + case AVG_LENGTH: + double avg = ((DoubleObjectInspector) oi).get(o); + statsObj.getStatsData().getBinaryStats().setAvgColLen(avg); + break; + case MAX_LENGTH: long v = ((LongObjectInspector) oi).get(o); statsObj.getStatsData().getBinaryStats().setMaxColLen(v); + break; + default: + throw new RuntimeException("Unsupported column stat for BINARY : " + csf); } } - private static void unpackDateStats(ObjectInspector oi, Object o, String fName, ColumnStatisticsObj statsObj) { - if (fName.equals("countnulls")) { - long v = ((LongObjectInspector) oi).get(o); - statsObj.getStatsData().getDateStats().setNumNulls(v); - } else if (fName.equals("numdistinctvalues")) { - long v = ((LongObjectInspector) oi).get(o); - statsObj.getStatsData().getDateStats().setNumDVs(v); - } else if (fName.equals("max")) { - DateWritableV2 v = ((DateObjectInspector) oi).getPrimitiveWritableObject(o); - statsObj.getStatsData().getDateStats().setHighValue(new Date(v.getDays())); - } else if (fName.equals("min")) { - DateWritableV2 v = ((DateObjectInspector) oi).getPrimitiveWritableObject(o); - statsObj.getStatsData().getDateStats().setLowValue(new Date(v.getDays())); - } else if (fName.equals("ndvbitvector")) { + private static void unpackDateStats(ObjectInspector oi, Object o, + ColumnStatsField csf, ColumnStatisticsObj statsObj) { + switch (csf) { + case COUNT_NULLS: + long cn = ((LongObjectInspector) oi).get(o); + statsObj.getStatsData().getDateStats().setNumNulls(cn); + break; + case MIN: + DateWritableV2 min = ((DateObjectInspector) oi).getPrimitiveWritableObject(o); + statsObj.getStatsData().getDateStats().setLowValue(new Date(min.getDays())); + break; + case MAX: + DateWritableV2 max = ((DateObjectInspector) oi).getPrimitiveWritableObject(o); + statsObj.getStatsData().getDateStats().setHighValue(new Date(max.getDays())); + break; + case NDV: + long ndv = ((LongObjectInspector) oi).get(o); + statsObj.getStatsData().getDateStats().setNumDVs(ndv); + break; + case BITVECTOR: PrimitiveObjectInspector poi = (PrimitiveObjectInspector) oi; byte[] buf = ((BinaryObjectInspector) poi).getPrimitiveJavaObject(o); statsObj.getStatsData().getDateStats().setBitVectors(buf); - ; + break; + default: + throw new RuntimeException("Unsupported column stat for DATE : " + csf); } } - private static void unpackTimestampStats(ObjectInspector oi, Object o, String fName, ColumnStatisticsObj statsObj) { - if (fName.equals("countnulls")) { - long v = ((LongObjectInspector) oi).get(o); - statsObj.getStatsData().getTimestampStats().setNumNulls(v); - } else if (fName.equals("numdistinctvalues")) { - long v = ((LongObjectInspector) oi).get(o); - statsObj.getStatsData().getTimestampStats().setNumDVs(v); - } else if (fName.equals("max")) { - TimestampWritableV2 v = ((TimestampObjectInspector) oi).getPrimitiveWritableObject(o); - statsObj.getStatsData().getTimestampStats().setHighValue(new Timestamp(v.getSeconds())); - } else if (fName.equals("min")) { - TimestampWritableV2 v = ((TimestampObjectInspector) oi).getPrimitiveWritableObject(o); - statsObj.getStatsData().getTimestampStats().setLowValue(new Timestamp(v.getSeconds())); - } else if (fName.equals("ndvbitvector")) { + private static void unpackTimestampStats(ObjectInspector oi, Object o, + ColumnStatsField csf, ColumnStatisticsObj statsObj) { + switch (csf) { + case COUNT_NULLS: + long cn = ((LongObjectInspector) oi).get(o); + statsObj.getStatsData().getTimestampStats().setNumNulls(cn); + break; + case MIN: + TimestampWritableV2 min = ((TimestampObjectInspector) oi).getPrimitiveWritableObject(o); + statsObj.getStatsData().getTimestampStats().setLowValue(new Timestamp(min.getSeconds())); + break; + case MAX: + TimestampWritableV2 max = ((TimestampObjectInspector) oi).getPrimitiveWritableObject(o); + statsObj.getStatsData().getTimestampStats().setHighValue(new Timestamp(max.getSeconds())); + break; + case NDV: + long ndv = ((LongObjectInspector) oi).get(o); + statsObj.getStatsData().getTimestampStats().setNumDVs(ndv); + break; + case BITVECTOR: PrimitiveObjectInspector poi = (PrimitiveObjectInspector) oi; byte[] buf = ((BinaryObjectInspector) poi).getPrimitiveJavaObject(o); statsObj.getStatsData().getTimestampStats().setBitVectors(buf); + break; + default: + throw new RuntimeException("Unsupported column stat for TIMESTAMP : " + csf); } } - private static void unpackPrimitiveObject(ObjectInspector oi, Object o, String fieldName, ColumnStatisticsObj statsObj) throws UnsupportedDoubleException { + private static void unpackPrimitiveObject(ObjectInspector oi, Object o, + ColumnStatsField csf, ColumnStatisticsObj statsObj) throws UnsupportedDoubleException { if (o == null) { return; } // First infer the type of object - if (fieldName.equals("columntype")) { + if (csf == ColumnStatsField.COLUMN_TYPE) { PrimitiveObjectInspector poi = (PrimitiveObjectInspector) oi; String s = ((StringObjectInspector) poi).getPrimitiveJavaObject(o); ColumnStatisticsData statsData = new ColumnStatisticsData(); - if (s.equalsIgnoreCase("long")) { + if (s.equalsIgnoreCase(ColumnStatsType.LONG.toString())) { LongColumnStatsDataInspector longStats = new LongColumnStatsDataInspector(); statsData.setLongStats(longStats); statsObj.setStatsData(statsData); - } else if (s.equalsIgnoreCase("double")) { + } else if (s.equalsIgnoreCase(ColumnStatsType.DOUBLE.toString())) { DoubleColumnStatsDataInspector doubleStats = new DoubleColumnStatsDataInspector(); statsData.setDoubleStats(doubleStats); statsObj.setStatsData(statsData); - } else if (s.equalsIgnoreCase("string")) { + } else if (s.equalsIgnoreCase(ColumnStatsType.STRING.toString())) { StringColumnStatsDataInspector stringStats = new StringColumnStatsDataInspector(); statsData.setStringStats(stringStats); statsObj.setStatsData(statsData); - } else if (s.equalsIgnoreCase("boolean")) { + } else if (s.equalsIgnoreCase(ColumnStatsType.BOOLEAN.toString())) { BooleanColumnStatsData booleanStats = new BooleanColumnStatsData(); statsData.setBooleanStats(booleanStats); statsObj.setStatsData(statsData); - } else if (s.equalsIgnoreCase("binary")) { + } else if (s.equalsIgnoreCase(ColumnStatsType.BINARY.toString())) { BinaryColumnStatsData binaryStats = new BinaryColumnStatsData(); statsData.setBinaryStats(binaryStats); statsObj.setStatsData(statsData); - } else if (s.equalsIgnoreCase("decimal")) { + } else if (s.equalsIgnoreCase(ColumnStatsType.DECIMAL.toString())) { DecimalColumnStatsDataInspector decimalStats = new DecimalColumnStatsDataInspector(); statsData.setDecimalStats(decimalStats); statsObj.setStatsData(statsData); - } else if (s.equalsIgnoreCase("date")) { + } else if (s.equalsIgnoreCase(ColumnStatsType.DATE.toString())) { DateColumnStatsDataInspector dateStats = new DateColumnStatsDataInspector(); statsData.setDateStats(dateStats); statsObj.setStatsData(statsData); - } else if (s.equalsIgnoreCase("timestamp")) { + } else if (s.equalsIgnoreCase(ColumnStatsType.TIMESTAMP.toString())) { TimestampColumnStatsDataInspector timestampStats = new TimestampColumnStatsDataInspector(); statsData.setTimestampStats(timestampStats); statsObj.setStatsData(statsData); @@ -285,44 +353,21 @@ private static void unpackPrimitiveObject(ObjectInspector oi, Object o, String f } else { // invoke the right unpack method depending on data type of the column if (statsObj.getStatsData().isSetBooleanStats()) { - unpackBooleanStats(oi, o, fieldName, statsObj); + unpackBooleanStats(oi, o, csf, statsObj); } else if (statsObj.getStatsData().isSetLongStats()) { - unpackLongStats(oi, o, fieldName, statsObj); + unpackLongStats(oi, o, csf, statsObj); } else if (statsObj.getStatsData().isSetDoubleStats()) { - unpackDoubleStats(oi, o, fieldName, statsObj); + unpackDoubleStats(oi, o, csf, statsObj); } else if (statsObj.getStatsData().isSetStringStats()) { - unpackStringStats(oi, o, fieldName, statsObj); + unpackStringStats(oi, o, csf, statsObj); } else if (statsObj.getStatsData().isSetBinaryStats()) { - unpackBinaryStats(oi, o, fieldName, statsObj); + unpackBinaryStats(oi, o, csf, statsObj); } else if (statsObj.getStatsData().isSetDecimalStats()) { - unpackDecimalStats(oi, o, fieldName, statsObj); + unpackDecimalStats(oi, o, csf, statsObj); } else if (statsObj.getStatsData().isSetDateStats()) { - unpackDateStats(oi, o, fieldName, statsObj); + unpackDateStats(oi, o, csf, statsObj); } else if (statsObj.getStatsData().isSetTimestampStats()) { - unpackTimestampStats(oi, o, fieldName, statsObj); - } - } - } - - private static void unpackStructObject(ObjectInspector oi, Object o, String fName, ColumnStatisticsObj cStatsObj) throws UnsupportedDoubleException { - if (oi.getCategory() != ObjectInspector.Category.STRUCT) { - throw new RuntimeException("Invalid object datatype : " + oi.getCategory().toString()); - } - - StructObjectInspector soi = (StructObjectInspector) oi; - List fields = soi.getAllStructFieldRefs(); - List list = soi.getStructFieldsDataAsList(o); - - for (int i = 0; i < fields.size(); i++) { - // Get the field objectInspector, fieldName and the field object. - ObjectInspector foi = fields.get(i).getFieldObjectInspector(); - Object f = (list == null ? null : list.get(i)); - String fieldName = fields.get(i).getFieldName(); - - if (foi.getCategory() == ObjectInspector.Category.PRIMITIVE) { - unpackPrimitiveObject(foi, f, fieldName, cStatsObj); - } else { - unpackStructObject(foi, f, fieldName, cStatsObj); + unpackTimestampStats(oi, o, csf, statsObj); } } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFComputeBitVector.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFComputeBitVector.java new file mode 100644 index 0000000000..8bfca77b51 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFComputeBitVector.java @@ -0,0 +1,561 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.udf.generic; + +import org.apache.hadoop.hive.common.classification.InterfaceAudience; +import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimator; +import org.apache.hadoop.hive.common.type.HiveDecimal; +import org.apache.hadoop.hive.ql.exec.Description; +import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.parse.SemanticException; +import org.apache.hadoop.hive.ql.stats.ColStatsProcessor.ColumnStatsType; +import org.apache.hadoop.hive.ql.util.JavaDataModel; +import org.apache.hadoop.hive.serde2.io.DateWritableV2; +import org.apache.hadoop.hive.serde2.io.TimestampWritableV2; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.DateObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveDecimalObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampObjectInspector; +import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.io.BytesWritable; + +import static org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimatorFactory.getEmptyNumDistinctValueEstimator; +import static org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimatorFactory.getNumDistinctValueEstimator; + +/** + * GenericUDAFComputeBitVector. This UDAF replicates part of the functionality + * that was in GenericUDAFComputeStats previously, which is deprecated now. + * In particular, it will compute a bit vector using the algorithm provided + * as a parameter. The ndv_compute_bit_vector function can be used on top of + * it to extract an estimate of the ndv from it. + */ +@Description(name = "compute_bit_vector", + value = "_FUNC_(x) - Computes bit vector for NDV computation.") +public class GenericUDAFComputeBitVector extends AbstractGenericUDAFResolver { + + @Override + public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters) + throws SemanticException { + if (parameters.length < 2 ) { + throw new UDFArgumentTypeException(parameters.length - 1, + "Exactly 2 (col + hll) or 3 (col + fm + #bitvectors) arguments are expected."); + } + + if (parameters[0].getCategory() != ObjectInspector.Category.PRIMITIVE) { + throw new UDFArgumentTypeException(0, + "Only primitive type arguments are accepted but " + + parameters[0].getTypeName() + " is passed."); + } + + ColumnStatsType cst = ColumnStatsType.getColumnStatsType(((PrimitiveTypeInfo) parameters[0])); + switch (cst) { + case LONG: + return new GenericUDAFLongStatsEvaluator(); + case DOUBLE: + return new GenericUDAFDoubleStatsEvaluator(); + case STRING: + return new GenericUDAFStringStatsEvaluator(); + case DECIMAL: + return new GenericUDAFDecimalStatsEvaluator(); + case DATE: + return new GenericUDAFDateStatsEvaluator(); + case TIMESTAMP: + return new GenericUDAFTimestampStatsEvaluator(); + default: + throw new UDFArgumentTypeException(0, + "Type argument " + parameters[0].getTypeName() + " not valid"); + } + } + + public static abstract class GenericUDAFNumericStatsEvaluator + extends GenericUDAFEvaluator { + + protected final static int MAX_BIT_VECTORS = 1024; + + /* Object Inspector corresponding to the input parameter. + */ + protected transient PrimitiveObjectInspector inputOI; + protected transient PrimitiveObjectInspector funcOI; + protected transient PrimitiveObjectInspector numVectorsOI; + + /* Object Inspector corresponding to the bitvector. + */ + protected transient BinaryObjectInspector ndvFieldOI; + + /* Partial aggregation result returned by TerminatePartial. + */ + protected transient BytesWritable partialResult; + + /* Output of final result of the aggregation. + */ + protected transient BytesWritable result; + + @Override + public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveException { + super.init(m, parameters); + + // initialize input + if (mode == Mode.PARTIAL1 || mode == Mode.COMPLETE) { + inputOI = (PrimitiveObjectInspector) parameters[0]; + funcOI = (PrimitiveObjectInspector) parameters[1]; + if (parameters.length > 2) { + numVectorsOI = (PrimitiveObjectInspector) parameters[2]; + } + } else { + ndvFieldOI = (BinaryObjectInspector) parameters[0]; + } + + // initialize output + if (mode == Mode.PARTIAL1 || mode == Mode.PARTIAL2) { + partialResult = new BytesWritable(); + return PrimitiveObjectInspectorFactory.writableBinaryObjectInspector; + } else { + result = new BytesWritable(); + return PrimitiveObjectInspectorFactory.writableBinaryObjectInspector; + } + } + + @Override + public void iterate(AggregationBuffer agg, Object[] parameters) throws HiveException { + NumericStatsAgg myagg = (NumericStatsAgg) agg; + + if (myagg.numDV == null) { + int numVectors = 0; + // func may be null when GBY op is closing. + // see mvn test -Dtest=TestMiniTezCliDriver -Dqfile=explainuser_3.q + // original behavior is to create FMSketch + String func = parameters[1] == null ? "fm" : PrimitiveObjectInspectorUtils.getString( + parameters[1], funcOI); + if (parameters.length == 3) { + numVectors = parameters[2] == null ? 0 : PrimitiveObjectInspectorUtils.getInt( + parameters[2], numVectorsOI); + if (numVectors > MAX_BIT_VECTORS) { + throw new HiveException("The maximum allowed value for number of bit vectors " + " is " + + MAX_BIT_VECTORS + ", but was passed " + numVectors + " bit vectors"); + } + } + myagg.initNDVEstimator(func, numVectors); + } + + if (parameters[0] != null) { + myagg.update(parameters[0], inputOI); + } + } + + @Override + public void merge(AggregationBuffer agg, Object partial) throws HiveException { + if (partial != null) { + NumericStatsAgg myagg = (NumericStatsAgg) agg; + // Merge numDistinctValue Estimators + byte[] buf = ndvFieldOI.getPrimitiveJavaObject(partial); + if (buf != null && buf.length != 0) { + if (myagg.numDV == null) { + myagg.numDV = getNumDistinctValueEstimator(buf); + } else { + myagg.numDV.mergeEstimators(getNumDistinctValueEstimator(buf)); + } + } + } + } + + @Override + public Object terminatePartial(AggregationBuffer agg) throws HiveException { + return ((NumericStatsAgg) agg).serializePartial(partialResult); + } + + @Override + public Object terminate(AggregationBuffer agg) throws HiveException { + return ((NumericStatsAgg) agg).serialize(result); + } + + public abstract class NumericStatsAgg extends AbstractAggregationBuffer { + + public NumDistinctValueEstimator numDV; /* Distinct value estimator */ + + @Override + public int estimate() { + JavaDataModel model = JavaDataModel.get(); + return (numDV == null) ? + lengthFor(model) : numDV.lengthFor(model); + } + + protected void initNDVEstimator(String func, int numBitVectors) { + numDV = getEmptyNumDistinctValueEstimator(func, numBitVectors); + } + + protected abstract void update(Object p, PrimitiveObjectInspector inputOI); + + protected Object serialize(BytesWritable result) { + if (numDV != null) { + byte[] buf = numDV.serialize(); + result.set(buf, 0, buf.length); + } + return result; + } + + protected Object serializePartial(BytesWritable result) { + if (numDV != null) { + // Serialize numDistinctValue Estimator + byte[] buf = numDV.serialize(); + result.set(buf, 0, buf.length); + } + return result; + } + + public void reset() throws HiveException { + numDV = null; + } + }; + } + + /** + * GenericUDAFLongStatsEvaluator. + * + */ + public static class GenericUDAFLongStatsEvaluator + extends GenericUDAFNumericStatsEvaluator { + + @AggregationType(estimable = true) + public class LongStatsAgg extends NumericStatsAgg { + @Override + public int estimate() { + JavaDataModel model = JavaDataModel.get(); + return super.estimate() + model.primitive2() * 2; + } + + @Override + protected void update(Object p, PrimitiveObjectInspector inputOI) { + long v = PrimitiveObjectInspectorUtils.getLong(p, inputOI); + numDV.addToEstimator(v); + } + }; + + @Override + public AggregationBuffer getNewAggregationBuffer() throws HiveException { + AggregationBuffer result = new LongStatsAgg(); + reset(result); + return result; + } + + @Override + public void reset(AggregationBuffer agg) throws HiveException { + ((NumericStatsAgg)agg).reset(); + } + } + + /** + * GenericUDAFDoubleStatsEvaluator. + */ + public static class GenericUDAFDoubleStatsEvaluator + extends GenericUDAFNumericStatsEvaluator { + + @AggregationType(estimable = true) + public class DoubleStatsAgg extends NumericStatsAgg { + @Override + public int estimate() { + JavaDataModel model = JavaDataModel.get(); + return super.estimate() + model.primitive2() * 2; + } + + @Override + protected void update(Object p, PrimitiveObjectInspector inputOI) { + double v = PrimitiveObjectInspectorUtils.getDouble(p, inputOI); + numDV.addToEstimator(v); + } + }; + + @Override + public AggregationBuffer getNewAggregationBuffer() throws HiveException { + AggregationBuffer result = new DoubleStatsAgg(); + reset(result); + return result; + } + + @Override + public void reset(AggregationBuffer agg) throws HiveException { + ((NumericStatsAgg)agg).reset(); + } + } + + public static class GenericUDAFDecimalStatsEvaluator + extends GenericUDAFNumericStatsEvaluator { + + @AggregationType(estimable = true) + public class DecimalStatsAgg extends NumericStatsAgg { + @Override + public int estimate() { + JavaDataModel model = JavaDataModel.get(); + return super.estimate() + model.lengthOfDecimal() * 2; + } + + @Override + protected void update(Object p, PrimitiveObjectInspector inputOI) { + HiveDecimal v = PrimitiveObjectInspectorUtils.getHiveDecimal(p, inputOI); + numDV.addToEstimator(v); + } + }; + + @Override + public AggregationBuffer getNewAggregationBuffer() throws HiveException { + AggregationBuffer result = new DecimalStatsAgg(); + reset(result); + return result; + } + + @Override + public void reset(AggregationBuffer agg) throws HiveException { + ((NumericStatsAgg)agg).reset(); + } + } + + /** + * GenericUDAFDateStatsEvaluator. + */ + public static class GenericUDAFDateStatsEvaluator + extends GenericUDAFNumericStatsEvaluator { + + @AggregationType(estimable = true) + public class DateStatsAgg extends NumericStatsAgg { + @Override + public int estimate() { + JavaDataModel model = JavaDataModel.get(); + return super.estimate() + model.primitive2() * 2; + } + + @Override + protected void update(Object p, PrimitiveObjectInspector inputOI) { + // DateWritableV2 is mutable, DateStatsAgg needs its own copy + DateWritableV2 v = new DateWritableV2((DateWritableV2) inputOI.getPrimitiveWritableObject(p)); + numDV.addToEstimator(v.getDays()); + } + }; + + @Override + public AggregationBuffer getNewAggregationBuffer() throws HiveException { + AggregationBuffer result = new DateStatsAgg(); + reset(result); + return result; + } + + @Override + public void reset(AggregationBuffer agg) throws HiveException { + ((NumericStatsAgg)agg).reset(); + } + } + + /** + * GenericUDAFTimestampStatsEvaluator. + */ + public static class GenericUDAFTimestampStatsEvaluator + extends GenericUDAFNumericStatsEvaluator { + + @AggregationType(estimable = true) + public class TimestampStatsAgg extends NumericStatsAgg { + @Override + public int estimate() { + JavaDataModel model = JavaDataModel.get(); + return super.estimate() + model.primitive2() * 2; + } + + @Override + protected void update(Object p, PrimitiveObjectInspector inputOI) { + // TimestampWritableV2 is mutable, TimestampStatsAgg needs its own copy + TimestampWritableV2 v = new TimestampWritableV2((TimestampWritableV2) inputOI.getPrimitiveWritableObject(p)); + numDV.addToEstimator(v.getSeconds()); + } + }; + + @Override + public AggregationBuffer getNewAggregationBuffer() throws HiveException { + AggregationBuffer result = new TimestampStatsAgg(); + reset(result); + return result; + } + + @Override + public void reset(AggregationBuffer agg) throws HiveException { + ((NumericStatsAgg)agg).reset(); + } + } + + /** + * GenericUDAFStringStatsEvaluator. + */ + public static class GenericUDAFStringStatsEvaluator extends GenericUDAFEvaluator { + + private final static int MAX_BIT_VECTORS = 1024; + + /* Object Inspector corresponding to the input parameter. + */ + private transient PrimitiveObjectInspector inputOI; + private transient PrimitiveObjectInspector funcOI; + private transient PrimitiveObjectInspector numVectorsOI; + + /* Object Inspector corresponding to the bitvector + */ + private transient BinaryObjectInspector ndvFieldOI; + + /* Partial aggregation result returned by TerminatePartial. + */ + private transient BytesWritable partialResult; + + /* Output of final result of the aggregation + */ + private transient BytesWritable result; + + @Override + public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveException { + super.init(m, parameters); + + // initialize input + if (mode == Mode.PARTIAL1 || mode == Mode.COMPLETE) { + inputOI = (PrimitiveObjectInspector) parameters[0]; + funcOI = (PrimitiveObjectInspector) parameters[1]; + if (parameters.length > 2) { + numVectorsOI = (PrimitiveObjectInspector) parameters[2]; + } + } else { + ndvFieldOI = (BinaryObjectInspector) parameters[0]; + } + + // initialize output + if (mode == Mode.PARTIAL1 || mode == Mode.PARTIAL2) { + partialResult = new BytesWritable(); + return PrimitiveObjectInspectorFactory.writableBinaryObjectInspector; + } else { + result = new BytesWritable(); + return PrimitiveObjectInspectorFactory.writableBinaryObjectInspector; + } + } + + @AggregationType(estimable = true) + public static class StringStatsAgg extends AbstractAggregationBuffer { + public NumDistinctValueEstimator numDV; /* Distinct value estimator */ + public boolean firstItem; + @Override + public int estimate() { + JavaDataModel model = JavaDataModel.get(); + return (numDV == null) ? + lengthFor(model) : numDV.lengthFor(model); } + }; + + @Override + public AggregationBuffer getNewAggregationBuffer() throws HiveException { + StringStatsAgg result = new StringStatsAgg(); + reset(result); + return result; + } + + public void initNDVEstimator(StringStatsAgg aggBuffer, String func, int numBitVectors) { + aggBuffer.numDV = getEmptyNumDistinctValueEstimator(func, numBitVectors); + aggBuffer.numDV.reset(); + } + + @Override + public void reset(AggregationBuffer agg) throws HiveException { + StringStatsAgg myagg = (StringStatsAgg) agg; + myagg.firstItem = true; + } + + @Override + public void iterate(AggregationBuffer agg, Object[] parameters) throws HiveException { + Object p = parameters[0]; + StringStatsAgg myagg = (StringStatsAgg) agg; + + if (myagg.firstItem) { + int numVectors = 0; + String func = parameters[1] == null ? "fm" : PrimitiveObjectInspectorUtils.getString( + parameters[1], funcOI); + if (parameters.length > 2) { + numVectors = PrimitiveObjectInspectorUtils.getInt(parameters[2], numVectorsOI); + if (numVectors > MAX_BIT_VECTORS) { + throw new HiveException("The maximum allowed value for number of bit vectors " + " is " + + MAX_BIT_VECTORS + " , but was passed " + numVectors + " bit vectors"); + } + } + + initNDVEstimator(myagg, func, numVectors); + myagg.firstItem = false; + } + + String v = PrimitiveObjectInspectorUtils.getString(p, inputOI); + if (v != null) { + // Add string value to NumDistinctValue Estimator + myagg.numDV.addToEstimator(v); + } + } + + @Override + public void merge(AggregationBuffer agg, Object partial) throws HiveException { + if (partial != null) { + StringStatsAgg myagg = (StringStatsAgg) agg; + + // Merge numDistinctValue Estimators + byte[] buf = ndvFieldOI.getPrimitiveJavaObject(partial); + + if (buf != null && buf.length != 0) { + if (myagg.numDV == null) { + myagg.numDV = getNumDistinctValueEstimator(buf); + } else { + myagg.numDV.mergeEstimators(getNumDistinctValueEstimator(buf)); + } + } + } + } + + @Override + public Object terminatePartial(AggregationBuffer agg) throws HiveException { + StringStatsAgg myagg = (StringStatsAgg) agg; + // Serialize numDistinctValue Estimator + if (myagg.numDV != null) { + byte[] buf = myagg.numDV.serialize(); + partialResult.set(buf, 0, buf.length); + } + return partialResult; + } + + @Override + public Object terminate(AggregationBuffer agg) throws HiveException { + StringStatsAgg myagg = (StringStatsAgg) agg; + if (myagg.numDV != null) { + byte[] buf = myagg.numDV.serialize(); + result.set(buf, 0, buf.length); + } + return result; + } + } + + @InterfaceAudience.LimitedPrivate(value = { "Hive" }) + static int lengthFor(JavaDataModel model) { + int length = model.object(); + // HiveConf hive.stats.ndv.error default produces 16 + length += model.array() * 3; // three array + length += model.primitive1() * 16 * 2; // two int array + length += (model.object() + model.array() + model.primitive1() + model.primitive2()) + * 16; // bitset array + return length; + } +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFComputeStats.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFComputeStats.java index 4e7c598155..2b5f90e2c5 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFComputeStats.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFComputeStats.java @@ -61,6 +61,7 @@ */ @Description(name = "compute_stats", value = "_FUNC_(x) - Returns the statistical summary of a set of primitive type values.") +@Deprecated public class GenericUDAFComputeStats extends AbstractGenericUDAFResolver { static final Logger LOG = LoggerFactory.getLogger(GenericUDAFComputeStats.class.getName()); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFNDVComputeBitVector.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFNDVComputeBitVector.java new file mode 100644 index 0000000000..9f9d8eb044 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFNDVComputeBitVector.java @@ -0,0 +1,85 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.udf.generic; + +import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimator; +import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimatorFactory; +import org.apache.hadoop.hive.ql.exec.Description; +import org.apache.hadoop.hive.ql.exec.UDFArgumentException; +import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.io.LongWritable; + + +/** + * GenericUDFNDVComputeBitVector. The ndv_compute_bit_vector function can be used on top of + * compute_bit_vector aggregate function to extract an estimate of the ndv from it. + */ +@Description(name = "ndv_compute_bit_vector", + value = "_FUNC_(x) - Extracts NDV from bit vector.") +public class GenericUDFNDVComputeBitVector extends GenericUDF { + + protected transient BinaryObjectInspector inputOI; + protected final LongWritable result = new LongWritable(0); + + @Override + public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException { + if (arguments[0].getCategory() != Category.PRIMITIVE) { + throw new UDFArgumentTypeException(0, + "ndv_compute_bitvector input only takes primitive types, got " + arguments[0].getTypeName()); + } + PrimitiveObjectInspector objectInspector = (PrimitiveObjectInspector) arguments[0]; + if (objectInspector.getPrimitiveCategory() != PrimitiveCategory.BINARY) { + throw new UDFArgumentTypeException(0, + "ndv_compute_bitvector input only takes BINARY type, got " + arguments[0].getTypeName()); + } + inputOI = (BinaryObjectInspector) arguments[0]; + return PrimitiveObjectInspectorFactory.writableLongObjectInspector; + } + + @Override + public Object evaluate(DeferredObject[] arguments) throws HiveException { + if (arguments[0] == null) { + return null; + } + Object input = arguments[0].get(); + if (input == null) { + return null; + } + + byte[] buf = inputOI.getPrimitiveJavaObject(input); + if (buf == null || buf.length == 0) { + return null; + } + NumDistinctValueEstimator numDV = + NumDistinctValueEstimatorFactory.getNumDistinctValueEstimator(buf); + result.set(numDV.estimateNumDistinctValues()); + return result; + } + + @Override + public String getDisplayString(String[] children) { + return getStandardDisplayString("ndv_compute_bit_vector", children, ","); + } +} diff --git a/ql/src/test/results/clientpositive/llap/hll.q.out b/ql/src/test/results/clientpositive/llap/hll.q.out index 0f2e13b1b2..501a6bd745 100644 --- a/ql/src/test/results/clientpositive/llap/hll.q.out +++ b/ql/src/test/results/clientpositive/llap/hll.q.out @@ -47,33 +47,37 @@ STAGE PLANS: outputColumnNames: key Statistics: Num rows: 500 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct) + Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3) mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Stats Work @@ -161,33 +165,37 @@ STAGE PLANS: outputColumnNames: key Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(key, 'hll') + aggregations: min(key), max(key), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll') minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct) + Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3) mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Stats Work diff --git a/ql/src/test/results/clientpositive/llap/materialized_view_cluster.q.out b/ql/src/test/results/clientpositive/llap/materialized_view_cluster.q.out index 9af6567987..654460c01b 100644 --- a/ql/src/test/results/clientpositive/llap/materialized_view_cluster.q.out +++ b/ql/src/test/results/clientpositive/llap/materialized_view_cluster.q.out @@ -85,31 +85,35 @@ STAGE PLANS: outputColumnNames: col1, col2 Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(col1, 'hll'), compute_stats(col2, 'hll') + aggregations: max(length(col1)), avg(length(col1)), count(CASE WHEN (col1 is null) THEN (1) ELSE (null) END), compute_bit_vector(col1, 'hll'), max(length(col2)), avg(length(col2)), count(CASE WHEN (col2 is null) THEN (1) ELSE (null) END), compute_bit_vector(col2, 'hll') minReductionHashAggr: 0.9818182 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -497,33 +501,37 @@ STAGE PLANS: outputColumnNames: value, key Statistics: Num rows: 18 Data size: 3240 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(value, 'hll'), compute_stats(key, 'hll') + aggregations: max(length(value)), avg(length(value)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll'), max(length(key)), avg(length(key)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll') minReductionHashAggr: 0.9444444 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Execution mode: llap LLAP IO: may be used (ACID table) Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -842,31 +850,35 @@ STAGE PLANS: outputColumnNames: value, key Statistics: Num rows: 18 Data size: 3258 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(value, 'hll'), compute_stats(key, 'hll') + aggregations: max(length(value)), avg(length(value)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll'), max(length(key)), avg(length(key)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll') minReductionHashAggr: 0.9444444 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -1101,31 +1113,35 @@ STAGE PLANS: outputColumnNames: value, key Statistics: Num rows: 55 Data size: 9955 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(value, 'hll'), compute_stats(key, 'hll') + aggregations: max(length(value)), avg(length(value)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll'), max(length(key)), avg(length(key)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll') minReductionHashAggr: 0.9818182 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -1251,31 +1267,35 @@ STAGE PLANS: outputColumnNames: value, key, tes"t, te*#"s"t Statistics: Num rows: 55 Data size: 10835 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(value, 'hll'), compute_stats(key, 'hll'), compute_stats(tes"t, 'hll'), compute_stats(te*#"s"t, 'hll') + aggregations: max(length(value)), avg(length(value)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll'), max(length(key)), avg(length(key)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), min(tes"t), max(tes"t), count(CASE WHEN (tes"t is null) THEN (1) ELSE (null) END), compute_bit_vector(tes"t, 'hll'), min(te*#"s"t), max(te*#"s"t), count(CASE WHEN (te*#"s"t is null) THEN (1) ELSE (null) END), compute_bit_vector(te*#"s"t, 'hll') minReductionHashAggr: 0.9818182 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 + Statistics: Num rows: 1 Data size: 800 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 1 Data size: 800 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary), _col8 (type: double), _col9 (type: double), _col10 (type: bigint), _col11 (type: binary), _col12 (type: double), _col13 (type: double), _col14 (type: bigint), _col15 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), min(VALUE._col8), max(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11), min(VALUE._col12), max(VALUE._col13), count(VALUE._col14), compute_bit_vector(VALUE._col15) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 + Statistics: Num rows: 1 Data size: 664 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'DOUBLE' (type: string), _col8 (type: double), _col9 (type: double), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary), 'DOUBLE' (type: string), _col12 (type: double), _col13 (type: double), _col14 (type: bigint), COALESCE(ndv_compute_bit_vector(_col15),0) (type: bigint), _col15 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 + Statistics: Num rows: 1 Data size: 1064 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1064 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_3.q.out b/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_3.q.out index 2d11b3fca1..b1c8cb6ec9 100644 --- a/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_3.q.out +++ b/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_3.q.out @@ -179,31 +179,35 @@ STAGE PLANS: outputColumnNames: col1, col2 Statistics: Num rows: 2 Data size: 232 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(col1, 'hll'), compute_stats(col2, 'hll') + aggregations: min(col1), max(col1), count(CASE WHEN (col1 is null) THEN (1) ELSE (null) END), compute_bit_vector(col1, 'hll'), min(col2), max(col2), count(CASE WHEN (col2 is null) THEN (1) ELSE (null) END), compute_bit_vector(col2, 'hll') minReductionHashAggr: 0.5 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1056 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 536 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1056 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 536 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: decimal(10,2)), _col5 (type: decimal(10,2)), _col6 (type: bigint), _col7 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1088 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1088 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 536 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'DECIMAL' (type: string), _col4 (type: decimal(10,2)), _col5 (type: decimal(10,2)), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 739 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 739 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -638,31 +642,35 @@ STAGE PLANS: outputColumnNames: a, c Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(a, 'hll'), compute_stats(c, 'hll') + aggregations: min(a), max(a), count(CASE WHEN (a is null) THEN (1) ELSE (null) END), compute_bit_vector(a, 'hll'), min(c), max(c), count(CASE WHEN (c is null) THEN (1) ELSE (null) END), compute_bit_vector(c, 'hll') minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1056 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 536 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1056 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 536 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: decimal(10,2)), _col5 (type: decimal(10,2)), _col6 (type: bigint), _col7 (type: binary) Reducer 6 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1088 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1088 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 536 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'DECIMAL' (type: string), _col4 (type: decimal(10,2)), _col5 (type: decimal(10,2)), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 739 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 739 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Union 4 Vertex: Union 4 diff --git a/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_4.q.out b/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_4.q.out index 13d7f5a756..848a5485f4 100644 --- a/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_4.q.out +++ b/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_4.q.out @@ -183,31 +183,35 @@ STAGE PLANS: outputColumnNames: col1, col2, col3 Statistics: Num rows: 2 Data size: 248 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(col1, 'hll'), compute_stats(col2, 'hll'), compute_stats(col3, 'hll') + aggregations: min(col1), max(col1), count(CASE WHEN (col1 is null) THEN (1) ELSE (null) END), compute_bit_vector(col1, 'hll'), min(col2), max(col2), count(CASE WHEN (col2 is null) THEN (1) ELSE (null) END), compute_bit_vector(col2, 'hll'), min(col3), max(col3), count(CASE WHEN (col3 is null) THEN (1) ELSE (null) END), compute_bit_vector(col3, 'hll') minReductionHashAggr: 0.5 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1480 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 704 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1480 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 704 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: decimal(10,2)), _col5 (type: decimal(10,2)), _col6 (type: bigint), _col7 (type: binary), _col8 (type: bigint), _col9 (type: bigint), _col10 (type: bigint), _col11 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), min(VALUE._col8), max(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1528 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1528 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 704 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'DECIMAL' (type: string), _col4 (type: decimal(10,2)), _col5 (type: decimal(10,2)), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'LONG' (type: string), _col8 (type: bigint), _col9 (type: bigint), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 1003 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1003 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -832,16 +836,16 @@ STAGE PLANS: outputColumnNames: a, c, _c2 Statistics: Num rows: 1 Data size: 124 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(a, 'hll'), compute_stats(c, 'hll'), compute_stats(_c2, 'hll') + aggregations: min(a), max(a), count(CASE WHEN (a is null) THEN (1) ELSE (null) END), compute_bit_vector(a, 'hll'), min(c), max(c), count(CASE WHEN (c is null) THEN (1) ELSE (null) END), compute_bit_vector(c, 'hll'), min(_c2), max(_c2), count(CASE WHEN (_c2 is null) THEN (1) ELSE (null) END), compute_bit_vector(_c2, 'hll') minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1480 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 704 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1480 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 704 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: decimal(10,2)), _col5 (type: decimal(10,2)), _col6 (type: bigint), _col7 (type: binary), _col8 (type: bigint), _col9 (type: bigint), _col10 (type: bigint), _col11 (type: binary) Filter Operator predicate: ((_col0 = _col4) and (_col1 = _col5)) (type: boolean) Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE @@ -860,17 +864,21 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), min(VALUE._col8), max(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1528 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1528 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 704 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'DECIMAL' (type: string), _col4 (type: decimal(10,2)), _col5 (type: decimal(10,2)), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'LONG' (type: string), _col8 (type: bigint), _col9 (type: bigint), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 1003 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1003 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Execution mode: llap Reduce Operator Tree: @@ -1223,31 +1231,35 @@ STAGE PLANS: outputColumnNames: a, c, _c2 Statistics: Num rows: 2 Data size: 248 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(a, 'hll'), compute_stats(c, 'hll'), compute_stats(_c2, 'hll') + aggregations: min(a), max(a), count(CASE WHEN (a is null) THEN (1) ELSE (null) END), compute_bit_vector(a, 'hll'), min(c), max(c), count(CASE WHEN (c is null) THEN (1) ELSE (null) END), compute_bit_vector(c, 'hll'), min(_c2), max(_c2), count(CASE WHEN (_c2 is null) THEN (1) ELSE (null) END), compute_bit_vector(_c2, 'hll') minReductionHashAggr: 0.5 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1480 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 704 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1480 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 704 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: decimal(10,2)), _col5 (type: decimal(10,2)), _col6 (type: bigint), _col7 (type: binary), _col8 (type: bigint), _col9 (type: bigint), _col10 (type: bigint), _col11 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), min(VALUE._col8), max(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1528 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1528 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 704 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'DECIMAL' (type: string), _col4 (type: decimal(10,2)), _col5 (type: decimal(10,2)), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'LONG' (type: string), _col8 (type: bigint), _col9 (type: bigint), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 1003 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1003 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -1524,31 +1536,35 @@ STAGE PLANS: outputColumnNames: a, c, _c2 Statistics: Num rows: 2 Data size: 248 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(a, 'hll'), compute_stats(c, 'hll'), compute_stats(_c2, 'hll') + aggregations: min(a), max(a), count(CASE WHEN (a is null) THEN (1) ELSE (null) END), compute_bit_vector(a, 'hll'), min(c), max(c), count(CASE WHEN (c is null) THEN (1) ELSE (null) END), compute_bit_vector(c, 'hll'), min(_c2), max(_c2), count(CASE WHEN (_c2 is null) THEN (1) ELSE (null) END), compute_bit_vector(_c2, 'hll') minReductionHashAggr: 0.5 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1480 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 704 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1480 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 704 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: decimal(10,2)), _col5 (type: decimal(10,2)), _col6 (type: bigint), _col7 (type: binary), _col8 (type: bigint), _col9 (type: bigint), _col10 (type: bigint), _col11 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), min(VALUE._col8), max(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1528 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1528 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 704 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'DECIMAL' (type: string), _col4 (type: decimal(10,2)), _col5 (type: decimal(10,2)), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'LONG' (type: string), _col8 (type: bigint), _col9 (type: bigint), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 1003 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1003 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -1844,16 +1860,16 @@ STAGE PLANS: outputColumnNames: a, c, _c2 Statistics: Num rows: 1 Data size: 124 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(a, 'hll'), compute_stats(c, 'hll'), compute_stats(_c2, 'hll') + aggregations: min(a), max(a), count(CASE WHEN (a is null) THEN (1) ELSE (null) END), compute_bit_vector(a, 'hll'), min(c), max(c), count(CASE WHEN (c is null) THEN (1) ELSE (null) END), compute_bit_vector(c, 'hll'), min(_c2), max(_c2), count(CASE WHEN (_c2 is null) THEN (1) ELSE (null) END), compute_bit_vector(_c2, 'hll') minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1480 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 704 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1480 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 704 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: decimal(10,2)), _col5 (type: decimal(10,2)), _col6 (type: bigint), _col7 (type: binary), _col8 (type: bigint), _col9 (type: bigint), _col10 (type: bigint), _col11 (type: binary) Filter Operator predicate: ((_col0 = _col4) and (_col1 = _col5)) (type: boolean) Statistics: Num rows: 1 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE @@ -1872,17 +1888,21 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), min(VALUE._col8), max(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1528 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1528 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 704 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'DECIMAL' (type: string), _col4 (type: decimal(10,2)), _col5 (type: decimal(10,2)), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'LONG' (type: string), _col8 (type: bigint), _col9 (type: bigint), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 1003 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1003 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Execution mode: llap Reduce Operator Tree: diff --git a/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_5.q.out b/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_5.q.out index e2fff711cd..2234f6ac6a 100644 --- a/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_5.q.out +++ b/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_5.q.out @@ -300,31 +300,35 @@ STAGE PLANS: outputColumnNames: a, c Statistics: Num rows: 2 Data size: 232 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(a, 'hll'), compute_stats(c, 'hll') + aggregations: min(a), max(a), count(CASE WHEN (a is null) THEN (1) ELSE (null) END), compute_bit_vector(a, 'hll'), min(c), max(c), count(CASE WHEN (c is null) THEN (1) ELSE (null) END), compute_bit_vector(c, 'hll') minReductionHashAggr: 0.5 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1056 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 536 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1056 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 536 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: decimal(10,2)), _col5 (type: decimal(10,2)), _col6 (type: bigint), _col7 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1088 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1088 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 536 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'DECIMAL' (type: string), _col4 (type: decimal(10,2)), _col5 (type: decimal(10,2)), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 739 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 739 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -575,31 +579,35 @@ STAGE PLANS: outputColumnNames: a, c Statistics: Num rows: 5 Data size: 580 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(a, 'hll'), compute_stats(c, 'hll') + aggregations: min(a), max(a), count(CASE WHEN (a is null) THEN (1) ELSE (null) END), compute_bit_vector(a, 'hll'), min(c), max(c), count(CASE WHEN (c is null) THEN (1) ELSE (null) END), compute_bit_vector(c, 'hll') minReductionHashAggr: 0.8 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1056 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 536 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1056 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 536 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: decimal(10,2)), _col5 (type: decimal(10,2)), _col6 (type: bigint), _col7 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1088 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1088 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 536 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'DECIMAL' (type: string), _col4 (type: decimal(10,2)), _col5 (type: decimal(10,2)), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 739 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 739 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -806,31 +814,35 @@ STAGE PLANS: outputColumnNames: a, c Statistics: Num rows: 5 Data size: 580 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(a, 'hll'), compute_stats(c, 'hll') + aggregations: min(a), max(a), count(CASE WHEN (a is null) THEN (1) ELSE (null) END), compute_bit_vector(a, 'hll'), min(c), max(c), count(CASE WHEN (c is null) THEN (1) ELSE (null) END), compute_bit_vector(c, 'hll') minReductionHashAggr: 0.8 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1056 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 536 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1056 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 536 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: decimal(10,2)), _col5 (type: decimal(10,2)), _col6 (type: bigint), _col7 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1088 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1088 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 536 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'DECIMAL' (type: string), _col4 (type: decimal(10,2)), _col5 (type: decimal(10,2)), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 739 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 739 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -1041,31 +1053,35 @@ STAGE PLANS: outputColumnNames: a, c Statistics: Num rows: 2 Data size: 232 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(a, 'hll'), compute_stats(c, 'hll') + aggregations: min(a), max(a), count(CASE WHEN (a is null) THEN (1) ELSE (null) END), compute_bit_vector(a, 'hll'), min(c), max(c), count(CASE WHEN (c is null) THEN (1) ELSE (null) END), compute_bit_vector(c, 'hll') minReductionHashAggr: 0.5 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1056 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 536 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1056 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 536 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: decimal(10,2)), _col5 (type: decimal(10,2)), _col6 (type: bigint), _col7 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1088 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1088 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 536 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'DECIMAL' (type: string), _col4 (type: decimal(10,2)), _col5 (type: decimal(10,2)), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 739 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 739 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_rebuild_dummy.q.out b/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_rebuild_dummy.q.out index 0a106ba0a3..52eebe598e 100644 --- a/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_rebuild_dummy.q.out +++ b/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_rebuild_dummy.q.out @@ -179,31 +179,35 @@ STAGE PLANS: outputColumnNames: col1, col2 Statistics: Num rows: 2 Data size: 232 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(col1, 'hll'), compute_stats(col2, 'hll') + aggregations: min(col1), max(col1), count(CASE WHEN (col1 is null) THEN (1) ELSE (null) END), compute_bit_vector(col1, 'hll'), min(col2), max(col2), count(CASE WHEN (col2 is null) THEN (1) ELSE (null) END), compute_bit_vector(col2, 'hll') minReductionHashAggr: 0.5 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1056 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 536 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1056 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 536 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: decimal(10,2)), _col5 (type: decimal(10,2)), _col6 (type: bigint), _col7 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1088 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1088 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 536 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'DECIMAL' (type: string), _col4 (type: decimal(10,2)), _col5 (type: decimal(10,2)), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 739 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 739 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -638,31 +642,35 @@ STAGE PLANS: outputColumnNames: a, c Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(a, 'hll'), compute_stats(c, 'hll') + aggregations: min(a), max(a), count(CASE WHEN (a is null) THEN (1) ELSE (null) END), compute_bit_vector(a, 'hll'), min(c), max(c), count(CASE WHEN (c is null) THEN (1) ELSE (null) END), compute_bit_vector(c, 'hll') minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1056 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 536 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1056 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 536 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: decimal(10,2)), _col5 (type: decimal(10,2)), _col6 (type: bigint), _col7 (type: binary) Reducer 6 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1088 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1088 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 536 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'DECIMAL' (type: string), _col4 (type: decimal(10,2)), _col5 (type: decimal(10,2)), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 739 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 739 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Union 4 Vertex: Union 4 diff --git a/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_time_window.q.out b/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_time_window.q.out index 6cce2a45f7..842e946440 100644 --- a/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_time_window.q.out +++ b/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_time_window.q.out @@ -179,31 +179,35 @@ STAGE PLANS: outputColumnNames: col1, col2 Statistics: Num rows: 2 Data size: 232 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(col1, 'hll'), compute_stats(col2, 'hll') + aggregations: min(col1), max(col1), count(CASE WHEN (col1 is null) THEN (1) ELSE (null) END), compute_bit_vector(col1, 'hll'), min(col2), max(col2), count(CASE WHEN (col2 is null) THEN (1) ELSE (null) END), compute_bit_vector(col2, 'hll') minReductionHashAggr: 0.5 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1056 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 536 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1056 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 536 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: decimal(10,2)), _col5 (type: decimal(10,2)), _col6 (type: bigint), _col7 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1088 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1088 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 536 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'DECIMAL' (type: string), _col4 (type: decimal(10,2)), _col5 (type: decimal(10,2)), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 739 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 739 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -762,31 +766,35 @@ STAGE PLANS: outputColumnNames: a, c Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(a, 'hll'), compute_stats(c, 'hll') + aggregations: min(a), max(a), count(CASE WHEN (a is null) THEN (1) ELSE (null) END), compute_bit_vector(a, 'hll'), min(c), max(c), count(CASE WHEN (c is null) THEN (1) ELSE (null) END), compute_bit_vector(c, 'hll') minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1056 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 536 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1056 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 536 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: binary), _col4 (type: decimal(10,2)), _col5 (type: decimal(10,2)), _col6 (type: bigint), _col7 (type: binary) Reducer 6 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1088 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1088 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 536 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'DECIMAL' (type: string), _col4 (type: decimal(10,2)), _col5 (type: decimal(10,2)), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 739 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 739 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Union 4 Vertex: Union 4 diff --git a/ql/src/test/results/clientpositive/llap/materialized_view_distribute_sort.q.out b/ql/src/test/results/clientpositive/llap/materialized_view_distribute_sort.q.out index 5961735f29..274e33cddd 100644 --- a/ql/src/test/results/clientpositive/llap/materialized_view_distribute_sort.q.out +++ b/ql/src/test/results/clientpositive/llap/materialized_view_distribute_sort.q.out @@ -85,31 +85,35 @@ STAGE PLANS: outputColumnNames: col1, col2 Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(col1, 'hll'), compute_stats(col2, 'hll') + aggregations: max(length(col1)), avg(length(col1)), count(CASE WHEN (col1 is null) THEN (1) ELSE (null) END), compute_bit_vector(col1, 'hll'), max(length(col2)), avg(length(col2)), count(CASE WHEN (col2 is null) THEN (1) ELSE (null) END), compute_bit_vector(col2, 'hll') minReductionHashAggr: 0.9818182 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -363,31 +367,35 @@ STAGE PLANS: outputColumnNames: col1, col2 Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(col1, 'hll'), compute_stats(col2, 'hll') + aggregations: max(length(col1)), avg(length(col1)), count(CASE WHEN (col1 is null) THEN (1) ELSE (null) END), compute_bit_vector(col1, 'hll'), max(length(col2)), avg(length(col2)), count(CASE WHEN (col2 is null) THEN (1) ELSE (null) END), compute_bit_vector(col2, 'hll') minReductionHashAggr: 0.9818182 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -668,33 +676,37 @@ STAGE PLANS: outputColumnNames: value, key Statistics: Num rows: 18 Data size: 3240 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(value, 'hll'), compute_stats(key, 'hll') + aggregations: max(length(value)), avg(length(value)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll'), max(length(key)), avg(length(key)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll') minReductionHashAggr: 0.9444444 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Execution mode: llap LLAP IO: may be used (ACID table) Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -889,31 +901,35 @@ STAGE PLANS: outputColumnNames: col1, col2 Statistics: Num rows: 55 Data size: 9900 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(col1, 'hll'), compute_stats(col2, 'hll') + aggregations: max(length(col1)), avg(length(col1)), count(CASE WHEN (col1 is null) THEN (1) ELSE (null) END), compute_bit_vector(col1, 'hll'), max(length(col2)), avg(length(col2)), count(CASE WHEN (col2 is null) THEN (1) ELSE (null) END), compute_bit_vector(col2, 'hll') minReductionHashAggr: 0.9818182 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -1242,31 +1258,35 @@ STAGE PLANS: outputColumnNames: value, key Statistics: Num rows: 18 Data size: 3258 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(value, 'hll'), compute_stats(key, 'hll') + aggregations: max(length(value)), avg(length(value)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll'), max(length(key)), avg(length(key)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll') minReductionHashAggr: 0.9444444 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: bigint), _col3 (type: binary), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col4,0)) (type: bigint), COALESCE(_col5,0) (type: double), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/materialized_view_partition_cluster.q.out b/ql/src/test/results/clientpositive/llap/materialized_view_partition_cluster.q.out index 25c5aedc12..d902842168 100644 --- a/ql/src/test/results/clientpositive/llap/materialized_view_partition_cluster.q.out +++ b/ql/src/test/results/clientpositive/llap/materialized_view_partition_cluster.q.out @@ -86,35 +86,35 @@ STAGE PLANS: outputColumnNames: col1, col2, col3 Statistics: Num rows: 55 Data size: 10230 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(col1, 'hll'), compute_stats(col2, 'hll') + aggregations: max(length(col1)), avg(length(col1)), count(CASE WHEN (col1 is null) THEN (1) ELSE (null) END), compute_bit_vector(col1, 'hll'), max(length(col2)), avg(length(col2)), count(CASE WHEN (col2 is null) THEN (1) ELSE (null) END), compute_bit_vector(col2, 'hll') keys: col3 (type: double) minReductionHashAggr: 0.5090909 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 27 Data size: 23976 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 27 Data size: 12744 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: double) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: double) - Statistics: Num rows: 27 Data size: 23976 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 27 Data size: 12744 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: struct), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) keys: KEY._col0 (type: double) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 27 Data size: 23976 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 27 Data size: 9072 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: double) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 27 Data size: 23976 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col1,0)) (type: bigint), COALESCE(_col2,0) (type: double), _col3 (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), _col7 (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), _col0 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 27 Data size: 14580 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 27 Data size: 23976 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 27 Data size: 14580 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -985,37 +985,37 @@ STAGE PLANS: outputColumnNames: value, key, partkey Statistics: Num rows: 18 Data size: 3384 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(value, 'hll'), compute_stats(key, 'hll') + aggregations: max(length(value)), avg(length(value)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll'), max(length(key)), avg(length(key)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll') keys: partkey (type: double) minReductionHashAggr: 0.5 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 9 Data size: 7992 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 9 Data size: 4248 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: double) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: double) - Statistics: Num rows: 9 Data size: 7992 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 9 Data size: 4248 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: struct), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Execution mode: llap LLAP IO: may be used (ACID table) Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) keys: KEY._col0 (type: double) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 9 Data size: 7992 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 9 Data size: 3024 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: double) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 9 Data size: 7992 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col1,0)) (type: bigint), COALESCE(_col2,0) (type: double), _col3 (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), _col7 (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), _col0 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 9 Data size: 4860 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 9 Data size: 7992 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 9 Data size: 4860 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1447,35 +1447,35 @@ STAGE PLANS: outputColumnNames: value, key, partkey Statistics: Num rows: 18 Data size: 3402 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(value, 'hll'), compute_stats(key, 'hll') + aggregations: max(length(value)), avg(length(value)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll'), max(length(key)), avg(length(key)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll') keys: partkey (type: double) minReductionHashAggr: 0.5 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 9 Data size: 7992 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 9 Data size: 4248 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: double) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: double) - Statistics: Num rows: 9 Data size: 7992 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 9 Data size: 4248 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: struct), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) keys: KEY._col0 (type: double) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 9 Data size: 7992 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 9 Data size: 3024 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: double) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 9 Data size: 7992 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col1,0)) (type: bigint), COALESCE(_col2,0) (type: double), _col3 (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), _col7 (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), _col0 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 9 Data size: 4860 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 9 Data size: 7992 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 9 Data size: 4860 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1945,35 +1945,35 @@ STAGE PLANS: outputColumnNames: value, key, partkey Statistics: Num rows: 55 Data size: 10395 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(value, 'hll'), compute_stats(key, 'hll') + aggregations: max(length(value)), avg(length(value)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll'), max(length(key)), avg(length(key)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll') keys: partkey (type: double) minReductionHashAggr: 0.5090909 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 27 Data size: 23976 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 27 Data size: 12744 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: double) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: double) - Statistics: Num rows: 27 Data size: 23976 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 27 Data size: 12744 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: struct), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) keys: KEY._col0 (type: double) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 27 Data size: 23976 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 27 Data size: 9072 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: double) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 27 Data size: 23976 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col1,0)) (type: bigint), COALESCE(_col2,0) (type: double), _col3 (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), _col7 (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), _col0 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 27 Data size: 14580 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 27 Data size: 23976 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 27 Data size: 14580 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2261,35 +2261,35 @@ STAGE PLANS: outputColumnNames: value, key, tes"t, te*#"s"t, partkey Statistics: Num rows: 55 Data size: 11275 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(value, 'hll'), compute_stats(key, 'hll'), compute_stats(tes"t, 'hll'), compute_stats(te*#"s"t, 'hll') + aggregations: max(length(value)), avg(length(value)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll'), max(length(key)), avg(length(key)), count(CASE WHEN (key is null) THEN (1) ELSE (null) END), compute_bit_vector(key, 'hll'), min(tes"t), max(tes"t), count(CASE WHEN (tes"t is null) THEN (1) ELSE (null) END), compute_bit_vector(tes"t, 'hll'), min(te*#"s"t), max(te*#"s"t), count(CASE WHEN (te*#"s"t is null) THEN (1) ELSE (null) END), compute_bit_vector(te*#"s"t, 'hll') keys: partkey (type: double) minReductionHashAggr: 0.5090909 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 27 Data size: 46872 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Statistics: Num rows: 27 Data size: 21816 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: double) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: double) - Statistics: Num rows: 27 Data size: 46872 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + Statistics: Num rows: 27 Data size: 21816 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: struct), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct), _col7 (type: bigint), _col8 (type: binary), _col9 (type: double), _col10 (type: double), _col11 (type: bigint), _col12 (type: binary), _col13 (type: double), _col14 (type: double), _col15 (type: bigint), _col16 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), max(VALUE._col4), avg(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), min(VALUE._col8), max(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11), min(VALUE._col12), max(VALUE._col13), count(VALUE._col14), compute_bit_vector(VALUE._col15) keys: KEY._col0 (type: double) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 27 Data size: 47736 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Statistics: Num rows: 27 Data size: 18144 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col0 (type: double) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 27 Data size: 47736 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col1,0)) (type: bigint), COALESCE(_col2,0) (type: double), _col3 (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), _col7 (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'DOUBLE' (type: string), _col9 (type: double), _col10 (type: double), _col11 (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary), 'DOUBLE' (type: string), _col13 (type: double), _col14 (type: double), _col15 (type: bigint), COALESCE(ndv_compute_bit_vector(_col16),0) (type: bigint), _col16 (type: binary), _col0 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24 + Statistics: Num rows: 27 Data size: 28944 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 27 Data size: 47736 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 27 Data size: 28944 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/materialized_view_partitioned.q.out b/ql/src/test/results/clientpositive/llap/materialized_view_partitioned.q.out index 85e22c791d..03bab3b823 100644 --- a/ql/src/test/results/clientpositive/llap/materialized_view_partitioned.q.out +++ b/ql/src/test/results/clientpositive/llap/materialized_view_partitioned.q.out @@ -62,19 +62,19 @@ STAGE PLANS: outputColumnNames: col1, col2 Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(col1, 'hll') + aggregations: max(length(col1)), avg(length(col1)), count(CASE WHEN (col1 is null) THEN (1) ELSE (null) END), compute_bit_vector(col1, 'hll') keys: col2 (type: string) minReductionHashAggr: 0.5090909 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 27 Data size: 14229 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 27 Data size: 8613 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 27 Data size: 14229 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct) + Statistics: Num rows: 27 Data size: 8613 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: struct), _col3 (type: bigint), _col4 (type: binary) Reduce Output Operator key expressions: _col1 (type: string) null sort order: a @@ -88,18 +88,18 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 27 Data size: 14229 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 27 Data size: 6777 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 27 Data size: 14229 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col1,0)) (type: bigint), COALESCE(_col2,0) (type: double), _col3 (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 27 Data size: 9531 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 27 Data size: 14229 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 27 Data size: 9531 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -653,19 +653,19 @@ STAGE PLANS: outputColumnNames: value, key Statistics: Num rows: 18 Data size: 3240 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(value, 'hll') + aggregations: max(length(value)), avg(length(value)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') keys: key (type: string) minReductionHashAggr: 0.5 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 9 Data size: 4743 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 9 Data size: 2871 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 9 Data size: 4743 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct) + Statistics: Num rows: 9 Data size: 2871 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: struct), _col3 (type: bigint), _col4 (type: binary) Reduce Output Operator key expressions: _col1 (type: string) null sort order: a @@ -679,18 +679,18 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 9 Data size: 4743 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 9 Data size: 2259 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 9 Data size: 4743 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col1,0)) (type: bigint), COALESCE(_col2,0) (type: double), _col3 (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 9 Data size: 3177 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 9 Data size: 4743 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 9 Data size: 3177 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1033,19 +1033,19 @@ STAGE PLANS: outputColumnNames: value, key Statistics: Num rows: 18 Data size: 3258 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(value, 'hll') + aggregations: max(length(value)), avg(length(value)), count(CASE WHEN (value is null) THEN (1) ELSE (null) END), compute_bit_vector(value, 'hll') keys: key (type: string) minReductionHashAggr: 0.5 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 9 Data size: 4743 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 9 Data size: 2871 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 9 Data size: 4743 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct) + Statistics: Num rows: 9 Data size: 2871 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: struct), _col3 (type: bigint), _col4 (type: binary) Reduce Output Operator key expressions: _col1 (type: string) null sort order: a @@ -1057,18 +1057,18 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 9 Data size: 4743 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 9 Data size: 2259 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 9 Data size: 4743 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col1,0)) (type: bigint), COALESCE(_col2,0) (type: double), _col3 (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 9 Data size: 3177 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 9 Data size: 4743 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 9 Data size: 3177 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/materialized_view_partitioned_3.q.out b/ql/src/test/results/clientpositive/llap/materialized_view_partitioned_3.q.out index da6e057636..019586cf5e 100644 --- a/ql/src/test/results/clientpositive/llap/materialized_view_partitioned_3.q.out +++ b/ql/src/test/results/clientpositive/llap/materialized_view_partitioned_3.q.out @@ -62,19 +62,19 @@ STAGE PLANS: outputColumnNames: col1, col2 Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(col1, 'hll') + aggregations: max(length(col1)), avg(length(col1)), count(CASE WHEN (col1 is null) THEN (1) ELSE (null) END), compute_bit_vector(col1, 'hll') keys: col2 (type: string) minReductionHashAggr: 0.5090909 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 27 Data size: 14229 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 27 Data size: 8613 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 27 Data size: 14229 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct) + Statistics: Num rows: 27 Data size: 8613 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: struct), _col3 (type: bigint), _col4 (type: binary) Reduce Output Operator key expressions: _col1 (type: string) null sort order: a @@ -88,18 +88,18 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0) + aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 27 Data size: 14229 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 27 Data size: 6777 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: struct), _col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 27 Data size: 14229 Basic stats: COMPLETE Column stats: COMPLETE + expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col1,0)) (type: bigint), COALESCE(_col2,0) (type: double), _col3 (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 27 Data size: 9531 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 27 Data size: 14229 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 27 Data size: 9531 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_window.q.out b/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_window.q.out index 26e3856761..429ef66a55 100644 --- a/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_window.q.out +++ b/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_window.q.out @@ -328,31 +328,35 @@ STAGE PLANS: outputColumnNames: quartile, total Statistics: Num rows: 2 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(quartile, 'hll'), compute_stats(total, 'hll') + aggregations: min(quartile), max(quartile), count(CASE WHEN (quartile is null) THEN (1) ELSE (null) END), compute_bit_vector(quartile, 'hll'), min(total), max(total), count(CASE WHEN (total is null) THEN (1) ELSE (null) END), compute_bit_vector(total, 'hll') minReductionHashAggr: 0.5 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1056 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 544 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1056 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 544 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: decimal(12,1)), _col1 (type: decimal(12,1)), _col2 (type: bigint), _col3 (type: binary), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1088 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1088 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 544 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'DECIMAL' (type: string), _col0 (type: decimal(12,1)), _col1 (type: decimal(12,1)), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 739 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 739 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 6 Execution mode: vectorized, llap Reduce Operator Tree: @@ -635,31 +639,35 @@ STAGE PLANS: outputColumnNames: quartile, total Statistics: Num rows: 2 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(quartile, 'hll'), compute_stats(total, 'hll') + aggregations: min(quartile), max(quartile), count(CASE WHEN (quartile is null) THEN (1) ELSE (null) END), compute_bit_vector(quartile, 'hll'), min(total), max(total), count(CASE WHEN (total is null) THEN (1) ELSE (null) END), compute_bit_vector(total, 'hll') minReductionHashAggr: 0.5 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1056 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 544 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1056 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 544 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: decimal(12,1)), _col1 (type: decimal(12,1)), _col2 (type: bigint), _col3 (type: binary), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1088 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1088 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 544 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'DECIMAL' (type: string), _col0 (type: decimal(12,1)), _col1 (type: decimal(12,1)), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'LONG' (type: string), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 739 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 739 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 6 Execution mode: vectorized, llap Reduce Operator Tree: @@ -965,31 +973,35 @@ STAGE PLANS: outputColumnNames: total_views, quartile, program Statistics: Num rows: 6 Data size: 1266 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(total_views, 'hll'), compute_stats(quartile, 'hll'), compute_stats(program, 'hll') + aggregations: min(total_views), max(total_views), count(CASE WHEN (total_views is null) THEN (1) ELSE (null) END), compute_bit_vector(total_views, 'hll'), min(quartile), max(quartile), count(CASE WHEN (quartile is null) THEN (1) ELSE (null) END), compute_bit_vector(quartile, 'hll'), max(length(program)), avg(length(program)), count(CASE WHEN (program is null) THEN (1) ELSE (null) END), compute_bit_vector(program, 'hll') minReductionHashAggr: 0.8333333 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1496 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 776 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1496 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 776 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: binary), _col4 (type: decimal(12,1)), _col5 (type: decimal(12,1)), _col6 (type: bigint), _col7 (type: binary), _col8 (type: int), _col9 (type: struct), _col10 (type: bigint), _col11 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), compute_bit_vector(VALUE._col3), min(VALUE._col4), max(VALUE._col5), count(VALUE._col6), compute_bit_vector(VALUE._col7), max(VALUE._col8), avg(VALUE._col9), count(VALUE._col10), compute_bit_vector(VALUE._col11) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1528 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1528 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 708 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'LONG' (type: string), _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), COALESCE(ndv_compute_bit_vector(_col3),0) (type: bigint), _col3 (type: binary), 'DECIMAL' (type: string), _col4 (type: decimal(12,1)), _col5 (type: decimal(12,1)), _col6 (type: bigint), COALESCE(ndv_compute_bit_vector(_col7),0) (type: bigint), _col7 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col8,0)) (type: bigint), COALESCE(_col9,0) (type: double), _col10 (type: bigint), COALESCE(ndv_compute_bit_vector(_col11),0) (type: bigint), _col11 (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 1005 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1005 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection