diff --git ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java index 8ae1c73..7eb5345 100644 --- ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java +++ ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java @@ -444,12 +444,8 @@ COLUMNSTATSCOLLECTOR_INVALID_PART_KEY(30005, "Invalid partitioning key specified in ANALYZE " + "statement"), - COLUMNSTATSCOLLECTOR_INCORRECT_NUM_PART_KEY(30006, "Incorrect number of partitioning key " + - "specified in ANALYZE statement"), COLUMNSTATSCOLLECTOR_INVALID_PARTITION(30007, "Invalid partitioning key/value specified in " + "ANALYZE statement"), - COLUMNSTATSCOLLECTOR_INVALID_SYNTAX(30008, "Dynamic partitioning is not supported yet while " + - "gathering column statistics through ANALYZE statement"), COLUMNSTATSCOLLECTOR_PARSE_ERROR(30009, "Encountered parse error while parsing rewritten query"), COLUMNSTATSCOLLECTOR_IO_ERROR(30010, "Encountered I/O exception while parsing rewritten query"), DROP_COMMAND_NOT_ALLOWED_FOR_PARTITION(30011, "Partition protected from being dropped"), diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsTask.java ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsTask.java index 47a6871..d504b3c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsTask.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsTask.java @@ -28,6 +28,7 @@ import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.metastore.Warehouse; import org.apache.hadoop.hive.metastore.api.BinaryColumnStatsData; import org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData; import org.apache.hadoop.hive.metastore.api.ColumnStatistics; @@ -37,12 +38,14 @@ import org.apache.hadoop.hive.metastore.api.Decimal; import org.apache.hadoop.hive.metastore.api.DecimalColumnStatsData; import org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData; +import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.metastore.api.LongColumnStatsData; +import org.apache.hadoop.hive.metastore.api.MetaException; import org.apache.hadoop.hive.metastore.api.StringColumnStatsData; -import org.apache.hadoop.hive.ql.CommandNeedRetryException; import org.apache.hadoop.hive.ql.DriverContext; import org.apache.hadoop.hive.ql.QueryPlan; import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.metadata.Table; import org.apache.hadoop.hive.ql.plan.ColumnStatsWork; import org.apache.hadoop.hive.ql.plan.api.StageType; import org.apache.hadoop.hive.ql.session.SessionState; @@ -65,8 +68,6 @@ public class ColumnStatsTask extends Task implements Serializable { private static final long serialVersionUID = 1L; private FetchOperator ftOp; - private int totalRows; - private int numRows = 0; private static transient final Log LOG = LogFactory.getLog(ColumnStatsTask.class); public ColumnStatsTask() { @@ -262,11 +263,7 @@ private void unpackStructObject(ObjectInspector oi, Object o, String fName, } } - private ColumnStatistics constructColumnStatsFromPackedRow(ObjectInspector oi, - Object o) throws HiveException { - if (oi.getCategory() != ObjectInspector.Category.STRUCT) { - throw new HiveException("Unexpected object type encountered while unpacking row"); - } + private List constructColumnStatsFromPackedRows() throws HiveException, MetaException, IOException { String dbName = SessionState.get().getCurrentDatabase(); String tableName = work.getColStats().getTableName(); @@ -275,33 +272,51 @@ private ColumnStatistics constructColumnStatsFromPackedRow(ObjectInspector oi, List colType = work.getColStats().getColType(); boolean isTblLevel = work.getColStats().isTblLevel(); - if (!isTblLevel) { - partName = work.getColStats().getPartName(); - } + List stats = new ArrayList(); + InspectableObject packedRow; + while ((packedRow = ftOp.getNextRow()) != null) { + if (packedRow.oi.getCategory() != ObjectInspector.Category.STRUCT) { + throw new HiveException("Unexpected object type encountered while unpacking row"); + } - ColumnStatisticsDesc statsDesc = getColumnStatsDesc(dbName, tableName, partName, isTblLevel); + List statsObjs = new ArrayList(); + StructObjectInspector soi = (StructObjectInspector) packedRow.oi; + List fields = soi.getAllStructFieldRefs(); + List list = soi.getStructFieldsDataAsList(packedRow.o); + + Table tbl = db.getTable(dbName,tableName); + List partColSchema = tbl.getPartCols(); + // Partition columns are appended at end, we only care about stats column + for (int i = 0; i < fields.size() - partColSchema.size(); i++) { + // Get the field objectInspector, fieldName and the field object. + ObjectInspector foi = fields.get(i).getFieldObjectInspector(); + Object f = (list == null ? null : list.get(i)); + String fieldName = fields.get(i).getFieldName(); + ColumnStatisticsObj statsObj = new ColumnStatisticsObj(); + statsObj.setColName(colName.get(i)); + statsObj.setColType(colType.get(i)); + unpackStructObject(foi, f, fieldName, statsObj); + statsObjs.add(statsObj); + } - List statsObjs = new ArrayList(); - StructObjectInspector soi = (StructObjectInspector) oi; - List fields = soi.getAllStructFieldRefs(); - List list = soi.getStructFieldsDataAsList(o); + if (!isTblLevel) { + List partVals = new ArrayList(); + // Iterate over partition columns to figure out partition name + for (int i = fields.size() - partColSchema.size(); i < fields.size(); i++) { + partVals.add(((PrimitiveObjectInspector)fields.get(i).getFieldObjectInspector()). + getPrimitiveJavaObject(list.get(i)).toString()); + } + partName = Warehouse.makePartName(partColSchema, partVals); + } - for (int i = 0; i < fields.size(); i++) { - // Get the field objectInspector, fieldName and the field object. - ObjectInspector foi = fields.get(i).getFieldObjectInspector(); - Object f = (list == null ? null : list.get(i)); - String fieldName = fields.get(i).getFieldName(); - ColumnStatisticsObj statsObj = new ColumnStatisticsObj(); - statsObj.setColName(colName.get(i)); - statsObj.setColType(colType.get(i)); - unpackStructObject(foi, f, fieldName, statsObj); - statsObjs.add(statsObj); + ColumnStatisticsDesc statsDesc = getColumnStatsDesc(dbName, tableName, partName, isTblLevel); + ColumnStatistics colStats = new ColumnStatistics(); + colStats.setStatsDesc(statsDesc); + colStats.setStatsObj(statsObjs); + stats.add(colStats); } - - ColumnStatistics colStats = new ColumnStatistics(); - colStats.setStatsDesc(statsDesc); - colStats.setStatsObj(statsObjs); - return colStats; + ftOp.clearFetchContext(); + return stats; } private ColumnStatisticsDesc getColumnStatsDesc(String dbName, String tableName, @@ -320,53 +335,24 @@ private ColumnStatisticsDesc getColumnStatsDesc(String dbName, String tableName, return statsDesc; } - private int persistPartitionStats() throws HiveException { - InspectableObject io = null; - // Fetch result of the analyze table .. compute statistics for columns .. - try { - io = fetchColumnStats(); - } catch (IOException e) { - e.printStackTrace(); - } catch (CommandNeedRetryException e) { - e.printStackTrace(); - } - - if (io != null) { - // Construct a column statistics object from the result - ColumnStatistics colStats = constructColumnStatsFromPackedRow(io.oi, io.o); + private int persistPartitionStats() throws HiveException, MetaException, IOException { - // Persist the column statistics object to the metastore - try { - db.updatePartitionColumnStatistics(colStats); - } catch (Exception e) { - e.printStackTrace(); - } + // Fetch result of the analyze table partition (p1=c1).. compute statistics for columns .. + // Construct a column statistics object from the result + List colStats = constructColumnStatsFromPackedRows(); + // Persist the column statistics object to the metastore + for (ColumnStatistics colStat : colStats) { + db.updatePartitionColumnStatistics(colStat); } return 0; } - private int persistTableStats() throws HiveException { - InspectableObject io = null; + private int persistTableStats() throws HiveException, MetaException, IOException { // Fetch result of the analyze table .. compute statistics for columns .. - try { - io = fetchColumnStats(); - } catch (IOException e) { - e.printStackTrace(); - } catch (CommandNeedRetryException e) { - e.printStackTrace(); - } - - if (io != null) { - // Construct a column statistics object from the result - ColumnStatistics colStats = constructColumnStatsFromPackedRow(io.oi, io.o); - - // Persist the column statistics object to the metastore - try { - db.updateTableColumnStatistics(colStats); - } catch (Exception e) { - e.printStackTrace(); - } - } + // Construct a column statistics object from the result + ColumnStatistics colStats = constructColumnStatsFromPackedRows().get(0); + // Persist the column statistics object to the metastore + db.updateTableColumnStatistics(colStats); return 0; } @@ -379,42 +365,11 @@ public int execute(DriverContext driverContext) { return persistPartitionStats(); } } catch (Exception e) { - e.printStackTrace(); + LOG.info(e); } return 1; } - private InspectableObject fetchColumnStats() throws IOException, CommandNeedRetryException { - InspectableObject io = null; - - try { - int rowsRet = work.getLeastNumRows(); - if (rowsRet <= 0) { - rowsRet = ColumnStatsWork.getLimit() >= 0 ? - Math.min(ColumnStatsWork.getLimit() - totalRows, 1) : 1; - } - if (rowsRet <= 0) { - ftOp.clearFetchContext(); - return null; - } - while (numRows < rowsRet) { - if ((io = ftOp.getNextRow()) == null) { - if (work.getLeastNumRows() > 0) { - throw new CommandNeedRetryException(); - } - } - numRows++; - } - return io; - } catch (CommandNeedRetryException e) { - throw e; - } catch (IOException e) { - throw e; - } catch (Exception e) { - throw new IOException(e); - } - } - @Override public StageType getType() { return StageType.COLUMNSTATS; diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java index 1270520..6c9876d 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java @@ -19,8 +19,7 @@ package org.apache.hadoop.hive.ql.parse; import java.io.IOException; -import java.util.ArrayList; -import java.util.LinkedHashMap; +import java.util.HashMap; import java.util.LinkedList; import java.util.List; import java.util.Map; @@ -33,7 +32,6 @@ import org.apache.hadoop.hive.ql.ErrorMsg; import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.metadata.HiveException; -import org.apache.hadoop.hive.ql.metadata.Partition; import org.apache.hadoop.hive.ql.metadata.Table; /** @@ -56,50 +54,8 @@ private boolean isTableLevel; private List colNames; private List colType; - private String partName; private Table tbl; - private class PartitionList { - private final String[] partKeys; - private final String[] partValues; - private final int numPartitions; - private int numPartitionValues; - - PartitionList(int numPartitions) { - this.numPartitions = numPartitions; - partKeys = new String[numPartitions]; - partValues = new String[numPartitions]; - } - - public int getNumPartitions() { - return numPartitions; - } - - public String[] getPartValues() { - return partValues; - } - - public String[] getPartKeys() { - return partKeys; - } - - public void addPartValue(String partValue, int index) { - partValues[index] = new String(partValue); - } - - public void addPartKey(String partKey, int index) { - partKeys[index] = new String(partKey); - } - - public int getNumPartValues() { - return numPartitionValues; - } - - public void setNumPartValues(int numPartValues) { - numPartitionValues = numPartValues; - } - } - public ColumnStatsSemanticAnalyzer(HiveConf conf) throws SemanticException { super(conf); } @@ -143,29 +99,22 @@ private Table getTable(ASTNode tree) throws SemanticException { } } - private PartitionList getPartKeyValuePairsFromAST(ASTNode tree) { + private Map getPartKeyValuePairsFromAST(ASTNode tree) { ASTNode child = ((ASTNode) tree.getChild(0).getChild(1)); - int numParts = child.getChildCount(); - PartitionList partList = new PartitionList(numParts); + Map partSpec = new HashMap(); String partKey; String partValue; - int numPartValue = 0; - for (int i = 0; i < numParts; i++) { - partKey = new String(getUnescapedName((ASTNode) child.getChild(i).getChild(0))); + for (int i = 0; i < child.getChildCount(); i++) { + partKey = new String(getUnescapedName((ASTNode) child.getChild(i).getChild(0))).toLowerCase(); if (child.getChild(i).getChildCount() > 1) { partValue = new String(getUnescapedName((ASTNode) child.getChild(i).getChild(1))); partValue = partValue.replaceAll("'", ""); - numPartValue += 1; } else { partValue = null; } - partList.addPartKey(partKey, i); - if (partValue != null) { - partList.addPartValue(partValue, i); - } + partSpec.put(partKey, partValue); } - partList.setNumPartValues(numPartValue); - return partList; + return partSpec; } private List getColumnName(ASTNode tree) throws SemanticException{ @@ -186,133 +135,85 @@ private PartitionList getPartKeyValuePairsFromAST(ASTNode tree) { } } - private void validatePartitionKeys(PartitionList partList) throws + private void handlePartialPartitionSpec(Map partSpec) throws SemanticException { - List partKeys = tbl.getPartitionKeys(); - String[] inputPartKeys = partList.getPartKeys(); - - if (inputPartKeys.length != partKeys.size()) { - throw new SemanticException(ErrorMsg.COLUMNSTATSCOLLECTOR_INCORRECT_NUM_PART_KEY.getMsg()); + // If user has fully specified partition, validate that partition exists + int partValsSpecified = 0; + for (String partKey : partSpec.keySet()) { + partValsSpecified += partSpec.get(partKey) == null ? 0 : 1; } - - Map partKeysMap = new LinkedHashMap(); - for (int i=0; i partKeys = tbl.getPartitionKeys(); - String[] inputPartKeys = partList.getPartKeys(); - String[] inputPartKeyTypes = new String[inputPartKeys.length]; - - for (int i=0; i < inputPartKeys.length; i++) { - for (FieldSchema partKey:partKeys) { - if (inputPartKeys[i].equalsIgnoreCase(partKey.getName())) { - inputPartKeyTypes[i] = new String(partKey.getType()); - break; - } - } - } - return inputPartKeyTypes; - } - - private String constructPartitionName(PartitionList partList) - throws SemanticException { - Partition part; - String[] partKeys = partList.getPartKeys(); - String[] partValues = partList.getPartValues(); - Map partSpec = new LinkedHashMap(); - - for (int i=0; i partKeys = Utilities.getColumnNamesFromFieldSchema(tbl.getPartitionKeys()); + for (String partKey : partKeys){ + if(!partSpec.containsKey(partKey)) { + partSpec.put(partKey, null); + } + } + + // Check if user have erroneously specified non-existent partitioning columns + for (String partKey : partSpec.keySet()) { + if(!partKeys.contains(partKey)){ + throw new SemanticException(ErrorMsg.COLUMNSTATSCOLLECTOR_INVALID_PART_KEY.getMsg() + " : " + partKey); + } + } } - private StringBuilder genPartitionClause(PartitionList partList) throws SemanticException { + private StringBuilder genPartitionClause(Map partSpec) throws SemanticException { StringBuilder whereClause = new StringBuilder(" where "); boolean predPresent = false; StringBuilder groupByClause = new StringBuilder(" group by "); boolean aggPresent = false; - StringBuilder retClause = null; - String[] partKeys = partList.getPartKeys(); - String[] partValues = partList.getPartValues(); - String[] partKeysType = getPartitionKeysType(partList); - for (int i = 0; i < partList.getNumPartitions(); i++) { - if (partValues[i] != null) { + for (String partKey : partSpec.keySet()) { + String value; + if ((value = partSpec.get(partKey)) != null) { if (!predPresent) { - whereClause.append(partKeys[i]); - whereClause.append(" = "); - if (partKeysType[i].equalsIgnoreCase("string")) { - whereClause.append("'"); - } - whereClause.append(partValues[i]); - if (partKeysType[i].equalsIgnoreCase("string")) { - whereClause.append("'"); - } predPresent = true; } else { whereClause.append(" and "); - whereClause.append(partKeys[i]); - whereClause.append(" = "); - if (partKeysType[i].equalsIgnoreCase("string")) { - whereClause.append("'"); - } - whereClause.append(partValues[i]); - if (partKeysType[i].equalsIgnoreCase("string")) { - whereClause.append("'"); - } } - } else { + whereClause.append(partKey); + whereClause.append(" = "); + if (getColTypeOf(partKey).equalsIgnoreCase("string")) { + whereClause.append("'"); + } + whereClause.append(value); + if (getColTypeOf(partKey).equalsIgnoreCase("string")) { + whereClause.append("'"); + } + } + } + + for (FieldSchema fs : tbl.getPartitionKeys()) { if (!aggPresent) { - groupByClause.append(partKeys[i]); aggPresent = true; } else { groupByClause.append(","); - groupByClause.append(partKeys[i]); } - } + groupByClause.append(fs.getName()); } + // attach the predicate and group by to the return clause - if (predPresent) { - retClause = new StringBuilder(whereClause); - } - if (aggPresent) { - retClause.append(groupByClause); + return predPresent ? whereClause.append(groupByClause) : groupByClause; + } + + private String getColTypeOf (String partKey) throws SemanticException{ + + for (FieldSchema fs : tbl.getPartitionKeys()) { + if (partKey.equalsIgnoreCase(fs.getName())) { + return fs.getType(); + } } - return retClause; + throw new SemanticException ("Unknown partition key : " + partKey); } private int getNumBitVectorsForNDVEstimation(HiveConf conf) throws SemanticException { @@ -369,7 +270,7 @@ private int getNumBitVectorsForNDVEstimation(HiveConf conf) throws SemanticExcep return numBitVectors; } - private List getTableColumnType(List colNames) + private List getColumnTypes(List colNames) throws SemanticException{ List colTypes = new LinkedList(); List cols = tbl.getCols(); @@ -384,35 +285,11 @@ private int getNumBitVectorsForNDVEstimation(HiveConf conf) throws SemanticExcep return colTypes; } - private List getPartitionColumnType(String partName, - List colNames) throws SemanticException { - List colTypes = new LinkedList(); - List partNames = new ArrayList(); - partNames.add(partName); - List partitionList; - - try { - partitionList = db.getPartitionsByNames(tbl, partNames); - } catch (HiveException e) { - throw new SemanticException(ErrorMsg.INVALID_PARTITION.getMsg(partName)); - } - Partition part = partitionList.get(0); - List cols = part.getCols(); - - for (String colName : colNames) { - for (FieldSchema col: cols) { - if (colName.equalsIgnoreCase(col.getName())) { - colTypes.add(new String(col.getType())); - } - } - } - return colTypes; - } - - private String genRewrittenQuery(List colNames, int numBitVectors, PartitionList partList, + private String genRewrittenQuery(List colNames, int numBitVectors, Map partSpec, boolean isPartitionStats) throws SemanticException{ StringBuilder rewrittenQueryBuilder = new StringBuilder("select "); String rewrittenQuery; + for (int i = 0; i < colNames.size(); i++) { if (i > 0) { rewrittenQueryBuilder.append(" , "); @@ -423,6 +300,9 @@ private String genRewrittenQuery(List colNames, int numBitVectors, Parti rewrittenQueryBuilder.append(numBitVectors); rewrittenQueryBuilder.append(" )"); } + for (FieldSchema fs : tbl.getPartCols()) { + rewrittenQueryBuilder.append(" , " + fs.getName()); + } rewrittenQueryBuilder.append(" from "); rewrittenQueryBuilder.append(tbl.getTableName()); isRewritten = true; @@ -430,7 +310,7 @@ private String genRewrittenQuery(List colNames, int numBitVectors, Parti // If partition level statistics is requested, add predicate and group by as needed to rewritten // query if (isPartitionStats) { - rewrittenQueryBuilder.append(genPartitionClause(partList)); + rewrittenQueryBuilder.append(genPartitionClause(partSpec)); } rewrittenQuery = rewrittenQueryBuilder.toString(); @@ -474,23 +354,20 @@ public ColumnStatsSemanticAnalyzer(HiveConf conf, ASTNode tree) throws SemanticE // Save away the original AST originalTree = tree; boolean isPartitionStats = isPartitionLevelStats(tree); - PartitionList partList = null; + Map partSpec = null; checkForPartitionColumns(colNames, Utilities.getColumnNamesFromFieldSchema(tbl.getPartitionKeys())); validateSpecifiedColumnNames(colNames); if (isPartitionStats) { isTableLevel = false; - partList = getPartKeyValuePairsFromAST(tree); - validatePartitionClause(partList); - partName = constructPartitionName(partList); - colType = getPartitionColumnType(partName, colNames); + partSpec = getPartKeyValuePairsFromAST(tree); + handlePartialPartitionSpec(partSpec); } else { isTableLevel = true; - colType = getTableColumnType(colNames); } - + colType = getColumnTypes(colNames); int numBitVectors = getNumBitVectorsForNDVEstimation(conf); - rewrittenQuery = genRewrittenQuery(colNames, numBitVectors, partList, isPartitionStats); + rewrittenQuery = genRewrittenQuery(colNames, numBitVectors, partSpec, isPartitionStats); rewrittenTree = genRewrittenTree(rewrittenQuery); } else { // Not an analyze table column compute statistics statement - don't do any rewrites @@ -540,10 +417,6 @@ public void analyze(ASTNode ast, Context origCtx) throws SemanticException { qbp = qb.getParseInfo(); qbp.setTableName(tbl.getTableName()); qbp.setTblLvl(isTableLevel); - - if (!isTableLevel) { - qbp.setPartName(partName); - } qbp.setColName(colNames); qbp.setColType(colType); initCtx(ctx); diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/QBParseInfo.java ql/src/java/org/apache/hadoop/hive/ql/parse/QBParseInfo.java index a4ba4bd..911ac8a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/QBParseInfo.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/QBParseInfo.java @@ -65,7 +65,6 @@ private String tableName; // used for column statistics private List colName; // used for column statistics private List colType; // used for column statistics - private String partName; // used for column statistics private boolean isTblLvl; // used for column statistics @@ -582,14 +581,6 @@ public void setColName(List colName) { this.colName = colName; } - public String getPartName() { - return partName; - } - - public void setPartName(String partName) { - this.partName = partName; - } - public boolean isTblLvl() { return isTblLvl; } diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompiler.java ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompiler.java index b75f78c..b9890af 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompiler.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompiler.java @@ -293,7 +293,6 @@ protected void genColumnStatsTask(QB qb, List loadTableWork, ColumnStatsWork cStatsWork = null; FetchWork fetch = null; String tableName = qbParseInfo.getTableName(); - String partName = qbParseInfo.getPartName(); List colName = qbParseInfo.getColName(); List colType = qbParseInfo.getColType(); boolean isTblLevel = qbParseInfo.isTblLvl(); @@ -307,7 +306,7 @@ protected void genColumnStatsTask(QB qb, List loadTableWork, fetch = new FetchWork(loadFileWork.get(0).getSourcePath(), resultTab, qb.getParseInfo().getOuterQueryLimit()); - ColumnStatsDesc cStatsDesc = new ColumnStatsDesc(tableName, partName, + ColumnStatsDesc cStatsDesc = new ColumnStatsDesc(tableName, colName, colType, isTblLevel); cStatsWork = new ColumnStatsWork(fetch, cStatsDesc); cStatsTask = (ColumnStatsTask) TaskFactory.get(cStatsWork, conf); diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/ColumnStatsDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/ColumnStatsDesc.java index 86e6877..a44c8e8 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/ColumnStatsDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/ColumnStatsDesc.java @@ -28,19 +28,17 @@ private static final long serialVersionUID = 1L; private boolean isTblLevel; private String tableName; - private String partName; private List colName; private List colType; public ColumnStatsDesc() { } - public ColumnStatsDesc(String tableName, String partName, List colName, + public ColumnStatsDesc(String tableName, List colName, List colType, boolean isTblLevel) { this.tableName = tableName; this.colName = colName; this.colType = colType; - this.partName = partName; this.isTblLevel = isTblLevel; } @@ -62,15 +60,6 @@ public void setTblLevel(boolean isTblLevel) { this.isTblLevel = isTblLevel; } - @Explain(displayName = "Partition") - public String getPartName() { - return partName; - } - - public void setPartName(String partName) { - this.partName = partName; - } - @Explain(displayName = "Columns") public List getColName() { return colName; diff --git ql/src/test/queries/clientnegative/columnstats_partlvl_dp.q ql/src/test/queries/clientnegative/columnstats_partlvl_dp.q deleted file mode 100644 index b4887c4..0000000 --- ql/src/test/queries/clientnegative/columnstats_partlvl_dp.q +++ /dev/null @@ -1,16 +0,0 @@ -DROP TABLE Employee_Part; - -CREATE TABLE Employee_Part(employeeID int, employeeName String) partitioned by (employeeSalary double, country string) -row format delimited fields terminated by '|' stored as textfile; - -LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='2000.0', country='USA'); -LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='2000.0', country='UK'); -LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='3000.0', country='USA'); -LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='4000.0', country='USA'); -LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='3500.0', country='UK'); -LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='3000.0', country='UK'); - --- dynamic partitioning syntax -explain -analyze table Employee_Part partition (employeeSalary='4000.0', country) compute statistics for columns employeeName, employeeID; -analyze table Employee_Part partition (employeeSalary='4000.0', country) compute statistics for columns employeeName, employeeID; diff --git ql/src/test/queries/clientnegative/columnstats_partlvl_incorrect_num_keys.q ql/src/test/queries/clientnegative/columnstats_partlvl_incorrect_num_keys.q deleted file mode 100644 index 2f8e927..0000000 --- ql/src/test/queries/clientnegative/columnstats_partlvl_incorrect_num_keys.q +++ /dev/null @@ -1,16 +0,0 @@ -DROP TABLE Employee_Part; - -CREATE TABLE Employee_Part(employeeID int, employeeName String) partitioned by (employeeSalary double, country string) -row format delimited fields terminated by '|' stored as textfile; - -LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='2000.0', country='USA'); -LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='2000.0', country='UK'); -LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='3000.0', country='USA'); -LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='4000.0', country='USA'); -LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='3500.0', country='UK'); -LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='3000.0', country='UK'); - --- don't specify all partitioning keys -explain -analyze table Employee_Part partition (employeeSalary='2000.0') compute statistics for columns employeeID; -analyze table Employee_Part partition (employeeSalary='2000.0') compute statistics for columns employeeID; diff --git ql/src/test/queries/clientpositive/columnstats_partlvl_dp.q ql/src/test/queries/clientpositive/columnstats_partlvl_dp.q new file mode 100644 index 0000000..73739b3 --- /dev/null +++ ql/src/test/queries/clientpositive/columnstats_partlvl_dp.q @@ -0,0 +1,69 @@ +DROP TABLE Employee_Part; + +CREATE TABLE Employee_Part(employeeID int, employeeName String) partitioned by (employeeSalary double, country string) +row format delimited fields terminated by '|' stored as textfile; + +LOAD DATA LOCAL INPATH "../../data/files/employee.dat" INTO TABLE Employee_Part partition(employeeSalary='2000.0', country='USA'); +LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='2000.0', country='UK'); +LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='3000.0', country='USA'); +LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='4000.0', country='USA'); +LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='3500.0', country='UK'); +LOAD DATA LOCAL INPATH "../../data/files/employee.dat" INTO TABLE Employee_Part partition(employeeSalary='3000.0', country='UK'); + +-- dynamic partitioning syntax +explain +analyze table Employee_Part partition (employeeSalary='4000.0', country) compute statistics for columns employeeName, employeeID; +analyze table Employee_Part partition (employeeSalary='4000.0', country) compute statistics for columns employeeName, employeeID; + +describe formatted Employee_Part.employeeName partition (employeeSalary='4000.0', country='USA'); + +-- don't specify all partitioning keys +explain +analyze table Employee_Part partition (employeeSalary='2000.0') compute statistics for columns employeeID; +analyze table Employee_Part partition (employeeSalary='2000.0') compute statistics for columns employeeID; + +describe formatted Employee_Part.employeeID partition (employeeSalary='2000.0', country='USA'); +describe formatted Employee_Part.employeeID partition (employeeSalary='2000.0', country='UK'); +-- don't specify any partitioning keys +explain +analyze table Employee_Part partition (employeeSalary) compute statistics for columns employeeID; +analyze table Employee_Part partition (employeeSalary) compute statistics for columns employeeID; + +describe formatted Employee_Part.employeeID partition (employeeSalary='3000.0', country='UK'); +explain +analyze table Employee_Part partition (employeeSalary,country) compute statistics for columns; +analyze table Employee_Part partition (employeeSalary,country) compute statistics for columns; + +describe formatted Employee_Part.employeeName partition (employeeSalary='3500.0', country='UK'); + +-- partially populated stats +drop table Employee; +CREATE TABLE Employee(employeeID int, employeeName String) partitioned by (employeeSalary double, country string) +row format delimited fields terminated by '|' stored as textfile; + +LOAD DATA LOCAL INPATH "../../data/files/employee.dat" INTO TABLE Employee partition(employeeSalary='2000.0', country='USA'); +LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee partition(employeeSalary='2000.0', country='UK'); +LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee partition(employeeSalary='3500.0', country='UK'); +LOAD DATA LOCAL INPATH "../../data/files/employee.dat" INTO TABLE Employee partition(employeeSalary='3000.0', country='UK'); + +analyze table Employee partition (employeeSalary,country) compute statistics for columns; + +describe formatted Employee.employeeName partition (employeeSalary='3500.0', country='UK'); +LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee partition(employeeSalary='3000.0', country='USA'); +LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee partition(employeeSalary='4000.0', country='USA'); + +analyze table Employee partition (employeeSalary) compute statistics for columns; + +describe formatted Employee.employeeName partition (employeeSalary='3000.0', country='USA'); + +-- add columns +alter table Employee add columns (c int ,d string); + +LOAD DATA LOCAL INPATH "../../data/files/employee_part.txt" INTO TABLE Employee partition(employeeSalary='6000.0', country='UK'); + +analyze table Employee partition (employeeSalary='6000.0',country='UK') compute statistics for columns; + +describe formatted Employee.employeeName partition (employeeSalary='6000.0', country='UK'); +describe formatted Employee.c partition (employeeSalary='6000.0', country='UK'); +describe formatted Employee.d partition (employeeSalary='6000.0', country='UK'); + diff --git ql/src/test/results/clientnegative/columnstats_partlvl_invalid_values.q.out ql/src/test/results/clientnegative/columnstats_partlvl_invalid_values.q.out index d48d8cb..e3855d6 100644 --- ql/src/test/results/clientnegative/columnstats_partlvl_invalid_values.q.out +++ ql/src/test/results/clientnegative/columnstats_partlvl_invalid_values.q.out @@ -65,4 +65,4 @@ POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@employee_part POSTHOOK: Output: default@employee_part@employeesalary=3000.0/country=UK -FAILED: SemanticException [Error 30007]: Invalid partitioning key/value specified in ANALYZE statement +FAILED: SemanticException [Error 30007]: Invalid partitioning key/value specified in ANALYZE statement : {employeesalary=4000.0, country=Canada} diff --git ql/src/test/results/clientpositive/columnstats_partlvl.q.out ql/src/test/results/clientpositive/columnstats_partlvl.q.out index 6128770..43d8737 100644 --- ql/src/test/results/clientpositive/columnstats_partlvl.q.out +++ ql/src/test/results/clientpositive/columnstats_partlvl.q.out @@ -46,23 +46,27 @@ STAGE PLANS: TableScan alias: employee_part Select Operator - expressions: employeeid (type: int) - outputColumnNames: employeeid + expressions: employeesalary (type: double), employeeid (type: int) + outputColumnNames: employeesalary, employeeid Group By Operator aggregations: compute_stats(employeeid, 16) + keys: employeesalary (type: double) mode: hash - outputColumnNames: _col0 + outputColumnNames: _col0, _col1 Reduce Output Operator - sort order: - value expressions: _col0 (type: struct) + key expressions: _col0 (type: double) + sort order: + + Map-reduce partition columns: _col0 (type: double) + value expressions: _col1 (type: struct) Reduce Operator Tree: Group By Operator aggregations: compute_stats(VALUE._col0) + keys: KEY._col0 (type: double) mode: mergepartial - outputColumnNames: _col0 + outputColumnNames: _col0, _col1 Select Operator - expressions: _col0 (type: struct) - outputColumnNames: _col0 + expressions: _col1 (type: struct), _col0 (type: double) + outputColumnNames: _col0, _col1 File Output Operator compressed: false table: @@ -75,7 +79,6 @@ STAGE PLANS: Column Stats Desc: Columns: employeeID Column Types: int - Partition: employeesalary=2000.0 Table: employee_part PREHOOK: query: explain extended @@ -111,16 +114,19 @@ STAGE PLANS: alias: employee_part GatherStats: false Select Operator - expressions: employeeid (type: int) - outputColumnNames: employeeid + expressions: employeesalary (type: double), employeeid (type: int) + outputColumnNames: employeesalary, employeeid Group By Operator aggregations: compute_stats(employeeid, 16) + keys: employeesalary (type: double) mode: hash - outputColumnNames: _col0 + outputColumnNames: _col0, _col1 Reduce Output Operator - sort order: + key expressions: _col0 (type: double) + sort order: + + Map-reduce partition columns: _col0 (type: double) tag: -1 - value expressions: _col0 (type: struct) + value expressions: _col1 (type: struct) Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -177,11 +183,12 @@ STAGE PLANS: Reduce Operator Tree: Group By Operator aggregations: compute_stats(VALUE._col0) + keys: KEY._col0 (type: double) mode: mergepartial - outputColumnNames: _col0 + outputColumnNames: _col0, _col1 Select Operator - expressions: _col0 (type: struct) - outputColumnNames: _col0 + expressions: _col1 (type: struct), _col0 (type: double) + outputColumnNames: _col0, _col1 File Output Operator compressed: false GlobalTableId: 0 @@ -192,8 +199,8 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - columns _col0 - columns.types struct + columns _col0,_col1 + columns.types struct:double escape.delim \ hive.serialization.extend.nesting.levels true serialization.format 1 @@ -208,7 +215,6 @@ STAGE PLANS: Column Stats Desc: Columns: employeeID Column Types: int - Partition: employeesalary=2000.0 Table: employee_part Is Table Level Stats: false @@ -239,23 +245,27 @@ STAGE PLANS: TableScan alias: employee_part Select Operator - expressions: employeeid (type: int) - outputColumnNames: employeeid + expressions: employeesalary (type: double), employeeid (type: int) + outputColumnNames: employeesalary, employeeid Group By Operator aggregations: compute_stats(employeeid, 16) + keys: employeesalary (type: double) mode: hash - outputColumnNames: _col0 + outputColumnNames: _col0, _col1 Reduce Output Operator - sort order: - value expressions: _col0 (type: struct) + key expressions: _col0 (type: double) + sort order: + + Map-reduce partition columns: _col0 (type: double) + value expressions: _col1 (type: struct) Reduce Operator Tree: Group By Operator aggregations: compute_stats(VALUE._col0) + keys: KEY._col0 (type: double) mode: mergepartial - outputColumnNames: _col0 + outputColumnNames: _col0, _col1 Select Operator - expressions: _col0 (type: struct) - outputColumnNames: _col0 + expressions: _col1 (type: struct), _col0 (type: double) + outputColumnNames: _col0, _col1 File Output Operator compressed: false table: @@ -268,7 +278,6 @@ STAGE PLANS: Column Stats Desc: Columns: employeeID Column Types: int - Partition: employeesalary=4000.0 Table: employee_part PREHOOK: query: explain extended @@ -304,16 +313,19 @@ STAGE PLANS: alias: employee_part GatherStats: false Select Operator - expressions: employeeid (type: int) - outputColumnNames: employeeid + expressions: employeesalary (type: double), employeeid (type: int) + outputColumnNames: employeesalary, employeeid Group By Operator aggregations: compute_stats(employeeid, 16) + keys: employeesalary (type: double) mode: hash - outputColumnNames: _col0 + outputColumnNames: _col0, _col1 Reduce Output Operator - sort order: + key expressions: _col0 (type: double) + sort order: + + Map-reduce partition columns: _col0 (type: double) tag: -1 - value expressions: _col0 (type: struct) + value expressions: _col1 (type: struct) Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -370,11 +382,12 @@ STAGE PLANS: Reduce Operator Tree: Group By Operator aggregations: compute_stats(VALUE._col0) + keys: KEY._col0 (type: double) mode: mergepartial - outputColumnNames: _col0 + outputColumnNames: _col0, _col1 Select Operator - expressions: _col0 (type: struct) - outputColumnNames: _col0 + expressions: _col1 (type: struct), _col0 (type: double) + outputColumnNames: _col0, _col1 File Output Operator compressed: false GlobalTableId: 0 @@ -385,8 +398,8 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - columns _col0 - columns.types struct + columns _col0,_col1 + columns.types struct:double escape.delim \ hive.serialization.extend.nesting.levels true serialization.format 1 @@ -401,7 +414,6 @@ STAGE PLANS: Column Stats Desc: Columns: employeeID Column Types: int - Partition: employeesalary=4000.0 Table: employee_part Is Table Level Stats: false @@ -432,23 +444,27 @@ STAGE PLANS: TableScan alias: employee_part Select Operator - expressions: employeeid (type: int), employeename (type: string) - outputColumnNames: employeeid, employeename + expressions: employeesalary (type: double), employeeid (type: int), employeename (type: string) + outputColumnNames: employeesalary, employeeid, employeename Group By Operator aggregations: compute_stats(employeeid, 16), compute_stats(employeename, 16) + keys: employeesalary (type: double) mode: hash - outputColumnNames: _col0, _col1 + outputColumnNames: _col0, _col1, _col2 Reduce Output Operator - sort order: - value expressions: _col0 (type: struct), _col1 (type: struct) + key expressions: _col0 (type: double) + sort order: + + Map-reduce partition columns: _col0 (type: double) + value expressions: _col1 (type: struct), _col2 (type: struct) Reduce Operator Tree: Group By Operator aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: double) mode: mergepartial - outputColumnNames: _col0, _col1 + outputColumnNames: _col0, _col1, _col2 Select Operator - expressions: _col0 (type: struct), _col1 (type: struct) - outputColumnNames: _col0, _col1 + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: double) + outputColumnNames: _col0, _col1, _col2 File Output Operator compressed: false table: @@ -461,7 +477,6 @@ STAGE PLANS: Column Stats Desc: Columns: employeeid, employeename Column Types: int, string - Partition: employeesalary=2000.0 Table: employee_part PREHOOK: query: analyze table Employee_Part partition (employeeSalary=2000.0) compute statistics for columns diff --git ql/src/test/results/clientpositive/columnstats_partlvl_dp.q.out ql/src/test/results/clientpositive/columnstats_partlvl_dp.q.out new file mode 100644 index 0000000..f2c530ce4 --- /dev/null +++ ql/src/test/results/clientpositive/columnstats_partlvl_dp.q.out @@ -0,0 +1,567 @@ +PREHOOK: query: DROP TABLE Employee_Part +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE Employee_Part +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE Employee_Part(employeeID int, employeeName String) partitioned by (employeeSalary double, country string) +row format delimited fields terminated by '|' stored as textfile +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +POSTHOOK: query: CREATE TABLE Employee_Part(employeeID int, employeeName String) partitioned by (employeeSalary double, country string) +row format delimited fields terminated by '|' stored as textfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@Employee_Part +PREHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee.dat" INTO TABLE Employee_Part partition(employeeSalary='2000.0', country='USA') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@employee_part +POSTHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee.dat" INTO TABLE Employee_Part partition(employeeSalary='2000.0', country='USA') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@employee_part +POSTHOOK: Output: default@employee_part@employeesalary=2000.0/country=USA +PREHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='2000.0', country='UK') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@employee_part +POSTHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='2000.0', country='UK') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@employee_part +POSTHOOK: Output: default@employee_part@employeesalary=2000.0/country=UK +PREHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='3000.0', country='USA') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@employee_part +POSTHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='3000.0', country='USA') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@employee_part +POSTHOOK: Output: default@employee_part@employeesalary=3000.0/country=USA +PREHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='4000.0', country='USA') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@employee_part +POSTHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='4000.0', country='USA') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@employee_part +POSTHOOK: Output: default@employee_part@employeesalary=4000.0/country=USA +PREHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='3500.0', country='UK') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@employee_part +POSTHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='3500.0', country='UK') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@employee_part +POSTHOOK: Output: default@employee_part@employeesalary=3500.0/country=UK +PREHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee.dat" INTO TABLE Employee_Part partition(employeeSalary='3000.0', country='UK') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@employee_part +POSTHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee.dat" INTO TABLE Employee_Part partition(employeeSalary='3000.0', country='UK') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@employee_part +POSTHOOK: Output: default@employee_part@employeesalary=3000.0/country=UK +PREHOOK: query: -- dynamic partitioning syntax +explain +analyze table Employee_Part partition (employeeSalary='4000.0', country) compute statistics for columns employeeName, employeeID +PREHOOK: type: QUERY +POSTHOOK: query: -- dynamic partitioning syntax +explain +analyze table Employee_Part partition (employeeSalary='4000.0', country) compute statistics for columns employeeName, employeeID +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + Stage-1 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Map Reduce + Map Operator Tree: + TableScan + alias: employee_part + Select Operator + expressions: employeesalary (type: double), country (type: string), employeename (type: string), employeeid (type: int) + outputColumnNames: employeesalary, country, employeename, employeeid + Group By Operator + aggregations: compute_stats(employeename, 16), compute_stats(employeeid, 16) + keys: employeesalary (type: double), country (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Reduce Output Operator + key expressions: _col0 (type: double), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: double), _col1 (type: string) + value expressions: _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: double), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: double), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-1 + Column Stats Work + Column Stats Desc: + Columns: employeeName, employeeID + Column Types: string, int + Table: employee_part + +PREHOOK: query: analyze table Employee_Part partition (employeeSalary='4000.0', country) compute statistics for columns employeeName, employeeID +PREHOOK: type: QUERY +PREHOOK: Input: default@employee_part +PREHOOK: Input: default@employee_part@employeesalary=4000.0/country=USA +#### A masked pattern was here #### +POSTHOOK: query: analyze table Employee_Part partition (employeeSalary='4000.0', country) compute statistics for columns employeeName, employeeID +POSTHOOK: type: QUERY +POSTHOOK: Input: default@employee_part +POSTHOOK: Input: default@employee_part@employeesalary=4000.0/country=USA +#### A masked pattern was here #### +PREHOOK: query: describe formatted Employee_Part.employeeName partition (employeeSalary='4000.0', country='USA') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@employee_part +POSTHOOK: query: describe formatted Employee_Part.employeeName partition (employeeSalary='4000.0', country='USA') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@employee_part +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment + +employeeName string 0 6 5.142857142857143 6 from deserializer +PREHOOK: query: -- don't specify all partitioning keys +explain +analyze table Employee_Part partition (employeeSalary='2000.0') compute statistics for columns employeeID +PREHOOK: type: QUERY +POSTHOOK: query: -- don't specify all partitioning keys +explain +analyze table Employee_Part partition (employeeSalary='2000.0') compute statistics for columns employeeID +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + Stage-1 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Map Reduce + Map Operator Tree: + TableScan + alias: employee_part + Select Operator + expressions: employeesalary (type: double), country (type: string), employeeid (type: int) + outputColumnNames: employeesalary, country, employeeid + Group By Operator + aggregations: compute_stats(employeeid, 16) + keys: employeesalary (type: double), country (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: _col0 (type: double), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: double), _col1 (type: string) + value expressions: _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + keys: KEY._col0 (type: double), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Select Operator + expressions: _col2 (type: struct), _col0 (type: double), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-1 + Column Stats Work + Column Stats Desc: + Columns: employeeID + Column Types: int + Table: employee_part + +PREHOOK: query: analyze table Employee_Part partition (employeeSalary='2000.0') compute statistics for columns employeeID +PREHOOK: type: QUERY +PREHOOK: Input: default@employee_part +PREHOOK: Input: default@employee_part@employeesalary=2000.0/country=UK +PREHOOK: Input: default@employee_part@employeesalary=2000.0/country=USA +#### A masked pattern was here #### +POSTHOOK: query: analyze table Employee_Part partition (employeeSalary='2000.0') compute statistics for columns employeeID +POSTHOOK: type: QUERY +POSTHOOK: Input: default@employee_part +POSTHOOK: Input: default@employee_part@employeesalary=2000.0/country=UK +POSTHOOK: Input: default@employee_part@employeesalary=2000.0/country=USA +#### A masked pattern was here #### +PREHOOK: query: describe formatted Employee_Part.employeeID partition (employeeSalary='2000.0', country='USA') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@employee_part +POSTHOOK: query: describe formatted Employee_Part.employeeID partition (employeeSalary='2000.0', country='USA') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@employee_part +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment + +employeeID int 16 34 1 14 from deserializer +PREHOOK: query: describe formatted Employee_Part.employeeID partition (employeeSalary='2000.0', country='UK') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@employee_part +POSTHOOK: query: describe formatted Employee_Part.employeeID partition (employeeSalary='2000.0', country='UK') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@employee_part +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment + +employeeID int 16 31 0 9 from deserializer +PREHOOK: query: -- don't specify any partitioning keys +explain +analyze table Employee_Part partition (employeeSalary) compute statistics for columns employeeID +PREHOOK: type: QUERY +POSTHOOK: query: -- don't specify any partitioning keys +explain +analyze table Employee_Part partition (employeeSalary) compute statistics for columns employeeID +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + Stage-1 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Map Reduce + Map Operator Tree: + TableScan + alias: employee_part + Select Operator + expressions: employeesalary (type: double), country (type: string), employeeid (type: int) + outputColumnNames: employeesalary, country, employeeid + Group By Operator + aggregations: compute_stats(employeeid, 16) + keys: employeesalary (type: double), country (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: _col0 (type: double), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: double), _col1 (type: string) + value expressions: _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + keys: KEY._col0 (type: double), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Select Operator + expressions: _col2 (type: struct), _col0 (type: double), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-1 + Column Stats Work + Column Stats Desc: + Columns: employeeID + Column Types: int + Table: employee_part + +PREHOOK: query: analyze table Employee_Part partition (employeeSalary) compute statistics for columns employeeID +PREHOOK: type: QUERY +PREHOOK: Input: default@employee_part +PREHOOK: Input: default@employee_part@employeesalary=2000.0/country=UK +PREHOOK: Input: default@employee_part@employeesalary=2000.0/country=USA +PREHOOK: Input: default@employee_part@employeesalary=3000.0/country=UK +PREHOOK: Input: default@employee_part@employeesalary=3000.0/country=USA +PREHOOK: Input: default@employee_part@employeesalary=3500.0/country=UK +PREHOOK: Input: default@employee_part@employeesalary=4000.0/country=USA +#### A masked pattern was here #### +POSTHOOK: query: analyze table Employee_Part partition (employeeSalary) compute statistics for columns employeeID +POSTHOOK: type: QUERY +POSTHOOK: Input: default@employee_part +POSTHOOK: Input: default@employee_part@employeesalary=2000.0/country=UK +POSTHOOK: Input: default@employee_part@employeesalary=2000.0/country=USA +POSTHOOK: Input: default@employee_part@employeesalary=3000.0/country=UK +POSTHOOK: Input: default@employee_part@employeesalary=3000.0/country=USA +POSTHOOK: Input: default@employee_part@employeesalary=3500.0/country=UK +POSTHOOK: Input: default@employee_part@employeesalary=4000.0/country=USA +#### A masked pattern was here #### +PREHOOK: query: describe formatted Employee_Part.employeeID partition (employeeSalary='3000.0', country='UK') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@employee_part +POSTHOOK: query: describe formatted Employee_Part.employeeID partition (employeeSalary='3000.0', country='UK') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@employee_part +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment + +employeeID int 16 34 1 14 from deserializer +PREHOOK: query: explain +analyze table Employee_Part partition (employeeSalary,country) compute statistics for columns +PREHOOK: type: QUERY +POSTHOOK: query: explain +analyze table Employee_Part partition (employeeSalary,country) compute statistics for columns +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + Stage-1 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Map Reduce + Map Operator Tree: + TableScan + alias: employee_part + Select Operator + expressions: employeesalary (type: double), country (type: string), employeeid (type: int), employeename (type: string) + outputColumnNames: employeesalary, country, employeeid, employeename + Group By Operator + aggregations: compute_stats(employeeid, 16), compute_stats(employeename, 16) + keys: employeesalary (type: double), country (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Reduce Output Operator + key expressions: _col0 (type: double), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: double), _col1 (type: string) + value expressions: _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: double), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: double), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-1 + Column Stats Work + Column Stats Desc: + Columns: employeeid, employeename + Column Types: int, string + Table: employee_part + +PREHOOK: query: analyze table Employee_Part partition (employeeSalary,country) compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@employee_part +PREHOOK: Input: default@employee_part@employeesalary=2000.0/country=UK +PREHOOK: Input: default@employee_part@employeesalary=2000.0/country=USA +PREHOOK: Input: default@employee_part@employeesalary=3000.0/country=UK +PREHOOK: Input: default@employee_part@employeesalary=3000.0/country=USA +PREHOOK: Input: default@employee_part@employeesalary=3500.0/country=UK +PREHOOK: Input: default@employee_part@employeesalary=4000.0/country=USA +#### A masked pattern was here #### +POSTHOOK: query: analyze table Employee_Part partition (employeeSalary,country) compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@employee_part +POSTHOOK: Input: default@employee_part@employeesalary=2000.0/country=UK +POSTHOOK: Input: default@employee_part@employeesalary=2000.0/country=USA +POSTHOOK: Input: default@employee_part@employeesalary=3000.0/country=UK +POSTHOOK: Input: default@employee_part@employeesalary=3000.0/country=USA +POSTHOOK: Input: default@employee_part@employeesalary=3500.0/country=UK +POSTHOOK: Input: default@employee_part@employeesalary=4000.0/country=USA +#### A masked pattern was here #### +PREHOOK: query: describe formatted Employee_Part.employeeName partition (employeeSalary='3500.0', country='UK') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@employee_part +POSTHOOK: query: describe formatted Employee_Part.employeeName partition (employeeSalary='3500.0', country='UK') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@employee_part +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment + +employeeName string 0 6 5.142857142857143 6 from deserializer +PREHOOK: query: -- partially populated stats +drop table Employee +PREHOOK: type: DROPTABLE +POSTHOOK: query: -- partially populated stats +drop table Employee +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE Employee(employeeID int, employeeName String) partitioned by (employeeSalary double, country string) +row format delimited fields terminated by '|' stored as textfile +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +POSTHOOK: query: CREATE TABLE Employee(employeeID int, employeeName String) partitioned by (employeeSalary double, country string) +row format delimited fields terminated by '|' stored as textfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@Employee +PREHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee.dat" INTO TABLE Employee partition(employeeSalary='2000.0', country='USA') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@employee +POSTHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee.dat" INTO TABLE Employee partition(employeeSalary='2000.0', country='USA') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@employee +POSTHOOK: Output: default@employee@employeesalary=2000.0/country=USA +PREHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee partition(employeeSalary='2000.0', country='UK') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@employee +POSTHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee partition(employeeSalary='2000.0', country='UK') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@employee +POSTHOOK: Output: default@employee@employeesalary=2000.0/country=UK +PREHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee partition(employeeSalary='3500.0', country='UK') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@employee +POSTHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee partition(employeeSalary='3500.0', country='UK') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@employee +POSTHOOK: Output: default@employee@employeesalary=3500.0/country=UK +PREHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee.dat" INTO TABLE Employee partition(employeeSalary='3000.0', country='UK') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@employee +POSTHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee.dat" INTO TABLE Employee partition(employeeSalary='3000.0', country='UK') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@employee +POSTHOOK: Output: default@employee@employeesalary=3000.0/country=UK +PREHOOK: query: analyze table Employee partition (employeeSalary,country) compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@employee +PREHOOK: Input: default@employee@employeesalary=2000.0/country=UK +PREHOOK: Input: default@employee@employeesalary=2000.0/country=USA +PREHOOK: Input: default@employee@employeesalary=3000.0/country=UK +PREHOOK: Input: default@employee@employeesalary=3500.0/country=UK +#### A masked pattern was here #### +POSTHOOK: query: analyze table Employee partition (employeeSalary,country) compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@employee +POSTHOOK: Input: default@employee@employeesalary=2000.0/country=UK +POSTHOOK: Input: default@employee@employeesalary=2000.0/country=USA +POSTHOOK: Input: default@employee@employeesalary=3000.0/country=UK +POSTHOOK: Input: default@employee@employeesalary=3500.0/country=UK +#### A masked pattern was here #### +PREHOOK: query: describe formatted Employee.employeeName partition (employeeSalary='3500.0', country='UK') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@employee +POSTHOOK: query: describe formatted Employee.employeeName partition (employeeSalary='3500.0', country='UK') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@employee +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment + +employeeName string 0 6 5.142857142857143 6 from deserializer +PREHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee partition(employeeSalary='3000.0', country='USA') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@employee +POSTHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee partition(employeeSalary='3000.0', country='USA') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@employee +POSTHOOK: Output: default@employee@employeesalary=3000.0/country=USA +PREHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee partition(employeeSalary='4000.0', country='USA') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@employee +POSTHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee partition(employeeSalary='4000.0', country='USA') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@employee +POSTHOOK: Output: default@employee@employeesalary=4000.0/country=USA +PREHOOK: query: analyze table Employee partition (employeeSalary) compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@employee +PREHOOK: Input: default@employee@employeesalary=2000.0/country=UK +PREHOOK: Input: default@employee@employeesalary=2000.0/country=USA +PREHOOK: Input: default@employee@employeesalary=3000.0/country=UK +PREHOOK: Input: default@employee@employeesalary=3000.0/country=USA +PREHOOK: Input: default@employee@employeesalary=3500.0/country=UK +PREHOOK: Input: default@employee@employeesalary=4000.0/country=USA +#### A masked pattern was here #### +POSTHOOK: query: analyze table Employee partition (employeeSalary) compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@employee +POSTHOOK: Input: default@employee@employeesalary=2000.0/country=UK +POSTHOOK: Input: default@employee@employeesalary=2000.0/country=USA +POSTHOOK: Input: default@employee@employeesalary=3000.0/country=UK +POSTHOOK: Input: default@employee@employeesalary=3000.0/country=USA +POSTHOOK: Input: default@employee@employeesalary=3500.0/country=UK +POSTHOOK: Input: default@employee@employeesalary=4000.0/country=USA +#### A masked pattern was here #### +PREHOOK: query: describe formatted Employee.employeeName partition (employeeSalary='3000.0', country='USA') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@employee +POSTHOOK: query: describe formatted Employee.employeeName partition (employeeSalary='3000.0', country='USA') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@employee +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment + +employeeName string 0 6 5.142857142857143 6 from deserializer +PREHOOK: query: -- add columns +alter table Employee add columns (c int ,d string) +PREHOOK: type: ALTERTABLE_ADDCOLS +PREHOOK: Input: default@employee +PREHOOK: Output: default@employee +POSTHOOK: query: -- add columns +alter table Employee add columns (c int ,d string) +POSTHOOK: type: ALTERTABLE_ADDCOLS +POSTHOOK: Input: default@employee +POSTHOOK: Output: default@employee +PREHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee_part.txt" INTO TABLE Employee partition(employeeSalary='6000.0', country='UK') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@employee +POSTHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee_part.txt" INTO TABLE Employee partition(employeeSalary='6000.0', country='UK') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@employee +POSTHOOK: Output: default@employee@employeesalary=6000.0/country=UK +PREHOOK: query: analyze table Employee partition (employeeSalary='6000.0',country='UK') compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@employee +PREHOOK: Input: default@employee@employeesalary=6000.0/country=UK +#### A masked pattern was here #### +POSTHOOK: query: analyze table Employee partition (employeeSalary='6000.0',country='UK') compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@employee +POSTHOOK: Input: default@employee@employeesalary=6000.0/country=UK +#### A masked pattern was here #### +PREHOOK: query: describe formatted Employee.employeeName partition (employeeSalary='6000.0', country='UK') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@employee +POSTHOOK: query: describe formatted Employee.employeeName partition (employeeSalary='6000.0', country='UK') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@employee +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment + +employeeName string 0 9 4.777777777777778 6 from deserializer +PREHOOK: query: describe formatted Employee.c partition (employeeSalary='6000.0', country='UK') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@employee +POSTHOOK: query: describe formatted Employee.c partition (employeeSalary='6000.0', country='UK') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@employee +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment + +c int 2000 4000 0 4 from deserializer +PREHOOK: query: describe formatted Employee.d partition (employeeSalary='6000.0', country='UK') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@employee +POSTHOOK: query: describe formatted Employee.d partition (employeeSalary='6000.0', country='UK') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@employee +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment + +d string 0 2 2.4444444444444446 3 from deserializer