diff --git a/data/files/extrapolate_stats_full.txt b/data/files/extrapolate_stats_full.txt new file mode 100644 index 0000000..d7ad64e --- /dev/null +++ b/data/files/extrapolate_stats_full.txt @@ -0,0 +1,6 @@ +|1|94087|2000 +O|2|94086|2000 +|1|94087|2001 +H|2|94086|2001 +|3|94086|2001 +OH|4|94086|2001 diff --git a/data/files/extrapolate_stats_partial.txt b/data/files/extrapolate_stats_partial.txt new file mode 100644 index 0000000..fa92ed3 --- /dev/null +++ b/data/files/extrapolate_stats_partial.txt @@ -0,0 +1,20 @@ +|1|94087|2000 +O|2|94086|2000 +|1|94087|2001 +H|2|94086|2001 +|3|94086|2001 +OH|4|43201|2001 +oh1|1|94087|2002 +OH2|2|43201|2002 +oh3|3|94087|2002 +OH4|4|94086|2002 +oh5|4|43201|2002 +OH6|5|94087|2002 +|31|94087|2003 +OH33|1|43201|2003 +|3|94087|2003 +OH|1|94086|2003 +|4|43201|2003 +OH|1|94087|2003 +|1|43201|2003 +OH|5|94086|2003 diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java b/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java index 9489949..06d7595 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java @@ -5024,12 +5024,8 @@ public AggrStats get_aggr_stats_for(PartitionsStatsRequest request) startFunction("get_aggr_stats_for: db=" + request.getDbName() + " table=" + request.getTblName()); AggrStats aggrStats = null; try { - //TODO: We are setting partitionCnt for which we were able to retrieve stats same as - // incoming number from request. This is not correct, but currently no users of this api - // rely on this. Only, current user StatsAnnotation don't care for it. StatsOptimizer - // will care for it, so before StatsOptimizer begin using it, we need to fix this. aggrStats = new AggrStats(getMS().get_aggr_stats_for(request.getDbName(), - request.getTblName(), request.getPartNames(), request.getColNames()), request.getPartNames().size()); + request.getTblName(), request.getPartNames(), request.getColNames())); return aggrStats; } finally { endFunction("get_partitions_statistics_req: ", aggrStats == null, null, request.getTblName()); diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/IExtrapolatePartStatus.java b/metastore/src/java/org/apache/hadoop/hive/metastore/IExtrapolatePartStatus.java new file mode 100644 index 0000000..74f1b01 --- /dev/null +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/IExtrapolatePartStatus.java @@ -0,0 +1,73 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.metastore; + +import java.util.HashMap; +import java.util.Map; + +public interface IExtrapolatePartStatus { + /** + * The sequence of colStatNames. + */ + static String[] colStatNames = new String[] { "LONG_LOW_VALUE", + "LONG_HIGH_VALUE", "DOUBLE_LOW_VALUE", "DOUBLE_HIGH_VALUE", + "BIG_DECIMAL_LOW_VALUE", "BIG_DECIMAL_HIGH_VALUE", "NUM_NULLS", + "NUM_DISTINCTS", "AVG_COL_LEN", "MAX_COL_LEN", "NUM_TRUES", "NUM_FALSES" }; + + /** + * The indexes for colstats. + */ + static HashMap indexMaps = new HashMap(){{ + put("long", new Integer [] {0,1,6,7}); + put("double", new Integer [] {2,3,6,7}); + put("string", new Integer [] {8,9,6,7}); + put("boolean", new Integer [] {10,11,6}); + put("binary", new Integer [] {8,9,6}); + put("decimal", new Integer [] {4,5,6,7}); + put("default", new Integer [] {0,1,2,3,4,5,6,7,8,9,10,11}); +}}; + + /** + * The sequence of colStatTypes. + */ + static enum ColStatType { + Long, Double, Decimal + } + + static ColStatType[] colStatTypes = new ColStatType[] { ColStatType.Long, + ColStatType.Long, ColStatType.Double, ColStatType.Double, + ColStatType.Decimal, ColStatType.Decimal, ColStatType.Long, + ColStatType.Long, ColStatType.Double, ColStatType.Long, ColStatType.Long, + ColStatType.Long }; + + /** + * The sequence of aggregation function on colStats. + */ + static enum AggrType { + Min, Max, Sum + } + + static AggrType[] aggrTypes = new AggrType[] { AggrType.Min, AggrType.Max, + AggrType.Min, AggrType.Max, AggrType.Min, AggrType.Max, AggrType.Sum, + AggrType.Max, AggrType.Max, AggrType.Max, AggrType.Sum, AggrType.Sum }; + + public Object extrapolate(Object[] min, Object[] max, int colStatIndex, + Map indexMap); + +} diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/LinearExtrapolatePartStatus.java b/metastore/src/java/org/apache/hadoop/hive/metastore/LinearExtrapolatePartStatus.java new file mode 100644 index 0000000..7fc04f1 --- /dev/null +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/LinearExtrapolatePartStatus.java @@ -0,0 +1,132 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.metastore; + +import java.math.BigDecimal; +import java.nio.ByteBuffer; +import java.util.Map; + +import org.apache.hadoop.hive.metastore.api.Decimal; + +public class LinearExtrapolatePartStatus implements IExtrapolatePartStatus { + + @Override + public Object extrapolate(Object[] min, Object[] max, int colStatIndex, + Map indexMap) { + int rightBorderInd = indexMap.size() - 1; + int minInd = indexMap.get((String) min[1]); + int maxInd = indexMap.get((String) max[1]); + if (minInd == maxInd) { + return min[0]; + } + if (aggrTypes[colStatIndex] == AggrType.Max) { + if (minInd < maxInd) { + // right border is the max + if (colStatTypes[colStatIndex] == ColStatType.Long) { + return (Long) ((Long) min[0] + (((Long) max[0] - (Long) min[0]) + * (rightBorderInd - minInd) / (maxInd - minInd))); + } else if (colStatTypes[colStatIndex] == ColStatType.Double) { + return (Double) ((Double) min[0] + (((Double) max[0] - (Double) min[0]) + * (rightBorderInd - minInd) / (maxInd - minInd))); + } else { + Decimal dmax = (Decimal) max[0]; + BigDecimal bdmax = new BigDecimal(dmax.toString()); + double doublemax = bdmax.doubleValue(); + Decimal dmin = (Decimal) min[0]; + BigDecimal bdmin = new BigDecimal(dmin.toString()); + double doublemin = bdmin.doubleValue(); + double ret = doublemin + (doublemax - doublemin) + * (rightBorderInd - minInd) / (maxInd - minInd); + return createThriftDecimal(String.valueOf(ret)); + } + } else { + // left border is the max + if (colStatTypes[colStatIndex] == ColStatType.Long) { + return (Long) ((Long) min[0] + ((Long) max[0] - (Long) min[0]) + * minInd / (minInd - maxInd)); + } else if (colStatTypes[colStatIndex] == ColStatType.Double) { + return (Double) ((Double) min[0] + ((Double) max[0] - (Double) min[0]) + * minInd / (maxInd - minInd)); + } else { + Decimal dmax = (Decimal) max[0]; + BigDecimal bdmax = new BigDecimal(dmax.toString()); + double doublemax = bdmax.doubleValue(); + Decimal dmin = (Decimal) min[0]; + BigDecimal bdmin = new BigDecimal(dmin.toString()); + double doublemin = bdmin.doubleValue(); + double ret = doublemin + (doublemax - doublemin) * minInd + / (maxInd - minInd); + return createThriftDecimal(String.valueOf(ret)); + } + } + } else { + if (minInd < maxInd) { + // left border is the min + if (colStatTypes[colStatIndex] == ColStatType.Long) { + Long ret = (Long) max[0] - ((Long) max[0] - (Long) min[0]) * maxInd + / (maxInd - minInd); + return ret; + } else if (colStatTypes[colStatIndex] == ColStatType.Double) { + Double ret = (Double) max[0] - ((Double) max[0] - (Double) min[0]) + * maxInd / (maxInd - minInd); + return ret; + } else { + Decimal dmax = (Decimal) max[0]; + BigDecimal bdmax = new BigDecimal(dmax.toString()); + double doublemax = bdmax.doubleValue(); + Decimal dmin = (Decimal) min[0]; + BigDecimal bdmin = new BigDecimal(dmin.toString()); + double doublemin = bdmin.doubleValue(); + double ret = doublemax - (doublemax - doublemin) * maxInd + / (maxInd - minInd); + return createThriftDecimal(String.valueOf(ret)); + + } + } else { + // right border is the min + if (colStatTypes[colStatIndex] == ColStatType.Long) { + Long ret = (Long) max[0] - ((Long) max[0] - (Long) min[0]) + * (rightBorderInd - maxInd) / (minInd - maxInd); + return ret; + } else if (colStatTypes[colStatIndex] == ColStatType.Double) { + Double ret = (Double) max[0] - ((Double) max[0] - (Double) min[0]) + * (rightBorderInd - maxInd) / (minInd - maxInd); + return ret; + } else { + Decimal dmax = (Decimal) max[0]; + BigDecimal bdmax = new BigDecimal(dmax.toString()); + double doublemax = bdmax.doubleValue(); + Decimal dmin = (Decimal) min[0]; + BigDecimal bdmin = new BigDecimal(dmin.toString()); + double doublemin = bdmin.doubleValue(); + double ret = doublemax - (doublemax - doublemin) + * (rightBorderInd - maxInd) / (minInd - maxInd); + return createThriftDecimal(String.valueOf(ret)); + } + } + } + } + + private static Decimal createThriftDecimal(String s) { + BigDecimal d = new BigDecimal(s); + return new Decimal(ByteBuffer.wrap(d.unscaledValue().toByteArray()), + (short) d.scale()); + } + +} diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java b/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java index 767cffc..e7694b7 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java @@ -21,19 +21,18 @@ import static org.apache.commons.lang.StringUtils.join; import static org.apache.commons.lang.StringUtils.repeat; +import java.math.BigDecimal; +import java.nio.ByteBuffer; import java.sql.Connection; import java.sql.SQLException; -import java.sql.Statement; import java.text.ParseException; -import java.text.SimpleDateFormat; import java.util.ArrayList; -import java.util.Date; +import java.util.Arrays; import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.TreeMap; -import java.util.concurrent.atomic.AtomicLong; import javax.jdo.PersistenceManager; import javax.jdo.Query; @@ -43,10 +42,12 @@ import org.apache.commons.lang.StringUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.metastore.api.AggrStats; import org.apache.hadoop.hive.metastore.api.ColumnStatistics; import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData; import org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc; import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; +import org.apache.hadoop.hive.metastore.api.Decimal; import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.metastore.api.MetaException; import org.apache.hadoop.hive.metastore.api.Order; @@ -65,9 +66,8 @@ import org.apache.hadoop.hive.metastore.parser.ExpressionTree.Operator; import org.apache.hadoop.hive.metastore.parser.ExpressionTree.TreeNode; import org.apache.hadoop.hive.metastore.parser.ExpressionTree.TreeVisitor; -import org.apache.hadoop.hive.metastore.parser.FilterLexer; import org.apache.hadoop.hive.serde.serdeConstants; -import org.datanucleus.store.schema.SchemaTool; +import org.datanucleus.store.rdbms.query.ForwardQueryResult; import com.google.common.collect.Lists; @@ -97,7 +97,7 @@ * Whether direct SQL can be used with the current datastore backing {@link #pm}. */ private final boolean isCompatibleDatastore; - + public MetaStoreDirectSql(PersistenceManager pm) { this.pm = pm; Transaction tx = pm.currentTransaction(); @@ -893,33 +893,247 @@ public ColumnStatistics getTableStats( return result; } - public List aggrColStatsForPartitions(String dbName, String tableName, + public AggrStats aggrColStatsForPartitions(String dbName, String tableName, List partNames, List colNames) throws MetaException { - String qText = "select \"COLUMN_NAME\", \"COLUMN_TYPE\", " - + "min(\"LONG_LOW_VALUE\"), max(\"LONG_HIGH_VALUE\"), min(\"DOUBLE_LOW_VALUE\"), max(\"DOUBLE_HIGH_VALUE\"), " - + "min(\"BIG_DECIMAL_LOW_VALUE\"), max(\"BIG_DECIMAL_HIGH_VALUE\"), sum(\"NUM_NULLS\"), max(\"NUM_DISTINCTS\"), " - + "max(\"AVG_COL_LEN\"), max(\"MAX_COL_LEN\"), sum(\"NUM_TRUES\"), sum(\"NUM_FALSES\") from \"PART_COL_STATS\"" - + " where \"DB_NAME\" = ? and \"TABLE_NAME\" = ? and \"COLUMN_NAME\" in (" - + makeParams(colNames.size()) + ") AND \"PARTITION_NAME\" in (" - + makeParams(partNames.size()) + ") group by \"COLUMN_NAME\", \"COLUMN_TYPE\""; + long partsFound = partsFoundForPartitions(dbName, tableName, partNames, + colNames); + List stats = columnStatisticsObjForPartitions(dbName, + tableName, partNames, colNames, partsFound); + return new AggrStats(stats, partsFound); + } + private long partsFoundForPartitions(String dbName, String tableName, + List partNames, List colNames) throws MetaException { + long partsFound = 0; boolean doTrace = LOG.isDebugEnabled(); + String qText = "select count(\"COLUMN_NAME\") from \"PART_COL_STATS\"" + + " where \"DB_NAME\" = ? and \"TABLE_NAME\" = ? " + + " and \"COLUMN_NAME\" in (" + makeParams(colNames.size()) + ")" + + " and \"PARTITION_NAME\" in (" + makeParams(partNames.size()) + ")" + + " group by \"PARTITION_NAME\""; long start = doTrace ? System.nanoTime() : 0; Query query = pm.newQuery("javax.jdo.query.SQL", qText); - Object qResult = query.executeWithArray(prepareParams(dbName, tableName, partNames, colNames)); - if (qResult == null) { - query.closeAll(); - return Lists.newArrayList(); - } - List list = ensureList(qResult); - List colStats = new ArrayList(list.size()); - for (Object[] row : list) { - colStats.add(prepareCSObj(row,0)); - } + Object qResult = query.executeWithArray(prepareParams(dbName, tableName, + partNames, colNames)); long end = doTrace ? System.nanoTime() : 0; timingTrace(doTrace, qText, start, end); - query.closeAll(); - return colStats; + ForwardQueryResult fqr = (ForwardQueryResult) qResult; + List colnumbers = new ArrayList(); + colnumbers.addAll(fqr); + for (Integer colnumber : colnumbers) { + if (colnumber == colNames.size()) + partsFound++; + } + return partsFound; + } + + private List columnStatisticsObjForPartitions( + String dbName, String tableName, List partNames, + List colNames, long partsFound) throws MetaException { + String commonPrefix = "select \"COLUMN_NAME\", \"COLUMN_TYPE\", " + + "min(\"LONG_LOW_VALUE\"), max(\"LONG_HIGH_VALUE\"), min(\"DOUBLE_LOW_VALUE\"), max(\"DOUBLE_HIGH_VALUE\"), " + + "min(\"BIG_DECIMAL_LOW_VALUE\"), max(\"BIG_DECIMAL_HIGH_VALUE\"), sum(\"NUM_NULLS\"), max(\"NUM_DISTINCTS\"), " + + "max(\"AVG_COL_LEN\"), max(\"MAX_COL_LEN\"), sum(\"NUM_TRUES\"), sum(\"NUM_FALSES\") from \"PART_COL_STATS\"" + + " where \"DB_NAME\" = ? and \"TABLE_NAME\" = ? "; + String qText = null; + long start = 0; + long end = 0; + Query query = null; + boolean doTrace = LOG.isDebugEnabled(); + Object qResult = null; + ForwardQueryResult fqr = null; + // Check if the status of all the columns of all the partitions exists + // Extrapolation is not needed. + if (partsFound == partNames.size()) { + qText = commonPrefix + + " and \"COLUMN_NAME\" in (" + makeParams(colNames.size()) + ")" + + " and \"PARTITION_NAME\" in (" + makeParams(partNames.size()) + ")" + + " group by \"COLUMN_NAME\", \"COLUMN_TYPE\""; + start = doTrace ? System.nanoTime() : 0; + query = pm.newQuery("javax.jdo.query.SQL", qText); + qResult = query.executeWithArray(prepareParams(dbName, tableName, + partNames, colNames)); + if (qResult == null) { + query.closeAll(); + return Lists.newArrayList(); + } + end = doTrace ? System.nanoTime() : 0; + timingTrace(doTrace, qText, start, end); + List list = ensureList(qResult); + List colStats = new ArrayList( + list.size()); + for (Object[] row : list) { + colStats.add(prepareCSObj(row, 0)); + } + query.closeAll(); + return colStats; + } else { + // Extrapolation is needed for some columns. + // In this case, at least a column status for a partition is missing. + // We need to extrapolate this partition based on the other partitions + List colStats = new ArrayList( + colNames.size()); + qText = "select \"COLUMN_NAME\", \"COLUMN_TYPE\", count(\"PARTITION_NAME\") " + + " from \"PART_COL_STATS\"" + + " where \"DB_NAME\" = ? and \"TABLE_NAME\" = ? " + + " and \"COLUMN_NAME\" in (" + makeParams(colNames.size()) + ")" + + " and \"PARTITION_NAME\" in (" + makeParams(partNames.size()) + ")" + + " group by \"COLUMN_NAME\", \"COLUMN_TYPE\""; + start = doTrace ? System.nanoTime() : 0; + query = pm.newQuery("javax.jdo.query.SQL", qText); + qResult = query.executeWithArray(prepareParams(dbName, tableName, + partNames, colNames)); + end = doTrace ? System.nanoTime() : 0; + timingTrace(doTrace, qText, start, end); + if (qResult == null) { + query.closeAll(); + return Lists.newArrayList(); + } + List noExtraColumnNames = new ArrayList(); + Map extraColumnNameTypeParts = new HashMap(); + List list = ensureList(qResult); + for (Object[] row : list) { + String colName = (String) row[0]; + String colType = (String) row[1]; + if ((Integer) row[2] == partNames.size() || (Integer) row[2] < 2) { + // Extrapolation is not needed for this column if + // count(\"PARTITION_NAME\")==partNames.size() + // Or, extrapolation is not possible for this column if + // count(\"PARTITION_NAME\")<2 + noExtraColumnNames.add(colName); + } else { + extraColumnNameTypeParts.put(colName, + new String[] { colType, String.valueOf((Integer) row[2]) }); + } + } + query.closeAll(); + // Extrapolation is not needed for columns noExtraColumnNames + if (noExtraColumnNames.size() != 0) { + qText = commonPrefix + + " and \"COLUMN_NAME\" in ("+ makeParams(noExtraColumnNames.size()) + ")" + + " and \"PARTITION_NAME\" in ("+ makeParams(partNames.size()) +")" + + " group by \"COLUMN_NAME\", \"COLUMN_TYPE\""; + start = doTrace ? System.nanoTime() : 0; + query = pm.newQuery("javax.jdo.query.SQL", qText); + qResult = query.executeWithArray(prepareParams(dbName, tableName, + partNames, noExtraColumnNames)); + if (qResult == null) { + query.closeAll(); + return Lists.newArrayList(); + } + list = ensureList(qResult); + for (Object[] row : list) { + colStats.add(prepareCSObj(row, 0)); + } + end = doTrace ? System.nanoTime() : 0; + timingTrace(doTrace, qText, start, end); + query.closeAll(); + } + // Extrapolation is needed for extraColumnNames. + // give a sequence number for all the partitions + if (extraColumnNameTypeParts.size() != 0) { + Map indexMap = new HashMap(); + for (int index = 0; index < partNames.size(); index++) { + indexMap.put(partNames.get(index), index); + } + // get sum for all columns to reduce the number of queries + Map> sumMap = new HashMap>(); + qText = "select \"COLUMN_NAME\", sum(\"NUM_NULLS\"), sum(\"NUM_TRUES\"), sum(\"NUM_FALSES\")" + + " from \"PART_COL_STATS\"" + + " where \"DB_NAME\" = ? and \"TABLE_NAME\" = ? " + + " and \"COLUMN_NAME\" in (" +makeParams(extraColumnNameTypeParts.size())+ ")" + + " and \"PARTITION_NAME\" in (" + makeParams(partNames.size()) + ")" + + " group by \"COLUMN_NAME\""; + start = doTrace ? System.nanoTime() : 0; + query = pm.newQuery("javax.jdo.query.SQL", qText); + List extraColumnNames = new ArrayList(); + extraColumnNames.addAll(extraColumnNameTypeParts.keySet()); + qResult = query.executeWithArray(prepareParams(dbName, tableName, + partNames, extraColumnNames)); + if (qResult == null) { + query.closeAll(); + return Lists.newArrayList(); + } + list = ensureList(qResult); + // see the indexes for colstats in IExtrapolatePartStatus + Integer[] sumIndex = new Integer[] { 6, 10, 11 }; + for (Object[] row : list) { + Map indexToObject = new HashMap(); + for (int ind = 1; ind < row.length; ind++) { + indexToObject.put(sumIndex[ind - 1], row[ind]); + } + sumMap.put((String) row[0], indexToObject); + } + end = doTrace ? System.nanoTime() : 0; + timingTrace(doTrace, qText, start, end); + query.closeAll(); + for (Map.Entry entry : extraColumnNameTypeParts + .entrySet()) { + Object[] row = new Object[IExtrapolatePartStatus.colStatNames.length + 2]; + String colName = entry.getKey(); + String colType = entry.getValue()[0]; + Long sumVal = Long.parseLong(entry.getValue()[1]); + // fill in colname + row[0] = colName; + // fill in coltype + row[1] = colType; + // use linear extrapolation. more complicated one can be added in the future. + IExtrapolatePartStatus extrapolateMethod = new LinearExtrapolatePartStatus(); + // fill in colstatus + Integer[] index = IExtrapolatePartStatus.indexMaps.get(colType + .toLowerCase()); + //if the colType is not the known type, long, double, etc, then get all index. + if (index == null) { + index = IExtrapolatePartStatus.indexMaps.get("default"); + } + for (int colStatIndex : index) { + String colStatName = IExtrapolatePartStatus.colStatNames[colStatIndex]; + // if the aggregation type is sum, we do a scale-up + if (IExtrapolatePartStatus.aggrTypes[colStatIndex] == IExtrapolatePartStatus.AggrType.Sum) { + Long val = (Long) sumMap.get(colName).get(colStatIndex); + if (val == null) { + row[2 + colStatIndex] = null; + } else { + row[2 + colStatIndex] = (Long) (val / sumVal * (partNames + .size())); + } + } else { + // if the aggregation type is min/max, we extrapolate from the + // left/right borders + qText = "select \"" + + colStatName + + "\",\"PARTITION_NAME\" from \"PART_COL_STATS\"" + + " where \"DB_NAME\" = ? and \"TABLE_NAME\" = ?" + + " and \"COLUMN_NAME\" in (" +makeParams(1)+ ")" + + " and \"PARTITION_NAME\" in (" + makeParams(partNames.size()) + ")" + + " order by \'" + colStatName + "\'"; + start = doTrace ? System.nanoTime() : 0; + query = pm.newQuery("javax.jdo.query.SQL", qText); + qResult = query.executeWithArray(prepareParams(dbName, + tableName, partNames, Arrays.asList(colName))); + if (qResult == null) { + query.closeAll(); + return Lists.newArrayList(); + } + fqr = (ForwardQueryResult) qResult; + Object[] min = (Object[]) (fqr.get(0)); + Object[] max = (Object[]) (fqr.get(fqr.size() - 1)); + end = doTrace ? System.nanoTime() : 0; + timingTrace(doTrace, qText, start, end); + query.closeAll(); + if (min[0] == null || max[0] == null) { + row[2 + colStatIndex] = null; + } else { + row[2 + colStatIndex] = extrapolateMethod.extrapolate(min, max, + colStatIndex, indexMap); + } + } + } + colStats.add(prepareCSObj(row, 0)); + } + } + return colStats; + } } private ColumnStatisticsObj prepareCSObj (Object[] row, int i) throws MetaException { @@ -949,7 +1163,7 @@ private ColumnStatisticsObj prepareCSObj (Object[] row, int i) throws MetaExcept return params; } - + public List getPartitionStats(String dbName, String tableName, List partNames, List colNames) throws MetaException { if (colNames.isEmpty() || partNames.isEmpty()) { diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java b/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java index a9f4be2..0693039 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java @@ -60,6 +60,7 @@ import org.apache.hadoop.hive.common.classification.InterfaceStability; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.conf.HiveConf.ConfVars; +import org.apache.hadoop.hive.metastore.api.AggrStats; import org.apache.hadoop.hive.metastore.api.ColumnStatistics; import org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc; import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; @@ -5904,25 +5905,28 @@ protected ColumnStatistics getJdoResult( @Override - public List get_aggr_stats_for(String dbName, String tblName, + public AggrStats get_aggr_stats_for(String dbName, String tblName, final List partNames, final List colNames) throws MetaException, NoSuchObjectException { - - return new GetListHelper(dbName, tblName, true, false) { + return new GetHelper(dbName, tblName, true, false) { @Override - protected List getSqlResult( - GetHelper> ctx) throws MetaException { - return directSql.aggrColStatsForPartitions(dbName, tblName, partNames, colNames); + protected AggrStats getSqlResult(GetHelper ctx) + throws MetaException { + return directSql.aggrColStatsForPartitions(dbName, tblName, partNames, + colNames); } - @Override - protected List getJdoResult( - GetHelper> ctx) throws MetaException, - NoSuchObjectException { - // This is fast path for query optimizations, if we can find this info quickly using + protected AggrStats getJdoResult(GetHelper ctx) + throws MetaException, NoSuchObjectException { + // This is fast path for query optimizations, if we can find this info + // quickly using // directSql, do it. No point in failing back to slow path here. throw new MetaException("Jdo path is not implemented for stats aggr."); } - }.run(true); + @Override + protected String describeResult() { + return null; + } + }.run(true); } private List getMPartitionColumnStatistics( diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/RawStore.java b/metastore/src/java/org/apache/hadoop/hive/metastore/RawStore.java index 0364385..e435d69 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/RawStore.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/RawStore.java @@ -26,6 +26,7 @@ import java.util.Map; import org.apache.hadoop.conf.Configurable; +import org.apache.hadoop.hive.metastore.api.AggrStats; import org.apache.hadoop.hive.metastore.api.ColumnStatistics; import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; import org.apache.hadoop.hive.metastore.api.Database; @@ -548,6 +549,6 @@ public void dropFunction(String dbName, String funcName) */ public List getFunctions(String dbName, String pattern) throws MetaException; - public List get_aggr_stats_for(String dbName, String tblName, + public AggrStats get_aggr_stats_for(String dbName, String tblName, List partNames, List colNames) throws MetaException, NoSuchObjectException; } diff --git a/metastore/src/test/org/apache/hadoop/hive/metastore/DummyRawStoreControlledCommit.java b/metastore/src/test/org/apache/hadoop/hive/metastore/DummyRawStoreControlledCommit.java index 4eba2b0..3847d99 100644 --- a/metastore/src/test/org/apache/hadoop/hive/metastore/DummyRawStoreControlledCommit.java +++ b/metastore/src/test/org/apache/hadoop/hive/metastore/DummyRawStoreControlledCommit.java @@ -24,6 +24,7 @@ import org.apache.hadoop.conf.Configurable; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.metastore.api.AggrStats; import org.apache.hadoop.hive.metastore.api.ColumnStatistics; import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; import org.apache.hadoop.hive.metastore.api.Database; @@ -712,7 +713,7 @@ public Function getFunction(String dbName, String funcName) } @Override - public List get_aggr_stats_for(String dbName, + public AggrStats get_aggr_stats_for(String dbName, String tblName, List partNames, List colNames) throws MetaException { return null; diff --git a/metastore/src/test/org/apache/hadoop/hive/metastore/DummyRawStoreForJdoConnection.java b/metastore/src/test/org/apache/hadoop/hive/metastore/DummyRawStoreForJdoConnection.java index 78ab19a..981fa1a 100644 --- a/metastore/src/test/org/apache/hadoop/hive/metastore/DummyRawStoreForJdoConnection.java +++ b/metastore/src/test/org/apache/hadoop/hive/metastore/DummyRawStoreForJdoConnection.java @@ -25,6 +25,7 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.metastore.api.AggrStats; import org.apache.hadoop.hive.metastore.api.ColumnStatistics; import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; import org.apache.hadoop.hive.metastore.api.Database; @@ -729,7 +730,7 @@ public Function getFunction(String dbName, String funcName) } @Override - public List get_aggr_stats_for(String dbName, + public AggrStats get_aggr_stats_for(String dbName, String tblName, List partNames, List colNames) throws MetaException { return null; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java index 8100b39..7cb7c5e 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java @@ -224,7 +224,7 @@ public static Statistics collectStatistics(HiveConf conf, PrunedPartitionList pa if (aggrStats.getPartsFound() != partNames.size() && colState != State.NONE) { LOG.debug("Column stats requested for : " + partNames.size() +" partitions. " + "Able to retrieve for " + aggrStats.getPartsFound() + " partitions"); - stats.updateColumnStatsState(State.PARTIAL); + colState = State.PARTIAL; } stats.setColumnStatsState(colState); } diff --git a/ql/src/test/queries/clientpositive/extrapolate_part_stats_full.q b/ql/src/test/queries/clientpositive/extrapolate_part_stats_full.q new file mode 100644 index 0000000..00c9b53 --- /dev/null +++ b/ql/src/test/queries/clientpositive/extrapolate_part_stats_full.q @@ -0,0 +1,52 @@ +set hive.stats.fetch.column.stats=true; +set hive.exec.dynamic.partition=true; +set hive.exec.dynamic.partition.mode=nonstrict; + +create table if not exists ext_loc ( + state string, + locid int, + zip int, + year string +) row format delimited fields terminated by '|' stored as textfile; + +LOAD DATA LOCAL INPATH '../../data/files/extrapolate_stats_full.txt' OVERWRITE INTO TABLE ext_loc; + +create table if not exists loc_orc_1d ( + state string, + locid int, + zip int +) partitioned by(year string) stored as orc; + +insert overwrite table loc_orc_1d partition(year) select * from ext_loc; + +analyze table loc_orc_1d partition(year='2000') compute statistics for columns state,locid; + +analyze table loc_orc_1d partition(year='2001') compute statistics for columns state,locid; + +describe formatted loc_orc_1d.state PARTITION(year='2001'); + +-- basicStatState: COMPLETE colStatState: PARTIAL +explain extended select state from loc_orc_1d; + +-- column statistics for __HIVE_DEFAULT_PARTITION__ is not supported yet. Hence colStatState reports PARTIAL +-- basicStatState: COMPLETE colStatState: PARTIAL +explain extended select state,locid from loc_orc_1d; + +create table if not exists loc_orc_2d ( + state string, + locid int +) partitioned by(zip int, year string) stored as orc; + +insert overwrite table loc_orc_2d partition(zip, year) select * from ext_loc; + +analyze table loc_orc_2d partition(zip=94086, year='2000') compute statistics for columns state,locid; + +analyze table loc_orc_2d partition(zip=94087, year='2000') compute statistics for columns state,locid; + +analyze table loc_orc_2d partition(zip=94086, year='2001') compute statistics for columns state,locid; + +analyze table loc_orc_2d partition(zip=94087, year='2001') compute statistics for columns state,locid; + +explain extended select state from loc_orc_2d; + +explain extended select state,locid from loc_orc_2d; diff --git a/ql/src/test/queries/clientpositive/extrapolate_part_stats_partial.q b/ql/src/test/queries/clientpositive/extrapolate_part_stats_partial.q new file mode 100644 index 0000000..8ae9a90 --- /dev/null +++ b/ql/src/test/queries/clientpositive/extrapolate_part_stats_partial.q @@ -0,0 +1,58 @@ +set hive.stats.fetch.column.stats=true; +set hive.exec.dynamic.partition=true; +set hive.exec.dynamic.partition.mode=nonstrict; + +create table if not exists ext_loc ( + state string, + locid int, + zip int, + year string +) row format delimited fields terminated by '|' stored as textfile; + +LOAD DATA LOCAL INPATH '../../data/files/extrapolate_stats_partial.txt' OVERWRITE INTO TABLE ext_loc; + +create table if not exists loc_orc_1d ( + state string, + locid int, + zip int +) partitioned by(year string) stored as orc; + +insert overwrite table loc_orc_1d partition(year) select * from ext_loc; + +analyze table loc_orc_1d partition(year='2001') compute statistics for columns state,locid; + +analyze table loc_orc_1d partition(year='2002') compute statistics for columns state,locid; + +describe formatted loc_orc_1d.state PARTITION(year='2001'); + +describe formatted loc_orc_1d.state PARTITION(year='2002'); + +-- basicStatState: COMPLETE colStatState: PARTIAL +explain extended select state from loc_orc_1d; + +-- column statistics for __HIVE_DEFAULT_PARTITION__ is not supported yet. Hence colStatState reports PARTIAL +-- basicStatState: COMPLETE colStatState: PARTIAL +explain extended select state,locid from loc_orc_1d; + +analyze table loc_orc_1d partition(year='2000') compute statistics for columns state; + +analyze table loc_orc_1d partition(year='2003') compute statistics for columns state; + +explain extended select state from loc_orc_1d; + +explain extended select state,locid from loc_orc_1d; + +create table if not exists loc_orc_2d ( + state string, + locid int +) partitioned by(zip int, year string) stored as orc; + +insert overwrite table loc_orc_2d partition(zip, year) select * from ext_loc; + +analyze table loc_orc_2d partition(zip=94086, year='2001') compute statistics for columns state,locid; + +analyze table loc_orc_2d partition(zip=94087, year='2002') compute statistics for columns state,locid; + +explain extended select state from loc_orc_2d; + +explain extended select state,locid from loc_orc_2d; diff --git a/ql/src/test/results/clientpositive/annotate_stats_part.q.out b/ql/src/test/results/clientpositive/annotate_stats_part.q.out index 10993c3..6262d37 100644 --- a/ql/src/test/results/clientpositive/annotate_stats_part.q.out +++ b/ql/src/test/results/clientpositive/annotate_stats_part.q.out @@ -366,14 +366,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: loc_orc - Statistics: Num rows: 8 Data size: 724 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8 Data size: 724 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: state (type: string) outputColumnNames: _col0 - Statistics: Num rows: 8 Data size: 688 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8 Data size: 688 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false - Statistics: Num rows: 8 Data size: 688 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8 Data size: 688 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -403,14 +403,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: loc_orc - Statistics: Num rows: 8 Data size: 724 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8 Data size: 724 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: state (type: string), locid (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false - Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git a/ql/src/test/results/clientpositive/extrapolate_part_stats_full.q.out b/ql/src/test/results/clientpositive/extrapolate_part_stats_full.q.out new file mode 100644 index 0000000..ef63e74 --- /dev/null +++ b/ql/src/test/results/clientpositive/extrapolate_part_stats_full.q.out @@ -0,0 +1,1007 @@ +PREHOOK: query: create table if not exists ext_loc ( + state string, + locid int, + zip int, + year string +) row format delimited fields terminated by '|' stored as textfile +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@ext_loc +POSTHOOK: query: create table if not exists ext_loc ( + state string, + locid int, + zip int, + year string +) row format delimited fields terminated by '|' stored as textfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@ext_loc +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/extrapolate_stats_full.txt' OVERWRITE INTO TABLE ext_loc +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@ext_loc +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/extrapolate_stats_full.txt' OVERWRITE INTO TABLE ext_loc +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@ext_loc +PREHOOK: query: create table if not exists loc_orc_1d ( + state string, + locid int, + zip int +) partitioned by(year string) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@loc_orc_1d +POSTHOOK: query: create table if not exists loc_orc_1d ( + state string, + locid int, + zip int +) partitioned by(year string) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@loc_orc_1d +PREHOOK: query: insert overwrite table loc_orc_1d partition(year) select * from ext_loc +PREHOOK: type: QUERY +PREHOOK: Input: default@ext_loc +PREHOOK: Output: default@loc_orc_1d +POSTHOOK: query: insert overwrite table loc_orc_1d partition(year) select * from ext_loc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@ext_loc +POSTHOOK: Output: default@loc_orc_1d@year=2000 +POSTHOOK: Output: default@loc_orc_1d@year=2001 +POSTHOOK: Lineage: loc_orc_1d PARTITION(year=2000).locid SIMPLE [(ext_loc)ext_loc.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc_1d PARTITION(year=2000).state SIMPLE [(ext_loc)ext_loc.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc_1d PARTITION(year=2000).zip SIMPLE [(ext_loc)ext_loc.FieldSchema(name:zip, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc_1d PARTITION(year=2001).locid SIMPLE [(ext_loc)ext_loc.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc_1d PARTITION(year=2001).state SIMPLE [(ext_loc)ext_loc.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc_1d PARTITION(year=2001).zip SIMPLE [(ext_loc)ext_loc.FieldSchema(name:zip, type:int, comment:null), ] +PREHOOK: query: analyze table loc_orc_1d partition(year='2000') compute statistics for columns state,locid +PREHOOK: type: QUERY +PREHOOK: Input: default@loc_orc_1d +PREHOOK: Input: default@loc_orc_1d@year=2000 +#### A masked pattern was here #### +POSTHOOK: query: analyze table loc_orc_1d partition(year='2000') compute statistics for columns state,locid +POSTHOOK: type: QUERY +POSTHOOK: Input: default@loc_orc_1d +POSTHOOK: Input: default@loc_orc_1d@year=2000 +#### A masked pattern was here #### +PREHOOK: query: analyze table loc_orc_1d partition(year='2001') compute statistics for columns state,locid +PREHOOK: type: QUERY +PREHOOK: Input: default@loc_orc_1d +PREHOOK: Input: default@loc_orc_1d@year=2001 +#### A masked pattern was here #### +POSTHOOK: query: analyze table loc_orc_1d partition(year='2001') compute statistics for columns state,locid +POSTHOOK: type: QUERY +POSTHOOK: Input: default@loc_orc_1d +POSTHOOK: Input: default@loc_orc_1d@year=2001 +#### A masked pattern was here #### +PREHOOK: query: describe formatted loc_orc_1d.state PARTITION(year='2001') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@loc_orc_1d +POSTHOOK: query: describe formatted loc_orc_1d.state PARTITION(year='2001') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@loc_orc_1d +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment + +state string 0 3 0.75 2 from deserializer +PREHOOK: query: -- basicStatState: COMPLETE colStatState: PARTIAL +explain extended select state from loc_orc_1d +PREHOOK: type: QUERY +POSTHOOK: query: -- basicStatState: COMPLETE colStatState: PARTIAL +explain extended select state from loc_orc_1d +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + loc_orc_1d + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + state + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: loc_orc_1d + Statistics: Num rows: 6 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE + GatherStats: false + Select Operator + expressions: state (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 6 Data size: 510 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 6 Data size: 510 Basic stats: COMPLETE Column stats: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: year=2000 + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + year 2000 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip + columns.comments + columns.types string:int:int +#### A masked pattern was here #### + name default.loc_orc_1d + numFiles 1 + numRows 2 + partition_columns year + partition_columns.types string + rawDataSize 184 + serialization.ddl struct loc_orc_1d { string state, i32 locid, i32 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 342 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip + columns.comments + columns.types string:int:int +#### A masked pattern was here #### + name default.loc_orc_1d + partition_columns year + partition_columns.types string + serialization.ddl struct loc_orc_1d { string state, i32 locid, i32 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc_1d + name: default.loc_orc_1d +#### A masked pattern was here #### + Partition + base file name: year=2001 + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + year 2001 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip + columns.comments + columns.types string:int:int +#### A masked pattern was here #### + name default.loc_orc_1d + numFiles 1 + numRows 4 + partition_columns year + partition_columns.types string + rawDataSize 368 + serialization.ddl struct loc_orc_1d { string state, i32 locid, i32 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 364 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip + columns.comments + columns.types string:int:int +#### A masked pattern was here #### + name default.loc_orc_1d + partition_columns year + partition_columns.types string + serialization.ddl struct loc_orc_1d { string state, i32 locid, i32 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc_1d + name: default.loc_orc_1d + Truncated Path -> Alias: + /loc_orc_1d/year=2000 [loc_orc_1d] + /loc_orc_1d/year=2001 [loc_orc_1d] + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: -- column statistics for __HIVE_DEFAULT_PARTITION__ is not supported yet. Hence colStatState reports PARTIAL +-- basicStatState: COMPLETE colStatState: PARTIAL +explain extended select state,locid from loc_orc_1d +PREHOOK: type: QUERY +POSTHOOK: query: -- column statistics for __HIVE_DEFAULT_PARTITION__ is not supported yet. Hence colStatState reports PARTIAL +-- basicStatState: COMPLETE colStatState: PARTIAL +explain extended select state,locid from loc_orc_1d +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + loc_orc_1d + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + state + TOK_SELEXPR + TOK_TABLE_OR_COL + locid + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: loc_orc_1d + Statistics: Num rows: 6 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE + GatherStats: false + Select Operator + expressions: state (type: string), locid (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 534 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 6 Data size: 534 Basic stats: COMPLETE Column stats: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1 + columns.types string:int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: year=2000 + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + year 2000 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip + columns.comments + columns.types string:int:int +#### A masked pattern was here #### + name default.loc_orc_1d + numFiles 1 + numRows 2 + partition_columns year + partition_columns.types string + rawDataSize 184 + serialization.ddl struct loc_orc_1d { string state, i32 locid, i32 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 342 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip + columns.comments + columns.types string:int:int +#### A masked pattern was here #### + name default.loc_orc_1d + partition_columns year + partition_columns.types string + serialization.ddl struct loc_orc_1d { string state, i32 locid, i32 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc_1d + name: default.loc_orc_1d +#### A masked pattern was here #### + Partition + base file name: year=2001 + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + year 2001 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip + columns.comments + columns.types string:int:int +#### A masked pattern was here #### + name default.loc_orc_1d + numFiles 1 + numRows 4 + partition_columns year + partition_columns.types string + rawDataSize 368 + serialization.ddl struct loc_orc_1d { string state, i32 locid, i32 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 364 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip + columns.comments + columns.types string:int:int +#### A masked pattern was here #### + name default.loc_orc_1d + partition_columns year + partition_columns.types string + serialization.ddl struct loc_orc_1d { string state, i32 locid, i32 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc_1d + name: default.loc_orc_1d + Truncated Path -> Alias: + /loc_orc_1d/year=2000 [loc_orc_1d] + /loc_orc_1d/year=2001 [loc_orc_1d] + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: create table if not exists loc_orc_2d ( + state string, + locid int +) partitioned by(zip int, year string) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@loc_orc_2d +POSTHOOK: query: create table if not exists loc_orc_2d ( + state string, + locid int +) partitioned by(zip int, year string) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@loc_orc_2d +PREHOOK: query: insert overwrite table loc_orc_2d partition(zip, year) select * from ext_loc +PREHOOK: type: QUERY +PREHOOK: Input: default@ext_loc +PREHOOK: Output: default@loc_orc_2d +POSTHOOK: query: insert overwrite table loc_orc_2d partition(zip, year) select * from ext_loc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@ext_loc +POSTHOOK: Output: default@loc_orc_2d@zip=94086/year=2000 +POSTHOOK: Output: default@loc_orc_2d@zip=94086/year=2001 +POSTHOOK: Output: default@loc_orc_2d@zip=94087/year=2000 +POSTHOOK: Output: default@loc_orc_2d@zip=94087/year=2001 +POSTHOOK: Lineage: loc_orc_2d PARTITION(zip=94086,year=2000).locid SIMPLE [(ext_loc)ext_loc.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc_2d PARTITION(zip=94086,year=2000).state SIMPLE [(ext_loc)ext_loc.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc_2d PARTITION(zip=94086,year=2001).locid SIMPLE [(ext_loc)ext_loc.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc_2d PARTITION(zip=94086,year=2001).state SIMPLE [(ext_loc)ext_loc.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc_2d PARTITION(zip=94087,year=2000).locid SIMPLE [(ext_loc)ext_loc.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc_2d PARTITION(zip=94087,year=2000).state SIMPLE [(ext_loc)ext_loc.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc_2d PARTITION(zip=94087,year=2001).locid SIMPLE [(ext_loc)ext_loc.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc_2d PARTITION(zip=94087,year=2001).state SIMPLE [(ext_loc)ext_loc.FieldSchema(name:state, type:string, comment:null), ] +PREHOOK: query: analyze table loc_orc_2d partition(zip=94086, year='2000') compute statistics for columns state,locid +PREHOOK: type: QUERY +PREHOOK: Input: default@loc_orc_2d +PREHOOK: Input: default@loc_orc_2d@zip=94086/year=2000 +#### A masked pattern was here #### +POSTHOOK: query: analyze table loc_orc_2d partition(zip=94086, year='2000') compute statistics for columns state,locid +POSTHOOK: type: QUERY +POSTHOOK: Input: default@loc_orc_2d +POSTHOOK: Input: default@loc_orc_2d@zip=94086/year=2000 +#### A masked pattern was here #### +PREHOOK: query: analyze table loc_orc_2d partition(zip=94087, year='2000') compute statistics for columns state,locid +PREHOOK: type: QUERY +PREHOOK: Input: default@loc_orc_2d +PREHOOK: Input: default@loc_orc_2d@zip=94087/year=2000 +#### A masked pattern was here #### +POSTHOOK: query: analyze table loc_orc_2d partition(zip=94087, year='2000') compute statistics for columns state,locid +POSTHOOK: type: QUERY +POSTHOOK: Input: default@loc_orc_2d +POSTHOOK: Input: default@loc_orc_2d@zip=94087/year=2000 +#### A masked pattern was here #### +PREHOOK: query: analyze table loc_orc_2d partition(zip=94086, year='2001') compute statistics for columns state,locid +PREHOOK: type: QUERY +PREHOOK: Input: default@loc_orc_2d +PREHOOK: Input: default@loc_orc_2d@zip=94086/year=2001 +#### A masked pattern was here #### +POSTHOOK: query: analyze table loc_orc_2d partition(zip=94086, year='2001') compute statistics for columns state,locid +POSTHOOK: type: QUERY +POSTHOOK: Input: default@loc_orc_2d +POSTHOOK: Input: default@loc_orc_2d@zip=94086/year=2001 +#### A masked pattern was here #### +PREHOOK: query: analyze table loc_orc_2d partition(zip=94087, year='2001') compute statistics for columns state,locid +PREHOOK: type: QUERY +PREHOOK: Input: default@loc_orc_2d +PREHOOK: Input: default@loc_orc_2d@zip=94087/year=2001 +#### A masked pattern was here #### +POSTHOOK: query: analyze table loc_orc_2d partition(zip=94087, year='2001') compute statistics for columns state,locid +POSTHOOK: type: QUERY +POSTHOOK: Input: default@loc_orc_2d +POSTHOOK: Input: default@loc_orc_2d@zip=94087/year=2001 +#### A masked pattern was here #### +PREHOOK: query: explain extended select state from loc_orc_2d +PREHOOK: type: QUERY +POSTHOOK: query: explain extended select state from loc_orc_2d +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + loc_orc_2d + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + state + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: loc_orc_2d + Statistics: Num rows: 6 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + GatherStats: false + Select Operator + expressions: state (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 6 Data size: 510 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 6 Data size: 510 Basic stats: COMPLETE Column stats: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: year=2000 + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + year 2000 + zip 94086 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid + columns.comments + columns.types string:int +#### A masked pattern was here #### + name default.loc_orc_2d + numFiles 1 + numRows 1 + partition_columns zip/year + partition_columns.types int:string + rawDataSize 89 + serialization.ddl struct loc_orc_2d { string state, i32 locid} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 260 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid + columns.comments + columns.types string:int +#### A masked pattern was here #### + name default.loc_orc_2d + partition_columns zip/year + partition_columns.types int:string + serialization.ddl struct loc_orc_2d { string state, i32 locid} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc_2d + name: default.loc_orc_2d +#### A masked pattern was here #### + Partition + base file name: year=2001 + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + year 2001 + zip 94086 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid + columns.comments + columns.types string:int +#### A masked pattern was here #### + name default.loc_orc_2d + numFiles 1 + numRows 3 + partition_columns zip/year + partition_columns.types int:string + rawDataSize 267 + serialization.ddl struct loc_orc_2d { string state, i32 locid} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 257 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid + columns.comments + columns.types string:int +#### A masked pattern was here #### + name default.loc_orc_2d + partition_columns zip/year + partition_columns.types int:string + serialization.ddl struct loc_orc_2d { string state, i32 locid} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc_2d + name: default.loc_orc_2d +#### A masked pattern was here #### + Partition + base file name: year=2000 + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + year 2000 + zip 94087 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid + columns.comments + columns.types string:int +#### A masked pattern was here #### + name default.loc_orc_2d + numFiles 1 + numRows 1 + partition_columns zip/year + partition_columns.types int:string + rawDataSize 88 + serialization.ddl struct loc_orc_2d { string state, i32 locid} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 247 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid + columns.comments + columns.types string:int +#### A masked pattern was here #### + name default.loc_orc_2d + partition_columns zip/year + partition_columns.types int:string + serialization.ddl struct loc_orc_2d { string state, i32 locid} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc_2d + name: default.loc_orc_2d +#### A masked pattern was here #### + Partition + base file name: year=2001 + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + year 2001 + zip 94087 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid + columns.comments + columns.types string:int +#### A masked pattern was here #### + name default.loc_orc_2d + numFiles 1 + numRows 1 + partition_columns zip/year + partition_columns.types int:string + rawDataSize 88 + serialization.ddl struct loc_orc_2d { string state, i32 locid} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 247 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid + columns.comments + columns.types string:int +#### A masked pattern was here #### + name default.loc_orc_2d + partition_columns zip/year + partition_columns.types int:string + serialization.ddl struct loc_orc_2d { string state, i32 locid} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc_2d + name: default.loc_orc_2d + Truncated Path -> Alias: + /loc_orc_2d/zip=94086/year=2000 [loc_orc_2d] + /loc_orc_2d/zip=94086/year=2001 [loc_orc_2d] + /loc_orc_2d/zip=94087/year=2000 [loc_orc_2d] + /loc_orc_2d/zip=94087/year=2001 [loc_orc_2d] + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain extended select state,locid from loc_orc_2d +PREHOOK: type: QUERY +POSTHOOK: query: explain extended select state,locid from loc_orc_2d +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + loc_orc_2d + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + state + TOK_SELEXPR + TOK_TABLE_OR_COL + locid + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: loc_orc_2d + Statistics: Num rows: 6 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE + GatherStats: false + Select Operator + expressions: state (type: string), locid (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 534 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 6 Data size: 534 Basic stats: COMPLETE Column stats: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1 + columns.types string:int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: year=2000 + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + year 2000 + zip 94086 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid + columns.comments + columns.types string:int +#### A masked pattern was here #### + name default.loc_orc_2d + numFiles 1 + numRows 1 + partition_columns zip/year + partition_columns.types int:string + rawDataSize 89 + serialization.ddl struct loc_orc_2d { string state, i32 locid} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 260 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid + columns.comments + columns.types string:int +#### A masked pattern was here #### + name default.loc_orc_2d + partition_columns zip/year + partition_columns.types int:string + serialization.ddl struct loc_orc_2d { string state, i32 locid} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc_2d + name: default.loc_orc_2d +#### A masked pattern was here #### + Partition + base file name: year=2001 + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + year 2001 + zip 94086 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid + columns.comments + columns.types string:int +#### A masked pattern was here #### + name default.loc_orc_2d + numFiles 1 + numRows 3 + partition_columns zip/year + partition_columns.types int:string + rawDataSize 267 + serialization.ddl struct loc_orc_2d { string state, i32 locid} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 257 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid + columns.comments + columns.types string:int +#### A masked pattern was here #### + name default.loc_orc_2d + partition_columns zip/year + partition_columns.types int:string + serialization.ddl struct loc_orc_2d { string state, i32 locid} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc_2d + name: default.loc_orc_2d +#### A masked pattern was here #### + Partition + base file name: year=2000 + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + year 2000 + zip 94087 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid + columns.comments + columns.types string:int +#### A masked pattern was here #### + name default.loc_orc_2d + numFiles 1 + numRows 1 + partition_columns zip/year + partition_columns.types int:string + rawDataSize 88 + serialization.ddl struct loc_orc_2d { string state, i32 locid} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 247 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid + columns.comments + columns.types string:int +#### A masked pattern was here #### + name default.loc_orc_2d + partition_columns zip/year + partition_columns.types int:string + serialization.ddl struct loc_orc_2d { string state, i32 locid} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc_2d + name: default.loc_orc_2d +#### A masked pattern was here #### + Partition + base file name: year=2001 + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + year 2001 + zip 94087 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid + columns.comments + columns.types string:int +#### A masked pattern was here #### + name default.loc_orc_2d + numFiles 1 + numRows 1 + partition_columns zip/year + partition_columns.types int:string + rawDataSize 88 + serialization.ddl struct loc_orc_2d { string state, i32 locid} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 247 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid + columns.comments + columns.types string:int +#### A masked pattern was here #### + name default.loc_orc_2d + partition_columns zip/year + partition_columns.types int:string + serialization.ddl struct loc_orc_2d { string state, i32 locid} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc_2d + name: default.loc_orc_2d + Truncated Path -> Alias: + /loc_orc_2d/zip=94086/year=2000 [loc_orc_2d] + /loc_orc_2d/zip=94086/year=2001 [loc_orc_2d] + /loc_orc_2d/zip=94087/year=2000 [loc_orc_2d] + /loc_orc_2d/zip=94087/year=2001 [loc_orc_2d] + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + diff --git a/ql/src/test/results/clientpositive/extrapolate_part_stats_partial.q.out b/ql/src/test/results/clientpositive/extrapolate_part_stats_partial.q.out new file mode 100644 index 0000000..cbe210b --- /dev/null +++ b/ql/src/test/results/clientpositive/extrapolate_part_stats_partial.q.out @@ -0,0 +1,2396 @@ +PREHOOK: query: create table if not exists ext_loc ( + state string, + locid int, + zip int, + year string +) row format delimited fields terminated by '|' stored as textfile +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@ext_loc +POSTHOOK: query: create table if not exists ext_loc ( + state string, + locid int, + zip int, + year string +) row format delimited fields terminated by '|' stored as textfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@ext_loc +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/extrapolate_stats_partial.txt' OVERWRITE INTO TABLE ext_loc +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@ext_loc +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/extrapolate_stats_partial.txt' OVERWRITE INTO TABLE ext_loc +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@ext_loc +PREHOOK: query: create table if not exists loc_orc_1d ( + state string, + locid int, + zip int +) partitioned by(year string) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@loc_orc_1d +POSTHOOK: query: create table if not exists loc_orc_1d ( + state string, + locid int, + zip int +) partitioned by(year string) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@loc_orc_1d +PREHOOK: query: insert overwrite table loc_orc_1d partition(year) select * from ext_loc +PREHOOK: type: QUERY +PREHOOK: Input: default@ext_loc +PREHOOK: Output: default@loc_orc_1d +POSTHOOK: query: insert overwrite table loc_orc_1d partition(year) select * from ext_loc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@ext_loc +POSTHOOK: Output: default@loc_orc_1d@year=2000 +POSTHOOK: Output: default@loc_orc_1d@year=2001 +POSTHOOK: Output: default@loc_orc_1d@year=2002 +POSTHOOK: Output: default@loc_orc_1d@year=2003 +POSTHOOK: Lineage: loc_orc_1d PARTITION(year=2000).locid SIMPLE [(ext_loc)ext_loc.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc_1d PARTITION(year=2000).state SIMPLE [(ext_loc)ext_loc.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc_1d PARTITION(year=2000).zip SIMPLE [(ext_loc)ext_loc.FieldSchema(name:zip, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc_1d PARTITION(year=2001).locid SIMPLE [(ext_loc)ext_loc.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc_1d PARTITION(year=2001).state SIMPLE [(ext_loc)ext_loc.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc_1d PARTITION(year=2001).zip SIMPLE [(ext_loc)ext_loc.FieldSchema(name:zip, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc_1d PARTITION(year=2002).locid SIMPLE [(ext_loc)ext_loc.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc_1d PARTITION(year=2002).state SIMPLE [(ext_loc)ext_loc.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc_1d PARTITION(year=2002).zip SIMPLE [(ext_loc)ext_loc.FieldSchema(name:zip, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc_1d PARTITION(year=2003).locid SIMPLE [(ext_loc)ext_loc.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc_1d PARTITION(year=2003).state SIMPLE [(ext_loc)ext_loc.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc_1d PARTITION(year=2003).zip SIMPLE [(ext_loc)ext_loc.FieldSchema(name:zip, type:int, comment:null), ] +PREHOOK: query: analyze table loc_orc_1d partition(year='2001') compute statistics for columns state,locid +PREHOOK: type: QUERY +PREHOOK: Input: default@loc_orc_1d +PREHOOK: Input: default@loc_orc_1d@year=2001 +#### A masked pattern was here #### +POSTHOOK: query: analyze table loc_orc_1d partition(year='2001') compute statistics for columns state,locid +POSTHOOK: type: QUERY +POSTHOOK: Input: default@loc_orc_1d +POSTHOOK: Input: default@loc_orc_1d@year=2001 +#### A masked pattern was here #### +PREHOOK: query: analyze table loc_orc_1d partition(year='2002') compute statistics for columns state,locid +PREHOOK: type: QUERY +PREHOOK: Input: default@loc_orc_1d +PREHOOK: Input: default@loc_orc_1d@year=2002 +#### A masked pattern was here #### +POSTHOOK: query: analyze table loc_orc_1d partition(year='2002') compute statistics for columns state,locid +POSTHOOK: type: QUERY +POSTHOOK: Input: default@loc_orc_1d +POSTHOOK: Input: default@loc_orc_1d@year=2002 +#### A masked pattern was here #### +PREHOOK: query: describe formatted loc_orc_1d.state PARTITION(year='2001') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@loc_orc_1d +POSTHOOK: query: describe formatted loc_orc_1d.state PARTITION(year='2001') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@loc_orc_1d +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment + +state string 0 3 0.75 2 from deserializer +PREHOOK: query: describe formatted loc_orc_1d.state PARTITION(year='2002') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@loc_orc_1d +POSTHOOK: query: describe formatted loc_orc_1d.state PARTITION(year='2002') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@loc_orc_1d +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment + +state string 0 6 3.0 3 from deserializer +PREHOOK: query: -- basicStatState: COMPLETE colStatState: PARTIAL +explain extended select state from loc_orc_1d +PREHOOK: type: QUERY +POSTHOOK: query: -- basicStatState: COMPLETE colStatState: PARTIAL +explain extended select state from loc_orc_1d +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + loc_orc_1d + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + state + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: loc_orc_1d + Statistics: Num rows: 20 Data size: 1866 Basic stats: COMPLETE Column stats: PARTIAL + GatherStats: false + Select Operator + expressions: state (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 20 Data size: 1780 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 20 Data size: 1780 Basic stats: COMPLETE Column stats: PARTIAL +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: year=2000 + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + year 2000 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip + columns.comments + columns.types string:int:int +#### A masked pattern was here #### + name default.loc_orc_1d + numFiles 1 + numRows 2 + partition_columns year + partition_columns.types string + rawDataSize 184 + serialization.ddl struct loc_orc_1d { string state, i32 locid, i32 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 342 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip + columns.comments + columns.types string:int:int +#### A masked pattern was here #### + name default.loc_orc_1d + partition_columns year + partition_columns.types string + serialization.ddl struct loc_orc_1d { string state, i32 locid, i32 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc_1d + name: default.loc_orc_1d +#### A masked pattern was here #### + Partition + base file name: year=2001 + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + year 2001 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip + columns.comments + columns.types string:int:int +#### A masked pattern was here #### + name default.loc_orc_1d + numFiles 1 + numRows 4 + partition_columns year + partition_columns.types string + rawDataSize 368 + serialization.ddl struct loc_orc_1d { string state, i32 locid, i32 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 364 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip + columns.comments + columns.types string:int:int +#### A masked pattern was here #### + name default.loc_orc_1d + partition_columns year + partition_columns.types string + serialization.ddl struct loc_orc_1d { string state, i32 locid, i32 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc_1d + name: default.loc_orc_1d +#### A masked pattern was here #### + Partition + base file name: year=2002 + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + year 2002 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip + columns.comments + columns.types string:int:int +#### A masked pattern was here #### + name default.loc_orc_1d + numFiles 1 + numRows 6 + partition_columns year + partition_columns.types string + rawDataSize 570 + serialization.ddl struct loc_orc_1d { string state, i32 locid, i32 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 383 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip + columns.comments + columns.types string:int:int +#### A masked pattern was here #### + name default.loc_orc_1d + partition_columns year + partition_columns.types string + serialization.ddl struct loc_orc_1d { string state, i32 locid, i32 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc_1d + name: default.loc_orc_1d +#### A masked pattern was here #### + Partition + base file name: year=2003 + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + year 2003 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip + columns.comments + columns.types string:int:int +#### A masked pattern was here #### + name default.loc_orc_1d + numFiles 1 + numRows 8 + partition_columns year + partition_columns.types string + rawDataSize 744 + serialization.ddl struct loc_orc_1d { string state, i32 locid, i32 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 390 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip + columns.comments + columns.types string:int:int +#### A masked pattern was here #### + name default.loc_orc_1d + partition_columns year + partition_columns.types string + serialization.ddl struct loc_orc_1d { string state, i32 locid, i32 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc_1d + name: default.loc_orc_1d + Truncated Path -> Alias: + /loc_orc_1d/year=2000 [loc_orc_1d] + /loc_orc_1d/year=2001 [loc_orc_1d] + /loc_orc_1d/year=2002 [loc_orc_1d] + /loc_orc_1d/year=2003 [loc_orc_1d] + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: -- column statistics for __HIVE_DEFAULT_PARTITION__ is not supported yet. Hence colStatState reports PARTIAL +-- basicStatState: COMPLETE colStatState: PARTIAL +explain extended select state,locid from loc_orc_1d +PREHOOK: type: QUERY +POSTHOOK: query: -- column statistics for __HIVE_DEFAULT_PARTITION__ is not supported yet. Hence colStatState reports PARTIAL +-- basicStatState: COMPLETE colStatState: PARTIAL +explain extended select state,locid from loc_orc_1d +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + loc_orc_1d + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + state + TOK_SELEXPR + TOK_TABLE_OR_COL + locid + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: loc_orc_1d + Statistics: Num rows: 20 Data size: 1866 Basic stats: COMPLETE Column stats: PARTIAL + GatherStats: false + Select Operator + expressions: state (type: string), locid (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 20 Data size: 1860 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 20 Data size: 1860 Basic stats: COMPLETE Column stats: PARTIAL +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1 + columns.types string:int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: year=2000 + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + year 2000 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip + columns.comments + columns.types string:int:int +#### A masked pattern was here #### + name default.loc_orc_1d + numFiles 1 + numRows 2 + partition_columns year + partition_columns.types string + rawDataSize 184 + serialization.ddl struct loc_orc_1d { string state, i32 locid, i32 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 342 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip + columns.comments + columns.types string:int:int +#### A masked pattern was here #### + name default.loc_orc_1d + partition_columns year + partition_columns.types string + serialization.ddl struct loc_orc_1d { string state, i32 locid, i32 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc_1d + name: default.loc_orc_1d +#### A masked pattern was here #### + Partition + base file name: year=2001 + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + year 2001 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip + columns.comments + columns.types string:int:int +#### A masked pattern was here #### + name default.loc_orc_1d + numFiles 1 + numRows 4 + partition_columns year + partition_columns.types string + rawDataSize 368 + serialization.ddl struct loc_orc_1d { string state, i32 locid, i32 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 364 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip + columns.comments + columns.types string:int:int +#### A masked pattern was here #### + name default.loc_orc_1d + partition_columns year + partition_columns.types string + serialization.ddl struct loc_orc_1d { string state, i32 locid, i32 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc_1d + name: default.loc_orc_1d +#### A masked pattern was here #### + Partition + base file name: year=2002 + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + year 2002 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip + columns.comments + columns.types string:int:int +#### A masked pattern was here #### + name default.loc_orc_1d + numFiles 1 + numRows 6 + partition_columns year + partition_columns.types string + rawDataSize 570 + serialization.ddl struct loc_orc_1d { string state, i32 locid, i32 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 383 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip + columns.comments + columns.types string:int:int +#### A masked pattern was here #### + name default.loc_orc_1d + partition_columns year + partition_columns.types string + serialization.ddl struct loc_orc_1d { string state, i32 locid, i32 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc_1d + name: default.loc_orc_1d +#### A masked pattern was here #### + Partition + base file name: year=2003 + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + year 2003 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip + columns.comments + columns.types string:int:int +#### A masked pattern was here #### + name default.loc_orc_1d + numFiles 1 + numRows 8 + partition_columns year + partition_columns.types string + rawDataSize 744 + serialization.ddl struct loc_orc_1d { string state, i32 locid, i32 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 390 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip + columns.comments + columns.types string:int:int +#### A masked pattern was here #### + name default.loc_orc_1d + partition_columns year + partition_columns.types string + serialization.ddl struct loc_orc_1d { string state, i32 locid, i32 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc_1d + name: default.loc_orc_1d + Truncated Path -> Alias: + /loc_orc_1d/year=2000 [loc_orc_1d] + /loc_orc_1d/year=2001 [loc_orc_1d] + /loc_orc_1d/year=2002 [loc_orc_1d] + /loc_orc_1d/year=2003 [loc_orc_1d] + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: analyze table loc_orc_1d partition(year='2000') compute statistics for columns state +PREHOOK: type: QUERY +PREHOOK: Input: default@loc_orc_1d +PREHOOK: Input: default@loc_orc_1d@year=2000 +#### A masked pattern was here #### +POSTHOOK: query: analyze table loc_orc_1d partition(year='2000') compute statistics for columns state +POSTHOOK: type: QUERY +POSTHOOK: Input: default@loc_orc_1d +POSTHOOK: Input: default@loc_orc_1d@year=2000 +#### A masked pattern was here #### +PREHOOK: query: analyze table loc_orc_1d partition(year='2003') compute statistics for columns state +PREHOOK: type: QUERY +PREHOOK: Input: default@loc_orc_1d +PREHOOK: Input: default@loc_orc_1d@year=2003 +#### A masked pattern was here #### +POSTHOOK: query: analyze table loc_orc_1d partition(year='2003') compute statistics for columns state +POSTHOOK: type: QUERY +POSTHOOK: Input: default@loc_orc_1d +POSTHOOK: Input: default@loc_orc_1d@year=2003 +#### A masked pattern was here #### +PREHOOK: query: explain extended select state from loc_orc_1d +PREHOOK: type: QUERY +POSTHOOK: query: explain extended select state from loc_orc_1d +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + loc_orc_1d + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + state + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: loc_orc_1d + Statistics: Num rows: 20 Data size: 1866 Basic stats: COMPLETE Column stats: COMPLETE + GatherStats: false + Select Operator + expressions: state (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 20 Data size: 1740 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 20 Data size: 1740 Basic stats: COMPLETE Column stats: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: year=2000 + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + year 2000 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip + columns.comments + columns.types string:int:int +#### A masked pattern was here #### + name default.loc_orc_1d + numFiles 1 + numRows 2 + partition_columns year + partition_columns.types string + rawDataSize 184 + serialization.ddl struct loc_orc_1d { string state, i32 locid, i32 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 342 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip + columns.comments + columns.types string:int:int +#### A masked pattern was here #### + name default.loc_orc_1d + partition_columns year + partition_columns.types string + serialization.ddl struct loc_orc_1d { string state, i32 locid, i32 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc_1d + name: default.loc_orc_1d +#### A masked pattern was here #### + Partition + base file name: year=2001 + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + year 2001 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip + columns.comments + columns.types string:int:int +#### A masked pattern was here #### + name default.loc_orc_1d + numFiles 1 + numRows 4 + partition_columns year + partition_columns.types string + rawDataSize 368 + serialization.ddl struct loc_orc_1d { string state, i32 locid, i32 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 364 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip + columns.comments + columns.types string:int:int +#### A masked pattern was here #### + name default.loc_orc_1d + partition_columns year + partition_columns.types string + serialization.ddl struct loc_orc_1d { string state, i32 locid, i32 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc_1d + name: default.loc_orc_1d +#### A masked pattern was here #### + Partition + base file name: year=2002 + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + year 2002 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip + columns.comments + columns.types string:int:int +#### A masked pattern was here #### + name default.loc_orc_1d + numFiles 1 + numRows 6 + partition_columns year + partition_columns.types string + rawDataSize 570 + serialization.ddl struct loc_orc_1d { string state, i32 locid, i32 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 383 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip + columns.comments + columns.types string:int:int +#### A masked pattern was here #### + name default.loc_orc_1d + partition_columns year + partition_columns.types string + serialization.ddl struct loc_orc_1d { string state, i32 locid, i32 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc_1d + name: default.loc_orc_1d +#### A masked pattern was here #### + Partition + base file name: year=2003 + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + year 2003 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip + columns.comments + columns.types string:int:int +#### A masked pattern was here #### + name default.loc_orc_1d + numFiles 1 + numRows 8 + partition_columns year + partition_columns.types string + rawDataSize 744 + serialization.ddl struct loc_orc_1d { string state, i32 locid, i32 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 390 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip + columns.comments + columns.types string:int:int +#### A masked pattern was here #### + name default.loc_orc_1d + partition_columns year + partition_columns.types string + serialization.ddl struct loc_orc_1d { string state, i32 locid, i32 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc_1d + name: default.loc_orc_1d + Truncated Path -> Alias: + /loc_orc_1d/year=2000 [loc_orc_1d] + /loc_orc_1d/year=2001 [loc_orc_1d] + /loc_orc_1d/year=2002 [loc_orc_1d] + /loc_orc_1d/year=2003 [loc_orc_1d] + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain extended select state,locid from loc_orc_1d +PREHOOK: type: QUERY +POSTHOOK: query: explain extended select state,locid from loc_orc_1d +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + loc_orc_1d + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + state + TOK_SELEXPR + TOK_TABLE_OR_COL + locid + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: loc_orc_1d + Statistics: Num rows: 20 Data size: 1866 Basic stats: COMPLETE Column stats: PARTIAL + GatherStats: false + Select Operator + expressions: state (type: string), locid (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 20 Data size: 1820 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 20 Data size: 1820 Basic stats: COMPLETE Column stats: PARTIAL +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1 + columns.types string:int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: year=2000 + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + year 2000 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip + columns.comments + columns.types string:int:int +#### A masked pattern was here #### + name default.loc_orc_1d + numFiles 1 + numRows 2 + partition_columns year + partition_columns.types string + rawDataSize 184 + serialization.ddl struct loc_orc_1d { string state, i32 locid, i32 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 342 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip + columns.comments + columns.types string:int:int +#### A masked pattern was here #### + name default.loc_orc_1d + partition_columns year + partition_columns.types string + serialization.ddl struct loc_orc_1d { string state, i32 locid, i32 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc_1d + name: default.loc_orc_1d +#### A masked pattern was here #### + Partition + base file name: year=2001 + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + year 2001 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip + columns.comments + columns.types string:int:int +#### A masked pattern was here #### + name default.loc_orc_1d + numFiles 1 + numRows 4 + partition_columns year + partition_columns.types string + rawDataSize 368 + serialization.ddl struct loc_orc_1d { string state, i32 locid, i32 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 364 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip + columns.comments + columns.types string:int:int +#### A masked pattern was here #### + name default.loc_orc_1d + partition_columns year + partition_columns.types string + serialization.ddl struct loc_orc_1d { string state, i32 locid, i32 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc_1d + name: default.loc_orc_1d +#### A masked pattern was here #### + Partition + base file name: year=2002 + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + year 2002 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip + columns.comments + columns.types string:int:int +#### A masked pattern was here #### + name default.loc_orc_1d + numFiles 1 + numRows 6 + partition_columns year + partition_columns.types string + rawDataSize 570 + serialization.ddl struct loc_orc_1d { string state, i32 locid, i32 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 383 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip + columns.comments + columns.types string:int:int +#### A masked pattern was here #### + name default.loc_orc_1d + partition_columns year + partition_columns.types string + serialization.ddl struct loc_orc_1d { string state, i32 locid, i32 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc_1d + name: default.loc_orc_1d +#### A masked pattern was here #### + Partition + base file name: year=2003 + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + year 2003 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip + columns.comments + columns.types string:int:int +#### A masked pattern was here #### + name default.loc_orc_1d + numFiles 1 + numRows 8 + partition_columns year + partition_columns.types string + rawDataSize 744 + serialization.ddl struct loc_orc_1d { string state, i32 locid, i32 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 390 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip + columns.comments + columns.types string:int:int +#### A masked pattern was here #### + name default.loc_orc_1d + partition_columns year + partition_columns.types string + serialization.ddl struct loc_orc_1d { string state, i32 locid, i32 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc_1d + name: default.loc_orc_1d + Truncated Path -> Alias: + /loc_orc_1d/year=2000 [loc_orc_1d] + /loc_orc_1d/year=2001 [loc_orc_1d] + /loc_orc_1d/year=2002 [loc_orc_1d] + /loc_orc_1d/year=2003 [loc_orc_1d] + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: create table if not exists loc_orc_2d ( + state string, + locid int +) partitioned by(zip int, year string) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@loc_orc_2d +POSTHOOK: query: create table if not exists loc_orc_2d ( + state string, + locid int +) partitioned by(zip int, year string) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@loc_orc_2d +PREHOOK: query: insert overwrite table loc_orc_2d partition(zip, year) select * from ext_loc +PREHOOK: type: QUERY +PREHOOK: Input: default@ext_loc +PREHOOK: Output: default@loc_orc_2d +POSTHOOK: query: insert overwrite table loc_orc_2d partition(zip, year) select * from ext_loc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@ext_loc +POSTHOOK: Output: default@loc_orc_2d@zip=43201/year=2001 +POSTHOOK: Output: default@loc_orc_2d@zip=43201/year=2002 +POSTHOOK: Output: default@loc_orc_2d@zip=43201/year=2003 +POSTHOOK: Output: default@loc_orc_2d@zip=94086/year=2000 +POSTHOOK: Output: default@loc_orc_2d@zip=94086/year=2001 +POSTHOOK: Output: default@loc_orc_2d@zip=94086/year=2002 +POSTHOOK: Output: default@loc_orc_2d@zip=94086/year=2003 +POSTHOOK: Output: default@loc_orc_2d@zip=94087/year=2000 +POSTHOOK: Output: default@loc_orc_2d@zip=94087/year=2001 +POSTHOOK: Output: default@loc_orc_2d@zip=94087/year=2002 +POSTHOOK: Output: default@loc_orc_2d@zip=94087/year=2003 +POSTHOOK: Lineage: loc_orc_2d PARTITION(zip=43201,year=2001).locid SIMPLE [(ext_loc)ext_loc.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc_2d PARTITION(zip=43201,year=2001).state SIMPLE [(ext_loc)ext_loc.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc_2d PARTITION(zip=43201,year=2002).locid SIMPLE [(ext_loc)ext_loc.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc_2d PARTITION(zip=43201,year=2002).state SIMPLE [(ext_loc)ext_loc.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc_2d PARTITION(zip=43201,year=2003).locid SIMPLE [(ext_loc)ext_loc.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc_2d PARTITION(zip=43201,year=2003).state SIMPLE [(ext_loc)ext_loc.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc_2d PARTITION(zip=94086,year=2000).locid SIMPLE [(ext_loc)ext_loc.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc_2d PARTITION(zip=94086,year=2000).state SIMPLE [(ext_loc)ext_loc.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc_2d PARTITION(zip=94086,year=2001).locid SIMPLE [(ext_loc)ext_loc.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc_2d PARTITION(zip=94086,year=2001).state SIMPLE [(ext_loc)ext_loc.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc_2d PARTITION(zip=94086,year=2002).locid SIMPLE [(ext_loc)ext_loc.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc_2d PARTITION(zip=94086,year=2002).state SIMPLE [(ext_loc)ext_loc.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc_2d PARTITION(zip=94086,year=2003).locid SIMPLE [(ext_loc)ext_loc.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc_2d PARTITION(zip=94086,year=2003).state SIMPLE [(ext_loc)ext_loc.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc_2d PARTITION(zip=94087,year=2000).locid SIMPLE [(ext_loc)ext_loc.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc_2d PARTITION(zip=94087,year=2000).state SIMPLE [(ext_loc)ext_loc.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc_2d PARTITION(zip=94087,year=2001).locid SIMPLE [(ext_loc)ext_loc.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc_2d PARTITION(zip=94087,year=2001).state SIMPLE [(ext_loc)ext_loc.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc_2d PARTITION(zip=94087,year=2002).locid SIMPLE [(ext_loc)ext_loc.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc_2d PARTITION(zip=94087,year=2002).state SIMPLE [(ext_loc)ext_loc.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc_2d PARTITION(zip=94087,year=2003).locid SIMPLE [(ext_loc)ext_loc.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc_2d PARTITION(zip=94087,year=2003).state SIMPLE [(ext_loc)ext_loc.FieldSchema(name:state, type:string, comment:null), ] +PREHOOK: query: analyze table loc_orc_2d partition(zip=94086, year='2001') compute statistics for columns state,locid +PREHOOK: type: QUERY +PREHOOK: Input: default@loc_orc_2d +PREHOOK: Input: default@loc_orc_2d@zip=94086/year=2001 +#### A masked pattern was here #### +POSTHOOK: query: analyze table loc_orc_2d partition(zip=94086, year='2001') compute statistics for columns state,locid +POSTHOOK: type: QUERY +POSTHOOK: Input: default@loc_orc_2d +POSTHOOK: Input: default@loc_orc_2d@zip=94086/year=2001 +#### A masked pattern was here #### +PREHOOK: query: analyze table loc_orc_2d partition(zip=94087, year='2002') compute statistics for columns state,locid +PREHOOK: type: QUERY +PREHOOK: Input: default@loc_orc_2d +PREHOOK: Input: default@loc_orc_2d@zip=94087/year=2002 +#### A masked pattern was here #### +POSTHOOK: query: analyze table loc_orc_2d partition(zip=94087, year='2002') compute statistics for columns state,locid +POSTHOOK: type: QUERY +POSTHOOK: Input: default@loc_orc_2d +POSTHOOK: Input: default@loc_orc_2d@zip=94087/year=2002 +#### A masked pattern was here #### +PREHOOK: query: explain extended select state from loc_orc_2d +PREHOOK: type: QUERY +POSTHOOK: query: explain extended select state from loc_orc_2d +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + loc_orc_2d + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + state + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: loc_orc_2d + Statistics: Num rows: 20 Data size: 1788 Basic stats: COMPLETE Column stats: PARTIAL + GatherStats: false + Select Operator + expressions: state (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 20 Data size: 1760 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 20 Data size: 1760 Basic stats: COMPLETE Column stats: PARTIAL +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: year=2001 + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + year 2001 + zip 43201 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid + columns.comments + columns.types string:int +#### A masked pattern was here #### + name default.loc_orc_2d + numFiles 1 + numRows 1 + partition_columns zip/year + partition_columns.types int:string + rawDataSize 90 + serialization.ddl struct loc_orc_2d { string state, i32 locid} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 264 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid + columns.comments + columns.types string:int +#### A masked pattern was here #### + name default.loc_orc_2d + partition_columns zip/year + partition_columns.types int:string + serialization.ddl struct loc_orc_2d { string state, i32 locid} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc_2d + name: default.loc_orc_2d +#### A masked pattern was here #### + Partition + base file name: year=2002 + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + year 2002 + zip 43201 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid + columns.comments + columns.types string:int +#### A masked pattern was here #### + name default.loc_orc_2d + numFiles 1 + numRows 2 + partition_columns zip/year + partition_columns.types int:string + rawDataSize 182 + serialization.ddl struct loc_orc_2d { string state, i32 locid} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 278 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid + columns.comments + columns.types string:int +#### A masked pattern was here #### + name default.loc_orc_2d + partition_columns zip/year + partition_columns.types int:string + serialization.ddl struct loc_orc_2d { string state, i32 locid} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc_2d + name: default.loc_orc_2d +#### A masked pattern was here #### + Partition + base file name: year=2003 + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + year 2003 + zip 43201 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid + columns.comments + columns.types string:int +#### A masked pattern was here #### + name default.loc_orc_2d + numFiles 1 + numRows 3 + partition_columns zip/year + partition_columns.types int:string + rawDataSize 267 + serialization.ddl struct loc_orc_2d { string state, i32 locid} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 280 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid + columns.comments + columns.types string:int +#### A masked pattern was here #### + name default.loc_orc_2d + partition_columns zip/year + partition_columns.types int:string + serialization.ddl struct loc_orc_2d { string state, i32 locid} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc_2d + name: default.loc_orc_2d +#### A masked pattern was here #### + Partition + base file name: year=2000 + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + year 2000 + zip 94086 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid + columns.comments + columns.types string:int +#### A masked pattern was here #### + name default.loc_orc_2d + numFiles 1 + numRows 1 + partition_columns zip/year + partition_columns.types int:string + rawDataSize 89 + serialization.ddl struct loc_orc_2d { string state, i32 locid} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 260 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid + columns.comments + columns.types string:int +#### A masked pattern was here #### + name default.loc_orc_2d + partition_columns zip/year + partition_columns.types int:string + serialization.ddl struct loc_orc_2d { string state, i32 locid} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc_2d + name: default.loc_orc_2d +#### A masked pattern was here #### + Partition + base file name: year=2001 + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + year 2001 + zip 94086 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid + columns.comments + columns.types string:int +#### A masked pattern was here #### + name default.loc_orc_2d + numFiles 1 + numRows 2 + partition_columns zip/year + partition_columns.types int:string + rawDataSize 176 + serialization.ddl struct loc_orc_2d { string state, i32 locid} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 257 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid + columns.comments + columns.types string:int +#### A masked pattern was here #### + name default.loc_orc_2d + partition_columns zip/year + partition_columns.types int:string + serialization.ddl struct loc_orc_2d { string state, i32 locid} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc_2d + name: default.loc_orc_2d +#### A masked pattern was here #### + Partition + base file name: year=2002 + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + year 2002 + zip 94086 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid + columns.comments + columns.types string:int +#### A masked pattern was here #### + name default.loc_orc_2d + numFiles 1 + numRows 1 + partition_columns zip/year + partition_columns.types int:string + rawDataSize 91 + serialization.ddl struct loc_orc_2d { string state, i32 locid} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 269 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid + columns.comments + columns.types string:int +#### A masked pattern was here #### + name default.loc_orc_2d + partition_columns zip/year + partition_columns.types int:string + serialization.ddl struct loc_orc_2d { string state, i32 locid} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc_2d + name: default.loc_orc_2d +#### A masked pattern was here #### + Partition + base file name: year=2003 + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + year 2003 + zip 94086 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid + columns.comments + columns.types string:int +#### A masked pattern was here #### + name default.loc_orc_2d + numFiles 1 + numRows 2 + partition_columns zip/year + partition_columns.types int:string + rawDataSize 180 + serialization.ddl struct loc_orc_2d { string state, i32 locid} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 278 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid + columns.comments + columns.types string:int +#### A masked pattern was here #### + name default.loc_orc_2d + partition_columns zip/year + partition_columns.types int:string + serialization.ddl struct loc_orc_2d { string state, i32 locid} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc_2d + name: default.loc_orc_2d +#### A masked pattern was here #### + Partition + base file name: year=2000 + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + year 2000 + zip 94087 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid + columns.comments + columns.types string:int +#### A masked pattern was here #### + name default.loc_orc_2d + numFiles 1 + numRows 1 + partition_columns zip/year + partition_columns.types int:string + rawDataSize 88 + serialization.ddl struct loc_orc_2d { string state, i32 locid} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 247 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid + columns.comments + columns.types string:int +#### A masked pattern was here #### + name default.loc_orc_2d + partition_columns zip/year + partition_columns.types int:string + serialization.ddl struct loc_orc_2d { string state, i32 locid} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc_2d + name: default.loc_orc_2d +#### A masked pattern was here #### + Partition + base file name: year=2001 + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + year 2001 + zip 94087 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid + columns.comments + columns.types string:int +#### A masked pattern was here #### + name default.loc_orc_2d + numFiles 1 + numRows 1 + partition_columns zip/year + partition_columns.types int:string + rawDataSize 88 + serialization.ddl struct loc_orc_2d { string state, i32 locid} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 247 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid + columns.comments + columns.types string:int +#### A masked pattern was here #### + name default.loc_orc_2d + partition_columns zip/year + partition_columns.types int:string + serialization.ddl struct loc_orc_2d { string state, i32 locid} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc_2d + name: default.loc_orc_2d +#### A masked pattern was here #### + Partition + base file name: year=2002 + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + year 2002 + zip 94087 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid + columns.comments + columns.types string:int +#### A masked pattern was here #### + name default.loc_orc_2d + numFiles 1 + numRows 3 + partition_columns zip/year + partition_columns.types int:string + rawDataSize 273 + serialization.ddl struct loc_orc_2d { string state, i32 locid} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 277 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid + columns.comments + columns.types string:int +#### A masked pattern was here #### + name default.loc_orc_2d + partition_columns zip/year + partition_columns.types int:string + serialization.ddl struct loc_orc_2d { string state, i32 locid} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc_2d + name: default.loc_orc_2d +#### A masked pattern was here #### + Partition + base file name: year=2003 + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + year 2003 + zip 94087 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid + columns.comments + columns.types string:int +#### A masked pattern was here #### + name default.loc_orc_2d + numFiles 1 + numRows 3 + partition_columns zip/year + partition_columns.types int:string + rawDataSize 264 + serialization.ddl struct loc_orc_2d { string state, i32 locid} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 271 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid + columns.comments + columns.types string:int +#### A masked pattern was here #### + name default.loc_orc_2d + partition_columns zip/year + partition_columns.types int:string + serialization.ddl struct loc_orc_2d { string state, i32 locid} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc_2d + name: default.loc_orc_2d + Truncated Path -> Alias: + /loc_orc_2d/zip=43201/year=2001 [loc_orc_2d] + /loc_orc_2d/zip=43201/year=2002 [loc_orc_2d] + /loc_orc_2d/zip=43201/year=2003 [loc_orc_2d] + /loc_orc_2d/zip=94086/year=2000 [loc_orc_2d] + /loc_orc_2d/zip=94086/year=2001 [loc_orc_2d] + /loc_orc_2d/zip=94086/year=2002 [loc_orc_2d] + /loc_orc_2d/zip=94086/year=2003 [loc_orc_2d] + /loc_orc_2d/zip=94087/year=2000 [loc_orc_2d] + /loc_orc_2d/zip=94087/year=2001 [loc_orc_2d] + /loc_orc_2d/zip=94087/year=2002 [loc_orc_2d] + /loc_orc_2d/zip=94087/year=2003 [loc_orc_2d] + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain extended select state,locid from loc_orc_2d +PREHOOK: type: QUERY +POSTHOOK: query: explain extended select state,locid from loc_orc_2d +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + loc_orc_2d + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + state + TOK_SELEXPR + TOK_TABLE_OR_COL + locid + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: loc_orc_2d + Statistics: Num rows: 20 Data size: 1788 Basic stats: COMPLETE Column stats: PARTIAL + GatherStats: false + Select Operator + expressions: state (type: string), locid (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 20 Data size: 1840 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 20 Data size: 1840 Basic stats: COMPLETE Column stats: PARTIAL +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1 + columns.types string:int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: year=2001 + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + year 2001 + zip 43201 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid + columns.comments + columns.types string:int +#### A masked pattern was here #### + name default.loc_orc_2d + numFiles 1 + numRows 1 + partition_columns zip/year + partition_columns.types int:string + rawDataSize 90 + serialization.ddl struct loc_orc_2d { string state, i32 locid} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 264 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid + columns.comments + columns.types string:int +#### A masked pattern was here #### + name default.loc_orc_2d + partition_columns zip/year + partition_columns.types int:string + serialization.ddl struct loc_orc_2d { string state, i32 locid} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc_2d + name: default.loc_orc_2d +#### A masked pattern was here #### + Partition + base file name: year=2002 + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + year 2002 + zip 43201 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid + columns.comments + columns.types string:int +#### A masked pattern was here #### + name default.loc_orc_2d + numFiles 1 + numRows 2 + partition_columns zip/year + partition_columns.types int:string + rawDataSize 182 + serialization.ddl struct loc_orc_2d { string state, i32 locid} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 278 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid + columns.comments + columns.types string:int +#### A masked pattern was here #### + name default.loc_orc_2d + partition_columns zip/year + partition_columns.types int:string + serialization.ddl struct loc_orc_2d { string state, i32 locid} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc_2d + name: default.loc_orc_2d +#### A masked pattern was here #### + Partition + base file name: year=2003 + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + year 2003 + zip 43201 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid + columns.comments + columns.types string:int +#### A masked pattern was here #### + name default.loc_orc_2d + numFiles 1 + numRows 3 + partition_columns zip/year + partition_columns.types int:string + rawDataSize 267 + serialization.ddl struct loc_orc_2d { string state, i32 locid} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 280 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid + columns.comments + columns.types string:int +#### A masked pattern was here #### + name default.loc_orc_2d + partition_columns zip/year + partition_columns.types int:string + serialization.ddl struct loc_orc_2d { string state, i32 locid} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc_2d + name: default.loc_orc_2d +#### A masked pattern was here #### + Partition + base file name: year=2000 + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + year 2000 + zip 94086 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid + columns.comments + columns.types string:int +#### A masked pattern was here #### + name default.loc_orc_2d + numFiles 1 + numRows 1 + partition_columns zip/year + partition_columns.types int:string + rawDataSize 89 + serialization.ddl struct loc_orc_2d { string state, i32 locid} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 260 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid + columns.comments + columns.types string:int +#### A masked pattern was here #### + name default.loc_orc_2d + partition_columns zip/year + partition_columns.types int:string + serialization.ddl struct loc_orc_2d { string state, i32 locid} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc_2d + name: default.loc_orc_2d +#### A masked pattern was here #### + Partition + base file name: year=2001 + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + year 2001 + zip 94086 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid + columns.comments + columns.types string:int +#### A masked pattern was here #### + name default.loc_orc_2d + numFiles 1 + numRows 2 + partition_columns zip/year + partition_columns.types int:string + rawDataSize 176 + serialization.ddl struct loc_orc_2d { string state, i32 locid} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 257 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid + columns.comments + columns.types string:int +#### A masked pattern was here #### + name default.loc_orc_2d + partition_columns zip/year + partition_columns.types int:string + serialization.ddl struct loc_orc_2d { string state, i32 locid} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc_2d + name: default.loc_orc_2d +#### A masked pattern was here #### + Partition + base file name: year=2002 + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + year 2002 + zip 94086 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid + columns.comments + columns.types string:int +#### A masked pattern was here #### + name default.loc_orc_2d + numFiles 1 + numRows 1 + partition_columns zip/year + partition_columns.types int:string + rawDataSize 91 + serialization.ddl struct loc_orc_2d { string state, i32 locid} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 269 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid + columns.comments + columns.types string:int +#### A masked pattern was here #### + name default.loc_orc_2d + partition_columns zip/year + partition_columns.types int:string + serialization.ddl struct loc_orc_2d { string state, i32 locid} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc_2d + name: default.loc_orc_2d +#### A masked pattern was here #### + Partition + base file name: year=2003 + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + year 2003 + zip 94086 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid + columns.comments + columns.types string:int +#### A masked pattern was here #### + name default.loc_orc_2d + numFiles 1 + numRows 2 + partition_columns zip/year + partition_columns.types int:string + rawDataSize 180 + serialization.ddl struct loc_orc_2d { string state, i32 locid} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 278 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid + columns.comments + columns.types string:int +#### A masked pattern was here #### + name default.loc_orc_2d + partition_columns zip/year + partition_columns.types int:string + serialization.ddl struct loc_orc_2d { string state, i32 locid} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc_2d + name: default.loc_orc_2d +#### A masked pattern was here #### + Partition + base file name: year=2000 + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + year 2000 + zip 94087 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid + columns.comments + columns.types string:int +#### A masked pattern was here #### + name default.loc_orc_2d + numFiles 1 + numRows 1 + partition_columns zip/year + partition_columns.types int:string + rawDataSize 88 + serialization.ddl struct loc_orc_2d { string state, i32 locid} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 247 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid + columns.comments + columns.types string:int +#### A masked pattern was here #### + name default.loc_orc_2d + partition_columns zip/year + partition_columns.types int:string + serialization.ddl struct loc_orc_2d { string state, i32 locid} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc_2d + name: default.loc_orc_2d +#### A masked pattern was here #### + Partition + base file name: year=2001 + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + year 2001 + zip 94087 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid + columns.comments + columns.types string:int +#### A masked pattern was here #### + name default.loc_orc_2d + numFiles 1 + numRows 1 + partition_columns zip/year + partition_columns.types int:string + rawDataSize 88 + serialization.ddl struct loc_orc_2d { string state, i32 locid} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 247 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid + columns.comments + columns.types string:int +#### A masked pattern was here #### + name default.loc_orc_2d + partition_columns zip/year + partition_columns.types int:string + serialization.ddl struct loc_orc_2d { string state, i32 locid} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc_2d + name: default.loc_orc_2d +#### A masked pattern was here #### + Partition + base file name: year=2002 + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + year 2002 + zip 94087 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid + columns.comments + columns.types string:int +#### A masked pattern was here #### + name default.loc_orc_2d + numFiles 1 + numRows 3 + partition_columns zip/year + partition_columns.types int:string + rawDataSize 273 + serialization.ddl struct loc_orc_2d { string state, i32 locid} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 277 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid + columns.comments + columns.types string:int +#### A masked pattern was here #### + name default.loc_orc_2d + partition_columns zip/year + partition_columns.types int:string + serialization.ddl struct loc_orc_2d { string state, i32 locid} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc_2d + name: default.loc_orc_2d +#### A masked pattern was here #### + Partition + base file name: year=2003 + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + year 2003 + zip 94087 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid + columns.comments + columns.types string:int +#### A masked pattern was here #### + name default.loc_orc_2d + numFiles 1 + numRows 3 + partition_columns zip/year + partition_columns.types int:string + rawDataSize 264 + serialization.ddl struct loc_orc_2d { string state, i32 locid} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 271 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid + columns.comments + columns.types string:int +#### A masked pattern was here #### + name default.loc_orc_2d + partition_columns zip/year + partition_columns.types int:string + serialization.ddl struct loc_orc_2d { string state, i32 locid} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc_2d + name: default.loc_orc_2d + Truncated Path -> Alias: + /loc_orc_2d/zip=43201/year=2001 [loc_orc_2d] + /loc_orc_2d/zip=43201/year=2002 [loc_orc_2d] + /loc_orc_2d/zip=43201/year=2003 [loc_orc_2d] + /loc_orc_2d/zip=94086/year=2000 [loc_orc_2d] + /loc_orc_2d/zip=94086/year=2001 [loc_orc_2d] + /loc_orc_2d/zip=94086/year=2002 [loc_orc_2d] + /loc_orc_2d/zip=94086/year=2003 [loc_orc_2d] + /loc_orc_2d/zip=94087/year=2000 [loc_orc_2d] + /loc_orc_2d/zip=94087/year=2001 [loc_orc_2d] + /loc_orc_2d/zip=94087/year=2002 [loc_orc_2d] + /loc_orc_2d/zip=94087/year=2003 [loc_orc_2d] + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink +