diff --git a/data/files/extrapolate_stats_partial_ndv.txt b/data/files/extrapolate_stats_partial_ndv.txt new file mode 100644 index 0000000..9af6d30 --- /dev/null +++ b/data/files/extrapolate_stats_partial_ndv.txt @@ -0,0 +1,20 @@ +|1|1.0E3|94087|2000 +O|2|1.01E3|94086|2000 +|1|0.01E3|94087|2001 +H|2|2.0E3|94086|2001 +|3|1.0E3|94086|2001 +OH|4|1.01E3|43201|2001 +oh1|1|1.0E2|94087|2002 +OH2|2|9.0E2|43201|2002 +oh3|3|1.0E2|94087|2002 +OH4|4|9.1E2|94086|2002 +oh5|4|9.0E2|43201|2002 +OH6|5|0.01E3|94087|2002 +|31|1.0E3|94087|2003 +OH33|1|1.01E3|43201|2003 +|3|2.0E3|94087|2003 +OH|1|1.0E3|94086|2003 +|4|2.0E3|43201|2003 +OH|1|1.0E3|94087|2003 +|1|2.0E3|43201|2003 +OH|5|1.0E3|94086|2003 diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/IExtrapolatePartStatus.java b/metastore/src/java/org/apache/hadoop/hive/metastore/IExtrapolatePartStatus.java index 74f1b01..92ca3ad 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/IExtrapolatePartStatus.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/IExtrapolatePartStatus.java @@ -28,18 +28,27 @@ static String[] colStatNames = new String[] { "LONG_LOW_VALUE", "LONG_HIGH_VALUE", "DOUBLE_LOW_VALUE", "DOUBLE_HIGH_VALUE", "BIG_DECIMAL_LOW_VALUE", "BIG_DECIMAL_HIGH_VALUE", "NUM_NULLS", - "NUM_DISTINCTS", "AVG_COL_LEN", "MAX_COL_LEN", "NUM_TRUES", "NUM_FALSES" }; + "NUM_DISTINCTS", "AVG_COL_LEN", "MAX_COL_LEN", "NUM_TRUES", "NUM_FALSES", + "AVG_NDV_LONG", "AVG_NDV_DOUBLE", "AVG_NDV_DECIMAL" }; /** * The indexes for colstats. */ static HashMap indexMaps = new HashMap(){{ - put("long", new Integer [] {0,1,6,7}); - put("double", new Integer [] {2,3,6,7}); + put("bigint", new Integer [] {0,1,6,7,12}); + put("int", new Integer [] {0,1,6,7,12}); + put("smallint", new Integer [] {0,1,6,7,12}); + put("tinyint", new Integer [] {0,1,6,7,12}); + put("timestamp", new Integer [] {0,1,6,7,12}); + put("long", new Integer [] {0,1,6,7,12}); + put("double", new Integer [] {2,3,6,7,13}); + put("float", new Integer [] {2,3,6,7,13}); + put("varchar", new Integer [] {8,9,6,7}); + put("char", new Integer [] {8,9,6,7}); put("string", new Integer [] {8,9,6,7}); put("boolean", new Integer [] {10,11,6}); put("binary", new Integer [] {8,9,6}); - put("decimal", new Integer [] {4,5,6,7}); + put("decimal", new Integer [] {4,5,6,7,14}); put("default", new Integer [] {0,1,2,3,4,5,6,7,8,9,10,11}); }}; @@ -53,19 +62,20 @@ static ColStatType[] colStatTypes = new ColStatType[] { ColStatType.Long, ColStatType.Long, ColStatType.Double, ColStatType.Double, ColStatType.Decimal, ColStatType.Decimal, ColStatType.Long, - ColStatType.Long, ColStatType.Double, ColStatType.Long, ColStatType.Long, - ColStatType.Long }; + ColStatType.Long, ColStatType.Double, ColStatType.Long, ColStatType.Long, ColStatType.Long, + ColStatType.Double, ColStatType.Double, ColStatType.Double }; /** * The sequence of aggregation function on colStats. */ static enum AggrType { - Min, Max, Sum + Min, Max, Sum, Avg } static AggrType[] aggrTypes = new AggrType[] { AggrType.Min, AggrType.Max, AggrType.Min, AggrType.Max, AggrType.Min, AggrType.Max, AggrType.Sum, - AggrType.Max, AggrType.Max, AggrType.Max, AggrType.Sum, AggrType.Sum }; + AggrType.Max, AggrType.Max, AggrType.Max, AggrType.Sum, AggrType.Sum, + AggrType.Avg, AggrType.Avg, AggrType.Avg}; public Object extrapolate(Object[] min, Object[] max, int colStatIndex, Map indexMap); diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/LinearExtrapolatePartStatus.java b/metastore/src/java/org/apache/hadoop/hive/metastore/LinearExtrapolatePartStatus.java index 7fc04f1..f4e5ef7 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/LinearExtrapolatePartStatus.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/LinearExtrapolatePartStatus.java @@ -19,11 +19,8 @@ package org.apache.hadoop.hive.metastore; import java.math.BigDecimal; -import java.nio.ByteBuffer; import java.util.Map; -import org.apache.hadoop.hive.metastore.api.Decimal; - public class LinearExtrapolatePartStatus implements IExtrapolatePartStatus { @Override @@ -35,6 +32,15 @@ public Object extrapolate(Object[] min, Object[] max, int colStatIndex, if (minInd == maxInd) { return min[0]; } + //note that recent metastore stores decimal in string. + double decimalmin= 0; + double decimalmax = 0; + if (colStatTypes[colStatIndex] == ColStatType.Decimal) { + BigDecimal bdmin = new BigDecimal(min[0].toString()); + decimalmin = bdmin.doubleValue(); + BigDecimal bdmax = new BigDecimal(max[0].toString()); + decimalmax = bdmax.doubleValue(); + } if (aggrTypes[colStatIndex] == AggrType.Max) { if (minInd < maxInd) { // right border is the max @@ -45,15 +51,9 @@ public Object extrapolate(Object[] min, Object[] max, int colStatIndex, return (Double) ((Double) min[0] + (((Double) max[0] - (Double) min[0]) * (rightBorderInd - minInd) / (maxInd - minInd))); } else { - Decimal dmax = (Decimal) max[0]; - BigDecimal bdmax = new BigDecimal(dmax.toString()); - double doublemax = bdmax.doubleValue(); - Decimal dmin = (Decimal) min[0]; - BigDecimal bdmin = new BigDecimal(dmin.toString()); - double doublemin = bdmin.doubleValue(); - double ret = doublemin + (doublemax - doublemin) + double ret = decimalmin + (decimalmax - decimalmin) * (rightBorderInd - minInd) / (maxInd - minInd); - return createThriftDecimal(String.valueOf(ret)); + return String.valueOf(ret); } } else { // left border is the max @@ -62,17 +62,11 @@ public Object extrapolate(Object[] min, Object[] max, int colStatIndex, * minInd / (minInd - maxInd)); } else if (colStatTypes[colStatIndex] == ColStatType.Double) { return (Double) ((Double) min[0] + ((Double) max[0] - (Double) min[0]) - * minInd / (maxInd - minInd)); + * minInd / (minInd - maxInd)); } else { - Decimal dmax = (Decimal) max[0]; - BigDecimal bdmax = new BigDecimal(dmax.toString()); - double doublemax = bdmax.doubleValue(); - Decimal dmin = (Decimal) min[0]; - BigDecimal bdmin = new BigDecimal(dmin.toString()); - double doublemin = bdmin.doubleValue(); - double ret = doublemin + (doublemax - doublemin) * minInd - / (maxInd - minInd); - return createThriftDecimal(String.valueOf(ret)); + double ret = decimalmin + (decimalmax - decimalmin) * minInd + / (minInd - maxInd); + return String.valueOf(ret); } } } else { @@ -87,16 +81,9 @@ public Object extrapolate(Object[] min, Object[] max, int colStatIndex, * maxInd / (maxInd - minInd); return ret; } else { - Decimal dmax = (Decimal) max[0]; - BigDecimal bdmax = new BigDecimal(dmax.toString()); - double doublemax = bdmax.doubleValue(); - Decimal dmin = (Decimal) min[0]; - BigDecimal bdmin = new BigDecimal(dmin.toString()); - double doublemin = bdmin.doubleValue(); - double ret = doublemax - (doublemax - doublemin) * maxInd + double ret = decimalmax - (decimalmax - decimalmin) * maxInd / (maxInd - minInd); - return createThriftDecimal(String.valueOf(ret)); - + return String.valueOf(ret); } } else { // right border is the min @@ -109,24 +96,11 @@ public Object extrapolate(Object[] min, Object[] max, int colStatIndex, * (rightBorderInd - maxInd) / (minInd - maxInd); return ret; } else { - Decimal dmax = (Decimal) max[0]; - BigDecimal bdmax = new BigDecimal(dmax.toString()); - double doublemax = bdmax.doubleValue(); - Decimal dmin = (Decimal) min[0]; - BigDecimal bdmin = new BigDecimal(dmin.toString()); - double doublemin = bdmin.doubleValue(); - double ret = doublemax - (doublemax - doublemin) + double ret = decimalmax - (decimalmax - decimalmin) * (rightBorderInd - maxInd) / (minInd - maxInd); - return createThriftDecimal(String.valueOf(ret)); + return String.valueOf(ret); } } } } - - private static Decimal createThriftDecimal(String s) { - BigDecimal d = new BigDecimal(s); - return new Decimal(ByteBuffer.wrap(d.unscaledValue().toByteArray()), - (short) d.scale()); - } - } diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java b/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java index 574141c..e6f8838 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java @@ -32,7 +32,6 @@ import java.util.Map; import java.util.TreeMap; -import javax.jdo.JDODataStoreException; import javax.jdo.PersistenceManager; import javax.jdo.Query; import javax.jdo.Transaction; @@ -41,7 +40,6 @@ import org.apache.commons.lang.StringUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; -import org.apache.derby.iapi.error.StandardException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.conf.HiveConf.ConfVars; @@ -780,6 +778,14 @@ static Long extractSqlLong(Object obj) throws MetaException { return ((Number)obj).longValue(); } + static Double extractSqlDouble(Object obj) throws MetaException { + if (obj == null) return null; + if (!(obj instanceof Number)) { + throw new MetaException("Expected numeric type but got " + obj.getClass().getName()); + } + return ((Number)obj).doubleValue(); + } + private static Boolean extractSqlBoolean(Object value) throws MetaException { // MySQL has booleans, but e.g. Derby uses 'Y'/'N' mapping. People using derby probably // don't care about performance anyway, but let's cover the common case. @@ -1120,8 +1126,12 @@ private long partsFoundForPartitions(String dbName, String tableName, // only mechanical data retrieval should remain here. String commonPrefix = "select \"COLUMN_NAME\", \"COLUMN_TYPE\", " + "min(\"LONG_LOW_VALUE\"), max(\"LONG_HIGH_VALUE\"), min(\"DOUBLE_LOW_VALUE\"), max(\"DOUBLE_HIGH_VALUE\"), " - + "min(\"BIG_DECIMAL_LOW_VALUE\"), max(\"BIG_DECIMAL_HIGH_VALUE\"), sum(\"NUM_NULLS\"), max(\"NUM_DISTINCTS\"), " - + "max(\"AVG_COL_LEN\"), max(\"MAX_COL_LEN\"), sum(\"NUM_TRUES\"), sum(\"NUM_FALSES\") from \"PART_COL_STATS\"" + + "min(cast(\"BIG_DECIMAL_LOW_VALUE\" as decimal)), max(cast(\"BIG_DECIMAL_HIGH_VALUE\" as decimal)), sum(\"NUM_NULLS\"), max(\"NUM_DISTINCTS\"), " + + "max(\"AVG_COL_LEN\"), max(\"MAX_COL_LEN\"), sum(\"NUM_TRUES\"), sum(\"NUM_FALSES\"), " + + "avg((\"LONG_HIGH_VALUE\"-\"LONG_LOW_VALUE\")/cast(\"NUM_DISTINCTS\" as double))," + + "avg((\"DOUBLE_HIGH_VALUE\"-\"DOUBLE_LOW_VALUE\")/\"NUM_DISTINCTS\")," + + "avg((cast(\"BIG_DECIMAL_HIGH_VALUE\" as decimal)-cast(\"BIG_DECIMAL_LOW_VALUE\" as decimal))/\"NUM_DISTINCTS\")" + + " from \"PART_COL_STATS\"" + " where \"DB_NAME\" = ? and \"TABLE_NAME\" = ? "; String queryText = null; long start = 0; @@ -1151,7 +1161,7 @@ private long partsFoundForPartitions(String dbName, String tableName, List colStats = new ArrayList( list.size()); for (Object[] row : list) { - colStats.add(prepareCSObj(row, 0)); + colStats.add(prepareCSObjWithAvgNDV(row, 0)); } query.closeAll(); return colStats; @@ -1211,7 +1221,7 @@ private long partsFoundForPartitions(String dbName, String tableName, } list = ensureList(qResult); for (Object[] row : list) { - colStats.add(prepareCSObj(row, 0)); + colStats.add(prepareCSObjWithAvgNDV(row, 0)); } end = doTrace ? System.nanoTime() : 0; timingTrace(doTrace, queryText, start, end); @@ -1268,8 +1278,14 @@ private long partsFoundForPartitions(String dbName, String tableName, // use linear extrapolation. more complicated one can be added in the future. IExtrapolatePartStatus extrapolateMethod = new LinearExtrapolatePartStatus(); // fill in colstatus - Integer[] index = IExtrapolatePartStatus.indexMaps.get(colType - .toLowerCase()); + Integer[] index = null; + boolean decimal = false; + if (colType.toLowerCase().startsWith("decimal")) { + index = IExtrapolatePartStatus.indexMaps.get("decimal"); + decimal = true; + } else { + index = IExtrapolatePartStatus.indexMaps.get(colType.toLowerCase()); + } //if the colType is not the known type, long, double, etc, then get all index. if (index == null) { index = IExtrapolatePartStatus.indexMaps.get("default"); @@ -1285,16 +1301,23 @@ private long partsFoundForPartitions(String dbName, String tableName, Long val = extractSqlLong(o); row[2 + colStatIndex] = (Long) (val / sumVal * (partNames.size())); } - } else { + } else if (IExtrapolatePartStatus.aggrTypes[colStatIndex] == IExtrapolatePartStatus.AggrType.Min + || IExtrapolatePartStatus.aggrTypes[colStatIndex] == IExtrapolatePartStatus.AggrType.Max) { // if the aggregation type is min/max, we extrapolate from the // left/right borders - queryText = "select \"" - + colStatName - + "\",\"PARTITION_NAME\" from \"PART_COL_STATS\"" - + " where \"DB_NAME\" = ? and \"TABLE_NAME\" = ?" - + " and \"COLUMN_NAME\" = ?" - + " and \"PARTITION_NAME\" in (" + makeParams(partNames.size()) + ")" - + " order by \'" + colStatName + "\'"; + if (!decimal) { + queryText = "select \"" + colStatName + + "\",\"PARTITION_NAME\" from \"PART_COL_STATS\"" + + " where \"DB_NAME\" = ? and \"TABLE_NAME\" = ?" + " and \"COLUMN_NAME\" = ?" + + " and \"PARTITION_NAME\" in (" + makeParams(partNames.size()) + ")" + + " order by \"" + colStatName + "\""; + } else { + queryText = "select \"" + colStatName + + "\",\"PARTITION_NAME\" from \"PART_COL_STATS\"" + + " where \"DB_NAME\" = ? and \"TABLE_NAME\" = ?" + " and \"COLUMN_NAME\" = ?" + + " and \"PARTITION_NAME\" in (" + makeParams(partNames.size()) + ")" + + " order by cast(\"" + colStatName + "\" as decimal)"; + } start = doTrace ? System.nanoTime() : 0; query = pm.newQuery("javax.jdo.query.SQL", queryText); qResult = executeWithArray(query, prepareParams( @@ -1315,9 +1338,35 @@ private long partsFoundForPartitions(String dbName, String tableName, row[2 + colStatIndex] = extrapolateMethod.extrapolate(min, max, colStatIndex, indexMap); } + } else { + // if the aggregation type is avg, we use the average on the existing ones. + queryText = "select " + + "avg((\"LONG_HIGH_VALUE\"-\"LONG_LOW_VALUE\")/cast(\"NUM_DISTINCTS\" as double))," + + "avg((\"DOUBLE_HIGH_VALUE\"-\"DOUBLE_LOW_VALUE\")/\"NUM_DISTINCTS\")," + + "avg((cast(\"BIG_DECIMAL_HIGH_VALUE\" as decimal)-cast(\"BIG_DECIMAL_LOW_VALUE\" as decimal))/\"NUM_DISTINCTS\")" + + " from \"PART_COL_STATS\"" + + " where \"DB_NAME\" = ? and \"TABLE_NAME\" = ?" + + " and \"COLUMN_NAME\" = ?" + + " and \"PARTITION_NAME\" in (" + makeParams(partNames.size()) + ")" + + " group by \"COLUMN_NAME\""; + start = doTrace ? System.nanoTime() : 0; + query = pm.newQuery("javax.jdo.query.SQL", queryText); + qResult = executeWithArray(query, prepareParams( + dbName, tableName, partNames, Arrays.asList(colName)), queryText); + if (qResult == null) { + query.closeAll(); + return Lists.newArrayList(); + } + fqr = (ForwardQueryResult) qResult; + Object[] avg = (Object[]) (fqr.get(0)); + //colStatIndex=12,13,14 respond to "AVG_LONG", "AVG_DOUBLE", "AVG_DECIMAL" + row[2 + colStatIndex] = avg[colStatIndex - 12]; + end = doTrace ? System.nanoTime() : 0; + timingTrace(doTrace, queryText, start, end); + query.closeAll(); } } - colStats.add(prepareCSObj(row, 0)); + colStats.add(prepareCSObjWithAvgNDV(row, 0)); } } return colStats; @@ -1335,6 +1384,16 @@ private ColumnStatisticsObj prepareCSObj (Object[] row, int i) throws MetaExcept return cso; } + private ColumnStatisticsObj prepareCSObjWithAvgNDV(Object[] row, int i) throws MetaException { + ColumnStatisticsData data = new ColumnStatisticsData(); + ColumnStatisticsObj cso = new ColumnStatisticsObj((String) row[i++], (String) row[i++], data); + Object llow = row[i++], lhigh = row[i++], dlow = row[i++], dhigh = row[i++], declow = row[i++], dechigh = row[i++], nulls = row[i++], dist = row[i++], avglen = row[i++], maxlen = row[i++], trues = row[i++], falses = row[i++], avgLong = row[i++], avgDouble = row[i++], avgDecimal = row[i++]; + StatObjectConverter + .fillColumnStatisticsData(cso.getColType(), data, llow, lhigh, dlow, dhigh, declow, + dechigh, nulls, dist, avglen, maxlen, trues, falses, avgLong, avgDouble, avgDecimal); + return cso; + } + private Object[] prepareParams(String dbName, String tableName, List partNames, List colNames) throws MetaException { diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/StatObjectConverter.java b/metastore/src/java/org/apache/hadoop/hive/metastore/StatObjectConverter.java index 475883b..9cf5653 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/StatObjectConverter.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/StatObjectConverter.java @@ -476,6 +476,107 @@ public static void fillColumnStatisticsData(String colType, ColumnStatisticsData } } + public static void fillColumnStatisticsData(String colType, ColumnStatisticsData data, + Object llow, Object lhigh, Object dlow, Object dhigh, Object declow, Object dechigh, + Object nulls, Object dist, Object avglen, Object maxlen, Object trues, Object falses, + Object avgLong, Object avgDouble, Object avgDecimal) throws MetaException { + colType = colType.toLowerCase(); + if (colType.equals("boolean")) { + BooleanColumnStatsData boolStats = new BooleanColumnStatsData(); + boolStats.setNumFalses(MetaStoreDirectSql.extractSqlLong(falses)); + boolStats.setNumTrues(MetaStoreDirectSql.extractSqlLong(trues)); + boolStats.setNumNulls(MetaStoreDirectSql.extractSqlLong(nulls)); + data.setBooleanStats(boolStats); + } else if (colType.equals("string") || colType.startsWith("varchar") + || colType.startsWith("char")) { + StringColumnStatsData stringStats = new StringColumnStatsData(); + stringStats.setNumNulls(MetaStoreDirectSql.extractSqlLong(nulls)); + stringStats.setAvgColLen((Double) avglen); + stringStats.setMaxColLen(MetaStoreDirectSql.extractSqlLong(maxlen)); + stringStats.setNumDVs(MetaStoreDirectSql.extractSqlLong(dist)); + data.setStringStats(stringStats); + } else if (colType.equals("binary")) { + BinaryColumnStatsData binaryStats = new BinaryColumnStatsData(); + binaryStats.setNumNulls(MetaStoreDirectSql.extractSqlLong(nulls)); + binaryStats.setAvgColLen((Double) avglen); + binaryStats.setMaxColLen(MetaStoreDirectSql.extractSqlLong(maxlen)); + data.setBinaryStats(binaryStats); + } else if (colType.equals("bigint") || colType.equals("int") || colType.equals("smallint") + || colType.equals("tinyint") || colType.equals("timestamp")) { + LongColumnStatsData longStats = new LongColumnStatsData(); + longStats.setNumNulls(MetaStoreDirectSql.extractSqlLong(nulls)); + if (lhigh != null) { + longStats.setHighValue(MetaStoreDirectSql.extractSqlLong(lhigh)); + } + if (llow != null) { + longStats.setLowValue(MetaStoreDirectSql.extractSqlLong(llow)); + } + if (lhigh != null && llow != null && avgLong != null + && MetaStoreDirectSql.extractSqlDouble(avgLong) != 0.0) { + longStats.setNumDVs(MetaStoreDirectSql.extractSqlLong((MetaStoreDirectSql + .extractSqlLong(lhigh) - MetaStoreDirectSql.extractSqlLong(llow)) + / MetaStoreDirectSql.extractSqlDouble(avgLong))); + } + else { + longStats.setNumDVs(MetaStoreDirectSql.extractSqlLong(dist)); + } + data.setLongStats(longStats); + } else if (colType.equals("double") || colType.equals("float")) { + DoubleColumnStatsData doubleStats = new DoubleColumnStatsData(); + doubleStats.setNumNulls(MetaStoreDirectSql.extractSqlLong(nulls)); + if (dhigh != null) { + doubleStats.setHighValue((Double) dhigh); + } + if (dlow != null) { + doubleStats.setLowValue((Double) dlow); + } + if (dhigh != null && dlow != null && avgDouble != null + && MetaStoreDirectSql.extractSqlDouble(avgDouble) != 0.0) { + doubleStats.setNumDVs(MetaStoreDirectSql.extractSqlLong((MetaStoreDirectSql + .extractSqlLong(dhigh) - MetaStoreDirectSql.extractSqlLong(dlow)) + / MetaStoreDirectSql.extractSqlDouble(avgDouble))); + } + else { + doubleStats.setNumDVs(MetaStoreDirectSql.extractSqlLong(dist)); + } + data.setDoubleStats(doubleStats); + } else if (colType.startsWith("decimal")) { + DecimalColumnStatsData decimalStats = new DecimalColumnStatsData(); + decimalStats.setNumNulls(MetaStoreDirectSql.extractSqlLong(nulls)); + Decimal low = null; + Decimal high = null; + BigDecimal blow = null; + BigDecimal bhigh = null; + if(dechigh instanceof BigDecimal) { + bhigh = (BigDecimal) dechigh; + high = new Decimal(ByteBuffer.wrap(bhigh.unscaledValue().toByteArray()), (short)bhigh.scale()); + } + else if(dechigh instanceof String) { + bhigh = new BigDecimal((String) dechigh); + high = createThriftDecimal((String) dechigh); + } + decimalStats.setHighValue(high); + if(declow instanceof BigDecimal) { + blow = (BigDecimal) declow; + low = new Decimal(ByteBuffer.wrap(blow.unscaledValue().toByteArray()), (short)blow.scale()); + } + else if(dechigh instanceof String) { + blow = new BigDecimal((String) declow); + low = createThriftDecimal((String) declow); + } + decimalStats.setLowValue(low); + if (dechigh != null && declow != null && avgDecimal != null + && MetaStoreDirectSql.extractSqlDouble(avgDecimal) != 0.0) { + decimalStats.setNumDVs(MetaStoreDirectSql.extractSqlLong(bhigh.subtract(blow).floatValue() + / MetaStoreDirectSql.extractSqlDouble(avgDecimal))); + } + else { + decimalStats.setNumDVs(MetaStoreDirectSql.extractSqlLong(dist)); + } + data.setDecimalStats(decimalStats); + } + } + private static Decimal createThriftDecimal(String s) { BigDecimal d = new BigDecimal(s); return new Decimal(ByteBuffer.wrap(d.unscaledValue().toByteArray()), (short)d.scale()); diff --git a/ql/src/test/queries/clientpositive/extrapolate_part_stats_full.q b/ql/src/test/queries/clientpositive/extrapolate_part_stats_full.q index 00c9b53..c32dda6 100644 --- a/ql/src/test/queries/clientpositive/extrapolate_part_stats_full.q +++ b/ql/src/test/queries/clientpositive/extrapolate_part_stats_full.q @@ -2,7 +2,9 @@ set hive.stats.fetch.column.stats=true; set hive.exec.dynamic.partition=true; set hive.exec.dynamic.partition.mode=nonstrict; -create table if not exists ext_loc ( +drop table if exists ext_loc; + +create table ext_loc ( state string, locid int, zip int, @@ -11,7 +13,9 @@ create table if not exists ext_loc ( LOAD DATA LOCAL INPATH '../../data/files/extrapolate_stats_full.txt' OVERWRITE INTO TABLE ext_loc; -create table if not exists loc_orc_1d ( +drop table if exists loc_orc_1d; + +create table loc_orc_1d ( state string, locid int, zip int @@ -32,7 +36,9 @@ explain extended select state from loc_orc_1d; -- basicStatState: COMPLETE colStatState: PARTIAL explain extended select state,locid from loc_orc_1d; -create table if not exists loc_orc_2d ( +drop table if exists loc_orc_2d; + +create table loc_orc_2d ( state string, locid int ) partitioned by(zip int, year string) stored as orc; diff --git a/ql/src/test/queries/clientpositive/extrapolate_part_stats_partial.q b/ql/src/test/queries/clientpositive/extrapolate_part_stats_partial.q index 8ae9a90..ca1ec23 100644 --- a/ql/src/test/queries/clientpositive/extrapolate_part_stats_partial.q +++ b/ql/src/test/queries/clientpositive/extrapolate_part_stats_partial.q @@ -2,7 +2,9 @@ set hive.stats.fetch.column.stats=true; set hive.exec.dynamic.partition=true; set hive.exec.dynamic.partition.mode=nonstrict; -create table if not exists ext_loc ( +drop table if exists ext_loc; + +create table ext_loc ( state string, locid int, zip int, @@ -11,7 +13,9 @@ create table if not exists ext_loc ( LOAD DATA LOCAL INPATH '../../data/files/extrapolate_stats_partial.txt' OVERWRITE INTO TABLE ext_loc; -create table if not exists loc_orc_1d ( +drop table if exists loc_orc_1d; + +create table loc_orc_1d ( state string, locid int, zip int @@ -42,7 +46,9 @@ explain extended select state from loc_orc_1d; explain extended select state,locid from loc_orc_1d; -create table if not exists loc_orc_2d ( +drop table if exists loc_orc_2d; + +create table loc_orc_2d ( state string, locid int ) partitioned by(zip int, year string) stored as orc; diff --git a/ql/src/test/queries/clientpositive/extrapolate_part_stats_partial_ndv.q b/ql/src/test/queries/clientpositive/extrapolate_part_stats_partial_ndv.q new file mode 100644 index 0000000..c42a608 --- /dev/null +++ b/ql/src/test/queries/clientpositive/extrapolate_part_stats_partial_ndv.q @@ -0,0 +1,98 @@ +set hive.stats.fetch.column.stats=true; +set hive.exec.dynamic.partition=true; +set hive.exec.dynamic.partition.mode=nonstrict; + +drop table if exists ext_loc; + +create table ext_loc ( + state string, + locid double, + cnt decimal, + zip int, + year string +) row format delimited fields terminated by '|' stored as textfile; + +LOAD DATA LOCAL INPATH '../../data/files/extrapolate_stats_partial_ndv.txt' OVERWRITE INTO TABLE ext_loc; + +drop table if exists loc_orc_1d; + +create table loc_orc_1d ( + state string, + locid double, + cnt decimal, + zip int +) partitioned by(year string) stored as orc; + +insert overwrite table loc_orc_1d partition(year) select * from ext_loc; + +analyze table loc_orc_1d partition(year='2001') compute statistics for columns state,locid,cnt,zip; + +analyze table loc_orc_1d partition(year='2002') compute statistics for columns state,locid,cnt,zip; + +describe formatted loc_orc_1d.state PARTITION(year='2001'); + +describe formatted loc_orc_1d.state PARTITION(year='2002'); + +describe formatted loc_orc_1d.locid PARTITION(year='2001'); + +describe formatted loc_orc_1d.locid PARTITION(year='2002'); + +describe formatted loc_orc_1d.cnt PARTITION(year='2001'); + +describe formatted loc_orc_1d.cnt PARTITION(year='2002'); + +describe formatted loc_orc_1d.zip PARTITION(year='2001'); + +describe formatted loc_orc_1d.zip PARTITION(year='2002'); + +explain extended select state,locid,cnt,zip from loc_orc_1d; + +analyze table loc_orc_1d partition(year='2000') compute statistics for columns state,locid,cnt,zip; + +analyze table loc_orc_1d partition(year='2003') compute statistics for columns state,locid,cnt,zip; + +describe formatted loc_orc_1d.state PARTITION(year='2000'); + +describe formatted loc_orc_1d.state PARTITION(year='2003'); + +describe formatted loc_orc_1d.locid PARTITION(year='2000'); + +describe formatted loc_orc_1d.locid PARTITION(year='2003'); + +describe formatted loc_orc_1d.cnt PARTITION(year='2000'); + +describe formatted loc_orc_1d.cnt PARTITION(year='2003'); + +describe formatted loc_orc_1d.zip PARTITION(year='2000'); + +describe formatted loc_orc_1d.zip PARTITION(year='2003'); + +explain extended select state,locid,cnt,zip from loc_orc_1d; + +drop table if exists loc_orc_2d; + +create table loc_orc_2d ( + state string, + locid int, + cnt decimal +) partitioned by(zip int, year string) stored as orc; + +insert overwrite table loc_orc_2d partition(zip, year) select * from ext_loc; + +analyze table loc_orc_2d partition(zip=94086, year='2001') compute statistics for columns state,locid,cnt; + +analyze table loc_orc_2d partition(zip=94087, year='2002') compute statistics for columns state,locid,cnt; + +describe formatted loc_orc_2d.state partition(zip=94086, year='2001'); + +describe formatted loc_orc_2d.state partition(zip=94087, year='2002'); + +describe formatted loc_orc_2d.locid partition(zip=94086, year='2001'); + +describe formatted loc_orc_2d.locid partition(zip=94087, year='2002'); + +describe formatted loc_orc_2d.cnt partition(zip=94086, year='2001'); + +describe formatted loc_orc_2d.cnt partition(zip=94087, year='2002'); + +explain extended select state,locid,cnt,zip from loc_orc_2d; diff --git a/ql/src/test/results/clientpositive/extrapolate_part_stats_full.q.out b/ql/src/test/results/clientpositive/extrapolate_part_stats_full.q.out index 0f6b15d..3614d33 100644 --- a/ql/src/test/results/clientpositive/extrapolate_part_stats_full.q.out +++ b/ql/src/test/results/clientpositive/extrapolate_part_stats_full.q.out @@ -1,4 +1,8 @@ -PREHOOK: query: create table if not exists ext_loc ( +PREHOOK: query: drop table if exists ext_loc +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists ext_loc +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table ext_loc ( state string, locid int, zip int, @@ -7,7 +11,7 @@ PREHOOK: query: create table if not exists ext_loc ( PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@ext_loc -POSTHOOK: query: create table if not exists ext_loc ( +POSTHOOK: query: create table ext_loc ( state string, locid int, zip int, @@ -24,7 +28,11 @@ POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/extrapolate_stats_full POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@ext_loc -PREHOOK: query: create table if not exists loc_orc_1d ( +PREHOOK: query: drop table if exists loc_orc_1d +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists loc_orc_1d +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table loc_orc_1d ( state string, locid int, zip int @@ -32,7 +40,7 @@ PREHOOK: query: create table if not exists loc_orc_1d ( PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@loc_orc_1d -POSTHOOK: query: create table if not exists loc_orc_1d ( +POSTHOOK: query: create table loc_orc_1d ( state string, locid int, zip int @@ -345,14 +353,18 @@ STAGE PLANS: Statistics: Num rows: 6 Data size: 534 Basic stats: COMPLETE Column stats: COMPLETE ListSink -PREHOOK: query: create table if not exists loc_orc_2d ( +PREHOOK: query: drop table if exists loc_orc_2d +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists loc_orc_2d +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table loc_orc_2d ( state string, locid int ) partitioned by(zip int, year string) stored as orc PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@loc_orc_2d -POSTHOOK: query: create table if not exists loc_orc_2d ( +POSTHOOK: query: create table loc_orc_2d ( state string, locid int ) partitioned by(zip int, year string) stored as orc diff --git a/ql/src/test/results/clientpositive/extrapolate_part_stats_partial.q.out b/ql/src/test/results/clientpositive/extrapolate_part_stats_partial.q.out index 1fdeb90..6c975b5 100644 --- a/ql/src/test/results/clientpositive/extrapolate_part_stats_partial.q.out +++ b/ql/src/test/results/clientpositive/extrapolate_part_stats_partial.q.out @@ -1,4 +1,8 @@ -PREHOOK: query: create table if not exists ext_loc ( +PREHOOK: query: drop table if exists ext_loc +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists ext_loc +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table ext_loc ( state string, locid int, zip int, @@ -7,7 +11,7 @@ PREHOOK: query: create table if not exists ext_loc ( PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@ext_loc -POSTHOOK: query: create table if not exists ext_loc ( +POSTHOOK: query: create table ext_loc ( state string, locid int, zip int, @@ -24,7 +28,11 @@ POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/extrapolate_stats_part POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@ext_loc -PREHOOK: query: create table if not exists loc_orc_1d ( +PREHOOK: query: drop table if exists loc_orc_1d +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists loc_orc_1d +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table loc_orc_1d ( state string, locid int, zip int @@ -32,7 +40,7 @@ PREHOOK: query: create table if not exists loc_orc_1d ( PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@loc_orc_1d -POSTHOOK: query: create table if not exists loc_orc_1d ( +POSTHOOK: query: create table loc_orc_1d ( state string, locid int, zip int @@ -981,14 +989,18 @@ STAGE PLANS: Statistics: Num rows: 20 Data size: 1820 Basic stats: COMPLETE Column stats: PARTIAL ListSink -PREHOOK: query: create table if not exists loc_orc_2d ( +PREHOOK: query: drop table if exists loc_orc_2d +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists loc_orc_2d +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table loc_orc_2d ( state string, locid int ) partitioned by(zip int, year string) stored as orc PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@loc_orc_2d -POSTHOOK: query: create table if not exists loc_orc_2d ( +POSTHOOK: query: create table loc_orc_2d ( state string, locid int ) partitioned by(zip int, year string) stored as orc diff --git a/ql/src/test/results/clientpositive/extrapolate_part_stats_partial_ndv.q.out b/ql/src/test/results/clientpositive/extrapolate_part_stats_partial_ndv.q.out new file mode 100644 index 0000000..81ac963 --- /dev/null +++ b/ql/src/test/results/clientpositive/extrapolate_part_stats_partial_ndv.q.out @@ -0,0 +1,1385 @@ +PREHOOK: query: drop table if exists ext_loc +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists ext_loc +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table ext_loc ( + state string, + locid double, + cnt decimal, + zip int, + year string +) row format delimited fields terminated by '|' stored as textfile +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@ext_loc +POSTHOOK: query: create table ext_loc ( + state string, + locid double, + cnt decimal, + zip int, + year string +) row format delimited fields terminated by '|' stored as textfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@ext_loc +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/extrapolate_stats_partial_ndv.txt' OVERWRITE INTO TABLE ext_loc +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@ext_loc +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/extrapolate_stats_partial_ndv.txt' OVERWRITE INTO TABLE ext_loc +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@ext_loc +PREHOOK: query: drop table if exists loc_orc_1d +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists loc_orc_1d +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table loc_orc_1d ( + state string, + locid double, + cnt decimal, + zip int +) partitioned by(year string) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@loc_orc_1d +POSTHOOK: query: create table loc_orc_1d ( + state string, + locid double, + cnt decimal, + zip int +) partitioned by(year string) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@loc_orc_1d +PREHOOK: query: insert overwrite table loc_orc_1d partition(year) select * from ext_loc +PREHOOK: type: QUERY +PREHOOK: Input: default@ext_loc +PREHOOK: Output: default@loc_orc_1d +POSTHOOK: query: insert overwrite table loc_orc_1d partition(year) select * from ext_loc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@ext_loc +POSTHOOK: Output: default@loc_orc_1d@year=2000 +POSTHOOK: Output: default@loc_orc_1d@year=2001 +POSTHOOK: Output: default@loc_orc_1d@year=2002 +POSTHOOK: Output: default@loc_orc_1d@year=2003 +POSTHOOK: Lineage: loc_orc_1d PARTITION(year=2000).cnt SIMPLE [(ext_loc)ext_loc.FieldSchema(name:cnt, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: loc_orc_1d PARTITION(year=2000).locid SIMPLE [(ext_loc)ext_loc.FieldSchema(name:locid, type:double, comment:null), ] +POSTHOOK: Lineage: loc_orc_1d PARTITION(year=2000).state SIMPLE [(ext_loc)ext_loc.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc_1d PARTITION(year=2000).zip SIMPLE [(ext_loc)ext_loc.FieldSchema(name:zip, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc_1d PARTITION(year=2001).cnt SIMPLE [(ext_loc)ext_loc.FieldSchema(name:cnt, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: loc_orc_1d PARTITION(year=2001).locid SIMPLE [(ext_loc)ext_loc.FieldSchema(name:locid, type:double, comment:null), ] +POSTHOOK: Lineage: loc_orc_1d PARTITION(year=2001).state SIMPLE [(ext_loc)ext_loc.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc_1d PARTITION(year=2001).zip SIMPLE [(ext_loc)ext_loc.FieldSchema(name:zip, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc_1d PARTITION(year=2002).cnt SIMPLE [(ext_loc)ext_loc.FieldSchema(name:cnt, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: loc_orc_1d PARTITION(year=2002).locid SIMPLE [(ext_loc)ext_loc.FieldSchema(name:locid, type:double, comment:null), ] +POSTHOOK: Lineage: loc_orc_1d PARTITION(year=2002).state SIMPLE [(ext_loc)ext_loc.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc_1d PARTITION(year=2002).zip SIMPLE [(ext_loc)ext_loc.FieldSchema(name:zip, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc_1d PARTITION(year=2003).cnt SIMPLE [(ext_loc)ext_loc.FieldSchema(name:cnt, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: loc_orc_1d PARTITION(year=2003).locid SIMPLE [(ext_loc)ext_loc.FieldSchema(name:locid, type:double, comment:null), ] +POSTHOOK: Lineage: loc_orc_1d PARTITION(year=2003).state SIMPLE [(ext_loc)ext_loc.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc_1d PARTITION(year=2003).zip SIMPLE [(ext_loc)ext_loc.FieldSchema(name:zip, type:int, comment:null), ] +PREHOOK: query: analyze table loc_orc_1d partition(year='2001') compute statistics for columns state,locid,cnt,zip +PREHOOK: type: QUERY +PREHOOK: Input: default@loc_orc_1d +PREHOOK: Input: default@loc_orc_1d@year=2001 +#### A masked pattern was here #### +POSTHOOK: query: analyze table loc_orc_1d partition(year='2001') compute statistics for columns state,locid,cnt,zip +POSTHOOK: type: QUERY +POSTHOOK: Input: default@loc_orc_1d +POSTHOOK: Input: default@loc_orc_1d@year=2001 +#### A masked pattern was here #### +PREHOOK: query: analyze table loc_orc_1d partition(year='2002') compute statistics for columns state,locid,cnt,zip +PREHOOK: type: QUERY +PREHOOK: Input: default@loc_orc_1d +PREHOOK: Input: default@loc_orc_1d@year=2002 +#### A masked pattern was here #### +POSTHOOK: query: analyze table loc_orc_1d partition(year='2002') compute statistics for columns state,locid,cnt,zip +POSTHOOK: type: QUERY +POSTHOOK: Input: default@loc_orc_1d +POSTHOOK: Input: default@loc_orc_1d@year=2002 +#### A masked pattern was here #### +PREHOOK: query: describe formatted loc_orc_1d.state PARTITION(year='2001') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@loc_orc_1d +POSTHOOK: query: describe formatted loc_orc_1d.state PARTITION(year='2001') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@loc_orc_1d +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment + +state string 0 3 0.75 2 from deserializer +PREHOOK: query: describe formatted loc_orc_1d.state PARTITION(year='2002') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@loc_orc_1d +POSTHOOK: query: describe formatted loc_orc_1d.state PARTITION(year='2002') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@loc_orc_1d +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment + +state string 0 6 3.0 3 from deserializer +PREHOOK: query: describe formatted loc_orc_1d.locid PARTITION(year='2001') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@loc_orc_1d +POSTHOOK: query: describe formatted loc_orc_1d.locid PARTITION(year='2001') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@loc_orc_1d +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment + +locid double 1.0 4.0 0 5 from deserializer +PREHOOK: query: describe formatted loc_orc_1d.locid PARTITION(year='2002') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@loc_orc_1d +POSTHOOK: query: describe formatted loc_orc_1d.locid PARTITION(year='2002') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@loc_orc_1d +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment + +locid double 1.0 5.0 0 6 from deserializer +PREHOOK: query: describe formatted loc_orc_1d.cnt PARTITION(year='2001') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@loc_orc_1d +POSTHOOK: query: describe formatted loc_orc_1d.cnt PARTITION(year='2001') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@loc_orc_1d +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment + +cnt decimal(10,0) 10 2000 0 5 from deserializer +PREHOOK: query: describe formatted loc_orc_1d.cnt PARTITION(year='2002') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@loc_orc_1d +POSTHOOK: query: describe formatted loc_orc_1d.cnt PARTITION(year='2002') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@loc_orc_1d +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment + +cnt decimal(10,0) 10 910 0 4 from deserializer +PREHOOK: query: describe formatted loc_orc_1d.zip PARTITION(year='2001') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@loc_orc_1d +POSTHOOK: query: describe formatted loc_orc_1d.zip PARTITION(year='2001') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@loc_orc_1d +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment + +zip int 43201 94087 0 4 from deserializer +PREHOOK: query: describe formatted loc_orc_1d.zip PARTITION(year='2002') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@loc_orc_1d +POSTHOOK: query: describe formatted loc_orc_1d.zip PARTITION(year='2002') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@loc_orc_1d +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment + +zip int 43201 94087 0 4 from deserializer +PREHOOK: query: explain extended select state,locid,cnt,zip from loc_orc_1d +PREHOOK: type: QUERY +POSTHOOK: query: explain extended select state,locid,cnt,zip from loc_orc_1d +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + loc_orc_1d + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + state + TOK_SELEXPR + TOK_TABLE_OR_COL + locid + TOK_SELEXPR + TOK_TABLE_OR_COL + cnt + TOK_SELEXPR + TOK_TABLE_OR_COL + zip + + +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Partition Description: + Partition + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + year 2000 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,cnt,zip + columns.comments + columns.types string:double:decimal(10,0):int +#### A masked pattern was here #### + name default.loc_orc_1d + numFiles 1 + numRows 2 + partition_columns year + partition_columns.types string + rawDataSize 416 + serialization.ddl struct loc_orc_1d { string state, double locid, decimal(10,0) cnt, i32 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 521 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,cnt,zip + columns.comments + columns.types string:double:decimal(10,0):int +#### A masked pattern was here #### + name default.loc_orc_1d + partition_columns year + partition_columns.types string + serialization.ddl struct loc_orc_1d { string state, double locid, decimal(10,0) cnt, i32 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc_1d + name: default.loc_orc_1d + Partition + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + year 2001 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,cnt,zip + columns.comments + columns.types string:double:decimal(10,0):int +#### A masked pattern was here #### + name default.loc_orc_1d + numFiles 1 + numRows 4 + partition_columns year + partition_columns.types string + rawDataSize 832 + serialization.ddl struct loc_orc_1d { string state, double locid, decimal(10,0) cnt, i32 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 553 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,cnt,zip + columns.comments + columns.types string:double:decimal(10,0):int +#### A masked pattern was here #### + name default.loc_orc_1d + partition_columns year + partition_columns.types string + serialization.ddl struct loc_orc_1d { string state, double locid, decimal(10,0) cnt, i32 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc_1d + name: default.loc_orc_1d + Partition + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + year 2002 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,cnt,zip + columns.comments + columns.types string:double:decimal(10,0):int +#### A masked pattern was here #### + name default.loc_orc_1d + numFiles 1 + numRows 6 + partition_columns year + partition_columns.types string + rawDataSize 1266 + serialization.ddl struct loc_orc_1d { string state, double locid, decimal(10,0) cnt, i32 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 571 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,cnt,zip + columns.comments + columns.types string:double:decimal(10,0):int +#### A masked pattern was here #### + name default.loc_orc_1d + partition_columns year + partition_columns.types string + serialization.ddl struct loc_orc_1d { string state, double locid, decimal(10,0) cnt, i32 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc_1d + name: default.loc_orc_1d + Partition + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + year 2003 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,cnt,zip + columns.comments + columns.types string:double:decimal(10,0):int +#### A masked pattern was here #### + name default.loc_orc_1d + numFiles 1 + numRows 8 + partition_columns year + partition_columns.types string + rawDataSize 1672 + serialization.ddl struct loc_orc_1d { string state, double locid, decimal(10,0) cnt, i32 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 593 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,cnt,zip + columns.comments + columns.types string:double:decimal(10,0):int +#### A masked pattern was here #### + name default.loc_orc_1d + partition_columns year + partition_columns.types string + serialization.ddl struct loc_orc_1d { string state, double locid, decimal(10,0) cnt, i32 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc_1d + name: default.loc_orc_1d + Processor Tree: + TableScan + alias: loc_orc_1d + Statistics: Num rows: 20 Data size: 4186 Basic stats: COMPLETE Column stats: PARTIAL + GatherStats: false + Select Operator + expressions: state (type: string), locid (type: double), cnt (type: decimal(10,0)), zip (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 20 Data size: 4260 Basic stats: COMPLETE Column stats: PARTIAL + ListSink + +PREHOOK: query: analyze table loc_orc_1d partition(year='2000') compute statistics for columns state,locid,cnt,zip +PREHOOK: type: QUERY +PREHOOK: Input: default@loc_orc_1d +PREHOOK: Input: default@loc_orc_1d@year=2000 +#### A masked pattern was here #### +POSTHOOK: query: analyze table loc_orc_1d partition(year='2000') compute statistics for columns state,locid,cnt,zip +POSTHOOK: type: QUERY +POSTHOOK: Input: default@loc_orc_1d +POSTHOOK: Input: default@loc_orc_1d@year=2000 +#### A masked pattern was here #### +PREHOOK: query: analyze table loc_orc_1d partition(year='2003') compute statistics for columns state,locid,cnt,zip +PREHOOK: type: QUERY +PREHOOK: Input: default@loc_orc_1d +PREHOOK: Input: default@loc_orc_1d@year=2003 +#### A masked pattern was here #### +POSTHOOK: query: analyze table loc_orc_1d partition(year='2003') compute statistics for columns state,locid,cnt,zip +POSTHOOK: type: QUERY +POSTHOOK: Input: default@loc_orc_1d +POSTHOOK: Input: default@loc_orc_1d@year=2003 +#### A masked pattern was here #### +PREHOOK: query: describe formatted loc_orc_1d.state PARTITION(year='2000') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@loc_orc_1d +POSTHOOK: query: describe formatted loc_orc_1d.state PARTITION(year='2000') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@loc_orc_1d +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment + +state string 0 2 0.5 1 from deserializer +PREHOOK: query: describe formatted loc_orc_1d.state PARTITION(year='2003') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@loc_orc_1d +POSTHOOK: query: describe formatted loc_orc_1d.state PARTITION(year='2003') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@loc_orc_1d +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment + +state string 0 4 1.25 4 from deserializer +PREHOOK: query: describe formatted loc_orc_1d.locid PARTITION(year='2000') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@loc_orc_1d +POSTHOOK: query: describe formatted loc_orc_1d.locid PARTITION(year='2000') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@loc_orc_1d +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment + +locid double 1.0 2.0 0 2 from deserializer +PREHOOK: query: describe formatted loc_orc_1d.locid PARTITION(year='2003') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@loc_orc_1d +POSTHOOK: query: describe formatted loc_orc_1d.locid PARTITION(year='2003') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@loc_orc_1d +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment + +locid double 1.0 31.0 0 6 from deserializer +PREHOOK: query: describe formatted loc_orc_1d.cnt PARTITION(year='2000') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@loc_orc_1d +POSTHOOK: query: describe formatted loc_orc_1d.cnt PARTITION(year='2000') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@loc_orc_1d +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment + +cnt decimal(10,0) 1000 1010 0 3 from deserializer +PREHOOK: query: describe formatted loc_orc_1d.cnt PARTITION(year='2003') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@loc_orc_1d +POSTHOOK: query: describe formatted loc_orc_1d.cnt PARTITION(year='2003') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@loc_orc_1d +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment + +cnt decimal(10,0) 1000 2000 0 3 from deserializer +PREHOOK: query: describe formatted loc_orc_1d.zip PARTITION(year='2000') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@loc_orc_1d +POSTHOOK: query: describe formatted loc_orc_1d.zip PARTITION(year='2000') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@loc_orc_1d +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment + +zip int 94086 94087 0 2 from deserializer +PREHOOK: query: describe formatted loc_orc_1d.zip PARTITION(year='2003') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@loc_orc_1d +POSTHOOK: query: describe formatted loc_orc_1d.zip PARTITION(year='2003') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@loc_orc_1d +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment + +zip int 43201 94087 0 4 from deserializer +PREHOOK: query: explain extended select state,locid,cnt,zip from loc_orc_1d +PREHOOK: type: QUERY +POSTHOOK: query: explain extended select state,locid,cnt,zip from loc_orc_1d +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + loc_orc_1d + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + state + TOK_SELEXPR + TOK_TABLE_OR_COL + locid + TOK_SELEXPR + TOK_TABLE_OR_COL + cnt + TOK_SELEXPR + TOK_TABLE_OR_COL + zip + + +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Partition Description: + Partition + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + year 2000 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,cnt,zip + columns.comments + columns.types string:double:decimal(10,0):int +#### A masked pattern was here #### + name default.loc_orc_1d + numFiles 1 + numRows 2 + partition_columns year + partition_columns.types string + rawDataSize 416 + serialization.ddl struct loc_orc_1d { string state, double locid, decimal(10,0) cnt, i32 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 521 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,cnt,zip + columns.comments + columns.types string:double:decimal(10,0):int +#### A masked pattern was here #### + name default.loc_orc_1d + partition_columns year + partition_columns.types string + serialization.ddl struct loc_orc_1d { string state, double locid, decimal(10,0) cnt, i32 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc_1d + name: default.loc_orc_1d + Partition + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + year 2001 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,cnt,zip + columns.comments + columns.types string:double:decimal(10,0):int +#### A masked pattern was here #### + name default.loc_orc_1d + numFiles 1 + numRows 4 + partition_columns year + partition_columns.types string + rawDataSize 832 + serialization.ddl struct loc_orc_1d { string state, double locid, decimal(10,0) cnt, i32 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 553 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,cnt,zip + columns.comments + columns.types string:double:decimal(10,0):int +#### A masked pattern was here #### + name default.loc_orc_1d + partition_columns year + partition_columns.types string + serialization.ddl struct loc_orc_1d { string state, double locid, decimal(10,0) cnt, i32 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc_1d + name: default.loc_orc_1d + Partition + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + year 2002 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,cnt,zip + columns.comments + columns.types string:double:decimal(10,0):int +#### A masked pattern was here #### + name default.loc_orc_1d + numFiles 1 + numRows 6 + partition_columns year + partition_columns.types string + rawDataSize 1266 + serialization.ddl struct loc_orc_1d { string state, double locid, decimal(10,0) cnt, i32 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 571 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,cnt,zip + columns.comments + columns.types string:double:decimal(10,0):int +#### A masked pattern was here #### + name default.loc_orc_1d + partition_columns year + partition_columns.types string + serialization.ddl struct loc_orc_1d { string state, double locid, decimal(10,0) cnt, i32 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc_1d + name: default.loc_orc_1d + Partition + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + year 2003 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,cnt,zip + columns.comments + columns.types string:double:decimal(10,0):int +#### A masked pattern was here #### + name default.loc_orc_1d + numFiles 1 + numRows 8 + partition_columns year + partition_columns.types string + rawDataSize 1672 + serialization.ddl struct loc_orc_1d { string state, double locid, decimal(10,0) cnt, i32 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 593 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,cnt,zip + columns.comments + columns.types string:double:decimal(10,0):int +#### A masked pattern was here #### + name default.loc_orc_1d + partition_columns year + partition_columns.types string + serialization.ddl struct loc_orc_1d { string state, double locid, decimal(10,0) cnt, i32 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc_1d + name: default.loc_orc_1d + Processor Tree: + TableScan + alias: loc_orc_1d + Statistics: Num rows: 20 Data size: 4186 Basic stats: COMPLETE Column stats: COMPLETE + GatherStats: false + Select Operator + expressions: state (type: string), locid (type: double), cnt (type: decimal(10,0)), zip (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 20 Data size: 4220 Basic stats: COMPLETE Column stats: COMPLETE + ListSink + +PREHOOK: query: drop table if exists loc_orc_2d +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists loc_orc_2d +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table loc_orc_2d ( + state string, + locid int, + cnt decimal +) partitioned by(zip int, year string) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@loc_orc_2d +POSTHOOK: query: create table loc_orc_2d ( + state string, + locid int, + cnt decimal +) partitioned by(zip int, year string) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@loc_orc_2d +PREHOOK: query: insert overwrite table loc_orc_2d partition(zip, year) select * from ext_loc +PREHOOK: type: QUERY +PREHOOK: Input: default@ext_loc +PREHOOK: Output: default@loc_orc_2d +POSTHOOK: query: insert overwrite table loc_orc_2d partition(zip, year) select * from ext_loc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@ext_loc +POSTHOOK: Output: default@loc_orc_2d@zip=43201/year=2001 +POSTHOOK: Output: default@loc_orc_2d@zip=43201/year=2002 +POSTHOOK: Output: default@loc_orc_2d@zip=43201/year=2003 +POSTHOOK: Output: default@loc_orc_2d@zip=94086/year=2000 +POSTHOOK: Output: default@loc_orc_2d@zip=94086/year=2001 +POSTHOOK: Output: default@loc_orc_2d@zip=94086/year=2002 +POSTHOOK: Output: default@loc_orc_2d@zip=94086/year=2003 +POSTHOOK: Output: default@loc_orc_2d@zip=94087/year=2000 +POSTHOOK: Output: default@loc_orc_2d@zip=94087/year=2001 +POSTHOOK: Output: default@loc_orc_2d@zip=94087/year=2002 +POSTHOOK: Output: default@loc_orc_2d@zip=94087/year=2003 +POSTHOOK: Lineage: loc_orc_2d PARTITION(zip=43201,year=2001).cnt SIMPLE [(ext_loc)ext_loc.FieldSchema(name:cnt, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: loc_orc_2d PARTITION(zip=43201,year=2001).locid EXPRESSION [(ext_loc)ext_loc.FieldSchema(name:locid, type:double, comment:null), ] +POSTHOOK: Lineage: loc_orc_2d PARTITION(zip=43201,year=2001).state SIMPLE [(ext_loc)ext_loc.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc_2d PARTITION(zip=43201,year=2002).cnt SIMPLE [(ext_loc)ext_loc.FieldSchema(name:cnt, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: loc_orc_2d PARTITION(zip=43201,year=2002).locid EXPRESSION [(ext_loc)ext_loc.FieldSchema(name:locid, type:double, comment:null), ] +POSTHOOK: Lineage: loc_orc_2d PARTITION(zip=43201,year=2002).state SIMPLE [(ext_loc)ext_loc.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc_2d PARTITION(zip=43201,year=2003).cnt SIMPLE [(ext_loc)ext_loc.FieldSchema(name:cnt, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: loc_orc_2d PARTITION(zip=43201,year=2003).locid EXPRESSION [(ext_loc)ext_loc.FieldSchema(name:locid, type:double, comment:null), ] +POSTHOOK: Lineage: loc_orc_2d PARTITION(zip=43201,year=2003).state SIMPLE [(ext_loc)ext_loc.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc_2d PARTITION(zip=94086,year=2000).cnt SIMPLE [(ext_loc)ext_loc.FieldSchema(name:cnt, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: loc_orc_2d PARTITION(zip=94086,year=2000).locid EXPRESSION [(ext_loc)ext_loc.FieldSchema(name:locid, type:double, comment:null), ] +POSTHOOK: Lineage: loc_orc_2d PARTITION(zip=94086,year=2000).state SIMPLE [(ext_loc)ext_loc.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc_2d PARTITION(zip=94086,year=2001).cnt SIMPLE [(ext_loc)ext_loc.FieldSchema(name:cnt, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: loc_orc_2d PARTITION(zip=94086,year=2001).locid EXPRESSION [(ext_loc)ext_loc.FieldSchema(name:locid, type:double, comment:null), ] +POSTHOOK: Lineage: loc_orc_2d PARTITION(zip=94086,year=2001).state SIMPLE [(ext_loc)ext_loc.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc_2d PARTITION(zip=94086,year=2002).cnt SIMPLE [(ext_loc)ext_loc.FieldSchema(name:cnt, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: loc_orc_2d PARTITION(zip=94086,year=2002).locid EXPRESSION [(ext_loc)ext_loc.FieldSchema(name:locid, type:double, comment:null), ] +POSTHOOK: Lineage: loc_orc_2d PARTITION(zip=94086,year=2002).state SIMPLE [(ext_loc)ext_loc.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc_2d PARTITION(zip=94086,year=2003).cnt SIMPLE [(ext_loc)ext_loc.FieldSchema(name:cnt, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: loc_orc_2d PARTITION(zip=94086,year=2003).locid EXPRESSION [(ext_loc)ext_loc.FieldSchema(name:locid, type:double, comment:null), ] +POSTHOOK: Lineage: loc_orc_2d PARTITION(zip=94086,year=2003).state SIMPLE [(ext_loc)ext_loc.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc_2d PARTITION(zip=94087,year=2000).cnt SIMPLE [(ext_loc)ext_loc.FieldSchema(name:cnt, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: loc_orc_2d PARTITION(zip=94087,year=2000).locid EXPRESSION [(ext_loc)ext_loc.FieldSchema(name:locid, type:double, comment:null), ] +POSTHOOK: Lineage: loc_orc_2d PARTITION(zip=94087,year=2000).state SIMPLE [(ext_loc)ext_loc.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc_2d PARTITION(zip=94087,year=2001).cnt SIMPLE [(ext_loc)ext_loc.FieldSchema(name:cnt, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: loc_orc_2d PARTITION(zip=94087,year=2001).locid EXPRESSION [(ext_loc)ext_loc.FieldSchema(name:locid, type:double, comment:null), ] +POSTHOOK: Lineage: loc_orc_2d PARTITION(zip=94087,year=2001).state SIMPLE [(ext_loc)ext_loc.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc_2d PARTITION(zip=94087,year=2002).cnt SIMPLE [(ext_loc)ext_loc.FieldSchema(name:cnt, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: loc_orc_2d PARTITION(zip=94087,year=2002).locid EXPRESSION [(ext_loc)ext_loc.FieldSchema(name:locid, type:double, comment:null), ] +POSTHOOK: Lineage: loc_orc_2d PARTITION(zip=94087,year=2002).state SIMPLE [(ext_loc)ext_loc.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc_2d PARTITION(zip=94087,year=2003).cnt SIMPLE [(ext_loc)ext_loc.FieldSchema(name:cnt, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: loc_orc_2d PARTITION(zip=94087,year=2003).locid EXPRESSION [(ext_loc)ext_loc.FieldSchema(name:locid, type:double, comment:null), ] +POSTHOOK: Lineage: loc_orc_2d PARTITION(zip=94087,year=2003).state SIMPLE [(ext_loc)ext_loc.FieldSchema(name:state, type:string, comment:null), ] +PREHOOK: query: analyze table loc_orc_2d partition(zip=94086, year='2001') compute statistics for columns state,locid,cnt +PREHOOK: type: QUERY +PREHOOK: Input: default@loc_orc_2d +PREHOOK: Input: default@loc_orc_2d@zip=94086/year=2001 +#### A masked pattern was here #### +POSTHOOK: query: analyze table loc_orc_2d partition(zip=94086, year='2001') compute statistics for columns state,locid,cnt +POSTHOOK: type: QUERY +POSTHOOK: Input: default@loc_orc_2d +POSTHOOK: Input: default@loc_orc_2d@zip=94086/year=2001 +#### A masked pattern was here #### +PREHOOK: query: analyze table loc_orc_2d partition(zip=94087, year='2002') compute statistics for columns state,locid,cnt +PREHOOK: type: QUERY +PREHOOK: Input: default@loc_orc_2d +PREHOOK: Input: default@loc_orc_2d@zip=94087/year=2002 +#### A masked pattern was here #### +POSTHOOK: query: analyze table loc_orc_2d partition(zip=94087, year='2002') compute statistics for columns state,locid,cnt +POSTHOOK: type: QUERY +POSTHOOK: Input: default@loc_orc_2d +POSTHOOK: Input: default@loc_orc_2d@zip=94087/year=2002 +#### A masked pattern was here #### +PREHOOK: query: describe formatted loc_orc_2d.state partition(zip=94086, year='2001') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@loc_orc_2d +POSTHOOK: query: describe formatted loc_orc_2d.state partition(zip=94086, year='2001') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@loc_orc_2d +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment + +state string 0 2 0.5 1 from deserializer +PREHOOK: query: describe formatted loc_orc_2d.state partition(zip=94087, year='2002') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@loc_orc_2d +POSTHOOK: query: describe formatted loc_orc_2d.state partition(zip=94087, year='2002') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@loc_orc_2d +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment + +state string 0 4 3.0 3 from deserializer +PREHOOK: query: describe formatted loc_orc_2d.locid partition(zip=94086, year='2001') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@loc_orc_2d +POSTHOOK: query: describe formatted loc_orc_2d.locid partition(zip=94086, year='2001') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@loc_orc_2d +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment + +locid int 2 3 0 2 from deserializer +PREHOOK: query: describe formatted loc_orc_2d.locid partition(zip=94087, year='2002') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@loc_orc_2d +POSTHOOK: query: describe formatted loc_orc_2d.locid partition(zip=94087, year='2002') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@loc_orc_2d +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment + +locid int 1 5 0 3 from deserializer +PREHOOK: query: describe formatted loc_orc_2d.cnt partition(zip=94086, year='2001') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@loc_orc_2d +POSTHOOK: query: describe formatted loc_orc_2d.cnt partition(zip=94086, year='2001') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@loc_orc_2d +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment + +cnt decimal(10,0) 1000 2000 0 2 from deserializer +PREHOOK: query: describe formatted loc_orc_2d.cnt partition(zip=94087, year='2002') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@loc_orc_2d +POSTHOOK: query: describe formatted loc_orc_2d.cnt partition(zip=94087, year='2002') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@loc_orc_2d +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment + +cnt decimal(10,0) 10 100 0 2 from deserializer +PREHOOK: query: explain extended select state,locid,cnt,zip from loc_orc_2d +PREHOOK: type: QUERY +POSTHOOK: query: explain extended select state,locid,cnt,zip from loc_orc_2d +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + loc_orc_2d + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + state + TOK_SELEXPR + TOK_TABLE_OR_COL + locid + TOK_SELEXPR + TOK_TABLE_OR_COL + cnt + TOK_SELEXPR + TOK_TABLE_OR_COL + zip + + +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Partition Description: + Partition + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + year 2001 + zip 43201 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,cnt + columns.comments + columns.types string:int:decimal(10,0) +#### A masked pattern was here #### + name default.loc_orc_2d + numFiles 1 + numRows 1 + partition_columns zip/year + partition_columns.types int:string + rawDataSize 202 + serialization.ddl struct loc_orc_2d { string state, i32 locid, decimal(10,0) cnt} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 379 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,cnt + columns.comments + columns.types string:int:decimal(10,0) +#### A masked pattern was here #### + name default.loc_orc_2d + partition_columns zip/year + partition_columns.types int:string + serialization.ddl struct loc_orc_2d { string state, i32 locid, decimal(10,0) cnt} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc_2d + name: default.loc_orc_2d + Partition + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + year 2002 + zip 43201 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,cnt + columns.comments + columns.types string:int:decimal(10,0) +#### A masked pattern was here #### + name default.loc_orc_2d + numFiles 1 + numRows 2 + partition_columns zip/year + partition_columns.types int:string + rawDataSize 406 + serialization.ddl struct loc_orc_2d { string state, i32 locid, decimal(10,0) cnt} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 402 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,cnt + columns.comments + columns.types string:int:decimal(10,0) +#### A masked pattern was here #### + name default.loc_orc_2d + partition_columns zip/year + partition_columns.types int:string + serialization.ddl struct loc_orc_2d { string state, i32 locid, decimal(10,0) cnt} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc_2d + name: default.loc_orc_2d + Partition + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + year 2003 + zip 43201 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,cnt + columns.comments + columns.types string:int:decimal(10,0) +#### A masked pattern was here #### + name default.loc_orc_2d + numFiles 1 + numRows 3 + partition_columns zip/year + partition_columns.types int:string + rawDataSize 603 + serialization.ddl struct loc_orc_2d { string state, i32 locid, decimal(10,0) cnt} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 417 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,cnt + columns.comments + columns.types string:int:decimal(10,0) +#### A masked pattern was here #### + name default.loc_orc_2d + partition_columns zip/year + partition_columns.types int:string + serialization.ddl struct loc_orc_2d { string state, i32 locid, decimal(10,0) cnt} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc_2d + name: default.loc_orc_2d + Partition + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + year 2000 + zip 94086 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,cnt + columns.comments + columns.types string:int:decimal(10,0) +#### A masked pattern was here #### + name default.loc_orc_2d + numFiles 1 + numRows 1 + partition_columns zip/year + partition_columns.types int:string + rawDataSize 201 + serialization.ddl struct loc_orc_2d { string state, i32 locid, decimal(10,0) cnt} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 376 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,cnt + columns.comments + columns.types string:int:decimal(10,0) +#### A masked pattern was here #### + name default.loc_orc_2d + partition_columns zip/year + partition_columns.types int:string + serialization.ddl struct loc_orc_2d { string state, i32 locid, decimal(10,0) cnt} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc_2d + name: default.loc_orc_2d + Partition + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + year 2001 + zip 94086 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,cnt + columns.comments + columns.types string:int:decimal(10,0) +#### A masked pattern was here #### + name default.loc_orc_2d + numFiles 1 + numRows 2 + partition_columns zip/year + partition_columns.types int:string + rawDataSize 400 + serialization.ddl struct loc_orc_2d { string state, i32 locid, decimal(10,0) cnt} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 386 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,cnt + columns.comments + columns.types string:int:decimal(10,0) +#### A masked pattern was here #### + name default.loc_orc_2d + partition_columns zip/year + partition_columns.types int:string + serialization.ddl struct loc_orc_2d { string state, i32 locid, decimal(10,0) cnt} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc_2d + name: default.loc_orc_2d + Partition + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + year 2002 + zip 94086 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,cnt + columns.comments + columns.types string:int:decimal(10,0) +#### A masked pattern was here #### + name default.loc_orc_2d + numFiles 1 + numRows 1 + partition_columns zip/year + partition_columns.types int:string + rawDataSize 203 + serialization.ddl struct loc_orc_2d { string state, i32 locid, decimal(10,0) cnt} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 380 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,cnt + columns.comments + columns.types string:int:decimal(10,0) +#### A masked pattern was here #### + name default.loc_orc_2d + partition_columns zip/year + partition_columns.types int:string + serialization.ddl struct loc_orc_2d { string state, i32 locid, decimal(10,0) cnt} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc_2d + name: default.loc_orc_2d + Partition + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + year 2003 + zip 94086 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,cnt + columns.comments + columns.types string:int:decimal(10,0) +#### A masked pattern was here #### + name default.loc_orc_2d + numFiles 1 + numRows 2 + partition_columns zip/year + partition_columns.types int:string + rawDataSize 404 + serialization.ddl struct loc_orc_2d { string state, i32 locid, decimal(10,0) cnt} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 403 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,cnt + columns.comments + columns.types string:int:decimal(10,0) +#### A masked pattern was here #### + name default.loc_orc_2d + partition_columns zip/year + partition_columns.types int:string + serialization.ddl struct loc_orc_2d { string state, i32 locid, decimal(10,0) cnt} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc_2d + name: default.loc_orc_2d + Partition + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + year 2000 + zip 94087 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,cnt + columns.comments + columns.types string:int:decimal(10,0) +#### A masked pattern was here #### + name default.loc_orc_2d + numFiles 1 + numRows 1 + partition_columns zip/year + partition_columns.types int:string + rawDataSize 200 + serialization.ddl struct loc_orc_2d { string state, i32 locid, decimal(10,0) cnt} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 359 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,cnt + columns.comments + columns.types string:int:decimal(10,0) +#### A masked pattern was here #### + name default.loc_orc_2d + partition_columns zip/year + partition_columns.types int:string + serialization.ddl struct loc_orc_2d { string state, i32 locid, decimal(10,0) cnt} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc_2d + name: default.loc_orc_2d + Partition + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + year 2001 + zip 94087 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,cnt + columns.comments + columns.types string:int:decimal(10,0) +#### A masked pattern was here #### + name default.loc_orc_2d + numFiles 1 + numRows 1 + partition_columns zip/year + partition_columns.types int:string + rawDataSize 200 + serialization.ddl struct loc_orc_2d { string state, i32 locid, decimal(10,0) cnt} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 354 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,cnt + columns.comments + columns.types string:int:decimal(10,0) +#### A masked pattern was here #### + name default.loc_orc_2d + partition_columns zip/year + partition_columns.types int:string + serialization.ddl struct loc_orc_2d { string state, i32 locid, decimal(10,0) cnt} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc_2d + name: default.loc_orc_2d + Partition + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + year 2002 + zip 94087 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,cnt + columns.comments + columns.types string:int:decimal(10,0) +#### A masked pattern was here #### + name default.loc_orc_2d + numFiles 1 + numRows 3 + partition_columns zip/year + partition_columns.types int:string + rawDataSize 609 + serialization.ddl struct loc_orc_2d { string state, i32 locid, decimal(10,0) cnt} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 405 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,cnt + columns.comments + columns.types string:int:decimal(10,0) +#### A masked pattern was here #### + name default.loc_orc_2d + partition_columns zip/year + partition_columns.types int:string + serialization.ddl struct loc_orc_2d { string state, i32 locid, decimal(10,0) cnt} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc_2d + name: default.loc_orc_2d + Partition + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + year 2003 + zip 94087 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,cnt + columns.comments + columns.types string:int:decimal(10,0) +#### A masked pattern was here #### + name default.loc_orc_2d + numFiles 1 + numRows 3 + partition_columns zip/year + partition_columns.types int:string + rawDataSize 600 + serialization.ddl struct loc_orc_2d { string state, i32 locid, decimal(10,0) cnt} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 405 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,cnt + columns.comments + columns.types string:int:decimal(10,0) +#### A masked pattern was here #### + name default.loc_orc_2d + partition_columns zip/year + partition_columns.types int:string + serialization.ddl struct loc_orc_2d { string state, i32 locid, decimal(10,0) cnt} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc_2d + name: default.loc_orc_2d + Processor Tree: + TableScan + alias: loc_orc_2d + Statistics: Num rows: 20 Data size: 4028 Basic stats: COMPLETE Column stats: PARTIAL + GatherStats: false + Select Operator + expressions: state (type: string), locid (type: int), cnt (type: decimal(10,0)), zip (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 20 Data size: 4160 Basic stats: COMPLETE Column stats: PARTIAL + ListSink +