diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 2723dad..f25ae5c 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -1465,7 +1465,9 @@ private static void populateLlapDaemonVarsSet(Set llapDaemonVarsSetLocal // Statistics HIVESTATSAUTOGATHER("hive.stats.autogather", true, - "A flag to gather statistics automatically during the INSERT OVERWRITE command."), + "A flag to gather statistics (only basic) automatically during the INSERT OVERWRITE command."), + HIVESTATSCOLAUTOGATHER("hive.stats.column.autogather", false, + "A flag to gather column statistics automatically."), HIVESTATSDBCLASS("hive.stats.dbclass", "fs", new PatternSet("custom", "fs"), "The storage that stores temporary Hive statistics. In filesystem based statistics collection ('fs'), \n" + "each task writes statistics it has collected in a file on the filesystem, which will be aggregated \n" + diff --git a/data/conf/hive-site.xml b/data/conf/hive-site.xml index cbb5546..fda7ee0 100644 --- a/data/conf/hive-site.xml +++ b/data/conf/hive-site.xml @@ -297,5 +297,9 @@ false + + hive.stats.column.autogather + true + diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties index f991d49..ad353cf 100644 --- a/itests/src/test/resources/testconfiguration.properties +++ b/itests/src/test/resources/testconfiguration.properties @@ -74,6 +74,7 @@ minitez.query.files.shared=acid_globallimit.q,\ alter_merge_2_orc.q,\ alter_merge_orc.q,\ alter_merge_stats_orc.q,\ + autoColumnStats_2.q,\ auto_join0.q,\ auto_join1.q,\ bucket2.q,\ diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java b/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java index f425c6e..eed8c6c 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java @@ -4284,7 +4284,9 @@ public ColumnStatistics get_table_column_statistics(String dbName, String tableN try { statsObj = getMS().getTableColumnStatistics( dbName, tableName, Lists.newArrayList(colName)); - assert statsObj.getStatsObjSize() <= 1; + if (statsObj != null) { + assert statsObj.getStatsObjSize() <= 1; + } return statsObj; } finally { endFunction("get_column_statistics_by_table: ", statsObj != null, null, tableName); diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java b/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java index b5c4d1d..8694302 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java @@ -38,6 +38,7 @@ import org.apache.hadoop.hive.metastore.api.CheckLockRequest; import org.apache.hadoop.hive.metastore.api.ClearFileMetadataRequest; import org.apache.hadoop.hive.metastore.api.ColumnStatistics; +import org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc; import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; import org.apache.hadoop.hive.metastore.api.CommitTxnRequest; import org.apache.hadoop.hive.metastore.api.CompactionRequest; @@ -134,6 +135,7 @@ import org.slf4j.LoggerFactory; import javax.security.auth.login.LoginException; + import java.io.IOException; import java.lang.reflect.Constructor; import java.lang.reflect.InvocationHandler; @@ -1529,8 +1531,18 @@ public Index getIndex(String dbName, String tblName, String indexName) /** {@inheritDoc} */ @Override public boolean updateTableColumnStatistics(ColumnStatistics statsObj) + throws NoSuchObjectException, InvalidObjectException, MetaException, TException, + InvalidInputException { + return updateTableColumnStatistics(statsObj, 0); + } + + @Override + public boolean updateTableColumnStatistics(ColumnStatistics statsObj, int numBitVector) throws NoSuchObjectException, InvalidObjectException, MetaException, TException, InvalidInputException{ + if (numBitVector > 0) { + MetaStoreUtils.mergeTableColStats(client, statsObj, numBitVector); + } return client.update_table_column_statistics(statsObj); } @@ -1547,6 +1559,16 @@ public boolean updatePartitionColumnStatistics(ColumnStatistics statsObj) public boolean setPartitionColumnStatistics(SetPartitionsStatsRequest request) throws NoSuchObjectException, InvalidObjectException, MetaException, TException, InvalidInputException{ + return setPartitionColumnStatistics(request, 0); + } + + @Override + public boolean setPartitionColumnStatistics(SetPartitionsStatsRequest request, int numBitVector) + throws NoSuchObjectException, InvalidObjectException, MetaException, TException, + InvalidInputException { + if (numBitVector > 0) { + MetaStoreUtils.mergePartColStats(client, request, numBitVector); + } return client.set_aggr_stats_for(request); } diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/IMetaStoreClient.java b/metastore/src/java/org/apache/hadoop/hive/metastore/IMetaStoreClient.java index cb092d1..93a91d6 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/IMetaStoreClient.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/IMetaStoreClient.java @@ -949,6 +949,7 @@ boolean dropIndex(String db_name, String tbl_name, /** * Write table level column statistics to persistent store * @param statsObj + * @param numBitVector * @return boolean indicating the status of the operation * @throws NoSuchObjectException * @throws InvalidObjectException @@ -957,9 +958,13 @@ boolean dropIndex(String db_name, String tbl_name, * @throws InvalidInputException */ + boolean updateTableColumnStatistics(ColumnStatistics statsObj, int numBitVector) + throws NoSuchObjectException, InvalidObjectException, MetaException, TException, + InvalidInputException; + boolean updateTableColumnStatistics(ColumnStatistics statsObj) - throws NoSuchObjectException, InvalidObjectException, MetaException, TException, - InvalidInputException; + throws NoSuchObjectException, InvalidObjectException, MetaException, TException, + InvalidInputException; /** * Write partition level column statistics to persistent store @@ -1505,6 +1510,8 @@ public AggrStats getAggrColStatsFor(String dbName, String tblName, boolean setPartitionColumnStatistics(SetPartitionsStatsRequest request) throws NoSuchObjectException, InvalidObjectException, MetaException, TException, InvalidInputException; + boolean setPartitionColumnStatistics(SetPartitionsStatsRequest request, int numBitVector) throws NoSuchObjectException, InvalidObjectException, MetaException, TException, InvalidInputException; + /** * Flush any catalog objects held by the metastore implementation. Note that this does not * flush statistics objects. This should be called at the beginning of each query. diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java b/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java index 866e1c3..0984a9e 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java @@ -29,6 +29,7 @@ import java.net.URLClassLoader; import java.util.ArrayList; import java.util.Arrays; +import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; @@ -55,16 +56,26 @@ import org.apache.hadoop.hive.common.StatsSetupConst; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.HiveMetaStore.HMSHandler; +import org.apache.hadoop.hive.metastore.api.ColumnStatistics; +import org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc; +import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; import org.apache.hadoop.hive.metastore.api.Database; import org.apache.hadoop.hive.metastore.api.EnvironmentContext; import org.apache.hadoop.hive.metastore.api.FieldSchema; +import org.apache.hadoop.hive.metastore.api.InvalidInputException; +import org.apache.hadoop.hive.metastore.api.InvalidObjectException; import org.apache.hadoop.hive.metastore.api.InvalidOperationException; import org.apache.hadoop.hive.metastore.api.MetaException; +import org.apache.hadoop.hive.metastore.api.NoSuchObjectException; import org.apache.hadoop.hive.metastore.api.Partition; import org.apache.hadoop.hive.metastore.api.SerDeInfo; +import org.apache.hadoop.hive.metastore.api.SetPartitionsStatsRequest; import org.apache.hadoop.hive.metastore.api.StorageDescriptor; import org.apache.hadoop.hive.metastore.api.Table; +import org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore.Iface; import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants; +import org.apache.hadoop.hive.metastore.hbase.stats.ColumnStatsAggregator; +import org.apache.hadoop.hive.metastore.hbase.stats.ColumnStatsAggregatorFactory; import org.apache.hadoop.hive.metastore.partition.spec.PartitionSpecProxy; import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.Deserializer; @@ -82,6 +93,7 @@ import org.apache.hadoop.hive.shims.ShimLoader; import org.apache.hadoop.hive.thrift.HadoopThriftAuthBridge; import org.apache.hive.common.util.ReflectionUtil; +import org.apache.thrift.TException; import javax.annotation.Nullable; @@ -1764,4 +1776,46 @@ public static String encodeTableName(String name) { return ret; } + public static void mergeTableColStats(Iface client, ColumnStatistics cs, int numBitVector) throws NoSuchObjectException, MetaException, InvalidInputException, InvalidObjectException, TException { + ColumnStatistics csCopy = cs.deepCopy(); + ColumnStatisticsDesc statsDesc = csCopy.getStatsDesc(); + List list = new ArrayList<>(); + for (ColumnStatisticsObj statsObjNew : csCopy.getStatsObj()) { + String col_name = statsObjNew.getColName(); + ColumnStatistics csOld = client.get_table_column_statistics(statsDesc.getDbName(), statsDesc.getTableName(), + col_name); + if (csOld != null && csOld.getStatsObjIterator() != null) { + ColumnStatisticsObj statsObjOld = csOld.getStatsObjIterator().next(); + ColumnStatsAggregator aggregator = ColumnStatsAggregatorFactory.getColumnStatsAggregator( + statsObjNew.getStatsData().getSetField(), numBitVector); + aggregator.aggregate(statsObjNew, statsObjOld); + } + list.add(statsObjNew); + } + cs.setStatsObj(list); + } + + public static void mergePartColStats(Iface client, SetPartitionsStatsRequest request, + int numBitVector) throws NoSuchObjectException, MetaException, InvalidInputException, + InvalidObjectException, TException { + for (ColumnStatistics cs : request.getColStats()) { + ColumnStatistics csCopy = cs.deepCopy(); + ColumnStatisticsDesc statsDesc = csCopy.getStatsDesc(); + List list = new ArrayList<>(); + for (ColumnStatisticsObj statsObjNew : csCopy.getStatsObj()) { + String col_name = statsObjNew.getColName(); + ColumnStatistics csOld = client.get_partition_column_statistics(statsDesc.getDbName(), + statsDesc.getTableName(), statsDesc.getPartName(), col_name); + if (csOld != null && csOld.getStatsObjIterator() != null) { + ColumnStatisticsObj statsObjOld = csOld.getStatsObjIterator().next(); + ColumnStatsAggregator aggregator = ColumnStatsAggregatorFactory.getColumnStatsAggregator( + statsObjNew.getStatsData().getSetField(), numBitVector); + aggregator.aggregate(statsObjNew, statsObjOld); + } + list.add(statsObjNew); + } + cs.setStatsObj(list); + } + } + } diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/ColumnStatsAggregator.java b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/ColumnStatsAggregator.java index 694e53b..f48f96c 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/ColumnStatsAggregator.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/ColumnStatsAggregator.java @@ -23,6 +23,6 @@ import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; public abstract class ColumnStatsAggregator { - NumDistinctValueEstimator ndvEstimator = null; + int numBitVectors = 0; public abstract void aggregate(ColumnStatisticsObj aggregateColStats, ColumnStatisticsObj newColStats); } diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/ColumnStatsAggregatorFactory.java b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/ColumnStatsAggregatorFactory.java index 8eb127b..56ac53c 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/ColumnStatsAggregatorFactory.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/ColumnStatsAggregatorFactory.java @@ -60,7 +60,7 @@ public static ColumnStatsAggregator getColumnStatsAggregator(_Fields type, int n throw new RuntimeException("Woh, bad. Unknown stats type " + type.toString()); } if (numBitVectors > 0) { - agg.ndvEstimator = new NumDistinctValueEstimator(numBitVectors); + agg.numBitVectors = numBitVectors; } return agg; } diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/DecimalColumnStatsAggregator.java b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/DecimalColumnStatsAggregator.java index 50f4325..5b80e1d 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/DecimalColumnStatsAggregator.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/DecimalColumnStatsAggregator.java @@ -39,9 +39,14 @@ public void aggregate(ColumnStatisticsObj aggregateColStats, ColumnStatisticsObj .getHighValue() : newData.getHighValue(); aggregateData.setHighValue(highValue); aggregateData.setNumNulls(aggregateData.getNumNulls() + newData.getNumNulls()); - if (ndvEstimator == null || !newData.isSetBitVectors() || newData.getBitVectors().length() == 0) { + if (numBitVectors == 0 || !aggregateData.isSetBitVectors() + || aggregateData.getBitVectors().length() == 0 || !newData.isSetBitVectors() + || newData.getBitVectors().length() == 0) { aggregateData.setNumDVs(Math.max(aggregateData.getNumDVs(), newData.getNumDVs())); } else { + NumDistinctValueEstimator ndvEstimator = new NumDistinctValueEstimator(numBitVectors); + ndvEstimator.mergeEstimators(new NumDistinctValueEstimator(aggregateData.getBitVectors(), + ndvEstimator.getnumBitVectors())); ndvEstimator.mergeEstimators(new NumDistinctValueEstimator(newData.getBitVectors(), ndvEstimator.getnumBitVectors())); aggregateData.setNumDVs(ndvEstimator.estimateNumDistinctValues()); diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/DoubleColumnStatsAggregator.java b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/DoubleColumnStatsAggregator.java index d945ec2..b879aec 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/DoubleColumnStatsAggregator.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/DoubleColumnStatsAggregator.java @@ -32,9 +32,14 @@ public void aggregate(ColumnStatisticsObj aggregateColStats, ColumnStatisticsObj aggregateData.setLowValue(Math.min(aggregateData.getLowValue(), newData.getLowValue())); aggregateData.setHighValue(Math.max(aggregateData.getHighValue(), newData.getHighValue())); aggregateData.setNumNulls(aggregateData.getNumNulls() + newData.getNumNulls()); - if (ndvEstimator == null || !newData.isSetBitVectors() || newData.getBitVectors().length() == 0) { + if (numBitVectors == 0 || !aggregateData.isSetBitVectors() + || aggregateData.getBitVectors().length() == 0 || !newData.isSetBitVectors() + || newData.getBitVectors().length() == 0) { aggregateData.setNumDVs(Math.max(aggregateData.getNumDVs(), newData.getNumDVs())); } else { + NumDistinctValueEstimator ndvEstimator = new NumDistinctValueEstimator(numBitVectors); + ndvEstimator.mergeEstimators(new NumDistinctValueEstimator(aggregateData.getBitVectors(), + ndvEstimator.getnumBitVectors())); ndvEstimator.mergeEstimators(new NumDistinctValueEstimator(newData.getBitVectors(), ndvEstimator.getnumBitVectors())); aggregateData.setNumDVs(ndvEstimator.estimateNumDistinctValues()); diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/LongColumnStatsAggregator.java b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/LongColumnStatsAggregator.java index 068dd00..297c1be 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/LongColumnStatsAggregator.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/LongColumnStatsAggregator.java @@ -32,9 +32,14 @@ public void aggregate(ColumnStatisticsObj aggregateColStats, ColumnStatisticsObj aggregateData.setLowValue(Math.min(aggregateData.getLowValue(), newData.getLowValue())); aggregateData.setHighValue(Math.max(aggregateData.getHighValue(), newData.getHighValue())); aggregateData.setNumNulls(aggregateData.getNumNulls() + newData.getNumNulls()); - if (ndvEstimator == null || !newData.isSetBitVectors() || newData.getBitVectors().length() == 0) { + if (numBitVectors == 0 || !aggregateData.isSetBitVectors() + || aggregateData.getBitVectors().length() == 0 || !newData.isSetBitVectors() + || newData.getBitVectors().length() == 0) { aggregateData.setNumDVs(Math.max(aggregateData.getNumDVs(), newData.getNumDVs())); } else { + NumDistinctValueEstimator ndvEstimator = new NumDistinctValueEstimator(numBitVectors); + ndvEstimator.mergeEstimators(new NumDistinctValueEstimator(aggregateData.getBitVectors(), + ndvEstimator.getnumBitVectors())); ndvEstimator.mergeEstimators(new NumDistinctValueEstimator(newData.getBitVectors(), ndvEstimator.getnumBitVectors())); aggregateData.setNumDVs(ndvEstimator.estimateNumDistinctValues()); diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/StringColumnStatsAggregator.java b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/StringColumnStatsAggregator.java index aeb6c39..07e297b 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/StringColumnStatsAggregator.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/StringColumnStatsAggregator.java @@ -32,9 +32,14 @@ public void aggregate(ColumnStatisticsObj aggregateColStats, ColumnStatisticsObj aggregateData.setMaxColLen(Math.max(aggregateData.getMaxColLen(), newData.getMaxColLen())); aggregateData.setAvgColLen(Math.max(aggregateData.getAvgColLen(), newData.getAvgColLen())); aggregateData.setNumNulls(aggregateData.getNumNulls() + newData.getNumNulls()); - if (ndvEstimator == null || !newData.isSetBitVectors() || newData.getBitVectors().length() == 0) { + if (numBitVectors == 0 || !aggregateData.isSetBitVectors() + || aggregateData.getBitVectors().length() == 0 || !newData.isSetBitVectors() + || newData.getBitVectors().length() == 0) { aggregateData.setNumDVs(Math.max(aggregateData.getNumDVs(), newData.getNumDVs())); } else { + NumDistinctValueEstimator ndvEstimator = new NumDistinctValueEstimator(numBitVectors); + ndvEstimator.mergeEstimators(new NumDistinctValueEstimator(aggregateData.getBitVectors(), + ndvEstimator.getnumBitVectors())); ndvEstimator.mergeEstimators(new NumDistinctValueEstimator(newData.getBitVectors(), ndvEstimator.getnumBitVectors())); aggregateData.setNumDVs(ndvEstimator.estimateNumDistinctValues()); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsTask.java index 9059928..685899a 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsTask.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsTask.java @@ -397,7 +397,7 @@ private int persistPartitionStats(Hive db) throws HiveException, MetaException, // Construct a column statistics object from the result List colStats = constructColumnStatsFromPackedRows(db); // Persist the column statistics object to the metastore - db.setPartitionColumnStatistics(new SetPartitionsStatsRequest(colStats)); + db.setPartitionColumnStatistics(new SetPartitionsStatsRequest(colStats), work.getColStats().getNumBitVector()); return 0; } @@ -406,7 +406,7 @@ private int persistTableStats(Hive db) throws HiveException, MetaException, IOEx // Construct a column statistics object from the result ColumnStatistics colStats = constructColumnStatsFromPackedRows(db).get(0); // Persist the column statistics object to the metastore - db.updateTableColumnStatistics(colStats); + db.updateTableColumnStatistics(colStats, work.getColStats().getNumBitVector()); return 0; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java index ad17096..ed69618 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java @@ -3231,7 +3231,16 @@ private String getUserName() { public boolean updateTableColumnStatistics(ColumnStatistics statsObj) throws HiveException { try { - return getMSC().updateTableColumnStatistics(statsObj); + return getMSC().updateTableColumnStatistics(statsObj, 0); + } catch (Exception e) { + LOG.debug(StringUtils.stringifyException(e)); + throw new HiveException(e); + } + } + + public boolean updateTableColumnStatistics(ColumnStatistics statsObj, int numBitVector) throws HiveException { + try { + return getMSC().updateTableColumnStatistics(statsObj, numBitVector); } catch (Exception e) { LOG.debug(StringUtils.stringifyException(e)); throw new HiveException(e); @@ -3256,6 +3265,15 @@ public boolean setPartitionColumnStatistics(SetPartitionsStatsRequest request) t } } + public boolean setPartitionColumnStatistics(SetPartitionsStatsRequest request, int numBitVector) throws HiveException { + try { + return getMSC().setPartitionColumnStatistics(request, numBitVector); + } catch (Exception e) { + LOG.debug(StringUtils.stringifyException(e)); + throw new HiveException(e); + } + } + public List getTableColumnStatistics( String dbName, String tableName, List colNames) throws HiveException { try { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/SessionHiveMetaStoreClient.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/SessionHiveMetaStoreClient.java index db2b674..569784b 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/SessionHiveMetaStoreClient.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/SessionHiveMetaStoreClient.java @@ -325,7 +325,7 @@ public PrincipalPrivilegeSet get_privilege_set(HiveObjectRef hiveObject, /** {@inheritDoc} */ @Override - public boolean updateTableColumnStatistics(ColumnStatistics statsObj) + public boolean updateTableColumnStatistics(ColumnStatistics statsObj, int numBitVector) throws NoSuchObjectException, InvalidObjectException, MetaException, TException, InvalidInputException { String dbName = statsObj.getStatsDesc().getDbName().toLowerCase(); @@ -333,7 +333,7 @@ public boolean updateTableColumnStatistics(ColumnStatistics statsObj) if (getTempTable(dbName, tableName) != null) { return updateTempTableColumnStats(dbName, tableName, statsObj); } - return super.updateTableColumnStatistics(statsObj); + return super.updateTableColumnStatistics(statsObj, numBitVector); } /** {@inheritDoc} */ diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java index f56cd96..163c574 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java @@ -232,6 +232,11 @@ public void initialize(HiveConf hiveConf) { } } + public void initialize(Transform transform) { + transformations = new ArrayList(); + transformations.add(transform); + } + /** * Invoke all the transformations one-by-one, and alter the query plan. * diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java index c36aa9d..2da4bc4 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java @@ -679,7 +679,7 @@ public ASTNode simpleBreadthFirstSearch(ASTNode ast, int... tokens) { } } - private static void replaceASTChild(ASTNode child, ASTNode newChild) { + static void replaceASTChild(ASTNode child, ASTNode newChild) { ASTNode parent = (ASTNode) child.parent; int childIndex = child.childIndex; parent.deleteChild(childIndex); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsAutoGatherContext.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsAutoGatherContext.java new file mode 100644 index 0000000..6d3b620 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsAutoGatherContext.java @@ -0,0 +1,306 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.parse; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.metastore.api.FieldSchema; +import org.apache.hadoop.hive.ql.Context; +import org.apache.hadoop.hive.ql.exec.ColumnInfo; +import org.apache.hadoop.hive.ql.exec.Operator; +import org.apache.hadoop.hive.ql.exec.RowSchema; +import org.apache.hadoop.hive.ql.exec.SelectOperator; +import org.apache.hadoop.hive.ql.metadata.Hive; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.metadata.Table; +import org.apache.hadoop.hive.ql.optimizer.ConstantPropagate; +import org.apache.hadoop.hive.ql.optimizer.Optimizer; +import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.AnalyzeRewriteContext; +import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; +import org.apache.hadoop.hive.ql.plan.LoadFileDesc; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.SelectDesc; + +/** + * ColumnStatsAutoGatherContext: This is passed to the compiler when set + * hive.stats.autogather=true during the INSERT OVERWRITE command. + * + **/ + +public class ColumnStatsAutoGatherContext { + + public static final String colStats = "colStats"; + public AnalyzeRewriteContext analyzeRewrite; + private final List loadFileWork = new ArrayList<>(); + private final SemanticAnalyzer sa; + private final HiveConf conf; + private final Operator op; + private final String tableName; + private final ASTNode tableAST; + private final List colNames = new ArrayList<>(); + private final List selectedPos = new ArrayList<>(); + private final List partitionColNames = new ArrayList<>(); + private boolean isPartitionedTable; + private boolean isInsertInto; + + public ColumnStatsAutoGatherContext( + SemanticAnalyzer sa, HiveConf conf, + Operator op, String tableName, ASTNode ast, boolean isInsertInto) throws SemanticException { + super(); + this.sa = sa; + this.conf = conf; + this.op = op; + this.tableName = tableName; + this.tableAST = ast; + this.isInsertInto = isInsertInto; + Hive db; + try { + db = Hive.get(conf); + Table tbl = db.getTable(tableName); + for (FieldSchema fs : tbl.getCols()) { + colNames.add(fs.getName()); + } + for (FieldSchema fs : tbl.getPartCols()) { + partitionColNames.add(fs.getName()); + } + String selectedCols = tbl.getProperty(colStats); + if (selectedCols != null) { + String[] selected = selectedCols.split(","); + if (selected != null) { + for (String col : selected) { + int index = this.colNames.indexOf(col.trim()); + if (index == -1) { + throw new SemanticException("In insert overwrite table " + tableName + + " statement, can not find column " + col + " in colStats table properties from " + + Arrays.toString(colNames.toArray())); + } else { + selectedPos.add(index); + } + } + } + } + isPartitionedTable = (partitionColNames.size() != 0); + //else means that colStats was not yet configured. Assume all the columns are selected. + } catch (HiveException e) { + throw new SemanticException(e); + } + } + + public List getLoadFileWork() { + return loadFileWork; + } + + public AnalyzeRewriteContext getAnalyzeRewrite() { + return analyzeRewrite; + } + + public void setAnalyzeRewrite(AnalyzeRewriteContext analyzeRewrite) { + this.analyzeRewrite = analyzeRewrite; + } + + public void insertAnalyzePipeline() throws SemanticException{ + // 1. Generate the statement of analyze table [tablename] compute statistics for columns + String analyzeCommand = "analyze table " + tableName + + " compute statistics for columns " + genCols(); + + // 2. + // In non-partitioned table case, it will generate TS-SEL-GBY-RS-GBY-SEL-FS operator + // In static-partitioned table case, it will generate TS-FIL(partitionKey)-SEL-GBY(partitionKey)-RS-GBY-SEL-FS operator + // In dynamic-partitioned table case, it will generate TS-SEL-GBY(partitionKey)-RS-GBY-SEL-FS operator + Operator selOp = null; + try { + selOp = genSelOpForAnalyze(analyzeCommand); + } catch (IOException | ParseException e) { + throw new SemanticException(e); + } + + // 3. attach this SEL to the operator right before FS + op.getChildOperators().add(selOp); + selOp.getParentOperators().clear(); + selOp.getParentOperators().add(op); + + // 4. address the colExp, colList, etc for the SEL + replaceSelectOperatorProcess((SelectOperator)selOp, op); + } + + private String genCols() { + if (selectedPos.size() == 0) { + return ""; + } else { + StringBuilder builder = new StringBuilder(); + for (int pos : selectedPos) { + builder.append("," + colNames.get(pos)); + } + return builder.substring(1); + } + } + + @SuppressWarnings("rawtypes") + private Operator genSelOpForAnalyze(String analyzeCommand) throws IOException, ParseException, SemanticException{ + //0. initialization + Context ctx = new Context(conf); + ParseDriver pd = new ParseDriver(); + ASTNode tree = pd.parse(analyzeCommand, ctx); + tree = ParseUtils.findRootNonNullToken(tree); + + ASTNode oldTable = new CalcitePlanner.ASTSearcher().simpleBreadthFirstSearch(tree, + HiveParser.TOK_ANALYZE, HiveParser.TOK_TAB); + if (oldTable == null) { + throw new SemanticException("Cannot find insert table in " + tree.dump()); + } + ASTNode newDest = null; + if (!isInsertInto) { + newDest = new CalcitePlanner.ASTSearcher().simpleBreadthFirstSearch(tableAST, + HiveParser.TOK_DESTINATION, HiveParser.TOK_TAB); + } else { + newDest = new CalcitePlanner.ASTSearcher().simpleBreadthFirstSearch(tableAST, + HiveParser.TOK_INSERT_INTO, HiveParser.TOK_TAB); + } + if (newDest == null) { + throw new SemanticException("Cannot find insert table in " + tableAST.dump()); + } + CalcitePlanner.replaceASTChild(oldTable, newDest); + + //1. get the ColumnStatsSemanticAnalyzer + BaseSemanticAnalyzer baseSem = SemanticAnalyzerFactory.get(conf, tree); + ColumnStatsSemanticAnalyzer colSem = (ColumnStatsSemanticAnalyzer) baseSem; + + //2. get the rewritten AST + ASTNode ast = colSem.getRewriteASTOnly(tree, this); + baseSem = SemanticAnalyzerFactory.get(conf, ast); + SemanticAnalyzer sem = (SemanticAnalyzer) baseSem; + QB qb = new QB(null, null, false); + ASTNode child = ast; + ParseContext subPCtx = ((SemanticAnalyzer) sem).getParseContext(); + subPCtx.setContext(ctx); + ((SemanticAnalyzer) sem).initParseCtx(subPCtx); + sem.doPhase1(child, qb, sem.initPhase1Ctx(), null); + sem.getMetaData(qb); + Operator operator = sem.genPlan(qb); + // If it contains static partition (SP, or SP mix DP), we need to do constant propagation. + if (isPartitionedTable) { + Optimizer optm = new Optimizer(); + optm.setPctx(subPCtx); + optm.initialize(new ConstantPropagate()); + subPCtx = optm.optimize(); + } + + //3. populate the load file work so that ColumnStatsTask can work + loadFileWork.addAll(sem.getLoadFileWork()); + + //4. because there is only one TS for analyze statement, we can get it. + operator = sem.topOps.values().iterator().next(); + + //5. get the first SEL after TS + while(!(operator instanceof SelectOperator)){ + operator = operator.getChildOperators().get(0); + } + return operator; + } + + /** + * @param operator : the select operator in the analyze statement + * @param input : the operator right before FS in the insert overwrite statement + * @throws SemanticException + */ + private void replaceSelectOperatorProcess(SelectOperator operator, Operator input) + throws SemanticException { + RowSchema selRS = operator.getSchema(); + ArrayList signature = new ArrayList<>(); + OpParseContext inputCtx = sa.opParseCtx.get(input); + RowResolver inputRR = inputCtx.getRowResolver(); + ArrayList columns = inputRR.getColumnInfos(); + ArrayList colList = new ArrayList(); + ArrayList columnNames = new ArrayList(); + Map columnExprMap = + new HashMap(); + // the column positions in the operator should be like this + // <----non-partition columns---->|<--static partition columns-->|<--dynamic partition columns--> + // ExprNodeColumnDesc | ExprNodeConstantDesc | ExprNodeColumnDesc + // from input | generate itself | from input + // | + + // there is no selected columns, select all non-partition columns + if (selectedPos.size() == 0) { + for (int i = 0; i < colNames.size(); i++) { + ColumnInfo col = columns.get(i); + colList.add(new ExprNodeColumnDesc(col)); + String internalName = selRS.getColumnNames().get(i); + columnNames.add(internalName); + columnExprMap.put(internalName, new ExprNodeColumnDesc(col)); + signature.add(selRS.getSignature().get(i)); + } + // there is selected columns + } else { + for (int i = 0; i < selectedPos.size(); i++) { + int index = selectedPos.get(i); + ColumnInfo col = columns.get(index); + colList.add(new ExprNodeColumnDesc(col)); + String internalName = selRS.getColumnNames().get(index); + columnNames.add(internalName); + columnExprMap.put(internalName, new ExprNodeColumnDesc(col)); + signature.add(selRS.getSignature().get(index)); + } + } + // if there is any partition column (in static partition or dynamic + // partition or mixed case) + for (int i = 0; i < partitionColNames.size(); i++) { + ExprNodeDesc exprNodeDesc = operator.getConf().getColList().get(colNames.size() + i); + // static partition columns + if (exprNodeDesc instanceof ExprNodeConstantDesc) { + colList.add(exprNodeDesc); + String internalName = selRS.getColumnNames().get(colNames.size() + i); + columnNames.add(internalName); + columnExprMap.put(internalName, operator.getColumnExprMap().get(internalName)); + signature.add(selRS.getSignature().get(colNames.size() + i)); + } + // dynamic partition columns + else { + int index = columns.size() - (partitionColNames.size() - i); + ColumnInfo col = columns.get(index); + colList.add(new ExprNodeColumnDesc(col)); + String internalName = selRS.getColumnNames().get(colNames.size() + i); + columnNames.add(internalName); + columnExprMap.put(internalName, new ExprNodeColumnDesc(col)); + signature.add(selRS.getSignature().get(colNames.size() + i)); + } + } + operator.setConf(new SelectDesc(colList, columnNames, true)); + operator.setColumnExprMap(columnExprMap); + selRS.setSignature(signature); + operator.setSchema(selRS); + } + + public String getTableName() { + return tableName; + } + + public boolean isInsertInto() { + return isInsertInto; + } + +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java index bb1bbad..7764983 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java @@ -111,11 +111,18 @@ private void handlePartialPartitionSpec(Map partSpec) throws partValsSpecified += partSpec.get(partKey) == null ? 0 : 1; } try { - if ((partValsSpecified == tbl.getPartitionKeys().size()) && (db.getPartition(tbl, partSpec, false, null, false) == null)) { - throw new SemanticException(ErrorMsg.COLUMNSTATSCOLLECTOR_INVALID_PARTITION.getMsg() + " : " + partSpec); + // for static partition, it may not exist when HIVESTATSCOLAUTOGATHER is + // set to true + if (!conf.getBoolVar(ConfVars.HIVESTATSCOLAUTOGATHER)) { + if ((partValsSpecified == tbl.getPartitionKeys().size()) + && (db.getPartition(tbl, partSpec, false, null, false) == null)) { + throw new SemanticException(ErrorMsg.COLUMNSTATSCOLLECTOR_INVALID_PARTITION.getMsg() + + " : " + partSpec); + } } } catch (HiveException he) { - throw new SemanticException(ErrorMsg.COLUMNSTATSCOLLECTOR_INVALID_PARTITION.getMsg() + " : " + partSpec); + throw new SemanticException(ErrorMsg.COLUMNSTATSCOLLECTOR_INVALID_PARTITION.getMsg() + " : " + + partSpec); } // User might have only specified partial list of partition keys, in which case add other partition keys in partSpec @@ -379,4 +386,54 @@ public void analyze(ASTNode ast, Context origCtx) throws SemanticException { analyzeInternal(originalTree); } } + + /** + * @param ast + * is the original analyze ast + * @param qb + * is the qb that calls this function + * @param sem + * is the semantic analyzer that calls this function + * @return + * @throws SemanticException + */ + public ASTNode getRewriteASTOnly(ASTNode ast, ColumnStatsAutoGatherContext context) + throws SemanticException { + tbl = AnalyzeCommandUtils.getTable(ast, this); + colNames = getColumnName(ast); + // Save away the original AST + originalTree = ast; + boolean isPartitionStats = AnalyzeCommandUtils.isPartitionLevelStats(ast); + Map partSpec = null; + checkForPartitionColumns(colNames, + Utilities.getColumnNamesFromFieldSchema(tbl.getPartitionKeys())); + validateSpecifiedColumnNames(colNames); + if (conf.getBoolVar(ConfVars.HIVE_STATS_COLLECT_PART_LEVEL_STATS) && tbl.isPartitioned()) { + isPartitionStats = true; + } + + if (isPartitionStats) { + isTableLevel = false; + partSpec = AnalyzeCommandUtils.getPartKeyValuePairsFromAST(tbl, ast, conf); + handlePartialPartitionSpec(partSpec); + } else { + isTableLevel = true; + } + colType = getColumnTypes(colNames); + int numBitVectors = 0; + try { + numBitVectors = HiveStatsUtils.getNumBitVectorsForNDVEstimation(conf); + } catch (Exception e) { + throw new SemanticException(e.getMessage()); + } + rewrittenQuery = genRewrittenQuery(colNames, numBitVectors, partSpec, isPartitionStats); + rewrittenTree = genRewrittenTree(rewrittenQuery); + + context.analyzeRewrite = new AnalyzeRewriteContext(); + context.analyzeRewrite.setTableName(tbl.getDbName() + "." + tbl.getTableName()); + context.analyzeRewrite.setTblLvl(isTableLevel); + context.analyzeRewrite.setColName(colNames); + context.analyzeRewrite.setColType(colType); + return rewrittenTree; + } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java index 4f784d1..49ba6c6 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java @@ -77,6 +77,7 @@ private HashMap nameToSplitSample; private List loadTableWork; private List loadFileWork; + private List columnStatsAutoGatherContexts; private Context ctx; private HiveConf conf; private HashMap idToTableNameMap; @@ -159,6 +160,7 @@ public ParseContext( Set joinOps, Set smbMapJoinOps, List loadTableWork, List loadFileWork, + List columnStatsAutoGatherContexts, Context ctx, HashMap idToTableNameMap, int destTableId, UnionProcContext uCtx, List> listMapJoinOpsNoReducer, Map prunedPartitions, @@ -178,6 +180,7 @@ public ParseContext( this.smbMapJoinOps = smbMapJoinOps; this.loadFileWork = loadFileWork; this.loadTableWork = loadTableWork; + this.columnStatsAutoGatherContexts = columnStatsAutoGatherContexts; this.topOps = topOps; this.ctx = ctx; this.idToTableNameMap = idToTableNameMap; @@ -572,4 +575,13 @@ public boolean isNeedViewColumnAuthorization() { public void setNeedViewColumnAuthorization(boolean needViewColumnAuthorization) { this.needViewColumnAuthorization = needViewColumnAuthorization; } + + public List getColumnStatsAutoGatherContexts() { + return columnStatsAutoGatherContexts; + } + + public void setColumnStatsAutoGatherContexts( + List columnStatsAutoGatherContexts) { + this.columnStatsAutoGatherContexts = columnStatsAutoGatherContexts; + } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/QBParseInfo.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/QBParseInfo.java index 3a226e7..383ce4e 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/QBParseInfo.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/QBParseInfo.java @@ -63,7 +63,8 @@ private final Set destCubes; private final Set destGroupingSets; private final Map destToHaving; - private final HashSet insertIntoTables; + private final Map insertIntoTables; + private final Map insertOverwriteTables; private boolean isAnalyzeCommand; // used for the analyze command (statistics) private boolean isNoScanAnalyzeCommand; // used for the analyze command (statistics) (noscan) @@ -133,7 +134,8 @@ public QBParseInfo(String alias, boolean isSubQ) { destToSortby = new HashMap(); destToOrderby = new HashMap(); destToLimit = new HashMap>(); - insertIntoTables = new HashSet(); + insertIntoTables = new HashMap(); + insertOverwriteTables = new HashMap(); destRollups = new HashSet(); destCubes = new HashSet(); destGroupingSets = new HashSet(); @@ -174,13 +176,13 @@ public void addAggregationExprsForClause(String clause, } } - public void addInsertIntoTable(String fullName) { - insertIntoTables.add(fullName.toLowerCase()); + public void addInsertIntoTable(String fullName, ASTNode ast) { + insertIntoTables.put(fullName.toLowerCase(), ast); } public boolean isInsertIntoTable(String dbName, String table) { String fullName = dbName + "." + table; - return insertIntoTables.contains(fullName.toLowerCase()); + return insertIntoTables.containsKey(fullName.toLowerCase()); } /** @@ -189,7 +191,7 @@ public boolean isInsertIntoTable(String dbName, String table) { * @return */ public boolean isInsertIntoTable(String fullTableName) { - return insertIntoTables.contains(fullTableName.toLowerCase()); + return insertIntoTables.containsKey(fullTableName.toLowerCase()); } public HashMap getAggregationExprsForClause(String clause) { @@ -636,6 +638,14 @@ public boolean isPartialScanAnalyzeCommand() { public void setPartialScanAnalyzeCommand(boolean isPartialScanAnalyzeCommand) { this.isPartialScanAnalyzeCommand = isPartialScanAnalyzeCommand; } + + public Map getInsertOverwriteTables() { + return insertOverwriteTables; + } + + public Map getInsertIntoTables() { + return insertIntoTables; + } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index 3e91e10..211345d 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -250,6 +250,7 @@ protected LinkedHashMap, OpParseContext> opParseCtx; private List loadTableWork; private List loadFileWork; + private List columnStatsAutoGatherContexts; private final Map joinContext; private final Map smbMapJoinContext; private final HashMap topToTable; @@ -334,6 +335,7 @@ public SemanticAnalyzer(HiveConf conf) throws SemanticException { topOps = new LinkedHashMap(); loadTableWork = new ArrayList(); loadFileWork = new ArrayList(); + columnStatsAutoGatherContexts = new ArrayList(); opParseCtx = new LinkedHashMap, OpParseContext>(); joinContext = new HashMap(); smbMapJoinContext = new HashMap(); @@ -368,6 +370,7 @@ protected void reset(boolean clearPartsCache) { } loadTableWork.clear(); loadFileWork.clear(); + columnStatsAutoGatherContexts.clear(); topOps.clear(); destTableId = 1; idToTableNameMap.clear(); @@ -425,7 +428,7 @@ public ParseContext getParseContext() { return new ParseContext(conf, opToPartPruner, opToPartList, topOps, new HashSet(joinContext.keySet()), new HashSet(smbMapJoinContext.keySet()), - loadTableWork, loadFileWork, ctx, idToTableNameMap, destTableId, uCtx, + loadTableWork, loadFileWork, columnStatsAutoGatherContexts, ctx, idToTableNameMap, destTableId, uCtx, listMapJoinOpsNoReducer, prunedPartitions, opToSamplePruner, globalLimitCtx, nameToSplitSample, inputs, rootTasks, opToPartToSkewedPruner, viewAliasToInput, reduceSinkOperatorsAddedByEnforceBucketingSorting, @@ -610,7 +613,7 @@ ASTNode getAST() { return this.ast; } - protected void setAST(ASTNode newAST) { + public void setAST(ASTNode newAST) { this.ast = newAST; } @@ -1361,18 +1364,25 @@ public boolean doPhase1(ASTNode ast, QB qb, Phase1Ctx ctx_1, PlannerContext plan case HiveParser.TOK_INSERT_INTO: String currentDatabase = SessionState.get().getCurrentDatabase(); String tab_name = getUnescapedName((ASTNode) ast.getChild(0).getChild(0), currentDatabase); - qbp.addInsertIntoTable(tab_name); + qbp.addInsertIntoTable(tab_name, ast); case HiveParser.TOK_DESTINATION: ctx_1.dest = "insclause-" + ctx_1.nextNum; ctx_1.nextNum++; boolean isTmpFileDest = false; if (ast.getChildCount() > 0 && ast.getChild(0) instanceof ASTNode) { - ASTNode ch = (ASTNode)ast.getChild(0); - if (ch.getToken().getType() == HiveParser.TOK_DIR - && ch.getChildCount() > 0 && ch.getChild(0) instanceof ASTNode) { - ch = (ASTNode)ch.getChild(0); + ASTNode ch = (ASTNode) ast.getChild(0); + if (ch.getToken().getType() == HiveParser.TOK_DIR && ch.getChildCount() > 0 + && ch.getChild(0) instanceof ASTNode) { + ch = (ASTNode) ch.getChild(0); isTmpFileDest = ch.getToken().getType() == HiveParser.TOK_TMP_FILE; + } else { + if (ast.getToken().getType() == HiveParser.TOK_DESTINATION + && ast.getChild(0).getType() == HiveParser.TOK_TAB) { + String fullTableName = getUnescapedName((ASTNode) ast.getChild(0).getChild(0), + SessionState.get().getCurrentDatabase()); + qbp.getInsertOverwriteTables().put(fullTableName, ast); + } } } @@ -9260,13 +9270,37 @@ private Operator genPostGroupByBodyPlan(Operator curr, String dest, QB qb, qb.getParseInfo().setOuterQueryLimit(limit.intValue()); } if (!SessionState.get().getHiveOperation().equals(HiveOperation.CREATEVIEW)) { - curr = genFileSinkPlan(dest, qb, curr); + Operator op = genFileSinkPlan(dest, qb, curr); + // the following code is used to collect column stats when + // hive.stats.autogather=true + // and it is an insert overwrite or insert into table + if (op instanceof FileSinkOperator) { + FileSinkOperator fsOp = (FileSinkOperator) op; + String tableName = fsOp.getConf().getTableInfo().getTableName(); + if (tableName != null + && conf.getBoolVar(ConfVars.HIVESTATSAUTOGATHER) + && conf.getBoolVar(ConfVars.HIVESTATSCOLAUTOGATHER) + && (qb.getParseInfo().getInsertOverwriteTables().containsKey(tableName) || + qb.getParseInfo().getInsertIntoTables().containsKey(tableName))) { + ColumnStatsAutoGatherContext columnStatsAutoGatherContext = null; + if (qb.getParseInfo().getInsertOverwriteTables().containsKey(tableName)) { + columnStatsAutoGatherContext = new ColumnStatsAutoGatherContext(this, conf, curr, + tableName, qb.getParseInfo().getInsertOverwriteTables().get(tableName), false); + } else { + columnStatsAutoGatherContext = new ColumnStatsAutoGatherContext(this, conf, curr, + tableName, qb.getParseInfo().getInsertIntoTables().get(tableName), true); + } + columnStatsAutoGatherContext.insertAnalyzePipeline(); + columnStatsAutoGatherContexts.add(columnStatsAutoGatherContext); + } + } + curr = op; } } return curr; } - + @SuppressWarnings("nls") private Operator genUnionPlan(String unionalias, String leftalias, Operator leftOp, String rightalias, Operator rightOp) @@ -10437,7 +10471,7 @@ void analyzeInternal(ASTNode ast, PlannerContext plannerCtx) throws SemanticExce ParseContext pCtx = new ParseContext(conf, opToPartPruner, opToPartList, topOps, new HashSet(joinContext.keySet()), new HashSet(smbMapJoinContext.keySet()), - loadTableWork, loadFileWork, ctx, idToTableNameMap, destTableId, uCtx, + loadTableWork, loadFileWork, columnStatsAutoGatherContexts, ctx, idToTableNameMap, destTableId, uCtx, listMapJoinOpsNoReducer, prunedPartitions, opToSamplePruner, globalLimitCtx, nameToSplitSample, inputs, rootTasks, opToPartToSkewedPruner, viewAliasToInput, reduceSinkOperatorsAddedByEnforceBucketingSorting, @@ -12633,4 +12667,12 @@ private void warn(String msg) { String.format("Warning: %s", msg)); } + public List getLoadFileWork() { + return loadFileWork; + } + + public void setLoadFileWork(List loadFileWork) { + this.loadFileWork = loadFileWork; + } + } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompiler.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompiler.java index 8e64a0b..174309c 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompiler.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompiler.java @@ -29,6 +29,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.common.HiveStatsUtils; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.Warehouse; import org.apache.hadoop.hive.metastore.api.MetaException; @@ -210,8 +211,33 @@ public void compile(final ParseContext pCtx, final List> tsks = new ArrayList<>(); + // find the move task that corresponds to the columnStatsAutoGatherContext + for (Task task : mvTask) { + if (task.getWork().getLoadTableWork().getTable().getTableName() + .equals(columnStatsAutoGatherContext.getTableName())) { + tsks.add(task); + break; + } + } + if (!columnStatsAutoGatherContext.isInsertInto()) { + genColumnStatsTask(columnStatsAutoGatherContext.getAnalyzeRewrite(), + columnStatsAutoGatherContext.getLoadFileWork(), tsks, outerQueryLimit, 0); + } else { + int numBitVector; + try { + numBitVector = HiveStatsUtils.getNumBitVectorsForNDVEstimation(conf); + } catch (Exception e) { + throw new SemanticException(e.getMessage()); + } + genColumnStatsTask(columnStatsAutoGatherContext.getAnalyzeRewrite(), + columnStatsAutoGatherContext.getLoadFileWork(), tsks, outerQueryLimit, numBitVector); + } + } } // For each task, set the key descriptor for the reducer @@ -309,8 +335,9 @@ public void compile(final ParseContext pCtx, final List loadTableWork, - List loadFileWork, List> rootTasks, int outerQueryLimit) { + protected void genColumnStatsTask(AnalyzeRewriteContext analyzeRewrite, + List loadFileWork, List> rootTasks, + int outerQueryLimit, int numBitVector) { ColumnStatsTask cStatsTask = null; ColumnStatsWork cStatsWork = null; FetchWork fetch = null; @@ -328,7 +355,7 @@ protected void genColumnStatsTask(AnalyzeRewriteContext analyzeRewrite, List colName; private List colType; @@ -36,12 +37,22 @@ public ColumnStatsDesc() { } + public ColumnStatsDesc(String tableName, List colName, List colType, + boolean isTblLevel) { + this.tableName = tableName; + this.colName = colName; + this.colType = colType; + this.isTblLevel = isTblLevel; + this.numBitVector = 0; + } + public ColumnStatsDesc(String tableName, List colName, - List colType, boolean isTblLevel) { + List colType, boolean isTblLevel, int numBitVector) { this.tableName = tableName; this.colName = colName; this.colType = colType; this.isTblLevel = isTblLevel; + this.numBitVector = numBitVector; } @Explain(displayName = "Table") @@ -79,4 +90,13 @@ public void setColName(List colName) { public void setColType(List colType) { this.colType = colType; } + + public int getNumBitVector() { + return numBitVector; + } + + public void setNumBitVector(int numBitVector) { + this.numBitVector = numBitVector; + } + } diff --git a/ql/src/test/queries/clientpositive/autoColumnStats.q b/ql/src/test/queries/clientpositive/autoColumnStats.q new file mode 100644 index 0000000..bb7252a --- /dev/null +++ b/ql/src/test/queries/clientpositive/autoColumnStats.q @@ -0,0 +1,192 @@ +set hive.stats.column.autogather=true; +set hive.stats.fetch.column.stats=true; +set hive.exec.dynamic.partition=true; +set hive.exec.dynamic.partition.mode=nonstrict; +set hive.auto.convert.join=true; +set hive.join.emit.interval=2; +set hive.auto.convert.join.noconditionaltask=true; +set hive.auto.convert.join.noconditionaltask.size=10000; +set hive.auto.convert.sortmerge.join.bigtable.selection.policy = org.apache.hadoop.hive.ql.optimizer.TableSizeBasedBigTableSelectorForAutoSMJ; +set hive.optimize.bucketingsorting=false; + +drop table src_multi1; + +create table src_multi1 like src; + +insert overwrite table src_multi1 select * from src; + +explain extended select * from src_multi1; + +describe formatted src_multi1; + +drop table a; +drop table b; +create table a like src; +create table b like src; + +from src +insert overwrite table a select * +insert overwrite table b select *; + +describe formatted a; +describe formatted b; + +drop table a; +drop table b; +create table a like src; +create table b like src; + +from src +insert overwrite table a select * +insert into table b select *; + +describe formatted a; +describe formatted b; + + +drop table src_multi2; + +create table src_multi2 like src; + +insert overwrite table src_multi2 select subq.key, src.value from (select * from src union select * from src1)subq join src on subq.key=src.key; + +describe formatted src_multi2; + + +drop table nzhang_part14; + +create table if not exists nzhang_part14 (key string) + partitioned by (value string); + +insert overwrite table nzhang_part14 partition(value) +select key, value from ( + select * from (select 'k1' as key, cast(null as string) as value from src limit 2)a + union all + select * from (select 'k2' as key, '' as value from src limit 2)b + union all + select * from (select 'k3' as key, ' ' as value from src limit 2)c +) T; + +explain select key from nzhang_part14; + + +drop table src5; + +create table src5 as select key, value from src limit 5; + +insert overwrite table nzhang_part14 partition(value) +select key, value from src5; + +explain select key from nzhang_part14; + + +create table alter5 ( col1 string ) partitioned by (dt string); + +alter table alter5 add partition (dt='a') location 'parta'; + +describe formatted alter5 partition (dt='a'); + +insert overwrite table alter5 partition (dt='a') select key from src ; + +describe formatted alter5 partition (dt='a'); + +explain select * from alter5 where dt='a'; + + +drop table src_stat_part; +create table src_stat_part(key string, value string) partitioned by (partitionId int); + +insert overwrite table src_stat_part partition (partitionId=1) +select * from src1 limit 5; + +describe formatted src_stat_part PARTITION(partitionId=1); + +insert overwrite table src_stat_part partition (partitionId=2) +select * from src1; + +describe formatted src_stat_part PARTITION(partitionId=2); + +drop table srcbucket_mapjoin; +CREATE TABLE srcbucket_mapjoin(key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE; +drop table tab_part; +CREATE TABLE tab_part (key int, value string) PARTITIONED BY(ds STRING) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE; +drop table srcbucket_mapjoin_part; +CREATE TABLE srcbucket_mapjoin_part (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE; + +load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin partition(ds='2008-04-08'); +load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin partition(ds='2008-04-08'); + +load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08'); +load data local inpath '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08'); +load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08'); +load data local inpath '../../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08'); + +insert overwrite table tab_part partition (ds='2008-04-08') +select key,value from srcbucket_mapjoin_part; + +describe formatted tab_part partition (ds='2008-04-08'); + +CREATE TABLE tab(key int, value string) PARTITIONED BY(ds STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE; +insert overwrite table tab partition (ds='2008-04-08') +select key,value from srcbucket_mapjoin; + +describe formatted tab partition (ds='2008-04-08'); + +drop table nzhang_part14; + +create table if not exists nzhang_part14 (key string, value string) + partitioned by (ds string, hr string); + +describe formatted nzhang_part14; + +insert overwrite table nzhang_part14 partition(ds, hr) +select key, value, ds, hr from ( + select * from (select 'k1' as key, cast(null as string) as value, '1' as ds, '2' as hr from src limit 2)a + union all + select * from (select 'k2' as key, '' as value, '1' as ds, '3' as hr from src limit 2)b + union all + select * from (select 'k3' as key, ' ' as value, '2' as ds, '1' as hr from src limit 2)c +) T; + +desc formatted nzhang_part14 partition(ds='1', hr='3'); + + +INSERT OVERWRITE TABLE nzhang_part14 PARTITION (ds='2010-03-03', hr) +SELECT key, value, hr FROM srcpart WHERE ds is not null and hr>10; + +desc formatted nzhang_part14 PARTITION(ds='2010-03-03', hr='12'); + + +drop table nzhang_part14; +create table if not exists nzhang_part14 (key string, value string) +partitioned by (ds string, hr string); + +INSERT OVERWRITE TABLE nzhang_part14 PARTITION (ds='2010-03-03', hr) +SELECT key, value, hr FROM srcpart WHERE ds is not null and hr>10; + +desc formatted nzhang_part14 PARTITION(ds='2010-03-03', hr='12'); + +drop table a; +create table a (key string, value string) +partitioned by (ds string, hr string); + +drop table b; +create table b (key string, value string) +partitioned by (ds string, hr string); + +drop table c; +create table c (key string, value string) +partitioned by (ds string, hr string); + + +FROM srcpart +INSERT OVERWRITE TABLE a PARTITION (ds='2010-03-11', hr) SELECT key, value, hr WHERE ds is not null and hr>10 +INSERT OVERWRITE TABLE b PARTITION (ds='2010-04-11', hr) SELECT key, value, hr WHERE ds is not null and hr>11 +INSERT OVERWRITE TABLE c PARTITION (ds='2010-05-11', hr) SELECT key, value, hr WHERE hr>0; + +explain select key from a; +explain select value from b; +explain select key from b; +explain select value from c; +explain select key from c; + diff --git a/ql/src/test/queries/clientpositive/autoColumnStats_1.q b/ql/src/test/queries/clientpositive/autoColumnStats_1.q new file mode 100644 index 0000000..bb7252a --- /dev/null +++ b/ql/src/test/queries/clientpositive/autoColumnStats_1.q @@ -0,0 +1,192 @@ +set hive.stats.column.autogather=true; +set hive.stats.fetch.column.stats=true; +set hive.exec.dynamic.partition=true; +set hive.exec.dynamic.partition.mode=nonstrict; +set hive.auto.convert.join=true; +set hive.join.emit.interval=2; +set hive.auto.convert.join.noconditionaltask=true; +set hive.auto.convert.join.noconditionaltask.size=10000; +set hive.auto.convert.sortmerge.join.bigtable.selection.policy = org.apache.hadoop.hive.ql.optimizer.TableSizeBasedBigTableSelectorForAutoSMJ; +set hive.optimize.bucketingsorting=false; + +drop table src_multi1; + +create table src_multi1 like src; + +insert overwrite table src_multi1 select * from src; + +explain extended select * from src_multi1; + +describe formatted src_multi1; + +drop table a; +drop table b; +create table a like src; +create table b like src; + +from src +insert overwrite table a select * +insert overwrite table b select *; + +describe formatted a; +describe formatted b; + +drop table a; +drop table b; +create table a like src; +create table b like src; + +from src +insert overwrite table a select * +insert into table b select *; + +describe formatted a; +describe formatted b; + + +drop table src_multi2; + +create table src_multi2 like src; + +insert overwrite table src_multi2 select subq.key, src.value from (select * from src union select * from src1)subq join src on subq.key=src.key; + +describe formatted src_multi2; + + +drop table nzhang_part14; + +create table if not exists nzhang_part14 (key string) + partitioned by (value string); + +insert overwrite table nzhang_part14 partition(value) +select key, value from ( + select * from (select 'k1' as key, cast(null as string) as value from src limit 2)a + union all + select * from (select 'k2' as key, '' as value from src limit 2)b + union all + select * from (select 'k3' as key, ' ' as value from src limit 2)c +) T; + +explain select key from nzhang_part14; + + +drop table src5; + +create table src5 as select key, value from src limit 5; + +insert overwrite table nzhang_part14 partition(value) +select key, value from src5; + +explain select key from nzhang_part14; + + +create table alter5 ( col1 string ) partitioned by (dt string); + +alter table alter5 add partition (dt='a') location 'parta'; + +describe formatted alter5 partition (dt='a'); + +insert overwrite table alter5 partition (dt='a') select key from src ; + +describe formatted alter5 partition (dt='a'); + +explain select * from alter5 where dt='a'; + + +drop table src_stat_part; +create table src_stat_part(key string, value string) partitioned by (partitionId int); + +insert overwrite table src_stat_part partition (partitionId=1) +select * from src1 limit 5; + +describe formatted src_stat_part PARTITION(partitionId=1); + +insert overwrite table src_stat_part partition (partitionId=2) +select * from src1; + +describe formatted src_stat_part PARTITION(partitionId=2); + +drop table srcbucket_mapjoin; +CREATE TABLE srcbucket_mapjoin(key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE; +drop table tab_part; +CREATE TABLE tab_part (key int, value string) PARTITIONED BY(ds STRING) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE; +drop table srcbucket_mapjoin_part; +CREATE TABLE srcbucket_mapjoin_part (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE; + +load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin partition(ds='2008-04-08'); +load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin partition(ds='2008-04-08'); + +load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08'); +load data local inpath '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08'); +load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08'); +load data local inpath '../../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08'); + +insert overwrite table tab_part partition (ds='2008-04-08') +select key,value from srcbucket_mapjoin_part; + +describe formatted tab_part partition (ds='2008-04-08'); + +CREATE TABLE tab(key int, value string) PARTITIONED BY(ds STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE; +insert overwrite table tab partition (ds='2008-04-08') +select key,value from srcbucket_mapjoin; + +describe formatted tab partition (ds='2008-04-08'); + +drop table nzhang_part14; + +create table if not exists nzhang_part14 (key string, value string) + partitioned by (ds string, hr string); + +describe formatted nzhang_part14; + +insert overwrite table nzhang_part14 partition(ds, hr) +select key, value, ds, hr from ( + select * from (select 'k1' as key, cast(null as string) as value, '1' as ds, '2' as hr from src limit 2)a + union all + select * from (select 'k2' as key, '' as value, '1' as ds, '3' as hr from src limit 2)b + union all + select * from (select 'k3' as key, ' ' as value, '2' as ds, '1' as hr from src limit 2)c +) T; + +desc formatted nzhang_part14 partition(ds='1', hr='3'); + + +INSERT OVERWRITE TABLE nzhang_part14 PARTITION (ds='2010-03-03', hr) +SELECT key, value, hr FROM srcpart WHERE ds is not null and hr>10; + +desc formatted nzhang_part14 PARTITION(ds='2010-03-03', hr='12'); + + +drop table nzhang_part14; +create table if not exists nzhang_part14 (key string, value string) +partitioned by (ds string, hr string); + +INSERT OVERWRITE TABLE nzhang_part14 PARTITION (ds='2010-03-03', hr) +SELECT key, value, hr FROM srcpart WHERE ds is not null and hr>10; + +desc formatted nzhang_part14 PARTITION(ds='2010-03-03', hr='12'); + +drop table a; +create table a (key string, value string) +partitioned by (ds string, hr string); + +drop table b; +create table b (key string, value string) +partitioned by (ds string, hr string); + +drop table c; +create table c (key string, value string) +partitioned by (ds string, hr string); + + +FROM srcpart +INSERT OVERWRITE TABLE a PARTITION (ds='2010-03-11', hr) SELECT key, value, hr WHERE ds is not null and hr>10 +INSERT OVERWRITE TABLE b PARTITION (ds='2010-04-11', hr) SELECT key, value, hr WHERE ds is not null and hr>11 +INSERT OVERWRITE TABLE c PARTITION (ds='2010-05-11', hr) SELECT key, value, hr WHERE hr>0; + +explain select key from a; +explain select value from b; +explain select key from b; +explain select value from c; +explain select key from c; + diff --git a/ql/src/test/queries/clientpositive/autoColumnStats_2.q b/ql/src/test/queries/clientpositive/autoColumnStats_2.q new file mode 100644 index 0000000..b8c35e2 --- /dev/null +++ b/ql/src/test/queries/clientpositive/autoColumnStats_2.q @@ -0,0 +1,199 @@ +set hive.stats.column.autogather=true; +set hive.stats.fetch.column.stats=true; +set hive.exec.dynamic.partition=true; +set hive.exec.dynamic.partition.mode=nonstrict; +set hive.auto.convert.join=true; +set hive.join.emit.interval=2; +set hive.auto.convert.join.noconditionaltask=true; +set hive.auto.convert.join.noconditionaltask.size=10000; +set hive.auto.convert.sortmerge.join.bigtable.selection.policy = org.apache.hadoop.hive.ql.optimizer.TableSizeBasedBigTableSelectorForAutoSMJ; +set hive.optimize.bucketingsorting=false; + +drop table src_multi1; + +create table src_multi1 like src; + +insert into table src_multi1 select * from src; + +explain extended select * from src_multi1; + +describe formatted src_multi1; + +drop table a; +drop table b; +create table a like src; +create table b like src; + +from src +insert into table a select * +insert into table b select *; + +describe formatted a key; +describe formatted b key; + +from src +insert overwrite table a select * +insert into table b select *; + +describe formatted a; +describe formatted b; + +describe formatted b key; +describe formatted b value; + +insert into table b select NULL, NULL from src limit 10; + +describe formatted b key; +describe formatted b value; + +insert into table b(value) select key+100000 from src limit 10; + +describe formatted b key; +describe formatted b value; + +drop table src_multi2; + +create table src_multi2 like src; + +insert into table src_multi2 select subq.key, src.value from (select * from src union select * from src1)subq join src on subq.key=src.key; + +describe formatted src_multi2; + + +drop table nzhang_part14; + +create table if not exists nzhang_part14 (key string) + partitioned by (value string); + +insert into table nzhang_part14 partition(value) +select key, value from ( + select * from (select 'k1' as key, cast(null as string) as value from src limit 2)a + union all + select * from (select 'k2' as key, '' as value from src limit 2)b + union all + select * from (select 'k3' as key, ' ' as value from src limit 2)c +) T; + +explain select key from nzhang_part14; + + +drop table src5; + +create table src5 as select key, value from src limit 5; + +insert into table nzhang_part14 partition(value) +select key, value from src5; + +explain select key from nzhang_part14; + + +create table alter5 ( col1 string ) partitioned by (dt string); + +alter table alter5 add partition (dt='a') location 'parta'; + +describe formatted alter5 partition (dt='a'); + +insert into table alter5 partition (dt='a') select key from src ; + +describe formatted alter5 partition (dt='a'); + +explain select * from alter5 where dt='a'; + + +drop table src_stat_part; +create table src_stat_part(key string, value string) partitioned by (partitionId int); + +insert into table src_stat_part partition (partitionId=1) +select * from src1 limit 5; + +describe formatted src_stat_part PARTITION(partitionId=1); + +insert into table src_stat_part partition (partitionId=2) +select * from src1; + +describe formatted src_stat_part PARTITION(partitionId=2); + +drop table srcbucket_mapjoin; +CREATE TABLE srcbucket_mapjoin(key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE; +drop table tab_part; +CREATE TABLE tab_part (key int, value string) PARTITIONED BY(ds STRING) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE; +drop table srcbucket_mapjoin_part; +CREATE TABLE srcbucket_mapjoin_part (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE; + +load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin partition(ds='2008-04-08'); +load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin partition(ds='2008-04-08'); + +load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08'); +load data local inpath '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08'); +load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08'); +load data local inpath '../../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08'); + +insert into table tab_part partition (ds='2008-04-08') +select key,value from srcbucket_mapjoin_part; + +describe formatted tab_part partition (ds='2008-04-08'); + +CREATE TABLE tab(key int, value string) PARTITIONED BY(ds STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE; +insert into table tab partition (ds='2008-04-08') +select key,value from srcbucket_mapjoin; + +describe formatted tab partition (ds='2008-04-08'); + +drop table nzhang_part14; + +create table if not exists nzhang_part14 (key string, value string) + partitioned by (ds string, hr string); + +describe formatted nzhang_part14; + +insert into table nzhang_part14 partition(ds, hr) +select key, value, ds, hr from ( + select * from (select 'k1' as key, cast(null as string) as value, '1' as ds, '2' as hr from src limit 2)a + union all + select * from (select 'k2' as key, '' as value, '1' as ds, '3' as hr from src limit 2)b + union all + select * from (select 'k3' as key, ' ' as value, '2' as ds, '1' as hr from src limit 2)c +) T; + +desc formatted nzhang_part14 partition(ds='1', hr='3'); + + +INSERT into TABLE nzhang_part14 PARTITION (ds='2010-03-03', hr) +SELECT key, value, hr FROM srcpart WHERE ds is not null and hr>10; + +desc formatted nzhang_part14 PARTITION(ds='2010-03-03', hr='12'); + + +drop table nzhang_part14; +create table if not exists nzhang_part14 (key string, value string) +partitioned by (ds string, hr string); + +INSERT into TABLE nzhang_part14 PARTITION (ds='2010-03-03', hr) +SELECT key, value, hr FROM srcpart WHERE ds is not null and hr>10; + +desc formatted nzhang_part14 PARTITION(ds='2010-03-03', hr='12'); + +drop table a; +create table a (key string, value string) +partitioned by (ds string, hr string); + +drop table b; +create table b (key string, value string) +partitioned by (ds string, hr string); + +drop table c; +create table c (key string, value string) +partitioned by (ds string, hr string); + + +FROM srcpart +INSERT into TABLE a PARTITION (ds='2010-03-11', hr) SELECT key, value, hr WHERE ds is not null and hr>10 +INSERT into TABLE b PARTITION (ds='2010-04-11', hr) SELECT key, value, hr WHERE ds is not null and hr>11 +INSERT into TABLE c PARTITION (ds='2010-05-11', hr) SELECT key, value, hr WHERE hr>0; + +explain select key from a; +explain select value from b; +explain select key from b; +explain select value from c; +explain select key from c; + diff --git a/ql/src/test/results/clientpositive/autoColumnStats.q.out b/ql/src/test/results/clientpositive/autoColumnStats.q.out new file mode 100644 index 0000000..47c72fb --- /dev/null +++ b/ql/src/test/results/clientpositive/autoColumnStats.q.out @@ -0,0 +1,1393 @@ +PREHOOK: query: drop table src_multi1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table src_multi1 +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table src_multi1 like src +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@src_multi1 +POSTHOOK: query: create table src_multi1 like src +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@src_multi1 +PREHOOK: query: insert overwrite table src_multi1 select * from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@src_multi1 +POSTHOOK: query: insert overwrite table src_multi1 select * from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@src_multi1 +POSTHOOK: Lineage: src_multi1.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: src_multi1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: explain extended select * from src_multi1 +PREHOOK: type: QUERY +POSTHOOK: query: explain extended select * from src_multi1 +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + src_multi1 + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + + +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: src_multi1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + GatherStats: false + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + ListSink + +PREHOOK: query: describe formatted src_multi1 +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@src_multi1 +POSTHOOK: query: describe formatted src_multi1 +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@src_multi1 +# col_name data_type comment + +key string default +value string default + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: + COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"},\"BASIC_STATS\":\"true\"} + numFiles 1 + numRows 500 + rawDataSize 5312 + totalSize 5812 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: drop table a +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table a +POSTHOOK: type: DROPTABLE +PREHOOK: query: drop table b +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table b +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table a like src +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@a +POSTHOOK: query: create table a like src +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@a +PREHOOK: query: create table b like src +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@b +POSTHOOK: query: create table b like src +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@b +PREHOOK: query: from src +insert overwrite table a select * +insert overwrite table b select * +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@a +PREHOOK: Output: default@b +POSTHOOK: query: from src +insert overwrite table a select * +insert overwrite table b select * +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@a +POSTHOOK: Output: default@b +POSTHOOK: Lineage: a.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: a.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: b.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: b.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: describe formatted a +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@a +POSTHOOK: query: describe formatted a +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@a +# col_name data_type comment + +key string default +value string default + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: + COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"},\"BASIC_STATS\":\"true\"} + numFiles 1 + numRows 500 + rawDataSize 5312 + totalSize 5812 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: describe formatted b +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@b +POSTHOOK: query: describe formatted b +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@b +# col_name data_type comment + +key string default +value string default + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: + COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"},\"BASIC_STATS\":\"true\"} + numFiles 1 + numRows 500 + rawDataSize 5312 + totalSize 5812 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: drop table a +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@a +PREHOOK: Output: default@a +POSTHOOK: query: drop table a +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@a +POSTHOOK: Output: default@a +PREHOOK: query: drop table b +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@b +PREHOOK: Output: default@b +POSTHOOK: query: drop table b +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@b +POSTHOOK: Output: default@b +PREHOOK: query: create table a like src +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@a +POSTHOOK: query: create table a like src +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@a +PREHOOK: query: create table b like src +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@b +POSTHOOK: query: create table b like src +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@b +PREHOOK: query: from src +insert overwrite table a select * +insert into table b select * +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@a +PREHOOK: Output: default@b +POSTHOOK: query: from src +insert overwrite table a select * +insert into table b select * +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@a +POSTHOOK: Output: default@b +POSTHOOK: Lineage: a.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: a.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: b.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: b.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: describe formatted a +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@a +POSTHOOK: query: describe formatted a +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@a +# col_name data_type comment + +key string default +value string default + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: + COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"},\"BASIC_STATS\":\"true\"} + numFiles 1 + numRows 500 + rawDataSize 5312 + totalSize 5812 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: describe formatted b +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@b +POSTHOOK: query: describe formatted b +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@b +# col_name data_type comment + +key string default +value string default + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: + COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"},\"BASIC_STATS\":\"true\"} + numFiles 1 + numRows 500 + rawDataSize 5312 + totalSize 5812 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: drop table src_multi2 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table src_multi2 +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table src_multi2 like src +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@src_multi2 +POSTHOOK: query: create table src_multi2 like src +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@src_multi2 +PREHOOK: query: insert overwrite table src_multi2 select subq.key, src.value from (select * from src union select * from src1)subq join src on subq.key=src.key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +PREHOOK: Output: default@src_multi2 +POSTHOOK: query: insert overwrite table src_multi2 select subq.key, src.value from (select * from src union select * from src1)subq join src on subq.key=src.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +POSTHOOK: Output: default@src_multi2 +POSTHOOK: Lineage: src_multi2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src1)src1.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: src_multi2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: describe formatted src_multi2 +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@src_multi2 +POSTHOOK: query: describe formatted src_multi2 +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@src_multi2 +# col_name data_type comment + +key string default +value string default + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: + COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"},\"BASIC_STATS\":\"true\"} + numFiles 1 + numRows 508 + rawDataSize 5400 + totalSize 5908 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: drop table nzhang_part14 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table nzhang_part14 +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table if not exists nzhang_part14 (key string) + partitioned by (value string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@nzhang_part14 +POSTHOOK: query: create table if not exists nzhang_part14 (key string) + partitioned by (value string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@nzhang_part14 +PREHOOK: query: insert overwrite table nzhang_part14 partition(value) +select key, value from ( + select * from (select 'k1' as key, cast(null as string) as value from src limit 2)a + union all + select * from (select 'k2' as key, '' as value from src limit 2)b + union all + select * from (select 'k3' as key, ' ' as value from src limit 2)c +) T +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@nzhang_part14 +POSTHOOK: query: insert overwrite table nzhang_part14 partition(value) +select key, value from ( + select * from (select 'k1' as key, cast(null as string) as value from src limit 2)a + union all + select * from (select 'k2' as key, '' as value from src limit 2)b + union all + select * from (select 'k3' as key, ' ' as value from src limit 2)c +) T +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@nzhang_part14@value= +POSTHOOK: Output: default@nzhang_part14@value=__HIVE_DEFAULT_PARTITION__ +POSTHOOK: Lineage: nzhang_part14 PARTITION(value= ).key EXPRESSION [] +POSTHOOK: Lineage: nzhang_part14 PARTITION(value=__HIVE_DEFAULT_PARTITION__).key EXPRESSION [] +PREHOOK: query: explain select key from nzhang_part14 +PREHOOK: type: QUERY +POSTHOOK: query: explain select key from nzhang_part14 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: nzhang_part14 + Statistics: Num rows: 6 Data size: 516 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 6 Data size: 516 Basic stats: COMPLETE Column stats: COMPLETE + ListSink + +PREHOOK: query: drop table src5 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table src5 +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table src5 as select key, value from src limit 5 +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@src +PREHOOK: Output: database:default +PREHOOK: Output: default@src5 +POSTHOOK: query: create table src5 as select key, value from src limit 5 +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@src +POSTHOOK: Output: database:default +POSTHOOK: Output: default@src5 +PREHOOK: query: insert overwrite table nzhang_part14 partition(value) +select key, value from src5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src5 +PREHOOK: Output: default@nzhang_part14 +POSTHOOK: query: insert overwrite table nzhang_part14 partition(value) +select key, value from src5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src5 +POSTHOOK: Output: default@nzhang_part14@value=val_165 +POSTHOOK: Output: default@nzhang_part14@value=val_238 +POSTHOOK: Output: default@nzhang_part14@value=val_27 +POSTHOOK: Output: default@nzhang_part14@value=val_311 +POSTHOOK: Output: default@nzhang_part14@value=val_86 +POSTHOOK: Lineage: nzhang_part14 PARTITION(value=val_165).key SIMPLE [(src5)src5.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: nzhang_part14 PARTITION(value=val_238).key SIMPLE [(src5)src5.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: nzhang_part14 PARTITION(value=val_27).key SIMPLE [(src5)src5.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: nzhang_part14 PARTITION(value=val_311).key SIMPLE [(src5)src5.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: nzhang_part14 PARTITION(value=val_86).key SIMPLE [(src5)src5.FieldSchema(name:key, type:string, comment:null), ] +PREHOOK: query: explain select key from nzhang_part14 +PREHOOK: type: QUERY +POSTHOOK: query: explain select key from nzhang_part14 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: nzhang_part14 + Statistics: Num rows: 11 Data size: 946 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 946 Basic stats: COMPLETE Column stats: COMPLETE + ListSink + +PREHOOK: query: create table alter5 ( col1 string ) partitioned by (dt string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@alter5 +POSTHOOK: query: create table alter5 ( col1 string ) partitioned by (dt string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@alter5 +PREHOOK: query: alter table alter5 add partition (dt='a') location 'parta' +PREHOOK: type: ALTERTABLE_ADDPARTS +#### A masked pattern was here #### +PREHOOK: Output: default@alter5 +POSTHOOK: query: alter table alter5 add partition (dt='a') location 'parta' +POSTHOOK: type: ALTERTABLE_ADDPARTS +#### A masked pattern was here #### +POSTHOOK: Output: default@alter5 +POSTHOOK: Output: default@alter5@dt=a +PREHOOK: query: describe formatted alter5 partition (dt='a') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@alter5 +POSTHOOK: query: describe formatted alter5 partition (dt='a') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@alter5 +# col_name data_type comment + +col1 string + +# Partition Information +# col_name data_type comment + +dt string + +# Detailed Partition Information +Partition Value: [a] +Database: default +Table: alter5 +#### A masked pattern was here #### +Partition Parameters: +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: insert overwrite table alter5 partition (dt='a') select key from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@alter5@dt=a +POSTHOOK: query: insert overwrite table alter5 partition (dt='a') select key from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@alter5@dt=a +POSTHOOK: Lineage: alter5 PARTITION(dt=a).col1 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: describe formatted alter5 partition (dt='a') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@alter5 +POSTHOOK: query: describe formatted alter5 partition (dt='a') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@alter5 +# col_name data_type comment + +col1 string + +# Partition Information +# col_name data_type comment + +dt string + +# Detailed Partition Information +Partition Value: [a] +Database: default +Table: alter5 +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"col1\":\"true\"},\"BASIC_STATS\":\"true\"} + numFiles 1 + numRows 500 + rawDataSize 1406 + totalSize 1906 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: explain select * from alter5 where dt='a' +PREHOOK: type: QUERY +POSTHOOK: query: explain select * from alter5 where dt='a' +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: alter5 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: col1 (type: string), 'a' (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 86000 Basic stats: COMPLETE Column stats: COMPLETE + ListSink + +PREHOOK: query: drop table src_stat_part +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table src_stat_part +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table src_stat_part(key string, value string) partitioned by (partitionId int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@src_stat_part +POSTHOOK: query: create table src_stat_part(key string, value string) partitioned by (partitionId int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@src_stat_part +PREHOOK: query: insert overwrite table src_stat_part partition (partitionId=1) +select * from src1 limit 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src1 +PREHOOK: Output: default@src_stat_part@partitionid=1 +POSTHOOK: query: insert overwrite table src_stat_part partition (partitionId=1) +select * from src1 limit 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src1 +POSTHOOK: Output: default@src_stat_part@partitionid=1 +POSTHOOK: Lineage: src_stat_part PARTITION(partitionid=1).key SIMPLE [(src1)src1.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: src_stat_part PARTITION(partitionid=1).value SIMPLE [(src1)src1.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: describe formatted src_stat_part PARTITION(partitionId=1) +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@src_stat_part +POSTHOOK: query: describe formatted src_stat_part PARTITION(partitionId=1) +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@src_stat_part +# col_name data_type comment + +key string +value string + +# Partition Information +# col_name data_type comment + +partitionid int + +# Detailed Partition Information +Partition Value: [1] +Database: default +Table: src_stat_part +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"},\"BASIC_STATS\":\"true\"} + numFiles 1 + numRows 5 + rawDataSize 38 + totalSize 43 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: insert overwrite table src_stat_part partition (partitionId=2) +select * from src1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src1 +PREHOOK: Output: default@src_stat_part@partitionid=2 +POSTHOOK: query: insert overwrite table src_stat_part partition (partitionId=2) +select * from src1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src1 +POSTHOOK: Output: default@src_stat_part@partitionid=2 +POSTHOOK: Lineage: src_stat_part PARTITION(partitionid=2).key SIMPLE [(src1)src1.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: src_stat_part PARTITION(partitionid=2).value SIMPLE [(src1)src1.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: describe formatted src_stat_part PARTITION(partitionId=2) +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@src_stat_part +POSTHOOK: query: describe formatted src_stat_part PARTITION(partitionId=2) +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@src_stat_part +# col_name data_type comment + +key string +value string + +# Partition Information +# col_name data_type comment + +partitionid int + +# Detailed Partition Information +Partition Value: [2] +Database: default +Table: src_stat_part +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"},\"BASIC_STATS\":\"true\"} + numFiles 1 + numRows 25 + rawDataSize 191 + totalSize 216 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: drop table srcbucket_mapjoin +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table srcbucket_mapjoin +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE srcbucket_mapjoin(key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@srcbucket_mapjoin +POSTHOOK: query: CREATE TABLE srcbucket_mapjoin(key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@srcbucket_mapjoin +PREHOOK: query: drop table tab_part +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table tab_part +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE tab_part (key int, value string) PARTITIONED BY(ds STRING) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tab_part +POSTHOOK: query: CREATE TABLE tab_part (key int, value string) PARTITIONED BY(ds STRING) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tab_part +PREHOOK: query: drop table srcbucket_mapjoin_part +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table srcbucket_mapjoin_part +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE srcbucket_mapjoin_part (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@srcbucket_mapjoin_part +POSTHOOK: query: CREATE TABLE srcbucket_mapjoin_part (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@srcbucket_mapjoin_part +PREHOOK: query: load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@srcbucket_mapjoin +POSTHOOK: query: load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@srcbucket_mapjoin +POSTHOOK: Output: default@srcbucket_mapjoin@ds=2008-04-08 +PREHOOK: query: load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@srcbucket_mapjoin@ds=2008-04-08 +POSTHOOK: query: load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@srcbucket_mapjoin@ds=2008-04-08 +PREHOOK: query: load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@srcbucket_mapjoin_part +POSTHOOK: query: load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@srcbucket_mapjoin_part +POSTHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08 +PREHOOK: query: load data local inpath '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08 +POSTHOOK: query: load data local inpath '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08 +PREHOOK: query: load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08 +POSTHOOK: query: load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08 +PREHOOK: query: load data local inpath '../../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08 +POSTHOOK: query: load data local inpath '../../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08 +PREHOOK: query: insert overwrite table tab_part partition (ds='2008-04-08') +select key,value from srcbucket_mapjoin_part +PREHOOK: type: QUERY +PREHOOK: Input: default@srcbucket_mapjoin_part +PREHOOK: Input: default@srcbucket_mapjoin_part@ds=2008-04-08 +PREHOOK: Output: default@tab_part@ds=2008-04-08 +POSTHOOK: query: insert overwrite table tab_part partition (ds='2008-04-08') +select key,value from srcbucket_mapjoin_part +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcbucket_mapjoin_part +POSTHOOK: Input: default@srcbucket_mapjoin_part@ds=2008-04-08 +POSTHOOK: Output: default@tab_part@ds=2008-04-08 +POSTHOOK: Lineage: tab_part PARTITION(ds=2008-04-08).key SIMPLE [(srcbucket_mapjoin_part)srcbucket_mapjoin_part.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: tab_part PARTITION(ds=2008-04-08).value SIMPLE [(srcbucket_mapjoin_part)srcbucket_mapjoin_part.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: describe formatted tab_part partition (ds='2008-04-08') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@tab_part +POSTHOOK: query: describe formatted tab_part partition (ds='2008-04-08') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@tab_part +# col_name data_type comment + +key int +value string + +# Partition Information +# col_name data_type comment + +ds string + +# Detailed Partition Information +Partition Value: [2008-04-08] +Database: default +Table: tab_part +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"},\"BASIC_STATS\":\"true\"} + numFiles 4 + numRows 500 + rawDataSize 5312 + totalSize 5812 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: 4 +Bucket Columns: [key] +Sort Columns: [Order(col:key, order:1, nullOrder:0)] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: CREATE TABLE tab(key int, value string) PARTITIONED BY(ds STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tab +POSTHOOK: query: CREATE TABLE tab(key int, value string) PARTITIONED BY(ds STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tab +PREHOOK: query: insert overwrite table tab partition (ds='2008-04-08') +select key,value from srcbucket_mapjoin +PREHOOK: type: QUERY +PREHOOK: Input: default@srcbucket_mapjoin +PREHOOK: Input: default@srcbucket_mapjoin@ds=2008-04-08 +PREHOOK: Output: default@tab@ds=2008-04-08 +POSTHOOK: query: insert overwrite table tab partition (ds='2008-04-08') +select key,value from srcbucket_mapjoin +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcbucket_mapjoin +POSTHOOK: Input: default@srcbucket_mapjoin@ds=2008-04-08 +POSTHOOK: Output: default@tab@ds=2008-04-08 +POSTHOOK: Lineage: tab PARTITION(ds=2008-04-08).key SIMPLE [(srcbucket_mapjoin)srcbucket_mapjoin.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: tab PARTITION(ds=2008-04-08).value SIMPLE [(srcbucket_mapjoin)srcbucket_mapjoin.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: describe formatted tab partition (ds='2008-04-08') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@tab +POSTHOOK: query: describe formatted tab partition (ds='2008-04-08') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@tab +# col_name data_type comment + +key int +value string + +# Partition Information +# col_name data_type comment + +ds string + +# Detailed Partition Information +Partition Value: [2008-04-08] +Database: default +Table: tab +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"},\"BASIC_STATS\":\"true\"} + numFiles 2 + numRows 242 + rawDataSize 2566 + totalSize 2808 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: 2 +Bucket Columns: [key] +Sort Columns: [Order(col:key, order:1, nullOrder:0)] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: drop table nzhang_part14 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@nzhang_part14 +PREHOOK: Output: default@nzhang_part14 +POSTHOOK: query: drop table nzhang_part14 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@nzhang_part14 +POSTHOOK: Output: default@nzhang_part14 +PREHOOK: query: create table if not exists nzhang_part14 (key string, value string) + partitioned by (ds string, hr string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@nzhang_part14 +POSTHOOK: query: create table if not exists nzhang_part14 (key string, value string) + partitioned by (ds string, hr string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@nzhang_part14 +PREHOOK: query: describe formatted nzhang_part14 +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@nzhang_part14 +POSTHOOK: query: describe formatted nzhang_part14 +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@nzhang_part14 +# col_name data_type comment + +key string +value string + +# Partition Information +# col_name data_type comment + +ds string +hr string + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: insert overwrite table nzhang_part14 partition(ds, hr) +select key, value, ds, hr from ( + select * from (select 'k1' as key, cast(null as string) as value, '1' as ds, '2' as hr from src limit 2)a + union all + select * from (select 'k2' as key, '' as value, '1' as ds, '3' as hr from src limit 2)b + union all + select * from (select 'k3' as key, ' ' as value, '2' as ds, '1' as hr from src limit 2)c +) T +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@nzhang_part14 +POSTHOOK: query: insert overwrite table nzhang_part14 partition(ds, hr) +select key, value, ds, hr from ( + select * from (select 'k1' as key, cast(null as string) as value, '1' as ds, '2' as hr from src limit 2)a + union all + select * from (select 'k2' as key, '' as value, '1' as ds, '3' as hr from src limit 2)b + union all + select * from (select 'k3' as key, ' ' as value, '2' as ds, '1' as hr from src limit 2)c +) T +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@nzhang_part14@ds=1/hr=2 +POSTHOOK: Output: default@nzhang_part14@ds=1/hr=3 +POSTHOOK: Output: default@nzhang_part14@ds=2/hr=1 +POSTHOOK: Lineage: nzhang_part14 PARTITION(ds=1,hr=2).key EXPRESSION [] +POSTHOOK: Lineage: nzhang_part14 PARTITION(ds=1,hr=2).value EXPRESSION [] +POSTHOOK: Lineage: nzhang_part14 PARTITION(ds=1,hr=3).key EXPRESSION [] +POSTHOOK: Lineage: nzhang_part14 PARTITION(ds=1,hr=3).value EXPRESSION [] +POSTHOOK: Lineage: nzhang_part14 PARTITION(ds=2,hr=1).key EXPRESSION [] +POSTHOOK: Lineage: nzhang_part14 PARTITION(ds=2,hr=1).value EXPRESSION [] +PREHOOK: query: desc formatted nzhang_part14 partition(ds='1', hr='3') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@nzhang_part14 +POSTHOOK: query: desc formatted nzhang_part14 partition(ds='1', hr='3') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@nzhang_part14 +# col_name data_type comment + +key string +value string + +# Partition Information +# col_name data_type comment + +ds string +hr string + +# Detailed Partition Information +Partition Value: [1, 3] +Database: default +Table: nzhang_part14 +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"},\"BASIC_STATS\":\"true\"} + numFiles 1 + numRows 2 + rawDataSize 6 + totalSize 8 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: INSERT OVERWRITE TABLE nzhang_part14 PARTITION (ds='2010-03-03', hr) +SELECT key, value, hr FROM srcpart WHERE ds is not null and hr>10 +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +PREHOOK: Output: default@nzhang_part14@ds=2010-03-03 +POSTHOOK: query: INSERT OVERWRITE TABLE nzhang_part14 PARTITION (ds='2010-03-03', hr) +SELECT key, value, hr FROM srcpart WHERE ds is not null and hr>10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +POSTHOOK: Output: default@nzhang_part14@ds=2010-03-03/hr=11 +POSTHOOK: Output: default@nzhang_part14@ds=2010-03-03/hr=12 +POSTHOOK: Lineage: nzhang_part14 PARTITION(ds=2010-03-03,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: nzhang_part14 PARTITION(ds=2010-03-03,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: nzhang_part14 PARTITION(ds=2010-03-03,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: nzhang_part14 PARTITION(ds=2010-03-03,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: desc formatted nzhang_part14 PARTITION(ds='2010-03-03', hr='12') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@nzhang_part14 +POSTHOOK: query: desc formatted nzhang_part14 PARTITION(ds='2010-03-03', hr='12') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@nzhang_part14 +# col_name data_type comment + +key string +value string + +# Partition Information +# col_name data_type comment + +ds string +hr string + +# Detailed Partition Information +Partition Value: [2010-03-03, 12] +Database: default +Table: nzhang_part14 +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"},\"BASIC_STATS\":\"true\"} + numFiles 1 + numRows 1000 + rawDataSize 10624 + totalSize 11624 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: drop table nzhang_part14 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@nzhang_part14 +PREHOOK: Output: default@nzhang_part14 +POSTHOOK: query: drop table nzhang_part14 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@nzhang_part14 +POSTHOOK: Output: default@nzhang_part14 +PREHOOK: query: create table if not exists nzhang_part14 (key string, value string) +partitioned by (ds string, hr string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@nzhang_part14 +POSTHOOK: query: create table if not exists nzhang_part14 (key string, value string) +partitioned by (ds string, hr string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@nzhang_part14 +PREHOOK: query: INSERT OVERWRITE TABLE nzhang_part14 PARTITION (ds='2010-03-03', hr) +SELECT key, value, hr FROM srcpart WHERE ds is not null and hr>10 +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +PREHOOK: Output: default@nzhang_part14@ds=2010-03-03 +POSTHOOK: query: INSERT OVERWRITE TABLE nzhang_part14 PARTITION (ds='2010-03-03', hr) +SELECT key, value, hr FROM srcpart WHERE ds is not null and hr>10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +POSTHOOK: Output: default@nzhang_part14@ds=2010-03-03/hr=11 +POSTHOOK: Output: default@nzhang_part14@ds=2010-03-03/hr=12 +POSTHOOK: Lineage: nzhang_part14 PARTITION(ds=2010-03-03,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: nzhang_part14 PARTITION(ds=2010-03-03,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: nzhang_part14 PARTITION(ds=2010-03-03,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: nzhang_part14 PARTITION(ds=2010-03-03,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: desc formatted nzhang_part14 PARTITION(ds='2010-03-03', hr='12') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@nzhang_part14 +POSTHOOK: query: desc formatted nzhang_part14 PARTITION(ds='2010-03-03', hr='12') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@nzhang_part14 +# col_name data_type comment + +key string +value string + +# Partition Information +# col_name data_type comment + +ds string +hr string + +# Detailed Partition Information +Partition Value: [2010-03-03, 12] +Database: default +Table: nzhang_part14 +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"},\"BASIC_STATS\":\"true\"} + numFiles 1 + numRows 1000 + rawDataSize 10624 + totalSize 11624 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: drop table a +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@a +PREHOOK: Output: default@a +POSTHOOK: query: drop table a +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@a +POSTHOOK: Output: default@a +PREHOOK: query: create table a (key string, value string) +partitioned by (ds string, hr string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@a +POSTHOOK: query: create table a (key string, value string) +partitioned by (ds string, hr string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@a +PREHOOK: query: drop table b +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@b +PREHOOK: Output: default@b +POSTHOOK: query: drop table b +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@b +POSTHOOK: Output: default@b +PREHOOK: query: create table b (key string, value string) +partitioned by (ds string, hr string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@b +POSTHOOK: query: create table b (key string, value string) +partitioned by (ds string, hr string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@b +PREHOOK: query: drop table c +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table c +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table c (key string, value string) +partitioned by (ds string, hr string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@c +POSTHOOK: query: create table c (key string, value string) +partitioned by (ds string, hr string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@c +PREHOOK: query: FROM srcpart +INSERT OVERWRITE TABLE a PARTITION (ds='2010-03-11', hr) SELECT key, value, hr WHERE ds is not null and hr>10 +INSERT OVERWRITE TABLE b PARTITION (ds='2010-04-11', hr) SELECT key, value, hr WHERE ds is not null and hr>11 +INSERT OVERWRITE TABLE c PARTITION (ds='2010-05-11', hr) SELECT key, value, hr WHERE hr>0 +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +PREHOOK: Output: default@a@ds=2010-03-11 +PREHOOK: Output: default@b@ds=2010-04-11 +PREHOOK: Output: default@c@ds=2010-05-11 +POSTHOOK: query: FROM srcpart +INSERT OVERWRITE TABLE a PARTITION (ds='2010-03-11', hr) SELECT key, value, hr WHERE ds is not null and hr>10 +INSERT OVERWRITE TABLE b PARTITION (ds='2010-04-11', hr) SELECT key, value, hr WHERE ds is not null and hr>11 +INSERT OVERWRITE TABLE c PARTITION (ds='2010-05-11', hr) SELECT key, value, hr WHERE hr>0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +POSTHOOK: Output: default@a@ds=2010-03-11/hr=11 +POSTHOOK: Output: default@a@ds=2010-03-11/hr=12 +POSTHOOK: Output: default@b@ds=2010-04-11/hr=12 +POSTHOOK: Output: default@c@ds=2010-05-11/hr=11 +POSTHOOK: Output: default@c@ds=2010-05-11/hr=12 +POSTHOOK: Lineage: a PARTITION(ds=2010-03-11,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: a PARTITION(ds=2010-03-11,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: a PARTITION(ds=2010-03-11,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: a PARTITION(ds=2010-03-11,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: b PARTITION(ds=2010-04-11,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: b PARTITION(ds=2010-04-11,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: c PARTITION(ds=2010-05-11,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: c PARTITION(ds=2010-05-11,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: c PARTITION(ds=2010-05-11,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: c PARTITION(ds=2010-05-11,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: explain select key from a +PREHOOK: type: QUERY +POSTHOOK: query: explain select key from a +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: a + Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE + ListSink + +PREHOOK: query: explain select value from b +PREHOOK: type: QUERY +POSTHOOK: query: explain select value from b +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: b + Statistics: Num rows: 1000 Data size: 91000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: value (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1000 Data size: 91000 Basic stats: COMPLETE Column stats: COMPLETE + ListSink + +PREHOOK: query: explain select key from b +PREHOOK: type: QUERY +POSTHOOK: query: explain select key from b +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: b + Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: COMPLETE + ListSink + +PREHOOK: query: explain select value from c +PREHOOK: type: QUERY +POSTHOOK: query: explain select value from c +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: c + Statistics: Num rows: 2000 Data size: 182000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: value (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 2000 Data size: 182000 Basic stats: COMPLETE Column stats: COMPLETE + ListSink + +PREHOOK: query: explain select key from c +PREHOOK: type: QUERY +POSTHOOK: query: explain select key from c +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: c + Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE + ListSink + diff --git a/ql/src/test/results/clientpositive/autoColumnStats_1.q.out b/ql/src/test/results/clientpositive/autoColumnStats_1.q.out new file mode 100644 index 0000000..47c72fb --- /dev/null +++ b/ql/src/test/results/clientpositive/autoColumnStats_1.q.out @@ -0,0 +1,1393 @@ +PREHOOK: query: drop table src_multi1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table src_multi1 +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table src_multi1 like src +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@src_multi1 +POSTHOOK: query: create table src_multi1 like src +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@src_multi1 +PREHOOK: query: insert overwrite table src_multi1 select * from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@src_multi1 +POSTHOOK: query: insert overwrite table src_multi1 select * from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@src_multi1 +POSTHOOK: Lineage: src_multi1.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: src_multi1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: explain extended select * from src_multi1 +PREHOOK: type: QUERY +POSTHOOK: query: explain extended select * from src_multi1 +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + src_multi1 + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + + +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: src_multi1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + GatherStats: false + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + ListSink + +PREHOOK: query: describe formatted src_multi1 +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@src_multi1 +POSTHOOK: query: describe formatted src_multi1 +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@src_multi1 +# col_name data_type comment + +key string default +value string default + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: + COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"},\"BASIC_STATS\":\"true\"} + numFiles 1 + numRows 500 + rawDataSize 5312 + totalSize 5812 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: drop table a +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table a +POSTHOOK: type: DROPTABLE +PREHOOK: query: drop table b +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table b +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table a like src +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@a +POSTHOOK: query: create table a like src +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@a +PREHOOK: query: create table b like src +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@b +POSTHOOK: query: create table b like src +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@b +PREHOOK: query: from src +insert overwrite table a select * +insert overwrite table b select * +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@a +PREHOOK: Output: default@b +POSTHOOK: query: from src +insert overwrite table a select * +insert overwrite table b select * +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@a +POSTHOOK: Output: default@b +POSTHOOK: Lineage: a.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: a.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: b.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: b.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: describe formatted a +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@a +POSTHOOK: query: describe formatted a +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@a +# col_name data_type comment + +key string default +value string default + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: + COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"},\"BASIC_STATS\":\"true\"} + numFiles 1 + numRows 500 + rawDataSize 5312 + totalSize 5812 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: describe formatted b +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@b +POSTHOOK: query: describe formatted b +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@b +# col_name data_type comment + +key string default +value string default + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: + COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"},\"BASIC_STATS\":\"true\"} + numFiles 1 + numRows 500 + rawDataSize 5312 + totalSize 5812 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: drop table a +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@a +PREHOOK: Output: default@a +POSTHOOK: query: drop table a +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@a +POSTHOOK: Output: default@a +PREHOOK: query: drop table b +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@b +PREHOOK: Output: default@b +POSTHOOK: query: drop table b +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@b +POSTHOOK: Output: default@b +PREHOOK: query: create table a like src +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@a +POSTHOOK: query: create table a like src +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@a +PREHOOK: query: create table b like src +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@b +POSTHOOK: query: create table b like src +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@b +PREHOOK: query: from src +insert overwrite table a select * +insert into table b select * +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@a +PREHOOK: Output: default@b +POSTHOOK: query: from src +insert overwrite table a select * +insert into table b select * +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@a +POSTHOOK: Output: default@b +POSTHOOK: Lineage: a.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: a.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: b.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: b.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: describe formatted a +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@a +POSTHOOK: query: describe formatted a +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@a +# col_name data_type comment + +key string default +value string default + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: + COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"},\"BASIC_STATS\":\"true\"} + numFiles 1 + numRows 500 + rawDataSize 5312 + totalSize 5812 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: describe formatted b +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@b +POSTHOOK: query: describe formatted b +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@b +# col_name data_type comment + +key string default +value string default + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: + COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"},\"BASIC_STATS\":\"true\"} + numFiles 1 + numRows 500 + rawDataSize 5312 + totalSize 5812 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: drop table src_multi2 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table src_multi2 +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table src_multi2 like src +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@src_multi2 +POSTHOOK: query: create table src_multi2 like src +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@src_multi2 +PREHOOK: query: insert overwrite table src_multi2 select subq.key, src.value from (select * from src union select * from src1)subq join src on subq.key=src.key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +PREHOOK: Output: default@src_multi2 +POSTHOOK: query: insert overwrite table src_multi2 select subq.key, src.value from (select * from src union select * from src1)subq join src on subq.key=src.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +POSTHOOK: Output: default@src_multi2 +POSTHOOK: Lineage: src_multi2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src1)src1.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: src_multi2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: describe formatted src_multi2 +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@src_multi2 +POSTHOOK: query: describe formatted src_multi2 +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@src_multi2 +# col_name data_type comment + +key string default +value string default + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: + COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"},\"BASIC_STATS\":\"true\"} + numFiles 1 + numRows 508 + rawDataSize 5400 + totalSize 5908 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: drop table nzhang_part14 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table nzhang_part14 +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table if not exists nzhang_part14 (key string) + partitioned by (value string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@nzhang_part14 +POSTHOOK: query: create table if not exists nzhang_part14 (key string) + partitioned by (value string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@nzhang_part14 +PREHOOK: query: insert overwrite table nzhang_part14 partition(value) +select key, value from ( + select * from (select 'k1' as key, cast(null as string) as value from src limit 2)a + union all + select * from (select 'k2' as key, '' as value from src limit 2)b + union all + select * from (select 'k3' as key, ' ' as value from src limit 2)c +) T +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@nzhang_part14 +POSTHOOK: query: insert overwrite table nzhang_part14 partition(value) +select key, value from ( + select * from (select 'k1' as key, cast(null as string) as value from src limit 2)a + union all + select * from (select 'k2' as key, '' as value from src limit 2)b + union all + select * from (select 'k3' as key, ' ' as value from src limit 2)c +) T +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@nzhang_part14@value= +POSTHOOK: Output: default@nzhang_part14@value=__HIVE_DEFAULT_PARTITION__ +POSTHOOK: Lineage: nzhang_part14 PARTITION(value= ).key EXPRESSION [] +POSTHOOK: Lineage: nzhang_part14 PARTITION(value=__HIVE_DEFAULT_PARTITION__).key EXPRESSION [] +PREHOOK: query: explain select key from nzhang_part14 +PREHOOK: type: QUERY +POSTHOOK: query: explain select key from nzhang_part14 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: nzhang_part14 + Statistics: Num rows: 6 Data size: 516 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 6 Data size: 516 Basic stats: COMPLETE Column stats: COMPLETE + ListSink + +PREHOOK: query: drop table src5 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table src5 +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table src5 as select key, value from src limit 5 +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@src +PREHOOK: Output: database:default +PREHOOK: Output: default@src5 +POSTHOOK: query: create table src5 as select key, value from src limit 5 +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@src +POSTHOOK: Output: database:default +POSTHOOK: Output: default@src5 +PREHOOK: query: insert overwrite table nzhang_part14 partition(value) +select key, value from src5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src5 +PREHOOK: Output: default@nzhang_part14 +POSTHOOK: query: insert overwrite table nzhang_part14 partition(value) +select key, value from src5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src5 +POSTHOOK: Output: default@nzhang_part14@value=val_165 +POSTHOOK: Output: default@nzhang_part14@value=val_238 +POSTHOOK: Output: default@nzhang_part14@value=val_27 +POSTHOOK: Output: default@nzhang_part14@value=val_311 +POSTHOOK: Output: default@nzhang_part14@value=val_86 +POSTHOOK: Lineage: nzhang_part14 PARTITION(value=val_165).key SIMPLE [(src5)src5.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: nzhang_part14 PARTITION(value=val_238).key SIMPLE [(src5)src5.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: nzhang_part14 PARTITION(value=val_27).key SIMPLE [(src5)src5.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: nzhang_part14 PARTITION(value=val_311).key SIMPLE [(src5)src5.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: nzhang_part14 PARTITION(value=val_86).key SIMPLE [(src5)src5.FieldSchema(name:key, type:string, comment:null), ] +PREHOOK: query: explain select key from nzhang_part14 +PREHOOK: type: QUERY +POSTHOOK: query: explain select key from nzhang_part14 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: nzhang_part14 + Statistics: Num rows: 11 Data size: 946 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 946 Basic stats: COMPLETE Column stats: COMPLETE + ListSink + +PREHOOK: query: create table alter5 ( col1 string ) partitioned by (dt string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@alter5 +POSTHOOK: query: create table alter5 ( col1 string ) partitioned by (dt string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@alter5 +PREHOOK: query: alter table alter5 add partition (dt='a') location 'parta' +PREHOOK: type: ALTERTABLE_ADDPARTS +#### A masked pattern was here #### +PREHOOK: Output: default@alter5 +POSTHOOK: query: alter table alter5 add partition (dt='a') location 'parta' +POSTHOOK: type: ALTERTABLE_ADDPARTS +#### A masked pattern was here #### +POSTHOOK: Output: default@alter5 +POSTHOOK: Output: default@alter5@dt=a +PREHOOK: query: describe formatted alter5 partition (dt='a') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@alter5 +POSTHOOK: query: describe formatted alter5 partition (dt='a') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@alter5 +# col_name data_type comment + +col1 string + +# Partition Information +# col_name data_type comment + +dt string + +# Detailed Partition Information +Partition Value: [a] +Database: default +Table: alter5 +#### A masked pattern was here #### +Partition Parameters: +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: insert overwrite table alter5 partition (dt='a') select key from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@alter5@dt=a +POSTHOOK: query: insert overwrite table alter5 partition (dt='a') select key from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@alter5@dt=a +POSTHOOK: Lineage: alter5 PARTITION(dt=a).col1 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: describe formatted alter5 partition (dt='a') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@alter5 +POSTHOOK: query: describe formatted alter5 partition (dt='a') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@alter5 +# col_name data_type comment + +col1 string + +# Partition Information +# col_name data_type comment + +dt string + +# Detailed Partition Information +Partition Value: [a] +Database: default +Table: alter5 +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"col1\":\"true\"},\"BASIC_STATS\":\"true\"} + numFiles 1 + numRows 500 + rawDataSize 1406 + totalSize 1906 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: explain select * from alter5 where dt='a' +PREHOOK: type: QUERY +POSTHOOK: query: explain select * from alter5 where dt='a' +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: alter5 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: col1 (type: string), 'a' (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 86000 Basic stats: COMPLETE Column stats: COMPLETE + ListSink + +PREHOOK: query: drop table src_stat_part +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table src_stat_part +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table src_stat_part(key string, value string) partitioned by (partitionId int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@src_stat_part +POSTHOOK: query: create table src_stat_part(key string, value string) partitioned by (partitionId int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@src_stat_part +PREHOOK: query: insert overwrite table src_stat_part partition (partitionId=1) +select * from src1 limit 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src1 +PREHOOK: Output: default@src_stat_part@partitionid=1 +POSTHOOK: query: insert overwrite table src_stat_part partition (partitionId=1) +select * from src1 limit 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src1 +POSTHOOK: Output: default@src_stat_part@partitionid=1 +POSTHOOK: Lineage: src_stat_part PARTITION(partitionid=1).key SIMPLE [(src1)src1.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: src_stat_part PARTITION(partitionid=1).value SIMPLE [(src1)src1.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: describe formatted src_stat_part PARTITION(partitionId=1) +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@src_stat_part +POSTHOOK: query: describe formatted src_stat_part PARTITION(partitionId=1) +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@src_stat_part +# col_name data_type comment + +key string +value string + +# Partition Information +# col_name data_type comment + +partitionid int + +# Detailed Partition Information +Partition Value: [1] +Database: default +Table: src_stat_part +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"},\"BASIC_STATS\":\"true\"} + numFiles 1 + numRows 5 + rawDataSize 38 + totalSize 43 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: insert overwrite table src_stat_part partition (partitionId=2) +select * from src1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src1 +PREHOOK: Output: default@src_stat_part@partitionid=2 +POSTHOOK: query: insert overwrite table src_stat_part partition (partitionId=2) +select * from src1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src1 +POSTHOOK: Output: default@src_stat_part@partitionid=2 +POSTHOOK: Lineage: src_stat_part PARTITION(partitionid=2).key SIMPLE [(src1)src1.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: src_stat_part PARTITION(partitionid=2).value SIMPLE [(src1)src1.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: describe formatted src_stat_part PARTITION(partitionId=2) +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@src_stat_part +POSTHOOK: query: describe formatted src_stat_part PARTITION(partitionId=2) +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@src_stat_part +# col_name data_type comment + +key string +value string + +# Partition Information +# col_name data_type comment + +partitionid int + +# Detailed Partition Information +Partition Value: [2] +Database: default +Table: src_stat_part +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"},\"BASIC_STATS\":\"true\"} + numFiles 1 + numRows 25 + rawDataSize 191 + totalSize 216 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: drop table srcbucket_mapjoin +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table srcbucket_mapjoin +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE srcbucket_mapjoin(key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@srcbucket_mapjoin +POSTHOOK: query: CREATE TABLE srcbucket_mapjoin(key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@srcbucket_mapjoin +PREHOOK: query: drop table tab_part +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table tab_part +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE tab_part (key int, value string) PARTITIONED BY(ds STRING) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tab_part +POSTHOOK: query: CREATE TABLE tab_part (key int, value string) PARTITIONED BY(ds STRING) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tab_part +PREHOOK: query: drop table srcbucket_mapjoin_part +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table srcbucket_mapjoin_part +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE srcbucket_mapjoin_part (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@srcbucket_mapjoin_part +POSTHOOK: query: CREATE TABLE srcbucket_mapjoin_part (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@srcbucket_mapjoin_part +PREHOOK: query: load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@srcbucket_mapjoin +POSTHOOK: query: load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@srcbucket_mapjoin +POSTHOOK: Output: default@srcbucket_mapjoin@ds=2008-04-08 +PREHOOK: query: load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@srcbucket_mapjoin@ds=2008-04-08 +POSTHOOK: query: load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@srcbucket_mapjoin@ds=2008-04-08 +PREHOOK: query: load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@srcbucket_mapjoin_part +POSTHOOK: query: load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@srcbucket_mapjoin_part +POSTHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08 +PREHOOK: query: load data local inpath '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08 +POSTHOOK: query: load data local inpath '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08 +PREHOOK: query: load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08 +POSTHOOK: query: load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08 +PREHOOK: query: load data local inpath '../../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08 +POSTHOOK: query: load data local inpath '../../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08 +PREHOOK: query: insert overwrite table tab_part partition (ds='2008-04-08') +select key,value from srcbucket_mapjoin_part +PREHOOK: type: QUERY +PREHOOK: Input: default@srcbucket_mapjoin_part +PREHOOK: Input: default@srcbucket_mapjoin_part@ds=2008-04-08 +PREHOOK: Output: default@tab_part@ds=2008-04-08 +POSTHOOK: query: insert overwrite table tab_part partition (ds='2008-04-08') +select key,value from srcbucket_mapjoin_part +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcbucket_mapjoin_part +POSTHOOK: Input: default@srcbucket_mapjoin_part@ds=2008-04-08 +POSTHOOK: Output: default@tab_part@ds=2008-04-08 +POSTHOOK: Lineage: tab_part PARTITION(ds=2008-04-08).key SIMPLE [(srcbucket_mapjoin_part)srcbucket_mapjoin_part.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: tab_part PARTITION(ds=2008-04-08).value SIMPLE [(srcbucket_mapjoin_part)srcbucket_mapjoin_part.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: describe formatted tab_part partition (ds='2008-04-08') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@tab_part +POSTHOOK: query: describe formatted tab_part partition (ds='2008-04-08') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@tab_part +# col_name data_type comment + +key int +value string + +# Partition Information +# col_name data_type comment + +ds string + +# Detailed Partition Information +Partition Value: [2008-04-08] +Database: default +Table: tab_part +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"},\"BASIC_STATS\":\"true\"} + numFiles 4 + numRows 500 + rawDataSize 5312 + totalSize 5812 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: 4 +Bucket Columns: [key] +Sort Columns: [Order(col:key, order:1, nullOrder:0)] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: CREATE TABLE tab(key int, value string) PARTITIONED BY(ds STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tab +POSTHOOK: query: CREATE TABLE tab(key int, value string) PARTITIONED BY(ds STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tab +PREHOOK: query: insert overwrite table tab partition (ds='2008-04-08') +select key,value from srcbucket_mapjoin +PREHOOK: type: QUERY +PREHOOK: Input: default@srcbucket_mapjoin +PREHOOK: Input: default@srcbucket_mapjoin@ds=2008-04-08 +PREHOOK: Output: default@tab@ds=2008-04-08 +POSTHOOK: query: insert overwrite table tab partition (ds='2008-04-08') +select key,value from srcbucket_mapjoin +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcbucket_mapjoin +POSTHOOK: Input: default@srcbucket_mapjoin@ds=2008-04-08 +POSTHOOK: Output: default@tab@ds=2008-04-08 +POSTHOOK: Lineage: tab PARTITION(ds=2008-04-08).key SIMPLE [(srcbucket_mapjoin)srcbucket_mapjoin.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: tab PARTITION(ds=2008-04-08).value SIMPLE [(srcbucket_mapjoin)srcbucket_mapjoin.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: describe formatted tab partition (ds='2008-04-08') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@tab +POSTHOOK: query: describe formatted tab partition (ds='2008-04-08') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@tab +# col_name data_type comment + +key int +value string + +# Partition Information +# col_name data_type comment + +ds string + +# Detailed Partition Information +Partition Value: [2008-04-08] +Database: default +Table: tab +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"},\"BASIC_STATS\":\"true\"} + numFiles 2 + numRows 242 + rawDataSize 2566 + totalSize 2808 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: 2 +Bucket Columns: [key] +Sort Columns: [Order(col:key, order:1, nullOrder:0)] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: drop table nzhang_part14 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@nzhang_part14 +PREHOOK: Output: default@nzhang_part14 +POSTHOOK: query: drop table nzhang_part14 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@nzhang_part14 +POSTHOOK: Output: default@nzhang_part14 +PREHOOK: query: create table if not exists nzhang_part14 (key string, value string) + partitioned by (ds string, hr string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@nzhang_part14 +POSTHOOK: query: create table if not exists nzhang_part14 (key string, value string) + partitioned by (ds string, hr string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@nzhang_part14 +PREHOOK: query: describe formatted nzhang_part14 +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@nzhang_part14 +POSTHOOK: query: describe formatted nzhang_part14 +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@nzhang_part14 +# col_name data_type comment + +key string +value string + +# Partition Information +# col_name data_type comment + +ds string +hr string + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: insert overwrite table nzhang_part14 partition(ds, hr) +select key, value, ds, hr from ( + select * from (select 'k1' as key, cast(null as string) as value, '1' as ds, '2' as hr from src limit 2)a + union all + select * from (select 'k2' as key, '' as value, '1' as ds, '3' as hr from src limit 2)b + union all + select * from (select 'k3' as key, ' ' as value, '2' as ds, '1' as hr from src limit 2)c +) T +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@nzhang_part14 +POSTHOOK: query: insert overwrite table nzhang_part14 partition(ds, hr) +select key, value, ds, hr from ( + select * from (select 'k1' as key, cast(null as string) as value, '1' as ds, '2' as hr from src limit 2)a + union all + select * from (select 'k2' as key, '' as value, '1' as ds, '3' as hr from src limit 2)b + union all + select * from (select 'k3' as key, ' ' as value, '2' as ds, '1' as hr from src limit 2)c +) T +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@nzhang_part14@ds=1/hr=2 +POSTHOOK: Output: default@nzhang_part14@ds=1/hr=3 +POSTHOOK: Output: default@nzhang_part14@ds=2/hr=1 +POSTHOOK: Lineage: nzhang_part14 PARTITION(ds=1,hr=2).key EXPRESSION [] +POSTHOOK: Lineage: nzhang_part14 PARTITION(ds=1,hr=2).value EXPRESSION [] +POSTHOOK: Lineage: nzhang_part14 PARTITION(ds=1,hr=3).key EXPRESSION [] +POSTHOOK: Lineage: nzhang_part14 PARTITION(ds=1,hr=3).value EXPRESSION [] +POSTHOOK: Lineage: nzhang_part14 PARTITION(ds=2,hr=1).key EXPRESSION [] +POSTHOOK: Lineage: nzhang_part14 PARTITION(ds=2,hr=1).value EXPRESSION [] +PREHOOK: query: desc formatted nzhang_part14 partition(ds='1', hr='3') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@nzhang_part14 +POSTHOOK: query: desc formatted nzhang_part14 partition(ds='1', hr='3') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@nzhang_part14 +# col_name data_type comment + +key string +value string + +# Partition Information +# col_name data_type comment + +ds string +hr string + +# Detailed Partition Information +Partition Value: [1, 3] +Database: default +Table: nzhang_part14 +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"},\"BASIC_STATS\":\"true\"} + numFiles 1 + numRows 2 + rawDataSize 6 + totalSize 8 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: INSERT OVERWRITE TABLE nzhang_part14 PARTITION (ds='2010-03-03', hr) +SELECT key, value, hr FROM srcpart WHERE ds is not null and hr>10 +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +PREHOOK: Output: default@nzhang_part14@ds=2010-03-03 +POSTHOOK: query: INSERT OVERWRITE TABLE nzhang_part14 PARTITION (ds='2010-03-03', hr) +SELECT key, value, hr FROM srcpart WHERE ds is not null and hr>10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +POSTHOOK: Output: default@nzhang_part14@ds=2010-03-03/hr=11 +POSTHOOK: Output: default@nzhang_part14@ds=2010-03-03/hr=12 +POSTHOOK: Lineage: nzhang_part14 PARTITION(ds=2010-03-03,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: nzhang_part14 PARTITION(ds=2010-03-03,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: nzhang_part14 PARTITION(ds=2010-03-03,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: nzhang_part14 PARTITION(ds=2010-03-03,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: desc formatted nzhang_part14 PARTITION(ds='2010-03-03', hr='12') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@nzhang_part14 +POSTHOOK: query: desc formatted nzhang_part14 PARTITION(ds='2010-03-03', hr='12') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@nzhang_part14 +# col_name data_type comment + +key string +value string + +# Partition Information +# col_name data_type comment + +ds string +hr string + +# Detailed Partition Information +Partition Value: [2010-03-03, 12] +Database: default +Table: nzhang_part14 +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"},\"BASIC_STATS\":\"true\"} + numFiles 1 + numRows 1000 + rawDataSize 10624 + totalSize 11624 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: drop table nzhang_part14 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@nzhang_part14 +PREHOOK: Output: default@nzhang_part14 +POSTHOOK: query: drop table nzhang_part14 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@nzhang_part14 +POSTHOOK: Output: default@nzhang_part14 +PREHOOK: query: create table if not exists nzhang_part14 (key string, value string) +partitioned by (ds string, hr string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@nzhang_part14 +POSTHOOK: query: create table if not exists nzhang_part14 (key string, value string) +partitioned by (ds string, hr string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@nzhang_part14 +PREHOOK: query: INSERT OVERWRITE TABLE nzhang_part14 PARTITION (ds='2010-03-03', hr) +SELECT key, value, hr FROM srcpart WHERE ds is not null and hr>10 +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +PREHOOK: Output: default@nzhang_part14@ds=2010-03-03 +POSTHOOK: query: INSERT OVERWRITE TABLE nzhang_part14 PARTITION (ds='2010-03-03', hr) +SELECT key, value, hr FROM srcpart WHERE ds is not null and hr>10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +POSTHOOK: Output: default@nzhang_part14@ds=2010-03-03/hr=11 +POSTHOOK: Output: default@nzhang_part14@ds=2010-03-03/hr=12 +POSTHOOK: Lineage: nzhang_part14 PARTITION(ds=2010-03-03,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: nzhang_part14 PARTITION(ds=2010-03-03,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: nzhang_part14 PARTITION(ds=2010-03-03,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: nzhang_part14 PARTITION(ds=2010-03-03,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: desc formatted nzhang_part14 PARTITION(ds='2010-03-03', hr='12') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@nzhang_part14 +POSTHOOK: query: desc formatted nzhang_part14 PARTITION(ds='2010-03-03', hr='12') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@nzhang_part14 +# col_name data_type comment + +key string +value string + +# Partition Information +# col_name data_type comment + +ds string +hr string + +# Detailed Partition Information +Partition Value: [2010-03-03, 12] +Database: default +Table: nzhang_part14 +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"},\"BASIC_STATS\":\"true\"} + numFiles 1 + numRows 1000 + rawDataSize 10624 + totalSize 11624 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: drop table a +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@a +PREHOOK: Output: default@a +POSTHOOK: query: drop table a +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@a +POSTHOOK: Output: default@a +PREHOOK: query: create table a (key string, value string) +partitioned by (ds string, hr string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@a +POSTHOOK: query: create table a (key string, value string) +partitioned by (ds string, hr string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@a +PREHOOK: query: drop table b +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@b +PREHOOK: Output: default@b +POSTHOOK: query: drop table b +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@b +POSTHOOK: Output: default@b +PREHOOK: query: create table b (key string, value string) +partitioned by (ds string, hr string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@b +POSTHOOK: query: create table b (key string, value string) +partitioned by (ds string, hr string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@b +PREHOOK: query: drop table c +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table c +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table c (key string, value string) +partitioned by (ds string, hr string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@c +POSTHOOK: query: create table c (key string, value string) +partitioned by (ds string, hr string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@c +PREHOOK: query: FROM srcpart +INSERT OVERWRITE TABLE a PARTITION (ds='2010-03-11', hr) SELECT key, value, hr WHERE ds is not null and hr>10 +INSERT OVERWRITE TABLE b PARTITION (ds='2010-04-11', hr) SELECT key, value, hr WHERE ds is not null and hr>11 +INSERT OVERWRITE TABLE c PARTITION (ds='2010-05-11', hr) SELECT key, value, hr WHERE hr>0 +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +PREHOOK: Output: default@a@ds=2010-03-11 +PREHOOK: Output: default@b@ds=2010-04-11 +PREHOOK: Output: default@c@ds=2010-05-11 +POSTHOOK: query: FROM srcpart +INSERT OVERWRITE TABLE a PARTITION (ds='2010-03-11', hr) SELECT key, value, hr WHERE ds is not null and hr>10 +INSERT OVERWRITE TABLE b PARTITION (ds='2010-04-11', hr) SELECT key, value, hr WHERE ds is not null and hr>11 +INSERT OVERWRITE TABLE c PARTITION (ds='2010-05-11', hr) SELECT key, value, hr WHERE hr>0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +POSTHOOK: Output: default@a@ds=2010-03-11/hr=11 +POSTHOOK: Output: default@a@ds=2010-03-11/hr=12 +POSTHOOK: Output: default@b@ds=2010-04-11/hr=12 +POSTHOOK: Output: default@c@ds=2010-05-11/hr=11 +POSTHOOK: Output: default@c@ds=2010-05-11/hr=12 +POSTHOOK: Lineage: a PARTITION(ds=2010-03-11,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: a PARTITION(ds=2010-03-11,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: a PARTITION(ds=2010-03-11,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: a PARTITION(ds=2010-03-11,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: b PARTITION(ds=2010-04-11,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: b PARTITION(ds=2010-04-11,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: c PARTITION(ds=2010-05-11,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: c PARTITION(ds=2010-05-11,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: c PARTITION(ds=2010-05-11,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: c PARTITION(ds=2010-05-11,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: explain select key from a +PREHOOK: type: QUERY +POSTHOOK: query: explain select key from a +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: a + Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE + ListSink + +PREHOOK: query: explain select value from b +PREHOOK: type: QUERY +POSTHOOK: query: explain select value from b +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: b + Statistics: Num rows: 1000 Data size: 91000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: value (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1000 Data size: 91000 Basic stats: COMPLETE Column stats: COMPLETE + ListSink + +PREHOOK: query: explain select key from b +PREHOOK: type: QUERY +POSTHOOK: query: explain select key from b +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: b + Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: COMPLETE + ListSink + +PREHOOK: query: explain select value from c +PREHOOK: type: QUERY +POSTHOOK: query: explain select value from c +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: c + Statistics: Num rows: 2000 Data size: 182000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: value (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 2000 Data size: 182000 Basic stats: COMPLETE Column stats: COMPLETE + ListSink + +PREHOOK: query: explain select key from c +PREHOOK: type: QUERY +POSTHOOK: query: explain select key from c +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: c + Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE + ListSink + diff --git a/ql/src/test/results/clientpositive/autoColumnStats_2.q.out b/ql/src/test/results/clientpositive/autoColumnStats_2.q.out new file mode 100644 index 0000000..7f7eb03 --- /dev/null +++ b/ql/src/test/results/clientpositive/autoColumnStats_2.q.out @@ -0,0 +1,1383 @@ +PREHOOK: query: drop table src_multi1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table src_multi1 +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table src_multi1 like src +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@src_multi1 +POSTHOOK: query: create table src_multi1 like src +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@src_multi1 +PREHOOK: query: insert into table src_multi1 select * from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@src_multi1 +POSTHOOK: query: insert into table src_multi1 select * from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@src_multi1 +POSTHOOK: Lineage: src_multi1.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: src_multi1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: explain extended select * from src_multi1 +PREHOOK: type: QUERY +POSTHOOK: query: explain extended select * from src_multi1 +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + src_multi1 + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + + +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: src_multi1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + GatherStats: false + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + ListSink + +PREHOOK: query: describe formatted src_multi1 +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@src_multi1 +POSTHOOK: query: describe formatted src_multi1 +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@src_multi1 +# col_name data_type comment + +key string default +value string default + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: + COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"},\"BASIC_STATS\":\"true\"} + numFiles 1 + numRows 500 + rawDataSize 5312 + totalSize 5812 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: drop table a +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table a +POSTHOOK: type: DROPTABLE +PREHOOK: query: drop table b +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table b +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table a like src +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@a +POSTHOOK: query: create table a like src +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@a +PREHOOK: query: create table b like src +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@b +POSTHOOK: query: create table b like src +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@b +PREHOOK: query: from src +insert into table a select * +insert into table b select * +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@a +PREHOOK: Output: default@b +POSTHOOK: query: from src +insert into table a select * +insert into table b select * +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@a +POSTHOOK: Output: default@b +POSTHOOK: Lineage: a.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: a.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: b.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: b.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: describe formatted a key +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@a +POSTHOOK: query: describe formatted a key +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@a +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment + +key string 0 205 2.812 3 from deserializer +PREHOOK: query: describe formatted b key +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@b +POSTHOOK: query: describe formatted b key +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@b +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment + +key string 0 205 2.812 3 from deserializer +PREHOOK: query: from src +insert overwrite table a select * +insert into table b select * +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@a +PREHOOK: Output: default@b +POSTHOOK: query: from src +insert overwrite table a select * +insert into table b select * +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@a +POSTHOOK: Output: default@b +POSTHOOK: Lineage: a.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: a.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: b.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: b.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: describe formatted a +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@a +POSTHOOK: query: describe formatted a +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@a +# col_name data_type comment + +key string default +value string default + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: + COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"},\"BASIC_STATS\":\"true\"} + numFiles 1 + numRows 500 + rawDataSize 5312 + totalSize 5812 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: describe formatted b +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@b +POSTHOOK: query: describe formatted b +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@b +# col_name data_type comment + +key string default +value string default + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: + COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"},\"BASIC_STATS\":\"true\"} + numFiles 2 + numRows 1000 + rawDataSize 10624 + totalSize 11624 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: describe formatted b key +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@b +POSTHOOK: query: describe formatted b key +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@b +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment + +key string 0 205 2.812 3 from deserializer +PREHOOK: query: describe formatted b value +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@b +POSTHOOK: query: describe formatted b value +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@b +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment + +value string 0 214 6.812 7 from deserializer +PREHOOK: query: insert into table b select NULL, NULL from src limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@b +POSTHOOK: query: insert into table b select NULL, NULL from src limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@b +POSTHOOK: Lineage: b.key SIMPLE [] +POSTHOOK: Lineage: b.value SIMPLE [] +PREHOOK: query: describe formatted b key +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@b +POSTHOOK: query: describe formatted b key +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@b +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment + +key string 10 205 2.812 3 from deserializer +PREHOOK: query: describe formatted b value +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@b +POSTHOOK: query: describe formatted b value +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@b +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment + +value string 10 214 6.812 7 from deserializer +PREHOOK: query: insert into table b(value) select key+100000 from src limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@b +POSTHOOK: query: insert into table b(value) select key+100000 from src limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@b +POSTHOOK: Lineage: b.key SIMPLE [] +POSTHOOK: Lineage: b.value EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: describe formatted b key +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@b +POSTHOOK: query: describe formatted b key +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@b +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment + +key string 20 205 2.812 3 from deserializer +PREHOOK: query: describe formatted b value +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@b +POSTHOOK: query: describe formatted b value +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@b +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment + +value string 10 214 8.0 8 from deserializer +PREHOOK: query: drop table src_multi2 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table src_multi2 +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table src_multi2 like src +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@src_multi2 +POSTHOOK: query: create table src_multi2 like src +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@src_multi2 +PREHOOK: query: insert into table src_multi2 select subq.key, src.value from (select * from src union select * from src1)subq join src on subq.key=src.key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +PREHOOK: Output: default@src_multi2 +POSTHOOK: query: insert into table src_multi2 select subq.key, src.value from (select * from src union select * from src1)subq join src on subq.key=src.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +POSTHOOK: Output: default@src_multi2 +POSTHOOK: Lineage: src_multi2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src1)src1.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: src_multi2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: describe formatted src_multi2 +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@src_multi2 +POSTHOOK: query: describe formatted src_multi2 +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@src_multi2 +# col_name data_type comment + +key string default +value string default + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: + COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"},\"BASIC_STATS\":\"true\"} + numFiles 1 + numRows 508 + rawDataSize 5400 + totalSize 5908 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: drop table nzhang_part14 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table nzhang_part14 +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table if not exists nzhang_part14 (key string) + partitioned by (value string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@nzhang_part14 +POSTHOOK: query: create table if not exists nzhang_part14 (key string) + partitioned by (value string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@nzhang_part14 +PREHOOK: query: insert into table nzhang_part14 partition(value) +select key, value from ( + select * from (select 'k1' as key, cast(null as string) as value from src limit 2)a + union all + select * from (select 'k2' as key, '' as value from src limit 2)b + union all + select * from (select 'k3' as key, ' ' as value from src limit 2)c +) T +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@nzhang_part14 +POSTHOOK: query: insert into table nzhang_part14 partition(value) +select key, value from ( + select * from (select 'k1' as key, cast(null as string) as value from src limit 2)a + union all + select * from (select 'k2' as key, '' as value from src limit 2)b + union all + select * from (select 'k3' as key, ' ' as value from src limit 2)c +) T +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@nzhang_part14@value= +POSTHOOK: Output: default@nzhang_part14@value=__HIVE_DEFAULT_PARTITION__ +POSTHOOK: Lineage: nzhang_part14 PARTITION(value= ).key EXPRESSION [] +POSTHOOK: Lineage: nzhang_part14 PARTITION(value=__HIVE_DEFAULT_PARTITION__).key EXPRESSION [] +PREHOOK: query: explain select key from nzhang_part14 +PREHOOK: type: QUERY +POSTHOOK: query: explain select key from nzhang_part14 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: nzhang_part14 + Statistics: Num rows: 6 Data size: 516 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 6 Data size: 516 Basic stats: COMPLETE Column stats: COMPLETE + ListSink + +PREHOOK: query: drop table src5 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table src5 +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table src5 as select key, value from src limit 5 +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@src +PREHOOK: Output: database:default +PREHOOK: Output: default@src5 +POSTHOOK: query: create table src5 as select key, value from src limit 5 +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@src +POSTHOOK: Output: database:default +POSTHOOK: Output: default@src5 +PREHOOK: query: insert into table nzhang_part14 partition(value) +select key, value from src5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src5 +PREHOOK: Output: default@nzhang_part14 +POSTHOOK: query: insert into table nzhang_part14 partition(value) +select key, value from src5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src5 +POSTHOOK: Output: default@nzhang_part14@value=val_165 +POSTHOOK: Output: default@nzhang_part14@value=val_238 +POSTHOOK: Output: default@nzhang_part14@value=val_27 +POSTHOOK: Output: default@nzhang_part14@value=val_311 +POSTHOOK: Output: default@nzhang_part14@value=val_86 +POSTHOOK: Lineage: nzhang_part14 PARTITION(value=val_165).key SIMPLE [(src5)src5.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: nzhang_part14 PARTITION(value=val_238).key SIMPLE [(src5)src5.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: nzhang_part14 PARTITION(value=val_27).key SIMPLE [(src5)src5.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: nzhang_part14 PARTITION(value=val_311).key SIMPLE [(src5)src5.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: nzhang_part14 PARTITION(value=val_86).key SIMPLE [(src5)src5.FieldSchema(name:key, type:string, comment:null), ] +PREHOOK: query: explain select key from nzhang_part14 +PREHOOK: type: QUERY +POSTHOOK: query: explain select key from nzhang_part14 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: nzhang_part14 + Statistics: Num rows: 11 Data size: 946 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 946 Basic stats: COMPLETE Column stats: COMPLETE + ListSink + +PREHOOK: query: create table alter5 ( col1 string ) partitioned by (dt string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@alter5 +POSTHOOK: query: create table alter5 ( col1 string ) partitioned by (dt string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@alter5 +PREHOOK: query: alter table alter5 add partition (dt='a') location 'parta' +PREHOOK: type: ALTERTABLE_ADDPARTS +#### A masked pattern was here #### +PREHOOK: Output: default@alter5 +POSTHOOK: query: alter table alter5 add partition (dt='a') location 'parta' +POSTHOOK: type: ALTERTABLE_ADDPARTS +#### A masked pattern was here #### +POSTHOOK: Output: default@alter5 +POSTHOOK: Output: default@alter5@dt=a +PREHOOK: query: describe formatted alter5 partition (dt='a') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@alter5 +POSTHOOK: query: describe formatted alter5 partition (dt='a') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@alter5 +# col_name data_type comment + +col1 string + +# Partition Information +# col_name data_type comment + +dt string + +# Detailed Partition Information +Partition Value: [a] +Database: default +Table: alter5 +#### A masked pattern was here #### +Partition Parameters: +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: insert into table alter5 partition (dt='a') select key from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@alter5@dt=a +POSTHOOK: query: insert into table alter5 partition (dt='a') select key from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@alter5@dt=a +POSTHOOK: Lineage: alter5 PARTITION(dt=a).col1 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: describe formatted alter5 partition (dt='a') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@alter5 +POSTHOOK: query: describe formatted alter5 partition (dt='a') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@alter5 +# col_name data_type comment + +col1 string + +# Partition Information +# col_name data_type comment + +dt string + +# Detailed Partition Information +Partition Value: [a] +Database: default +Table: alter5 +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"col1\":\"true\"},\"BASIC_STATS\":\"true\"} + numFiles 1 + numRows 500 + rawDataSize 1406 + totalSize 1906 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: explain select * from alter5 where dt='a' +PREHOOK: type: QUERY +POSTHOOK: query: explain select * from alter5 where dt='a' +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: alter5 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: col1 (type: string), 'a' (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 86000 Basic stats: COMPLETE Column stats: COMPLETE + ListSink + +PREHOOK: query: drop table src_stat_part +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table src_stat_part +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table src_stat_part(key string, value string) partitioned by (partitionId int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@src_stat_part +POSTHOOK: query: create table src_stat_part(key string, value string) partitioned by (partitionId int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@src_stat_part +PREHOOK: query: insert into table src_stat_part partition (partitionId=1) +select * from src1 limit 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src1 +PREHOOK: Output: default@src_stat_part@partitionid=1 +POSTHOOK: query: insert into table src_stat_part partition (partitionId=1) +select * from src1 limit 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src1 +POSTHOOK: Output: default@src_stat_part@partitionid=1 +POSTHOOK: Lineage: src_stat_part PARTITION(partitionid=1).key SIMPLE [(src1)src1.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: src_stat_part PARTITION(partitionid=1).value SIMPLE [(src1)src1.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: describe formatted src_stat_part PARTITION(partitionId=1) +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@src_stat_part +POSTHOOK: query: describe formatted src_stat_part PARTITION(partitionId=1) +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@src_stat_part +# col_name data_type comment + +key string +value string + +# Partition Information +# col_name data_type comment + +partitionid int + +# Detailed Partition Information +Partition Value: [1] +Database: default +Table: src_stat_part +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"},\"BASIC_STATS\":\"true\"} + numFiles 1 + numRows 5 + rawDataSize 38 + totalSize 43 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: insert into table src_stat_part partition (partitionId=2) +select * from src1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src1 +PREHOOK: Output: default@src_stat_part@partitionid=2 +POSTHOOK: query: insert into table src_stat_part partition (partitionId=2) +select * from src1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src1 +POSTHOOK: Output: default@src_stat_part@partitionid=2 +POSTHOOK: Lineage: src_stat_part PARTITION(partitionid=2).key SIMPLE [(src1)src1.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: src_stat_part PARTITION(partitionid=2).value SIMPLE [(src1)src1.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: describe formatted src_stat_part PARTITION(partitionId=2) +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@src_stat_part +POSTHOOK: query: describe formatted src_stat_part PARTITION(partitionId=2) +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@src_stat_part +# col_name data_type comment + +key string +value string + +# Partition Information +# col_name data_type comment + +partitionid int + +# Detailed Partition Information +Partition Value: [2] +Database: default +Table: src_stat_part +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"},\"BASIC_STATS\":\"true\"} + numFiles 1 + numRows 25 + rawDataSize 191 + totalSize 216 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: drop table srcbucket_mapjoin +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table srcbucket_mapjoin +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE srcbucket_mapjoin(key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@srcbucket_mapjoin +POSTHOOK: query: CREATE TABLE srcbucket_mapjoin(key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@srcbucket_mapjoin +PREHOOK: query: drop table tab_part +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table tab_part +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE tab_part (key int, value string) PARTITIONED BY(ds STRING) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tab_part +POSTHOOK: query: CREATE TABLE tab_part (key int, value string) PARTITIONED BY(ds STRING) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tab_part +PREHOOK: query: drop table srcbucket_mapjoin_part +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table srcbucket_mapjoin_part +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE srcbucket_mapjoin_part (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@srcbucket_mapjoin_part +POSTHOOK: query: CREATE TABLE srcbucket_mapjoin_part (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@srcbucket_mapjoin_part +PREHOOK: query: load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@srcbucket_mapjoin +POSTHOOK: query: load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@srcbucket_mapjoin +POSTHOOK: Output: default@srcbucket_mapjoin@ds=2008-04-08 +PREHOOK: query: load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@srcbucket_mapjoin@ds=2008-04-08 +POSTHOOK: query: load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@srcbucket_mapjoin@ds=2008-04-08 +PREHOOK: query: load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@srcbucket_mapjoin_part +POSTHOOK: query: load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@srcbucket_mapjoin_part +POSTHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08 +PREHOOK: query: load data local inpath '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08 +POSTHOOK: query: load data local inpath '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08 +PREHOOK: query: load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08 +POSTHOOK: query: load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08 +PREHOOK: query: load data local inpath '../../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08 +POSTHOOK: query: load data local inpath '../../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08 +PREHOOK: query: insert into table tab_part partition (ds='2008-04-08') +select key,value from srcbucket_mapjoin_part +PREHOOK: type: QUERY +PREHOOK: Input: default@srcbucket_mapjoin_part +PREHOOK: Input: default@srcbucket_mapjoin_part@ds=2008-04-08 +PREHOOK: Output: default@tab_part@ds=2008-04-08 +POSTHOOK: query: insert into table tab_part partition (ds='2008-04-08') +select key,value from srcbucket_mapjoin_part +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcbucket_mapjoin_part +POSTHOOK: Input: default@srcbucket_mapjoin_part@ds=2008-04-08 +POSTHOOK: Output: default@tab_part@ds=2008-04-08 +POSTHOOK: Lineage: tab_part PARTITION(ds=2008-04-08).key SIMPLE [(srcbucket_mapjoin_part)srcbucket_mapjoin_part.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: tab_part PARTITION(ds=2008-04-08).value SIMPLE [(srcbucket_mapjoin_part)srcbucket_mapjoin_part.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: describe formatted tab_part partition (ds='2008-04-08') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@tab_part +POSTHOOK: query: describe formatted tab_part partition (ds='2008-04-08') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@tab_part +# col_name data_type comment + +key int +value string + +# Partition Information +# col_name data_type comment + +ds string + +# Detailed Partition Information +Partition Value: [2008-04-08] +Database: default +Table: tab_part +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"},\"BASIC_STATS\":\"true\"} + numFiles 4 + numRows 500 + rawDataSize 5312 + totalSize 5812 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: 4 +Bucket Columns: [key] +Sort Columns: [Order(col:key, order:1, nullOrder:0)] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: CREATE TABLE tab(key int, value string) PARTITIONED BY(ds STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tab +POSTHOOK: query: CREATE TABLE tab(key int, value string) PARTITIONED BY(ds STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tab +PREHOOK: query: insert into table tab partition (ds='2008-04-08') +select key,value from srcbucket_mapjoin +PREHOOK: type: QUERY +PREHOOK: Input: default@srcbucket_mapjoin +PREHOOK: Input: default@srcbucket_mapjoin@ds=2008-04-08 +PREHOOK: Output: default@tab@ds=2008-04-08 +POSTHOOK: query: insert into table tab partition (ds='2008-04-08') +select key,value from srcbucket_mapjoin +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcbucket_mapjoin +POSTHOOK: Input: default@srcbucket_mapjoin@ds=2008-04-08 +POSTHOOK: Output: default@tab@ds=2008-04-08 +POSTHOOK: Lineage: tab PARTITION(ds=2008-04-08).key SIMPLE [(srcbucket_mapjoin)srcbucket_mapjoin.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: tab PARTITION(ds=2008-04-08).value SIMPLE [(srcbucket_mapjoin)srcbucket_mapjoin.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: describe formatted tab partition (ds='2008-04-08') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@tab +POSTHOOK: query: describe formatted tab partition (ds='2008-04-08') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@tab +# col_name data_type comment + +key int +value string + +# Partition Information +# col_name data_type comment + +ds string + +# Detailed Partition Information +Partition Value: [2008-04-08] +Database: default +Table: tab +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"},\"BASIC_STATS\":\"true\"} + numFiles 2 + numRows 242 + rawDataSize 2566 + totalSize 2808 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: 2 +Bucket Columns: [key] +Sort Columns: [Order(col:key, order:1, nullOrder:0)] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: drop table nzhang_part14 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@nzhang_part14 +PREHOOK: Output: default@nzhang_part14 +POSTHOOK: query: drop table nzhang_part14 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@nzhang_part14 +POSTHOOK: Output: default@nzhang_part14 +PREHOOK: query: create table if not exists nzhang_part14 (key string, value string) + partitioned by (ds string, hr string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@nzhang_part14 +POSTHOOK: query: create table if not exists nzhang_part14 (key string, value string) + partitioned by (ds string, hr string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@nzhang_part14 +PREHOOK: query: describe formatted nzhang_part14 +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@nzhang_part14 +POSTHOOK: query: describe formatted nzhang_part14 +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@nzhang_part14 +# col_name data_type comment + +key string +value string + +# Partition Information +# col_name data_type comment + +ds string +hr string + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: insert into table nzhang_part14 partition(ds, hr) +select key, value, ds, hr from ( + select * from (select 'k1' as key, cast(null as string) as value, '1' as ds, '2' as hr from src limit 2)a + union all + select * from (select 'k2' as key, '' as value, '1' as ds, '3' as hr from src limit 2)b + union all + select * from (select 'k3' as key, ' ' as value, '2' as ds, '1' as hr from src limit 2)c +) T +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@nzhang_part14 +POSTHOOK: query: insert into table nzhang_part14 partition(ds, hr) +select key, value, ds, hr from ( + select * from (select 'k1' as key, cast(null as string) as value, '1' as ds, '2' as hr from src limit 2)a + union all + select * from (select 'k2' as key, '' as value, '1' as ds, '3' as hr from src limit 2)b + union all + select * from (select 'k3' as key, ' ' as value, '2' as ds, '1' as hr from src limit 2)c +) T +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@nzhang_part14@ds=1/hr=2 +POSTHOOK: Output: default@nzhang_part14@ds=1/hr=3 +POSTHOOK: Output: default@nzhang_part14@ds=2/hr=1 +POSTHOOK: Lineage: nzhang_part14 PARTITION(ds=1,hr=2).key EXPRESSION [] +POSTHOOK: Lineage: nzhang_part14 PARTITION(ds=1,hr=2).value EXPRESSION [] +POSTHOOK: Lineage: nzhang_part14 PARTITION(ds=1,hr=3).key EXPRESSION [] +POSTHOOK: Lineage: nzhang_part14 PARTITION(ds=1,hr=3).value EXPRESSION [] +POSTHOOK: Lineage: nzhang_part14 PARTITION(ds=2,hr=1).key EXPRESSION [] +POSTHOOK: Lineage: nzhang_part14 PARTITION(ds=2,hr=1).value EXPRESSION [] +PREHOOK: query: desc formatted nzhang_part14 partition(ds='1', hr='3') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@nzhang_part14 +POSTHOOK: query: desc formatted nzhang_part14 partition(ds='1', hr='3') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@nzhang_part14 +# col_name data_type comment + +key string +value string + +# Partition Information +# col_name data_type comment + +ds string +hr string + +# Detailed Partition Information +Partition Value: [1, 3] +Database: default +Table: nzhang_part14 +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"},\"BASIC_STATS\":\"true\"} + numFiles 1 + numRows 2 + rawDataSize 6 + totalSize 8 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: INSERT into TABLE nzhang_part14 PARTITION (ds='2010-03-03', hr) +SELECT key, value, hr FROM srcpart WHERE ds is not null and hr>10 +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +PREHOOK: Output: default@nzhang_part14@ds=2010-03-03 +POSTHOOK: query: INSERT into TABLE nzhang_part14 PARTITION (ds='2010-03-03', hr) +SELECT key, value, hr FROM srcpart WHERE ds is not null and hr>10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +POSTHOOK: Output: default@nzhang_part14@ds=2010-03-03/hr=11 +POSTHOOK: Output: default@nzhang_part14@ds=2010-03-03/hr=12 +POSTHOOK: Lineage: nzhang_part14 PARTITION(ds=2010-03-03,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: nzhang_part14 PARTITION(ds=2010-03-03,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: nzhang_part14 PARTITION(ds=2010-03-03,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: nzhang_part14 PARTITION(ds=2010-03-03,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: desc formatted nzhang_part14 PARTITION(ds='2010-03-03', hr='12') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@nzhang_part14 +POSTHOOK: query: desc formatted nzhang_part14 PARTITION(ds='2010-03-03', hr='12') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@nzhang_part14 +# col_name data_type comment + +key string +value string + +# Partition Information +# col_name data_type comment + +ds string +hr string + +# Detailed Partition Information +Partition Value: [2010-03-03, 12] +Database: default +Table: nzhang_part14 +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"},\"BASIC_STATS\":\"true\"} + numFiles 1 + numRows 1000 + rawDataSize 10624 + totalSize 11624 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: drop table nzhang_part14 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@nzhang_part14 +PREHOOK: Output: default@nzhang_part14 +POSTHOOK: query: drop table nzhang_part14 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@nzhang_part14 +POSTHOOK: Output: default@nzhang_part14 +PREHOOK: query: create table if not exists nzhang_part14 (key string, value string) +partitioned by (ds string, hr string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@nzhang_part14 +POSTHOOK: query: create table if not exists nzhang_part14 (key string, value string) +partitioned by (ds string, hr string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@nzhang_part14 +PREHOOK: query: INSERT into TABLE nzhang_part14 PARTITION (ds='2010-03-03', hr) +SELECT key, value, hr FROM srcpart WHERE ds is not null and hr>10 +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +PREHOOK: Output: default@nzhang_part14@ds=2010-03-03 +POSTHOOK: query: INSERT into TABLE nzhang_part14 PARTITION (ds='2010-03-03', hr) +SELECT key, value, hr FROM srcpart WHERE ds is not null and hr>10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +POSTHOOK: Output: default@nzhang_part14@ds=2010-03-03/hr=11 +POSTHOOK: Output: default@nzhang_part14@ds=2010-03-03/hr=12 +POSTHOOK: Lineage: nzhang_part14 PARTITION(ds=2010-03-03,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: nzhang_part14 PARTITION(ds=2010-03-03,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: nzhang_part14 PARTITION(ds=2010-03-03,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: nzhang_part14 PARTITION(ds=2010-03-03,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: desc formatted nzhang_part14 PARTITION(ds='2010-03-03', hr='12') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@nzhang_part14 +POSTHOOK: query: desc formatted nzhang_part14 PARTITION(ds='2010-03-03', hr='12') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@nzhang_part14 +# col_name data_type comment + +key string +value string + +# Partition Information +# col_name data_type comment + +ds string +hr string + +# Detailed Partition Information +Partition Value: [2010-03-03, 12] +Database: default +Table: nzhang_part14 +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"},\"BASIC_STATS\":\"true\"} + numFiles 1 + numRows 1000 + rawDataSize 10624 + totalSize 11624 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: drop table a +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@a +PREHOOK: Output: default@a +POSTHOOK: query: drop table a +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@a +POSTHOOK: Output: default@a +PREHOOK: query: create table a (key string, value string) +partitioned by (ds string, hr string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@a +POSTHOOK: query: create table a (key string, value string) +partitioned by (ds string, hr string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@a +PREHOOK: query: drop table b +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@b +PREHOOK: Output: default@b +POSTHOOK: query: drop table b +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@b +POSTHOOK: Output: default@b +PREHOOK: query: create table b (key string, value string) +partitioned by (ds string, hr string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@b +POSTHOOK: query: create table b (key string, value string) +partitioned by (ds string, hr string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@b +PREHOOK: query: drop table c +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table c +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table c (key string, value string) +partitioned by (ds string, hr string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@c +POSTHOOK: query: create table c (key string, value string) +partitioned by (ds string, hr string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@c +PREHOOK: query: FROM srcpart +INSERT into TABLE a PARTITION (ds='2010-03-11', hr) SELECT key, value, hr WHERE ds is not null and hr>10 +INSERT into TABLE b PARTITION (ds='2010-04-11', hr) SELECT key, value, hr WHERE ds is not null and hr>11 +INSERT into TABLE c PARTITION (ds='2010-05-11', hr) SELECT key, value, hr WHERE hr>0 +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +PREHOOK: Output: default@a@ds=2010-03-11 +PREHOOK: Output: default@b@ds=2010-04-11 +PREHOOK: Output: default@c@ds=2010-05-11 +POSTHOOK: query: FROM srcpart +INSERT into TABLE a PARTITION (ds='2010-03-11', hr) SELECT key, value, hr WHERE ds is not null and hr>10 +INSERT into TABLE b PARTITION (ds='2010-04-11', hr) SELECT key, value, hr WHERE ds is not null and hr>11 +INSERT into TABLE c PARTITION (ds='2010-05-11', hr) SELECT key, value, hr WHERE hr>0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +POSTHOOK: Output: default@a@ds=2010-03-11/hr=11 +POSTHOOK: Output: default@a@ds=2010-03-11/hr=12 +POSTHOOK: Output: default@b@ds=2010-04-11/hr=12 +POSTHOOK: Output: default@c@ds=2010-05-11/hr=11 +POSTHOOK: Output: default@c@ds=2010-05-11/hr=12 +POSTHOOK: Lineage: a PARTITION(ds=2010-03-11,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: a PARTITION(ds=2010-03-11,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: a PARTITION(ds=2010-03-11,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: a PARTITION(ds=2010-03-11,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: b PARTITION(ds=2010-04-11,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: b PARTITION(ds=2010-04-11,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: c PARTITION(ds=2010-05-11,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: c PARTITION(ds=2010-05-11,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: c PARTITION(ds=2010-05-11,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: c PARTITION(ds=2010-05-11,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: explain select key from a +PREHOOK: type: QUERY +POSTHOOK: query: explain select key from a +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: a + Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE + ListSink + +PREHOOK: query: explain select value from b +PREHOOK: type: QUERY +POSTHOOK: query: explain select value from b +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: b + Statistics: Num rows: 1000 Data size: 91000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: value (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1000 Data size: 91000 Basic stats: COMPLETE Column stats: COMPLETE + ListSink + +PREHOOK: query: explain select key from b +PREHOOK: type: QUERY +POSTHOOK: query: explain select key from b +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: b + Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: COMPLETE + ListSink + +PREHOOK: query: explain select value from c +PREHOOK: type: QUERY +POSTHOOK: query: explain select value from c +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: c + Statistics: Num rows: 2000 Data size: 182000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: value (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 2000 Data size: 182000 Basic stats: COMPLETE Column stats: COMPLETE + ListSink + +PREHOOK: query: explain select key from c +PREHOOK: type: QUERY +POSTHOOK: query: explain select key from c +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: c + Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE + ListSink + diff --git a/ql/src/test/results/clientpositive/tez/autoColumnStats_2.q.out b/ql/src/test/results/clientpositive/tez/autoColumnStats_2.q.out new file mode 100644 index 0000000..a82f694 --- /dev/null +++ b/ql/src/test/results/clientpositive/tez/autoColumnStats_2.q.out @@ -0,0 +1,1325 @@ +PREHOOK: query: drop table src_multi1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table src_multi1 +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table src_multi1 like src +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@src_multi1 +POSTHOOK: query: create table src_multi1 like src +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@src_multi1 +PREHOOK: query: insert into table src_multi1 select * from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@src_multi1 +POSTHOOK: query: insert into table src_multi1 select * from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@src_multi1 +POSTHOOK: Lineage: src_multi1.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: src_multi1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: explain extended select * from src_multi1 +PREHOOK: type: QUERY +POSTHOOK: query: explain extended select * from src_multi1 +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + src_multi1 + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + + +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: src_multi1 + GatherStats: false + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + ListSink + +PREHOOK: query: describe formatted src_multi1 +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@src_multi1 +POSTHOOK: query: describe formatted src_multi1 +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@src_multi1 +# col_name data_type comment + +key string default +value string default + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: + COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"},\"BASIC_STATS\":\"true\"} + numFiles 1 + numRows 500 + rawDataSize 5312 + totalSize 5812 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: drop table a +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table a +POSTHOOK: type: DROPTABLE +PREHOOK: query: drop table b +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table b +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table a like src +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@a +POSTHOOK: query: create table a like src +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@a +PREHOOK: query: create table b like src +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@b +POSTHOOK: query: create table b like src +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@b +PREHOOK: query: from src +insert into table a select * +insert into table b select * +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@a +PREHOOK: Output: default@b +POSTHOOK: query: from src +insert into table a select * +insert into table b select * +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@a +POSTHOOK: Output: default@b +POSTHOOK: Lineage: a.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: a.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: b.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: b.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: describe formatted a key +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@a +POSTHOOK: query: describe formatted a key +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@a +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment + +key string 0 205 2.812 3 from deserializer +PREHOOK: query: describe formatted b key +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@b +POSTHOOK: query: describe formatted b key +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@b +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment + +key string 0 205 2.812 3 from deserializer +PREHOOK: query: from src +insert overwrite table a select * +insert into table b select * +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@a +PREHOOK: Output: default@b +POSTHOOK: query: from src +insert overwrite table a select * +insert into table b select * +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@a +POSTHOOK: Output: default@b +POSTHOOK: Lineage: a.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: a.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: b.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: b.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: describe formatted a +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@a +POSTHOOK: query: describe formatted a +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@a +# col_name data_type comment + +key string default +value string default + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: + COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"},\"BASIC_STATS\":\"true\"} + numFiles 1 + numRows 500 + rawDataSize 5312 + totalSize 5812 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: describe formatted b +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@b +POSTHOOK: query: describe formatted b +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@b +# col_name data_type comment + +key string default +value string default + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: + COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"},\"BASIC_STATS\":\"true\"} + numFiles 2 + numRows 1000 + rawDataSize 10624 + totalSize 11624 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: describe formatted b key +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@b +POSTHOOK: query: describe formatted b key +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@b +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment + +key string 0 205 2.812 3 from deserializer +PREHOOK: query: describe formatted b value +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@b +POSTHOOK: query: describe formatted b value +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@b +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment + +value string 0 214 6.812 7 from deserializer +PREHOOK: query: insert into table b select NULL, NULL from src limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@b +POSTHOOK: query: insert into table b select NULL, NULL from src limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@b +POSTHOOK: Lineage: b.key SIMPLE [] +POSTHOOK: Lineage: b.value SIMPLE [] +PREHOOK: query: describe formatted b key +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@b +POSTHOOK: query: describe formatted b key +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@b +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment + +key string 10 205 2.812 3 from deserializer +PREHOOK: query: describe formatted b value +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@b +POSTHOOK: query: describe formatted b value +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@b +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment + +value string 10 214 6.812 7 from deserializer +PREHOOK: query: insert into table b(value) select key+100000 from src limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@b +POSTHOOK: query: insert into table b(value) select key+100000 from src limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@b +POSTHOOK: Lineage: b.key SIMPLE [] +POSTHOOK: Lineage: b.value EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: describe formatted b key +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@b +POSTHOOK: query: describe formatted b key +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@b +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment + +key string 20 205 2.812 3 from deserializer +PREHOOK: query: describe formatted b value +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@b +POSTHOOK: query: describe formatted b value +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@b +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment + +value string 10 266 8.0 8 from deserializer +PREHOOK: query: drop table src_multi2 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table src_multi2 +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table src_multi2 like src +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@src_multi2 +POSTHOOK: query: create table src_multi2 like src +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@src_multi2 +PREHOOK: query: insert into table src_multi2 select subq.key, src.value from (select * from src union select * from src1)subq join src on subq.key=src.key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +PREHOOK: Output: default@src_multi2 +POSTHOOK: query: insert into table src_multi2 select subq.key, src.value from (select * from src union select * from src1)subq join src on subq.key=src.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +POSTHOOK: Output: default@src_multi2 +POSTHOOK: Lineage: src_multi2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src1)src1.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: src_multi2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: describe formatted src_multi2 +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@src_multi2 +POSTHOOK: query: describe formatted src_multi2 +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@src_multi2 +# col_name data_type comment + +key string default +value string default + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: + COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"},\"BASIC_STATS\":\"true\"} + numFiles 1 + numRows 508 + rawDataSize 5400 + totalSize 5908 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: drop table nzhang_part14 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table nzhang_part14 +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table if not exists nzhang_part14 (key string) + partitioned by (value string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@nzhang_part14 +POSTHOOK: query: create table if not exists nzhang_part14 (key string) + partitioned by (value string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@nzhang_part14 +PREHOOK: query: insert into table nzhang_part14 partition(value) +select key, value from ( + select * from (select 'k1' as key, cast(null as string) as value from src limit 2)a + union all + select * from (select 'k2' as key, '' as value from src limit 2)b + union all + select * from (select 'k3' as key, ' ' as value from src limit 2)c +) T +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@nzhang_part14 +POSTHOOK: query: insert into table nzhang_part14 partition(value) +select key, value from ( + select * from (select 'k1' as key, cast(null as string) as value from src limit 2)a + union all + select * from (select 'k2' as key, '' as value from src limit 2)b + union all + select * from (select 'k3' as key, ' ' as value from src limit 2)c +) T +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@nzhang_part14@value= +POSTHOOK: Output: default@nzhang_part14@value=__HIVE_DEFAULT_PARTITION__ +POSTHOOK: Lineage: nzhang_part14 PARTITION(value= ).key EXPRESSION [] +POSTHOOK: Lineage: nzhang_part14 PARTITION(value=__HIVE_DEFAULT_PARTITION__).key EXPRESSION [] +PREHOOK: query: explain select key from nzhang_part14 +PREHOOK: type: QUERY +POSTHOOK: query: explain select key from nzhang_part14 +POSTHOOK: type: QUERY +Plan optimized by CBO. + +Stage-0 + Fetch Operator + limit:-1 + Select Operator [SEL_1] + Output:["_col0"] + TableScan [TS_0] + Output:["key"] + +PREHOOK: query: drop table src5 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table src5 +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table src5 as select key, value from src limit 5 +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@src +PREHOOK: Output: database:default +PREHOOK: Output: default@src5 +POSTHOOK: query: create table src5 as select key, value from src limit 5 +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@src +POSTHOOK: Output: database:default +POSTHOOK: Output: default@src5 +PREHOOK: query: insert into table nzhang_part14 partition(value) +select key, value from src5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src5 +PREHOOK: Output: default@nzhang_part14 +POSTHOOK: query: insert into table nzhang_part14 partition(value) +select key, value from src5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src5 +POSTHOOK: Output: default@nzhang_part14@value=val_165 +POSTHOOK: Output: default@nzhang_part14@value=val_238 +POSTHOOK: Output: default@nzhang_part14@value=val_27 +POSTHOOK: Output: default@nzhang_part14@value=val_311 +POSTHOOK: Output: default@nzhang_part14@value=val_86 +POSTHOOK: Lineage: nzhang_part14 PARTITION(value=val_165).key SIMPLE [(src5)src5.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: nzhang_part14 PARTITION(value=val_238).key SIMPLE [(src5)src5.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: nzhang_part14 PARTITION(value=val_27).key SIMPLE [(src5)src5.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: nzhang_part14 PARTITION(value=val_311).key SIMPLE [(src5)src5.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: nzhang_part14 PARTITION(value=val_86).key SIMPLE [(src5)src5.FieldSchema(name:key, type:string, comment:null), ] +PREHOOK: query: explain select key from nzhang_part14 +PREHOOK: type: QUERY +POSTHOOK: query: explain select key from nzhang_part14 +POSTHOOK: type: QUERY +Plan optimized by CBO. + +Stage-0 + Fetch Operator + limit:-1 + Select Operator [SEL_1] + Output:["_col0"] + TableScan [TS_0] + Output:["key"] + +PREHOOK: query: create table alter5 ( col1 string ) partitioned by (dt string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@alter5 +POSTHOOK: query: create table alter5 ( col1 string ) partitioned by (dt string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@alter5 +PREHOOK: query: alter table alter5 add partition (dt='a') location 'parta' +PREHOOK: type: ALTERTABLE_ADDPARTS +#### A masked pattern was here #### +PREHOOK: Output: default@alter5 +POSTHOOK: query: alter table alter5 add partition (dt='a') location 'parta' +POSTHOOK: type: ALTERTABLE_ADDPARTS +#### A masked pattern was here #### +POSTHOOK: Output: default@alter5 +POSTHOOK: Output: default@alter5@dt=a +PREHOOK: query: describe formatted alter5 partition (dt='a') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@alter5 +POSTHOOK: query: describe formatted alter5 partition (dt='a') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@alter5 +# col_name data_type comment + +col1 string + +# Partition Information +# col_name data_type comment + +dt string + +# Detailed Partition Information +Partition Value: [a] +Database: default +Table: alter5 +#### A masked pattern was here #### +Partition Parameters: +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: insert into table alter5 partition (dt='a') select key from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@alter5@dt=a +POSTHOOK: query: insert into table alter5 partition (dt='a') select key from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@alter5@dt=a +POSTHOOK: Lineage: alter5 PARTITION(dt=a).col1 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: describe formatted alter5 partition (dt='a') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@alter5 +POSTHOOK: query: describe formatted alter5 partition (dt='a') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@alter5 +# col_name data_type comment + +col1 string + +# Partition Information +# col_name data_type comment + +dt string + +# Detailed Partition Information +Partition Value: [a] +Database: default +Table: alter5 +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"col1\":\"true\"},\"BASIC_STATS\":\"true\"} + numFiles 1 + numRows 500 + rawDataSize 1406 + totalSize 1906 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: explain select * from alter5 where dt='a' +PREHOOK: type: QUERY +POSTHOOK: query: explain select * from alter5 where dt='a' +POSTHOOK: type: QUERY +Plan optimized by CBO. + +Stage-0 + Fetch Operator + limit:-1 + Select Operator [SEL_2] + Output:["_col0","_col1"] + TableScan [TS_0] + Output:["col1"] + +PREHOOK: query: drop table src_stat_part +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table src_stat_part +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table src_stat_part(key string, value string) partitioned by (partitionId int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@src_stat_part +POSTHOOK: query: create table src_stat_part(key string, value string) partitioned by (partitionId int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@src_stat_part +PREHOOK: query: insert into table src_stat_part partition (partitionId=1) +select * from src1 limit 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src1 +PREHOOK: Output: default@src_stat_part@partitionid=1 +POSTHOOK: query: insert into table src_stat_part partition (partitionId=1) +select * from src1 limit 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src1 +POSTHOOK: Output: default@src_stat_part@partitionid=1 +POSTHOOK: Lineage: src_stat_part PARTITION(partitionid=1).key SIMPLE [(src1)src1.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: src_stat_part PARTITION(partitionid=1).value SIMPLE [(src1)src1.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: describe formatted src_stat_part PARTITION(partitionId=1) +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@src_stat_part +POSTHOOK: query: describe formatted src_stat_part PARTITION(partitionId=1) +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@src_stat_part +# col_name data_type comment + +key string +value string + +# Partition Information +# col_name data_type comment + +partitionid int + +# Detailed Partition Information +Partition Value: [1] +Database: default +Table: src_stat_part +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"},\"BASIC_STATS\":\"true\"} + numFiles 1 + numRows 5 + rawDataSize 38 + totalSize 43 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: insert into table src_stat_part partition (partitionId=2) +select * from src1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src1 +PREHOOK: Output: default@src_stat_part@partitionid=2 +POSTHOOK: query: insert into table src_stat_part partition (partitionId=2) +select * from src1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src1 +POSTHOOK: Output: default@src_stat_part@partitionid=2 +POSTHOOK: Lineage: src_stat_part PARTITION(partitionid=2).key SIMPLE [(src1)src1.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: src_stat_part PARTITION(partitionid=2).value SIMPLE [(src1)src1.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: describe formatted src_stat_part PARTITION(partitionId=2) +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@src_stat_part +POSTHOOK: query: describe formatted src_stat_part PARTITION(partitionId=2) +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@src_stat_part +# col_name data_type comment + +key string +value string + +# Partition Information +# col_name data_type comment + +partitionid int + +# Detailed Partition Information +Partition Value: [2] +Database: default +Table: src_stat_part +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"},\"BASIC_STATS\":\"true\"} + numFiles 1 + numRows 25 + rawDataSize 191 + totalSize 216 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: drop table srcbucket_mapjoin +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table srcbucket_mapjoin +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE srcbucket_mapjoin(key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@srcbucket_mapjoin +POSTHOOK: query: CREATE TABLE srcbucket_mapjoin(key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@srcbucket_mapjoin +PREHOOK: query: drop table tab_part +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table tab_part +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE tab_part (key int, value string) PARTITIONED BY(ds STRING) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tab_part +POSTHOOK: query: CREATE TABLE tab_part (key int, value string) PARTITIONED BY(ds STRING) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tab_part +PREHOOK: query: drop table srcbucket_mapjoin_part +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table srcbucket_mapjoin_part +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE srcbucket_mapjoin_part (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@srcbucket_mapjoin_part +POSTHOOK: query: CREATE TABLE srcbucket_mapjoin_part (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@srcbucket_mapjoin_part +PREHOOK: query: load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@srcbucket_mapjoin +POSTHOOK: query: load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@srcbucket_mapjoin +POSTHOOK: Output: default@srcbucket_mapjoin@ds=2008-04-08 +PREHOOK: query: load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@srcbucket_mapjoin@ds=2008-04-08 +POSTHOOK: query: load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@srcbucket_mapjoin@ds=2008-04-08 +PREHOOK: query: load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@srcbucket_mapjoin_part +POSTHOOK: query: load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@srcbucket_mapjoin_part +POSTHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08 +PREHOOK: query: load data local inpath '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08 +POSTHOOK: query: load data local inpath '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08 +PREHOOK: query: load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08 +POSTHOOK: query: load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08 +PREHOOK: query: load data local inpath '../../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08 +POSTHOOK: query: load data local inpath '../../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08 +PREHOOK: query: insert into table tab_part partition (ds='2008-04-08') +select key,value from srcbucket_mapjoin_part +PREHOOK: type: QUERY +PREHOOK: Input: default@srcbucket_mapjoin_part +PREHOOK: Input: default@srcbucket_mapjoin_part@ds=2008-04-08 +PREHOOK: Output: default@tab_part@ds=2008-04-08 +POSTHOOK: query: insert into table tab_part partition (ds='2008-04-08') +select key,value from srcbucket_mapjoin_part +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcbucket_mapjoin_part +POSTHOOK: Input: default@srcbucket_mapjoin_part@ds=2008-04-08 +POSTHOOK: Output: default@tab_part@ds=2008-04-08 +POSTHOOK: Lineage: tab_part PARTITION(ds=2008-04-08).key SIMPLE [(srcbucket_mapjoin_part)srcbucket_mapjoin_part.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: tab_part PARTITION(ds=2008-04-08).value SIMPLE [(srcbucket_mapjoin_part)srcbucket_mapjoin_part.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: describe formatted tab_part partition (ds='2008-04-08') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@tab_part +POSTHOOK: query: describe formatted tab_part partition (ds='2008-04-08') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@tab_part +# col_name data_type comment + +key int +value string + +# Partition Information +# col_name data_type comment + +ds string + +# Detailed Partition Information +Partition Value: [2008-04-08] +Database: default +Table: tab_part +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"},\"BASIC_STATS\":\"true\"} + numFiles 4 + numRows 500 + rawDataSize 5312 + totalSize 5812 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: 4 +Bucket Columns: [key] +Sort Columns: [Order(col:key, order:1, nullOrder:0)] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: CREATE TABLE tab(key int, value string) PARTITIONED BY(ds STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tab +POSTHOOK: query: CREATE TABLE tab(key int, value string) PARTITIONED BY(ds STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tab +PREHOOK: query: insert into table tab partition (ds='2008-04-08') +select key,value from srcbucket_mapjoin +PREHOOK: type: QUERY +PREHOOK: Input: default@srcbucket_mapjoin +PREHOOK: Input: default@srcbucket_mapjoin@ds=2008-04-08 +PREHOOK: Output: default@tab@ds=2008-04-08 +POSTHOOK: query: insert into table tab partition (ds='2008-04-08') +select key,value from srcbucket_mapjoin +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcbucket_mapjoin +POSTHOOK: Input: default@srcbucket_mapjoin@ds=2008-04-08 +POSTHOOK: Output: default@tab@ds=2008-04-08 +POSTHOOK: Lineage: tab PARTITION(ds=2008-04-08).key SIMPLE [(srcbucket_mapjoin)srcbucket_mapjoin.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: tab PARTITION(ds=2008-04-08).value SIMPLE [(srcbucket_mapjoin)srcbucket_mapjoin.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: describe formatted tab partition (ds='2008-04-08') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@tab +POSTHOOK: query: describe formatted tab partition (ds='2008-04-08') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@tab +# col_name data_type comment + +key int +value string + +# Partition Information +# col_name data_type comment + +ds string + +# Detailed Partition Information +Partition Value: [2008-04-08] +Database: default +Table: tab +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"},\"BASIC_STATS\":\"true\"} + numFiles 2 + numRows 242 + rawDataSize 2566 + totalSize 2808 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: 2 +Bucket Columns: [key] +Sort Columns: [Order(col:key, order:1, nullOrder:0)] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: drop table nzhang_part14 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@nzhang_part14 +PREHOOK: Output: default@nzhang_part14 +POSTHOOK: query: drop table nzhang_part14 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@nzhang_part14 +POSTHOOK: Output: default@nzhang_part14 +PREHOOK: query: create table if not exists nzhang_part14 (key string, value string) + partitioned by (ds string, hr string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@nzhang_part14 +POSTHOOK: query: create table if not exists nzhang_part14 (key string, value string) + partitioned by (ds string, hr string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@nzhang_part14 +PREHOOK: query: describe formatted nzhang_part14 +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@nzhang_part14 +POSTHOOK: query: describe formatted nzhang_part14 +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@nzhang_part14 +# col_name data_type comment + +key string +value string + +# Partition Information +# col_name data_type comment + +ds string +hr string + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: insert into table nzhang_part14 partition(ds, hr) +select key, value, ds, hr from ( + select * from (select 'k1' as key, cast(null as string) as value, '1' as ds, '2' as hr from src limit 2)a + union all + select * from (select 'k2' as key, '' as value, '1' as ds, '3' as hr from src limit 2)b + union all + select * from (select 'k3' as key, ' ' as value, '2' as ds, '1' as hr from src limit 2)c +) T +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@nzhang_part14 +POSTHOOK: query: insert into table nzhang_part14 partition(ds, hr) +select key, value, ds, hr from ( + select * from (select 'k1' as key, cast(null as string) as value, '1' as ds, '2' as hr from src limit 2)a + union all + select * from (select 'k2' as key, '' as value, '1' as ds, '3' as hr from src limit 2)b + union all + select * from (select 'k3' as key, ' ' as value, '2' as ds, '1' as hr from src limit 2)c +) T +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@nzhang_part14@ds=1/hr=2 +POSTHOOK: Output: default@nzhang_part14@ds=1/hr=3 +POSTHOOK: Output: default@nzhang_part14@ds=2/hr=1 +POSTHOOK: Lineage: nzhang_part14 PARTITION(ds=1,hr=2).key EXPRESSION [] +POSTHOOK: Lineage: nzhang_part14 PARTITION(ds=1,hr=2).value EXPRESSION [] +POSTHOOK: Lineage: nzhang_part14 PARTITION(ds=1,hr=3).key EXPRESSION [] +POSTHOOK: Lineage: nzhang_part14 PARTITION(ds=1,hr=3).value EXPRESSION [] +POSTHOOK: Lineage: nzhang_part14 PARTITION(ds=2,hr=1).key EXPRESSION [] +POSTHOOK: Lineage: nzhang_part14 PARTITION(ds=2,hr=1).value EXPRESSION [] +PREHOOK: query: desc formatted nzhang_part14 partition(ds='1', hr='3') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@nzhang_part14 +POSTHOOK: query: desc formatted nzhang_part14 partition(ds='1', hr='3') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@nzhang_part14 +# col_name data_type comment + +key string +value string + +# Partition Information +# col_name data_type comment + +ds string +hr string + +# Detailed Partition Information +Partition Value: [1, 3] +Database: default +Table: nzhang_part14 +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"},\"BASIC_STATS\":\"true\"} + numFiles 1 + numRows 2 + rawDataSize 6 + totalSize 8 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: INSERT into TABLE nzhang_part14 PARTITION (ds='2010-03-03', hr) +SELECT key, value, hr FROM srcpart WHERE ds is not null and hr>10 +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +PREHOOK: Output: default@nzhang_part14@ds=2010-03-03 +POSTHOOK: query: INSERT into TABLE nzhang_part14 PARTITION (ds='2010-03-03', hr) +SELECT key, value, hr FROM srcpart WHERE ds is not null and hr>10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +POSTHOOK: Output: default@nzhang_part14@ds=2010-03-03/hr=11 +POSTHOOK: Output: default@nzhang_part14@ds=2010-03-03/hr=12 +POSTHOOK: Lineage: nzhang_part14 PARTITION(ds=2010-03-03,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: nzhang_part14 PARTITION(ds=2010-03-03,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: nzhang_part14 PARTITION(ds=2010-03-03,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: nzhang_part14 PARTITION(ds=2010-03-03,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: desc formatted nzhang_part14 PARTITION(ds='2010-03-03', hr='12') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@nzhang_part14 +POSTHOOK: query: desc formatted nzhang_part14 PARTITION(ds='2010-03-03', hr='12') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@nzhang_part14 +# col_name data_type comment + +key string +value string + +# Partition Information +# col_name data_type comment + +ds string +hr string + +# Detailed Partition Information +Partition Value: [2010-03-03, 12] +Database: default +Table: nzhang_part14 +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"},\"BASIC_STATS\":\"true\"} + numFiles 1 + numRows 1000 + rawDataSize 10624 + totalSize 11624 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: drop table nzhang_part14 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@nzhang_part14 +PREHOOK: Output: default@nzhang_part14 +POSTHOOK: query: drop table nzhang_part14 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@nzhang_part14 +POSTHOOK: Output: default@nzhang_part14 +PREHOOK: query: create table if not exists nzhang_part14 (key string, value string) +partitioned by (ds string, hr string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@nzhang_part14 +POSTHOOK: query: create table if not exists nzhang_part14 (key string, value string) +partitioned by (ds string, hr string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@nzhang_part14 +PREHOOK: query: INSERT into TABLE nzhang_part14 PARTITION (ds='2010-03-03', hr) +SELECT key, value, hr FROM srcpart WHERE ds is not null and hr>10 +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +PREHOOK: Output: default@nzhang_part14@ds=2010-03-03 +POSTHOOK: query: INSERT into TABLE nzhang_part14 PARTITION (ds='2010-03-03', hr) +SELECT key, value, hr FROM srcpart WHERE ds is not null and hr>10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +POSTHOOK: Output: default@nzhang_part14@ds=2010-03-03/hr=11 +POSTHOOK: Output: default@nzhang_part14@ds=2010-03-03/hr=12 +POSTHOOK: Lineage: nzhang_part14 PARTITION(ds=2010-03-03,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: nzhang_part14 PARTITION(ds=2010-03-03,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: nzhang_part14 PARTITION(ds=2010-03-03,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: nzhang_part14 PARTITION(ds=2010-03-03,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: desc formatted nzhang_part14 PARTITION(ds='2010-03-03', hr='12') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@nzhang_part14 +POSTHOOK: query: desc formatted nzhang_part14 PARTITION(ds='2010-03-03', hr='12') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@nzhang_part14 +# col_name data_type comment + +key string +value string + +# Partition Information +# col_name data_type comment + +ds string +hr string + +# Detailed Partition Information +Partition Value: [2010-03-03, 12] +Database: default +Table: nzhang_part14 +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"},\"BASIC_STATS\":\"true\"} + numFiles 1 + numRows 1000 + rawDataSize 10624 + totalSize 11624 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: drop table a +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@a +PREHOOK: Output: default@a +POSTHOOK: query: drop table a +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@a +POSTHOOK: Output: default@a +PREHOOK: query: create table a (key string, value string) +partitioned by (ds string, hr string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@a +POSTHOOK: query: create table a (key string, value string) +partitioned by (ds string, hr string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@a +PREHOOK: query: drop table b +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@b +PREHOOK: Output: default@b +POSTHOOK: query: drop table b +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@b +POSTHOOK: Output: default@b +PREHOOK: query: create table b (key string, value string) +partitioned by (ds string, hr string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@b +POSTHOOK: query: create table b (key string, value string) +partitioned by (ds string, hr string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@b +PREHOOK: query: drop table c +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table c +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table c (key string, value string) +partitioned by (ds string, hr string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@c +POSTHOOK: query: create table c (key string, value string) +partitioned by (ds string, hr string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@c +PREHOOK: query: FROM srcpart +INSERT into TABLE a PARTITION (ds='2010-03-11', hr) SELECT key, value, hr WHERE ds is not null and hr>10 +INSERT into TABLE b PARTITION (ds='2010-04-11', hr) SELECT key, value, hr WHERE ds is not null and hr>11 +INSERT into TABLE c PARTITION (ds='2010-05-11', hr) SELECT key, value, hr WHERE hr>0 +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +PREHOOK: Output: default@a@ds=2010-03-11 +PREHOOK: Output: default@b@ds=2010-04-11 +PREHOOK: Output: default@c@ds=2010-05-11 +POSTHOOK: query: FROM srcpart +INSERT into TABLE a PARTITION (ds='2010-03-11', hr) SELECT key, value, hr WHERE ds is not null and hr>10 +INSERT into TABLE b PARTITION (ds='2010-04-11', hr) SELECT key, value, hr WHERE ds is not null and hr>11 +INSERT into TABLE c PARTITION (ds='2010-05-11', hr) SELECT key, value, hr WHERE hr>0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +POSTHOOK: Output: default@a@ds=2010-03-11/hr=11 +POSTHOOK: Output: default@a@ds=2010-03-11/hr=12 +POSTHOOK: Output: default@b@ds=2010-04-11/hr=12 +POSTHOOK: Output: default@c@ds=2010-05-11/hr=11 +POSTHOOK: Output: default@c@ds=2010-05-11/hr=12 +POSTHOOK: Lineage: a PARTITION(ds=2010-03-11,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: a PARTITION(ds=2010-03-11,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: a PARTITION(ds=2010-03-11,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: a PARTITION(ds=2010-03-11,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: b PARTITION(ds=2010-04-11,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: b PARTITION(ds=2010-04-11,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: c PARTITION(ds=2010-05-11,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: c PARTITION(ds=2010-05-11,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: c PARTITION(ds=2010-05-11,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: c PARTITION(ds=2010-05-11,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: explain select key from a +PREHOOK: type: QUERY +POSTHOOK: query: explain select key from a +POSTHOOK: type: QUERY +Plan optimized by CBO. + +Stage-0 + Fetch Operator + limit:-1 + Select Operator [SEL_1] + Output:["_col0"] + TableScan [TS_0] + Output:["key"] + +PREHOOK: query: explain select value from b +PREHOOK: type: QUERY +POSTHOOK: query: explain select value from b +POSTHOOK: type: QUERY +Plan optimized by CBO. + +Stage-0 + Fetch Operator + limit:-1 + Select Operator [SEL_1] + Output:["_col0"] + TableScan [TS_0] + Output:["value"] + +PREHOOK: query: explain select key from b +PREHOOK: type: QUERY +POSTHOOK: query: explain select key from b +POSTHOOK: type: QUERY +Plan optimized by CBO. + +Stage-0 + Fetch Operator + limit:-1 + Select Operator [SEL_1] + Output:["_col0"] + TableScan [TS_0] + Output:["key"] + +PREHOOK: query: explain select value from c +PREHOOK: type: QUERY +POSTHOOK: query: explain select value from c +POSTHOOK: type: QUERY +Plan optimized by CBO. + +Stage-0 + Fetch Operator + limit:-1 + Select Operator [SEL_1] + Output:["_col0"] + TableScan [TS_0] + Output:["value"] + +PREHOOK: query: explain select key from c +PREHOOK: type: QUERY +POSTHOOK: query: explain select key from c +POSTHOOK: type: QUERY +Plan optimized by CBO. + +Stage-0 + Fetch Operator + limit:-1 + Select Operator [SEL_1] + Output:["_col0"] + TableScan [TS_0] + Output:["key"] +