diff --git ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java index cbc3cd2..b1e37e7 100644 --- ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java +++ ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java @@ -413,6 +413,13 @@ + "for partition columns"), STATISTICS_CLONING_FAILED(30013, "Cloning of statistics failed"), + + STATSAGGREGATOR_SOURCETASK_NULL(30014, "SourceTask for StatsAggregator should not be null"), + + STATSAGGREGATOR_CONNECTION_ERROR(30015, "StatsAggregator cannot be connected to." + + "There was a error while connecting to the StatsAggregator, and retrying " + + "might help. If you dont want the query to fail because accurate statistics " + + "could not be collected, set hive.stats.reliable=false"), ; private int errorCode; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java index e7453c7..533a241 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java @@ -977,7 +977,8 @@ private void publishStats() throws HiveException { // for non-partitioned/static partitioned table, the key for temp storage is // common key prefix + static partition spec + taskID String keyPrefix = Utilities.getHashedStatsPrefix( - conf.getStatsAggPrefix() + spSpec, conf.getMaxStatsKeyPrefixLength()); + conf.getStatsAggPrefix() + spSpec, + conf.getMaxStatsKeyPrefixLength(), taskID.length()); key = keyPrefix + taskID; } else { // for partitioned table, the key is @@ -1060,7 +1061,7 @@ private String createKeyForStatsPublisher(String taskID, String spSpec, String f } String keyPrefix = Utilities.getHashedStatsPrefix( conf.getStatsAggPrefix() + spSpec + newFspKey, - conf.getMaxStatsKeyPrefixLength()); + conf.getMaxStatsKeyPrefixLength(), storedAsDirPostFix.length() - taskID.length()); key = keyPrefix + storedAsDirPostFix + taskID; return key; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/StatsTask.java ql/src/java/org/apache/hadoop/hive/ql/exec/StatsTask.java index 142af10..4044b7d 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/StatsTask.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/StatsTask.java @@ -34,6 +34,7 @@ import org.apache.hadoop.hive.metastore.Warehouse; import org.apache.hadoop.hive.ql.DriverContext; import org.apache.hadoop.hive.ql.ErrorMsg; +import org.apache.hadoop.hive.ql.exec.mr.MapRedTask; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.metadata.Partition; import org.apache.hadoop.hive.ql.metadata.Table; @@ -174,25 +175,12 @@ private int aggregateStats() { // Stats setup: Warehouse wh = new Warehouse(conf); - if (!this.getWork().getNoStatsAggregator()) { - String statsImplementationClass = HiveConf.getVar(conf, HiveConf.ConfVars.HIVESTATSDBCLASS); - StatsFactory factory = StatsFactory.newFactory(statsImplementationClass, conf); - if (factory != null && work.isNoScanAnalyzeCommand()){ - // initialize stats publishing table for noscan which has only stats task - // the rest of MR task following stats task initializes it in ExecDriver.java - StatsPublisher statsPublisher = factory.getStatsPublisher(); - if (!statsPublisher.init(conf)) { // creating stats table if not exists - if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_STATS_RELIABLE)) { - throw - new HiveException(ErrorMsg.STATSPUBLISHER_INITIALIZATION_ERROR.getErrorCodedMsg()); - } - } - } - if (factory != null) { - statsAggregator = factory.getStatsAggregator(); - // manufacture a StatsAggregator - if (!statsAggregator.connect(conf, getWork().getSourceTask())) { - throw new HiveException("StatsAggregator connect failed " + statsImplementationClass); + if (!getWork().getNoStatsAggregator() && !getWork().isNoScanAnalyzeCommand()) { + try { + statsAggregator = createStatsAggregator(conf); + } catch (HiveException e) { + if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_STATS_RELIABLE)) { + throw e; } } } @@ -212,8 +200,7 @@ private int aggregateStats() { List partitions = getPartitionsList(); boolean atomic = HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_STATS_ATOMIC); - int maxPrefixLength = HiveConf.getIntVar(conf, - HiveConf.ConfVars.HIVE_STATS_KEY_PREFIX_MAX_LENGTH); + int maxPrefixLength = StatsFactory.getMaxPrefixLength(conf); if (partitions == null) { // non-partitioned tables: @@ -222,7 +209,7 @@ private int aggregateStats() { } // In case of a non-partitioned table, the key for stats temporary store is "rootDir" if (statsAggregator != null) { - String aggKey = Utilities.getHashedStatsPrefix(work.getAggKey(), maxPrefixLength); + String aggKey = Utilities.getHashedStatsPrefix(work.getAggKey(), maxPrefixLength, 0); updateStats(StatsSetupConst.statsRequireCompute, tblStats, statsAggregator, parameters, aggKey, atomic); statsAggregator.cleanUp(aggKey); @@ -281,7 +268,7 @@ else if (work.isClearAggregatorStats()) { // In that case of a partition, the key for stats temporary store is // "rootDir/[dynamic_partition_specs/]%" String partitionID = Utilities.getHashedStatsPrefix( - work.getAggKey() + Warehouse.makePartPath(partn.getSpec()), maxPrefixLength); + work.getAggKey() + Warehouse.makePartPath(partn.getSpec()), maxPrefixLength, 0); LOG.info("Stats aggregator : " + partitionID); @@ -355,6 +342,30 @@ else if (work.isClearAggregatorStats()) { return ret; } + private StatsAggregator createStatsAggregator(HiveConf conf) throws HiveException { + String statsImplementationClass = HiveConf.getVar(conf, HiveConf.ConfVars.HIVESTATSDBCLASS); + StatsFactory factory = StatsFactory.newFactory(statsImplementationClass, conf); + if (factory == null) { + throw new HiveException(ErrorMsg.STATSPUBLISHER_NOT_OBTAINED.getErrorCodedMsg()); + } + // initialize stats publishing table for noscan which has only stats task + // the rest of MR task following stats task initializes it in ExecDriver.java + StatsPublisher statsPublisher = factory.getStatsPublisher(); + if (!statsPublisher.init(conf)) { // creating stats table if not exists + throw new HiveException(ErrorMsg.STATSPUBLISHER_INITIALIZATION_ERROR.getErrorCodedMsg()); + } + MapRedTask sourceTask = getWork().getSourceTask(); + if (sourceTask == null) { + throw new HiveException(ErrorMsg.STATSAGGREGATOR_SOURCETASK_NULL.getErrorCodedMsg()); + } + // manufacture a StatsAggregator + StatsAggregator statsAggregator = factory.getStatsAggregator(); + if (!statsAggregator.connect(conf, getWork().getSourceTask())) { + throw new HiveException(ErrorMsg.STATSAGGREGATOR_CONNECTION_ERROR.getErrorCodedMsg()); + } + return statsAggregator; + } + private boolean existStats(Map parameters) { return parameters.containsKey(StatsSetupConst.ROW_COUNT) || parameters.containsKey(StatsSetupConst.NUM_FILES) diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/TableScanOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/TableScanOperator.java index 8d895f4..6061768 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/TableScanOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/TableScanOperator.java @@ -293,13 +293,15 @@ private void publishStats() throws HiveException { // In case of a non-partitioned table, the key for temp storage is just // "tableName + taskID" String keyPrefix = Utilities.getHashedStatsPrefix( - conf.getStatsAggPrefix(), conf.getMaxStatsKeyPrefixLength()); + conf.getStatsAggPrefix(), + conf.getMaxStatsKeyPrefixLength(), taskID.length()); key = keyPrefix + taskID; } else { // In case of a partition, the key for temp storage is // "tableName + partitionSpecs + taskID" String keyPrefix = Utilities.getHashedStatsPrefix( - conf.getStatsAggPrefix() + pspecs, conf.getMaxStatsKeyPrefixLength()); + conf.getStatsAggPrefix() + pspecs, + conf.getMaxStatsKeyPrefixLength(), taskID.length()); key = keyPrefix + taskID; } for(String statType : stats.get(pspecs).getStoredStats()) { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java index 91a9e6c..650ed9f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java @@ -2284,9 +2284,10 @@ public static StatsPublisher getStatsPublisher(JobConf jc) { * @param maxPrefixLength * @return */ - public static String getHashedStatsPrefix(String statsPrefix, int maxPrefixLength) { + public static String getHashedStatsPrefix(String statsPrefix, + int maxPrefixLength, int postfixLength) { String ret = appendPathSeparator(statsPrefix); - if (maxPrefixLength >= 0 && statsPrefix.length() > maxPrefixLength) { + if (maxPrefixLength >= 0 && statsPrefix.length() > maxPrefixLength - postfixLength) { try { MessageDigest digester = MessageDigest.getInstance("MD5"); digester.update(ret.getBytes()); @@ -2295,6 +2296,8 @@ public static String getHashedStatsPrefix(String statsPrefix, int maxPrefixLengt throw new RuntimeException(e); } } + // todo: this might return possibly longer prefix than maxPrefixLength, + // which would make stats invalid (especially for 'counter' type) return ret; } diff --git ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/stats/PartialScanMapper.java ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/stats/PartialScanMapper.java index 7e701f4..e319fe4 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/stats/PartialScanMapper.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/stats/PartialScanMapper.java @@ -26,13 +26,13 @@ import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hive.common.StatsSetupConst; import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.conf.HiveConf.ConfVars; import org.apache.hadoop.hive.ql.ErrorMsg; import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.io.RCFile.KeyBuffer; import org.apache.hadoop.hive.ql.io.rcfile.merge.RCFileKeyBufferWrapper; import org.apache.hadoop.hive.ql.io.rcfile.merge.RCFileValueBufferWrapper; import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.stats.StatsFactory; import org.apache.hadoop.hive.ql.stats.StatsPublisher; import org.apache.hadoop.hive.shims.CombineHiveKey; import org.apache.hadoop.mapred.JobConf; @@ -142,7 +142,7 @@ private void publishStats() throws HiveException { // construct key used to store stats in intermediate db String taskID = Utilities.getTaskIdFromFilename(Utilities.getTaskId(jc)); String keyPrefix = Utilities.getHashedStatsPrefix( - statsAggKeyPrefix, HiveConf.getIntVar(jc, ConfVars.HIVE_STATS_KEY_PREFIX_MAX_LENGTH)); + statsAggKeyPrefix, StatsFactory.getMaxPrefixLength(jc), taskID.length()); String key = keyPrefix + taskID; // construct statistics to be stored diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRFileSink1.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRFileSink1.java index cca8481..d866cde 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRFileSink1.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRFileSink1.java @@ -65,6 +65,7 @@ import org.apache.hadoop.hive.ql.plan.PartitionDesc; import org.apache.hadoop.hive.ql.plan.StatsWork; import org.apache.hadoop.hive.ql.plan.TableDesc; +import org.apache.hadoop.hive.ql.stats.StatsFactory; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; import org.apache.hadoop.mapred.InputFormat; @@ -247,8 +248,7 @@ private void addStatsTask(FileSinkOperator nd, MoveTask mvTask, mrWork.getReduceWork().setGatheringStats(true); } nd.getConf().setStatsReliable(hconf.getBoolVar(ConfVars.HIVE_STATS_RELIABLE)); - nd.getConf().setMaxStatsKeyPrefixLength( - hconf.getIntVar(ConfVars.HIVE_STATS_KEY_PREFIX_MAX_LENGTH)); + nd.getConf().setMaxStatsKeyPrefixLength(StatsFactory.getMaxPrefixLength(hconf)); // mrWork.addDestinationTable(nd.getConf().getTableInfo().getTableName()); // subscribe feeds from the MoveTask so that MoveTask can forward the list diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRTableScan1.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRTableScan1.java index 0268f98..8d3dc56 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRTableScan1.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRTableScan1.java @@ -35,6 +35,7 @@ import org.apache.hadoop.hive.ql.exec.TableScanOperator; import org.apache.hadoop.hive.ql.exec.Task; import org.apache.hadoop.hive.ql.exec.TaskFactory; +import org.apache.hadoop.hive.ql.exec.mr.MapRedTask; import org.apache.hadoop.hive.ql.io.rcfile.stats.PartialScanWork; import org.apache.hadoop.hive.ql.lib.Node; import org.apache.hadoop.hive.ql.lib.NodeProcessor; @@ -74,7 +75,7 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx opProcCtx, // create a dummy MapReduce task MapredWork currWork = GenMapRedUtils.getMapRedWork(parseCtx); - Task currTask = TaskFactory.get(currWork, parseCtx.getConf()); + MapRedTask currTask = (MapRedTask) TaskFactory.get(currWork, parseCtx.getConf()); Operator currTopOp = op; ctx.setCurrTask(currTask); ctx.setCurrTopOp(currTopOp); @@ -95,6 +96,7 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx opProcCtx, StatsWork statsWork = new StatsWork(parseCtx.getQB().getParseInfo().getTableSpec()); statsWork.setAggKey(op.getConf().getStatsAggPrefix()); + statsWork.setSourceTask(currTask); statsWork.setStatsReliable( parseCtx.getConf().getBoolVar(HiveConf.ConfVars.HIVE_STATS_RELIABLE)); Task statsTask = TaskFactory.get(statsWork, parseCtx.getConf()); diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index e9d9ee7..c2a46a4 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -154,6 +154,7 @@ import org.apache.hadoop.hive.ql.plan.ptf.PartitionedTableFunctionDef; import org.apache.hadoop.hive.ql.session.SessionState; import org.apache.hadoop.hive.ql.session.SessionState.ResourceType; +import org.apache.hadoop.hive.ql.stats.StatsFactory; import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator; import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.Mode; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFHash; @@ -8481,8 +8482,7 @@ private void setupStats(TableScanDesc tsDesc, QBParseInfo qbp, Table tab, String } else { tsDesc.setGatherStats(true); tsDesc.setStatsReliable(conf.getBoolVar(HiveConf.ConfVars.HIVE_STATS_RELIABLE)); - tsDesc.setMaxStatsKeyPrefixLength( - conf.getIntVar(HiveConf.ConfVars.HIVE_STATS_KEY_PREFIX_MAX_LENGTH)); + tsDesc.setMaxStatsKeyPrefixLength(StatsFactory.getMaxPrefixLength(conf)); // append additional virtual columns for storing statistics Iterator vcs = VirtualColumn.getStatsRegistry(conf).iterator(); diff --git ql/src/java/org/apache/hadoop/hive/ql/stats/CounterStatsAggregator.java ql/src/java/org/apache/hadoop/hive/ql/stats/CounterStatsAggregator.java index 2cc2519..fa1dcc9 100644 --- ql/src/java/org/apache/hadoop/hive/ql/stats/CounterStatsAggregator.java +++ ql/src/java/org/apache/hadoop/hive/ql/stats/CounterStatsAggregator.java @@ -60,7 +60,8 @@ public String aggregateStats(String keyPrefix, String statType) { long value = 0; for (String groupName : counters.getGroupNames()) { if (groupName.startsWith(keyPrefix)) { - value += counters.getGroup(groupName).getCounter(statType); + long counter = counters.getGroup(groupName).getCounter(statType); + value += counter; } } return String.valueOf(value); diff --git ql/src/java/org/apache/hadoop/hive/ql/stats/StatsFactory.java ql/src/java/org/apache/hadoop/hive/ql/stats/StatsFactory.java index 8ae32f0..2fb880d 100644 --- ql/src/java/org/apache/hadoop/hive/ql/stats/StatsFactory.java +++ ql/src/java/org/apache/hadoop/hive/ql/stats/StatsFactory.java @@ -28,6 +28,9 @@ import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.util.ReflectionUtils; +import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVESTATSDBCLASS; +import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVE_STATS_KEY_PREFIX_MAX_LENGTH; + /** * A factory of stats publisher and aggregator implementations of the * StatsPublisher and StatsAggregator interfaces. @@ -40,8 +43,19 @@ private Class aggregatorImplementation; private Configuration jobConf; + public static int getMaxPrefixLength(Configuration conf) { + int maxPrefixLength = HiveConf.getIntVar(conf, HIVE_STATS_KEY_PREFIX_MAX_LENGTH); + if (HiveConf.getVar(conf, HIVESTATSDBCLASS).equalsIgnoreCase(StatDB.counter.name())) { + // see org.apache.hadoop.mapred.Counter or org.apache.hadoop.mapreduce.MRJobConfig + int groupNameMax = conf.getInt("mapreduce.job.counters.group.name.max", 128); + maxPrefixLength = maxPrefixLength < 0 ? groupNameMax : + Math.min(maxPrefixLength, groupNameMax); + } + return maxPrefixLength; + } + public static StatsFactory newFactory(Configuration conf) { - return newFactory(HiveConf.getVar(conf, HiveConf.ConfVars.HIVESTATSDBCLASS), conf); + return newFactory(HiveConf.getVar(conf, HIVESTATSDBCLASS), conf); } /** diff --git ql/src/test/queries/clientpositive/stats_counter.q ql/src/test/queries/clientpositive/stats_counter.q index 20769e4..3c1f132 100644 --- ql/src/test/queries/clientpositive/stats_counter.q +++ ql/src/test/queries/clientpositive/stats_counter.q @@ -1,6 +1,16 @@ -set hive.stats.autogather=true; set hive.stats.dbclass=counter; +set hive.stats.autogather=false; + +-- by analyze +create table dummy1 as select * from src; + +analyze table dummy1 compute statistics; +desc formatted dummy1; + +set hive.stats.dbclass=counter; +set hive.stats.autogather=true; -create table dummy as select * from src; +-- by autogather +create table dummy2 as select * from src; -desc formatted dummy; +desc formatted dummy2; diff --git ql/src/test/queries/clientpositive/stats_noscan_2.q ql/src/test/queries/clientpositive/stats_noscan_2.q index a19d01b..b106b30 100644 --- ql/src/test/queries/clientpositive/stats_noscan_2.q +++ ql/src/test/queries/clientpositive/stats_noscan_2.q @@ -3,10 +3,10 @@ -- 1 test table CREATE EXTERNAL TABLE anaylyze_external (a INT) LOCATION '${system:hive.root}/data/files/ext_test'; SELECT * FROM anaylyze_external; -analyze table anaylyze_external compute statistics; -describe formatted anaylyze_external; analyze table anaylyze_external compute statistics noscan; describe formatted anaylyze_external; +analyze table anaylyze_external compute statistics; +describe formatted anaylyze_external; drop table anaylyze_external; -- 2 test partition @@ -21,10 +21,10 @@ CREATE EXTERNAL TABLE anaylyze_external (key string, val string) partitioned by ALTER TABLE anaylyze_external ADD PARTITION (insertdate='2008-01-01') location 'pfile://${system:test.tmp.dir}/texternal/2008-01-01'; select count(*) from anaylyze_external where insertdate='2008-01-01'; -- analyze -analyze table anaylyze_external PARTITION (insertdate='2008-01-01') compute statistics; -describe formatted anaylyze_external PARTITION (insertdate='2008-01-01'); analyze table anaylyze_external PARTITION (insertdate='2008-01-01') compute statistics noscan; describe formatted anaylyze_external PARTITION (insertdate='2008-01-01'); +analyze table anaylyze_external PARTITION (insertdate='2008-01-01') compute statistics; +describe formatted anaylyze_external PARTITION (insertdate='2008-01-01'); dfs -rmr ${system:test.tmp.dir}/texternal; drop table anaylyze_external; diff --git ql/src/test/results/clientpositive/stats_counter.q.out ql/src/test/results/clientpositive/stats_counter.q.out index f15d8c5..40d8656 100644 --- ql/src/test/results/clientpositive/stats_counter.q.out +++ ql/src/test/results/clientpositive/stats_counter.q.out @@ -1,13 +1,66 @@ -PREHOOK: query: create table dummy as select * from src +PREHOOK: query: -- by analyze +create table dummy1 as select * from src PREHOOK: type: CREATETABLE_AS_SELECT PREHOOK: Input: default@src -POSTHOOK: query: create table dummy as select * from src +POSTHOOK: query: -- by analyze +create table dummy1 as select * from src POSTHOOK: type: CREATETABLE_AS_SELECT POSTHOOK: Input: default@src -POSTHOOK: Output: default@dummy -PREHOOK: query: desc formatted dummy +POSTHOOK: Output: default@dummy1 +PREHOOK: query: analyze table dummy1 compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@dummy1 +PREHOOK: Output: default@dummy1 +POSTHOOK: query: analyze table dummy1 compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dummy1 +POSTHOOK: Output: default@dummy1 +PREHOOK: query: desc formatted dummy1 PREHOOK: type: DESCTABLE -POSTHOOK: query: desc formatted dummy +POSTHOOK: query: desc formatted dummy1 +POSTHOOK: type: DESCTABLE +# col_name data_type comment + +key string None +value string None + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Protect Mode: None +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: + COLUMN_STATS_ACCURATE true + numFiles 1 + numRows 500 + rawDataSize 5312 + totalSize 5812 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: -- by autogather +create table dummy2 as select * from src +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@src +POSTHOOK: query: -- by autogather +create table dummy2 as select * from src +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dummy2 +PREHOOK: query: desc formatted dummy2 +PREHOOK: type: DESCTABLE +POSTHOOK: query: desc formatted dummy2 POSTHOOK: type: DESCTABLE # col_name data_type comment diff --git ql/src/test/results/clientpositive/stats_noscan_1.q.out ql/src/test/results/clientpositive/stats_noscan_1.q.out index 054ca38..6603ce0 100644 --- ql/src/test/results/clientpositive/stats_noscan_1.q.out +++ ql/src/test/results/clientpositive/stats_noscan_1.q.out @@ -136,8 +136,8 @@ Protect Mode: None Partition Parameters: COLUMN_STATS_ACCURATE true numFiles 1 - numRows 0 - rawDataSize 0 + numRows -1 + rawDataSize -1 totalSize 5812 #### A masked pattern was here #### @@ -184,8 +184,8 @@ Protect Mode: None Partition Parameters: COLUMN_STATS_ACCURATE true numFiles 1 - numRows 0 - rawDataSize 0 + numRows -1 + rawDataSize -1 totalSize 5812 #### A masked pattern was here #### @@ -512,8 +512,8 @@ Protect Mode: None Partition Parameters: COLUMN_STATS_ACCURATE true numFiles 1 - numRows 0 - rawDataSize 0 + numRows -1 + rawDataSize -1 totalSize 5812 #### A masked pattern was here #### @@ -568,8 +568,8 @@ Protect Mode: None Partition Parameters: COLUMN_STATS_ACCURATE true numFiles 1 - numRows 0 - rawDataSize 0 + numRows -1 + rawDataSize -1 totalSize 5812 #### A masked pattern was here #### diff --git ql/src/test/results/clientpositive/stats_noscan_2.q.out ql/src/test/results/clientpositive/stats_noscan_2.q.out index e55fa94..b17d456 100644 --- ql/src/test/results/clientpositive/stats_noscan_2.q.out +++ ql/src/test/results/clientpositive/stats_noscan_2.q.out @@ -21,11 +21,11 @@ POSTHOOK: Input: default@anaylyze_external 4 5 6 -PREHOOK: query: analyze table anaylyze_external compute statistics +PREHOOK: query: analyze table anaylyze_external compute statistics noscan PREHOOK: type: QUERY PREHOOK: Input: default@anaylyze_external PREHOOK: Output: default@anaylyze_external -POSTHOOK: query: analyze table anaylyze_external compute statistics +POSTHOOK: query: analyze table anaylyze_external compute statistics noscan POSTHOOK: type: QUERY POSTHOOK: Input: default@anaylyze_external POSTHOOK: Output: default@anaylyze_external @@ -48,8 +48,8 @@ Table Parameters: COLUMN_STATS_ACCURATE true EXTERNAL TRUE numFiles 0 - numRows 6 - rawDataSize 6 + numRows -1 + rawDataSize -1 totalSize 0 #### A masked pattern was here #### @@ -63,11 +63,11 @@ Bucket Columns: [] Sort Columns: [] Storage Desc Params: serialization.format 1 -PREHOOK: query: analyze table anaylyze_external compute statistics noscan +PREHOOK: query: analyze table anaylyze_external compute statistics PREHOOK: type: QUERY PREHOOK: Input: default@anaylyze_external PREHOOK: Output: default@anaylyze_external -POSTHOOK: query: analyze table anaylyze_external compute statistics noscan +POSTHOOK: query: analyze table anaylyze_external compute statistics POSTHOOK: type: QUERY POSTHOOK: Input: default@anaylyze_external POSTHOOK: Output: default@anaylyze_external @@ -90,8 +90,8 @@ Table Parameters: COLUMN_STATS_ACCURATE true EXTERNAL TRUE numFiles 0 - numRows 0 - rawDataSize 0 + numRows 6 + rawDataSize 6 totalSize 0 #### A masked pattern was here #### @@ -184,14 +184,14 @@ POSTHOOK: Lineage: texternal PARTITION(insertdate=2008-01-01).key SIMPLE [(src)s POSTHOOK: Lineage: texternal PARTITION(insertdate=2008-01-01).val SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] 500 PREHOOK: query: -- analyze -analyze table anaylyze_external PARTITION (insertdate='2008-01-01') compute statistics +analyze table anaylyze_external PARTITION (insertdate='2008-01-01') compute statistics noscan PREHOOK: type: QUERY PREHOOK: Input: default@anaylyze_external PREHOOK: Input: default@anaylyze_external@insertdate=2008-01-01 PREHOOK: Output: default@anaylyze_external PREHOOK: Output: default@anaylyze_external@insertdate=2008-01-01 POSTHOOK: query: -- analyze -analyze table anaylyze_external PARTITION (insertdate='2008-01-01') compute statistics +analyze table anaylyze_external PARTITION (insertdate='2008-01-01') compute statistics noscan POSTHOOK: type: QUERY POSTHOOK: Input: default@anaylyze_external POSTHOOK: Input: default@anaylyze_external@insertdate=2008-01-01 @@ -225,8 +225,8 @@ Protect Mode: None Partition Parameters: COLUMN_STATS_ACCURATE true numFiles 1 - numRows 500 - rawDataSize 5312 + numRows -1 + rawDataSize -1 totalSize 5812 #### A masked pattern was here #### @@ -240,13 +240,13 @@ Bucket Columns: [] Sort Columns: [] Storage Desc Params: serialization.format 1 -PREHOOK: query: analyze table anaylyze_external PARTITION (insertdate='2008-01-01') compute statistics noscan +PREHOOK: query: analyze table anaylyze_external PARTITION (insertdate='2008-01-01') compute statistics PREHOOK: type: QUERY PREHOOK: Input: default@anaylyze_external PREHOOK: Input: default@anaylyze_external@insertdate=2008-01-01 PREHOOK: Output: default@anaylyze_external PREHOOK: Output: default@anaylyze_external@insertdate=2008-01-01 -POSTHOOK: query: analyze table anaylyze_external PARTITION (insertdate='2008-01-01') compute statistics noscan +POSTHOOK: query: analyze table anaylyze_external PARTITION (insertdate='2008-01-01') compute statistics POSTHOOK: type: QUERY POSTHOOK: Input: default@anaylyze_external POSTHOOK: Input: default@anaylyze_external@insertdate=2008-01-01 @@ -280,8 +280,8 @@ Protect Mode: None Partition Parameters: COLUMN_STATS_ACCURATE true numFiles 1 - numRows 0 - rawDataSize 0 + numRows 500 + rawDataSize 5312 totalSize 5812 #### A masked pattern was here ####