diff --git ql/src/java/org/apache/hadoop/hive/ql/QueryProperties.java ql/src/java/org/apache/hadoop/hive/ql/QueryProperties.java index 5dab171..e8f7fba 100644 --- ql/src/java/org/apache/hadoop/hive/ql/QueryProperties.java +++ ql/src/java/org/apache/hadoop/hive/ql/QueryProperties.java @@ -18,6 +18,7 @@ package org.apache.hadoop.hive.ql; + /** * * QueryProperties. @@ -25,16 +26,26 @@ * A structure to contain features of a query that are determined * during parsing and may be useful for categorizing a query type * - * These inlucde whether the query contains: + * These include whether the query contains: * a join clause, a group by clause, an order by clause, a sort by * clause, a group by clause following a join clause, and whether * the query uses a script for mapping/reducing */ public class QueryProperties { + boolean query; + boolean analyzeCommand; + boolean partialScanAnalyzeCommand; + boolean noScanAnalyzeCommand; + boolean analyzeRewrite; + boolean ctas; + boolean insertToTable; + int outerQueryLimit; + boolean hasJoin = false; boolean hasGroupBy = false; boolean hasOrderBy = false; + boolean hasOuterOrderBy = false; boolean hasSortBy = false; boolean hasJoinFollowedByGroupBy = false; boolean hasPTF = false; @@ -51,10 +62,75 @@ private int noOfJoins = 0; private int noOfOuterJoins = 0; private boolean hasLateralViews; - + private boolean multiDestQuery; private boolean filterWithSubQuery; - + + + public boolean isQuery() { + return query; + } + + public void setQuery(boolean query) { + this.query = query; + } + + public boolean isAnalyzeCommand() { + return analyzeCommand; + } + + public void setAnalyzeCommand(boolean analyzeCommand) { + this.analyzeCommand = analyzeCommand; + } + + public boolean isPartialScanAnalyzeCommand() { + return partialScanAnalyzeCommand; + } + + public void setPartialScanAnalyzeCommand(boolean partialScanAnalyzeCommand) { + this.partialScanAnalyzeCommand = partialScanAnalyzeCommand; + } + + public boolean isNoScanAnalyzeCommand() { + return noScanAnalyzeCommand; + } + + public void setNoScanAnalyzeCommand(boolean noScanAnalyzeCommand) { + this.noScanAnalyzeCommand = noScanAnalyzeCommand; + } + + public boolean isAnalyzeRewrite() { + return analyzeRewrite; + } + + public void setAnalyzeRewrite(boolean analyzeRewrite) { + this.analyzeRewrite = analyzeRewrite; + } + + public boolean isCTAS() { + return ctas; + } + + public void setCTAS(boolean ctas) { + this.ctas = ctas; + } + + public boolean isInsertToTable() { + return insertToTable; + } + + public void setInsertToTable(boolean insertToTable) { + this.insertToTable = insertToTable; + } + + public int getOuterQueryLimit() { + return outerQueryLimit; + } + + public void setOuterQueryLimit(int outerQueryLimit) { + this.outerQueryLimit = outerQueryLimit; + } + public boolean hasJoin() { return (noOfJoins > 0); } @@ -97,6 +173,14 @@ public void setHasOrderBy(boolean hasOrderBy) { this.hasOrderBy = hasOrderBy; } + public boolean hasOuterOrderBy() { + return hasOuterOrderBy; + } + + public void setHasOuterOrderBy(boolean hasOuterOrderBy) { + this.hasOuterOrderBy = hasOuterOrderBy; + } + public boolean hasSortBy() { return hasSortBy; } @@ -186,9 +270,19 @@ public boolean hasFilterWithSubQuery() { } public void clear() { + query = false; + analyzeCommand = false; + partialScanAnalyzeCommand = false; + noScanAnalyzeCommand = false; + analyzeRewrite = false; + ctas = false; + insertToTable = false; + outerQueryLimit = -1; + hasJoin = false; hasGroupBy = false; hasOrderBy = false; + hasOuterOrderBy = false; hasSortBy = false; hasJoinFollowedByGroupBy = false; hasPTF = false; @@ -204,8 +298,9 @@ public void clear() { noOfJoins = 0; noOfOuterJoins = 0; - + multiDestQuery = false; filterWithSubQuery = false; } + } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/StatsNoJobTask.java ql/src/java/org/apache/hadoop/hive/ql/exec/StatsNoJobTask.java index 868cf04..cdfb308 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/StatsNoJobTask.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/StatsNoJobTask.java @@ -43,7 +43,7 @@ import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.metadata.Partition; import org.apache.hadoop.hive.ql.metadata.Table; -import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.tableSpec; +import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.TableSpec; import org.apache.hadoop.hive.ql.plan.StatsNoJobWork; import org.apache.hadoop.hive.ql.plan.api.StageType; import org.apache.hadoop.hive.shims.ShimLoader; @@ -377,7 +377,7 @@ private String toString(Map parameters) { private List getPartitionsList() throws HiveException { if (work.getTableSpecs() != null) { - tableSpec tblSpec = work.getTableSpecs(); + TableSpec tblSpec = work.getTableSpecs(); table = tblSpec.tableHandle; if (!table.isPartitioned()) { return null; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/StatsTask.java ql/src/java/org/apache/hadoop/hive/ql/exec/StatsTask.java index 6922f89..2a8167a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/StatsTask.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/StatsTask.java @@ -39,7 +39,7 @@ import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.metadata.Partition; import org.apache.hadoop.hive.ql.metadata.Table; -import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.tableSpec; +import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.TableSpec; import org.apache.hadoop.hive.ql.plan.DynamicPartitionCtx; import org.apache.hadoop.hive.ql.plan.LoadTableDesc; import org.apache.hadoop.hive.ql.plan.StatsWork; @@ -370,7 +370,7 @@ private String toString(Map parameters) { if (work.getTableSpecs() != null) { // ANALYZE command - tableSpec tblSpec = work.getTableSpecs(); + TableSpec tblSpec = work.getTableSpecs(); table = tblSpec.tableHandle; if (!table.isPartitioned()) { return null; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkTask.java ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkTask.java index 1342afe..7b013bc 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkTask.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkTask.java @@ -49,14 +49,13 @@ import org.apache.hadoop.hive.ql.exec.spark.session.SparkSession; import org.apache.hadoop.hive.ql.exec.spark.session.SparkSessionManager; import org.apache.hadoop.hive.ql.exec.spark.session.SparkSessionManagerImpl; -import org.apache.hadoop.hive.ql.exec.spark.status.LocalSparkJobMonitor; import org.apache.hadoop.hive.ql.exec.spark.status.SparkJobRef; import org.apache.hadoop.hive.ql.exec.spark.status.SparkJobStatus; import org.apache.hadoop.hive.ql.log.PerfLogger; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.metadata.Partition; import org.apache.hadoop.hive.ql.metadata.Table; -import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer; +import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.TableSpec; import org.apache.hadoop.hive.ql.plan.BaseWork; import org.apache.hadoop.hive.ql.plan.DynamicPartitionCtx; import org.apache.hadoop.hive.ql.plan.LoadTableDesc; @@ -320,7 +319,7 @@ private static StatsTask getStatsTaskInChildTasks(Task r if (work.getTableSpecs() != null) { // ANALYZE command - BaseSemanticAnalyzer.tableSpec tblSpec = work.getTableSpecs(); + TableSpec tblSpec = work.getTableSpecs(); table = tblSpec.tableHandle; if (!table.isPartitioned()) { return null; diff --git ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java index 69a4545..2e5ea41 100644 --- ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java +++ ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java @@ -34,8 +34,8 @@ import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.common.FileUtils; +import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.conf.HiveConf.ConfVars; import org.apache.hadoop.hive.metastore.MetaStoreUtils; import org.apache.hadoop.hive.metastore.ProtectMode; @@ -51,6 +51,7 @@ import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.io.HiveFileFormatUtils; import org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat; +import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.TableSpec; import org.apache.hadoop.hive.ql.parse.SemanticAnalyzer; import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.ql.session.SessionState; @@ -91,6 +92,9 @@ private transient HiveStorageHandler storageHandler; + private transient TableSpec tableSpec; + + /** * Used only for serialization. */ @@ -1012,4 +1016,13 @@ private static String normalize(String colName) throws HiveException { } return colName.toLowerCase(); } + + public TableSpec getTableSpec() { + return tableSpec; + } + + public void setTableSpec(TableSpec tableSpec) { + this.tableSpec = tableSpec; + } + }; diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRTableScan1.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRTableScan1.java index 0ea81ab..c5f03d9 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRTableScan1.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRTableScan1.java @@ -43,7 +43,6 @@ import org.apache.hadoop.hive.ql.optimizer.GenMRProcContext.GenMapRedCtx; import org.apache.hadoop.hive.ql.parse.ParseContext; import org.apache.hadoop.hive.ql.parse.PrunedPartitionList; -import org.apache.hadoop.hive.ql.parse.QBParseInfo; import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.ql.plan.MapredWork; import org.apache.hadoop.hive.ql.plan.OperatorDesc; @@ -88,8 +87,9 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx opProcCtx, ctx.setCurrAliasId(currAliasId); mapCurrCtx.put(op, new GenMapRedCtx(currTask, currAliasId)); - QBParseInfo parseInfo = parseCtx.getQB().getParseInfo(); - if (parseInfo.isAnalyzeCommand()) { + if (parseCtx.getQueryProperties().isAnalyzeCommand()) { + boolean partialScan = parseCtx.getQueryProperties().isPartialScanAnalyzeCommand(); + boolean noScan = parseCtx.getQueryProperties().isNoScanAnalyzeCommand(); if (inputFormat.equals(OrcInputFormat.class)) { // For ORC, all the following statements are the same // ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS @@ -97,14 +97,14 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx opProcCtx, // ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS noscan; // There will not be any MR or Tez job above this task - StatsNoJobWork snjWork = new StatsNoJobWork(parseCtx.getQB().getParseInfo().getTableSpec()); + StatsNoJobWork snjWork = new StatsNoJobWork(op.getConf().getTableMetadata().getTableSpec()); snjWork.setStatsReliable(parseCtx.getConf().getBoolVar( HiveConf.ConfVars.HIVE_STATS_RELIABLE)); // If partition is specified, get pruned partition list - Set confirmedParts = GenMapRedUtils.getConfirmedPartitionsForScan(parseInfo); + Set confirmedParts = GenMapRedUtils.getConfirmedPartitionsForScan(op); if (confirmedParts.size() > 0) { - Table source = parseCtx.getQB().getMetaData().getTableForAlias(alias); - List partCols = GenMapRedUtils.getPartitionColumns(parseInfo); + Table source = op.getConf().getTableMetadata(); + List partCols = GenMapRedUtils.getPartitionColumns(op); PrunedPartitionList partList = new PrunedPartitionList(source, confirmedParts, partCols, false); snjWork.setPrunedPartitionList(partList); @@ -119,7 +119,7 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx opProcCtx, // The plan consists of a simple MapRedTask followed by a StatsTask. // The MR task is just a simple TableScanOperator - StatsWork statsWork = new StatsWork(parseCtx.getQB().getParseInfo().getTableSpec()); + StatsWork statsWork = new StatsWork(op.getConf().getTableMetadata().getTableSpec()); statsWork.setAggKey(op.getConf().getStatsAggPrefix()); statsWork.setSourceTask(currTask); statsWork.setStatsReliable(parseCtx.getConf().getBoolVar( @@ -132,7 +132,7 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx opProcCtx, // ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS noscan; // The plan consists of a StatsTask only. - if (parseInfo.isNoScanAnalyzeCommand()) { + if (noScan) { statsTask.setParentTasks(null); statsWork.setNoScanAnalyzeCommand(true); ctx.getRootTasks().remove(currTask); @@ -140,8 +140,8 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx opProcCtx, } // ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS partialscan; - if (parseInfo.isPartialScanAnalyzeCommand()) { - handlePartialScanCommand(op, ctx, parseCtx, currTask, parseInfo, statsWork, statsTask); + if (partialScan) { + handlePartialScanCommand(op, ctx, parseCtx, currTask, statsWork, statsTask); } currWork.getMapWork().setGatheringStats(true); @@ -153,10 +153,10 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx opProcCtx, // pruned list, // and pass it to setTaskPlan as the last parameter Set confirmedPartns = GenMapRedUtils - .getConfirmedPartitionsForScan(parseInfo); + .getConfirmedPartitionsForScan(op); if (confirmedPartns.size() > 0) { - Table source = parseCtx.getQB().getMetaData().getTableForAlias(alias); - List partCols = GenMapRedUtils.getPartitionColumns(parseInfo); + Table source = op.getConf().getTableMetadata(); + List partCols = GenMapRedUtils.getPartitionColumns(op); PrunedPartitionList partList = new PrunedPartitionList(source, confirmedPartns, partCols, false); GenMapRedUtils.setTaskPlan(currAliasId, currTopOp, currTask, false, ctx, partList); } else { // non-partitioned table @@ -184,12 +184,11 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx opProcCtx, * @throws SemanticException */ private void handlePartialScanCommand(TableScanOperator op, GenMRProcContext ctx, - ParseContext parseCtx, Task currTask, QBParseInfo parseInfo, + ParseContext parseCtx, Task currTask, StatsWork statsWork, Task statsTask) throws SemanticException { String aggregationKey = op.getConf().getStatsAggPrefix(); StringBuffer aggregationKeyBuffer = new StringBuffer(aggregationKey); - List inputPaths = GenMapRedUtils.getInputPathsForPartialScan(parseInfo, - aggregationKeyBuffer); + List inputPaths = GenMapRedUtils.getInputPathsForPartialScan(op, aggregationKeyBuffer); aggregationKey = aggregationKeyBuffer.toString(); // scan work diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java index fad582a..fe03a9b 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java @@ -76,10 +76,9 @@ import org.apache.hadoop.hive.ql.optimizer.GenMRProcContext.GenMapRedCtx; import org.apache.hadoop.hive.ql.optimizer.listbucketingpruner.ListBucketingPruner; import org.apache.hadoop.hive.ql.optimizer.ppr.PartitionPruner; -import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.tableSpec; +import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.TableSpec; import org.apache.hadoop.hive.ql.parse.ParseContext; import org.apache.hadoop.hive.ql.parse.PrunedPartitionList; -import org.apache.hadoop.hive.ql.parse.QBParseInfo; import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.ql.plan.BaseWork; import org.apache.hadoop.hive.ql.plan.ConditionalResolverMergeFiles; @@ -1466,7 +1465,7 @@ public static void addStatsTask(FileSinkOperator nd, MoveTask mvTask, */ public static boolean isInsertInto(ParseContext parseCtx, FileSinkOperator fsOp) { return fsOp.getConf().getTableInfo().getTableName() != null && - parseCtx.getQB().getParseInfo().isInsertToTable(); + parseCtx.getQueryProperties().isInsertToTable(); } /** @@ -1788,52 +1787,53 @@ public static Path createMoveTask(Task currTask, boolean return dest; } - public static Set getConfirmedPartitionsForScan(QBParseInfo parseInfo) { + public static Set getConfirmedPartitionsForScan(TableScanOperator tableScanOp) { Set confirmedPartns = new HashSet(); - tableSpec tblSpec = parseInfo.getTableSpec(); - if (tblSpec.specType == tableSpec.SpecType.STATIC_PARTITION) { + TableSpec tblSpec = tableScanOp.getConf().getTableMetadata().getTableSpec(); + if (tblSpec.specType == TableSpec.SpecType.STATIC_PARTITION) { // static partition if (tblSpec.partHandle != null) { confirmedPartns.add(tblSpec.partHandle); } else { // partial partition spec has null partHandle - assert parseInfo.isNoScanAnalyzeCommand(); confirmedPartns.addAll(tblSpec.partitions); } - } else if (tblSpec.specType == tableSpec.SpecType.DYNAMIC_PARTITION) { + } else if (tblSpec.specType == TableSpec.SpecType.DYNAMIC_PARTITION) { // dynamic partition confirmedPartns.addAll(tblSpec.partitions); } return confirmedPartns; } - public static List getPartitionColumns(QBParseInfo parseInfo) { - tableSpec tblSpec = parseInfo.getTableSpec(); + public static List getPartitionColumns(TableScanOperator tableScanOp) { + TableSpec tblSpec = tableScanOp.getConf().getTableMetadata().getTableSpec(); if (tblSpec.tableHandle.isPartitioned()) { return new ArrayList(tblSpec.getPartSpec().keySet()); } return Collections.emptyList(); } - public static List getInputPathsForPartialScan(QBParseInfo parseInfo, StringBuffer aggregationKey) - throws SemanticException { + public static List getInputPathsForPartialScan(TableScanOperator tableScanOp, + StringBuffer aggregationKey) throws SemanticException { List inputPaths = new ArrayList(); - switch (parseInfo.getTableSpec().specType) { - case TABLE_ONLY: - inputPaths.add(parseInfo.getTableSpec().tableHandle.getPath()); - break; - case STATIC_PARTITION: - Partition part = parseInfo.getTableSpec().partHandle; - try { - aggregationKey.append(Warehouse.makePartPath(part.getSpec())); - } catch (MetaException e) { - throw new SemanticException(ErrorMsg.ANALYZE_TABLE_PARTIALSCAN_AGGKEY.getMsg( - part.getDataLocation().toString() + e.getMessage())); - } - inputPaths.add(part.getDataLocation()); - break; - default: - assert false; + switch (tableScanOp.getConf().getTableMetadata().getTableSpec().specType) { + case TABLE_ONLY: + inputPaths.add(tableScanOp.getConf().getTableMetadata() + .getTableSpec().tableHandle.getPath()); + break; + case STATIC_PARTITION: + Partition part = tableScanOp.getConf().getTableMetadata() + .getTableSpec().partHandle; + try { + aggregationKey.append(Warehouse.makePartPath(part.getSpec())); + } catch (MetaException e) { + throw new SemanticException(ErrorMsg.ANALYZE_TABLE_PARTIALSCAN_AGGKEY.getMsg( + part.getDataLocation().toString() + e.getMessage())); + } + inputPaths.add(part.getDataLocation()); + break; + default: + assert false; } return inputPaths; } diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchAggregation.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchAggregation.java index 31c83f2..39e11a2 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchAggregation.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchAggregation.java @@ -55,8 +55,8 @@ @Override public ParseContext transform(ParseContext pctx) throws SemanticException { - if (pctx.getFetchTask() != null || !pctx.getQB().getIsQuery() || - pctx.getQB().isAnalyzeRewrite() || pctx.getQB().isCTAS() || + if (pctx.getFetchTask() != null || !pctx.getQueryProperties().isQuery() || + pctx.getQueryProperties().isAnalyzeRewrite() || pctx.getQueryProperties().isCTAS() || pctx.getLoadFileWork().size() > 1 || !pctx.getLoadTableWork().isEmpty()) { return pctx; } diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchOptimizer.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchOptimizer.java index 986e33b..0328007 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchOptimizer.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchOptimizer.java @@ -22,9 +22,9 @@ import java.io.IOException; import java.util.ArrayList; import java.util.HashSet; +import java.util.LinkedHashSet; import java.util.List; import java.util.Map; -import java.util.LinkedHashSet; import java.util.Set; import org.apache.commons.logging.Log; @@ -58,7 +58,6 @@ import org.apache.hadoop.hive.ql.optimizer.ppr.PartitionPruner; import org.apache.hadoop.hive.ql.parse.ParseContext; import org.apache.hadoop.hive.ql.parse.PrunedPartitionList; -import org.apache.hadoop.hive.ql.parse.QB; import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.ql.parse.SplitSample; import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; @@ -95,12 +94,12 @@ @Override public ParseContext transform(ParseContext pctx) throws SemanticException { Map> topOps = pctx.getTopOps(); - if (pctx.getQB().getIsQuery() && !pctx.getQB().getParseInfo().isAnalyzeCommand() + if (pctx.getQueryProperties().isQuery() && !pctx.getQueryProperties().isAnalyzeCommand() && topOps.size() == 1) { // no join, no groupby, no distinct, no lateral view, no subq, // no CTAS or insert, not analyze command, and single sourced. String alias = (String) pctx.getTopOps().keySet().toArray()[0]; - Operator topOp = (Operator) pctx.getTopOps().values().toArray()[0]; + Operator topOp = (Operator) pctx.getTopOps().values().toArray()[0]; if (topOp instanceof TableScanOperator) { try { FetchTask fetchTask = optimize(pctx, alias, (TableScanOperator) topOp); @@ -129,7 +128,7 @@ private FetchTask optimize(ParseContext pctx, String alias, TableScanOperator so pctx.getConf(), HiveConf.ConfVars.HIVEFETCHTASKCONVERSION); boolean aggressive = "more".equals(mode); - final int limit = pctx.getQB().getParseInfo().getOuterQueryLimit(); + final int limit = pctx.getQueryProperties().getOuterQueryLimit(); FetchData fetch = checkTree(aggressive, pctx, alias, source); if (fetch != null && checkThreshold(fetch, limit, pctx)) { FetchWork fetchWork = fetch.convertToWork(); @@ -179,8 +178,7 @@ private FetchData checkTree(boolean aggressive, ParseContext pctx, String alias, if (!aggressive && splitSample != null) { return null; } - QB qb = pctx.getQB(); - if (!aggressive && qb.hasTableSample(alias)) { + if (!aggressive && ts.getConf().getTableSample() != null) { return null; } Table table = ts.getConf().getTableMetadata(); diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionOptimizer.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionOptimizer.java index 0d19be6..7bcb797 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionOptimizer.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionOptimizer.java @@ -416,8 +416,7 @@ public ReduceSinkDesc getReduceSinkDesc(List partitionPositions, // should honor the ordering of records provided by ORDER BY in SELECT statement ReduceSinkOperator parentRSOp = OperatorUtils.findSingleOperatorUpstream(parent, ReduceSinkOperator.class); - boolean isOrderBy = parseCtx.getQB().getParseInfo().getDestToOrderBy().size() > 0; - if (parentRSOp != null && isOrderBy) { + if (parentRSOp != null && parseCtx.getQueryProperties().hasOuterOrderBy()) { String parentRSOpOrder = parentRSOp.getConf().getOrder(); if (parentRSOpOrder != null && !parentRSOpOrder.isEmpty() && sortPositions.isEmpty()) { newKeyCols.addAll(parentRSOp.getConf().getKeyCols()); diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java index f69b7aa..bc8d8f7 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java @@ -97,8 +97,8 @@ @Override public ParseContext transform(ParseContext pctx) throws SemanticException { - if (pctx.getFetchTask() != null || !pctx.getQB().getIsQuery() || - pctx.getQB().isAnalyzeRewrite() || pctx.getQB().isCTAS() || + if (pctx.getFetchTask() != null || !pctx.getQueryProperties().isQuery() || + pctx.getQueryProperties().isAnalyzeRewrite() || pctx.getQueryProperties().isCTAS() || pctx.getLoadFileWork().size() > 1 || !pctx.getLoadTableWork().isEmpty()) { return pctx; } diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java index a0459c4..ab376b9 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java @@ -21,7 +21,6 @@ import java.io.IOException; import java.io.Serializable; import java.io.UnsupportedEncodingException; -import java.sql.Date; import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; @@ -42,7 +41,6 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.common.FileUtils; import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.metastore.HiveMetaStore; import org.apache.hadoop.hive.metastore.api.Database; import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.metastore.api.Order; @@ -69,7 +67,6 @@ import org.apache.hadoop.hive.ql.plan.PlanUtils; import org.apache.hadoop.hive.ql.session.SessionState.LogHelper; import org.apache.hadoop.hive.serde.serdeConstants; -import org.apache.hadoop.hive.serde2.io.DateWritable; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; @@ -696,10 +693,10 @@ private static String getUnionTypeStringFromAST(ASTNode typeNode) } /** - * tableSpec. + * TableSpec. * */ - public static class tableSpec { + public static class TableSpec { public String tableName; public Table tableHandle; public Map partSpec; // has to use LinkedHashMap to enforce order @@ -709,12 +706,12 @@ private static String getUnionTypeStringFromAST(ASTNode typeNode) public static enum SpecType {TABLE_ONLY, STATIC_PARTITION, DYNAMIC_PARTITION}; public SpecType specType; - public tableSpec(Hive db, HiveConf conf, ASTNode ast) + public TableSpec(Hive db, HiveConf conf, ASTNode ast) throws SemanticException { this(db, conf, ast, true, false); } - public tableSpec(Hive db, HiveConf conf, String tableName, Map partSpec) + public TableSpec(Hive db, HiveConf conf, String tableName, Map partSpec) throws HiveException { this.tableName = tableName; this.partSpec = partSpec; @@ -728,7 +725,7 @@ public tableSpec(Hive db, HiveConf conf, String tableName, Map p } } - public tableSpec(Hive db, HiveConf conf, ASTNode ast, boolean allowDynamicPartitionsSpec, + public TableSpec(Hive db, HiveConf conf, ASTNode ast, boolean allowDynamicPartitionsSpec, boolean allowPartialPartitionsSpec) throws SemanticException { assert (ast.getToken().getType() == HiveParser.TOK_TAB || ast.getToken().getType() == HiveParser.TOK_TABLE_PARTITION @@ -867,6 +864,47 @@ public String toString() { } } + public class AnalyzeRewriteContext { + + private String tableName; + private List colName; + private List colType; + private boolean tblLvl; + + + public String getTableName() { + return tableName; + } + + public void setTableName(String tableName) { + this.tableName = tableName; + } + + public List getColName() { + return colName; + } + + public void setColName(List colName) { + this.colName = colName; + } + + public boolean isTblLvl() { + return tblLvl; + } + + public void setTblLvl(boolean isTblLvl) { + this.tblLvl = isTblLvl; + } + + public List getColType() { + return colType; + } + + public void setColType(List colType) { + this.colType = colType; + } + } + /** * Gets the lineage information. * diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java index 8979b4c..c83523e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java @@ -427,10 +427,12 @@ public void analyze(ASTNode ast, Context origCtx) throws SemanticException { qb = getQB(); qb.setAnalyzeRewrite(true); qbp = qb.getParseInfo(); - qbp.setTableName(tbl.getTableName()); - qbp.setTblLvl(isTableLevel); - qbp.setColName(colNames); - qbp.setColType(colType); + analyzeRewrite = new AnalyzeRewriteContext(); + analyzeRewrite.setTableName(tbl.getTableName()); + analyzeRewrite.setTblLvl(isTableLevel); + analyzeRewrite.setColName(colNames); + analyzeRewrite.setColType(colType); + qbp.setAnalyzeRewrite(analyzeRewrite); initCtx(ctx); LOG.info("Invoking analyze on rewritten query"); analyzeInternal(rewrittenTree); diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java index 7ac0551..e01c771 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java @@ -997,7 +997,7 @@ private void analyzeTruncateTable(ASTNode ast) throws SemanticException { StatsWork statDesc; if (oldTblPartLoc.equals(newTblPartLoc)) { // If we're merging to the same location, we can avoid some metastore calls - tableSpec tablepart = new tableSpec(this.db, conf, root); + TableSpec tablepart = new TableSpec(this.db, conf, root); statDesc = new StatsWork(tablepart); } else { statDesc = new StatsWork(ltd); @@ -1618,7 +1618,7 @@ private void analyzeAlterTablePartMergeFiles(ASTNode ast, StatsWork statDesc; if (oldTblPartLoc.equals(newTblPartLoc)) { // If we're merging to the same location, we can avoid some metastore calls - tableSpec tablepart = new tableSpec(db, conf, tableName, partSpec); + TableSpec tablepart = new TableSpec(db, conf, tableName, partSpec); statDesc = new StatsWork(tablepart); } else { statDesc = new StatsWork(ltd); diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/ExportSemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/ExportSemanticAnalyzer.java index e5e6a55..fd084d8 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/ExportSemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/ExportSemanticAnalyzer.java @@ -18,13 +18,10 @@ package org.apache.hadoop.hive.ql.parse; -import org.apache.hadoop.hive.ql.metadata.PartitionIterable; - import java.io.FileNotFoundException; import java.io.IOException; import java.io.Serializable; import java.net.URI; -import java.util.List; import org.antlr.runtime.tree.Tree; import org.apache.hadoop.fs.FileStatus; @@ -37,6 +34,7 @@ import org.apache.hadoop.hive.ql.exec.TaskFactory; import org.apache.hadoop.hive.ql.hooks.ReadEntity; import org.apache.hadoop.hive.ql.metadata.Partition; +import org.apache.hadoop.hive.ql.metadata.PartitionIterable; import org.apache.hadoop.hive.ql.plan.CopyWork; /** @@ -59,7 +57,7 @@ public void analyzeInternal(ASTNode ast) throws SemanticException { URI toURI = EximUtil.getValidatedURI(conf, tmpPath); // initialize source table/partition - tableSpec ts = new tableSpec(db, conf, (ASTNode) tableTree, false, true); + TableSpec ts = new TableSpec(db, conf, (ASTNode) tableTree, false, true); EximUtil.validateTable(ts.tableHandle); try { FileSystem fs = FileSystem.get(toURI, conf); diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java index 7efad0c..1a9b42b 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java @@ -199,7 +199,7 @@ public void analyzeInternal(ASTNode ast) throws SemanticException { } // initialize destination table/partition - tableSpec ts = new tableSpec(db, conf, (ASTNode) tableTree); + TableSpec ts = new TableSpec(db, conf, (ASTNode) tableTree); if (ts.tableHandle.isOffline()){ throw new SemanticException( diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java index 6c1ab07..a7a1340 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java @@ -42,6 +42,8 @@ import org.apache.hadoop.hive.ql.hooks.ReadEntity; import org.apache.hadoop.hive.ql.optimizer.ppr.PartitionPruner; import org.apache.hadoop.hive.ql.optimizer.unionproc.UnionProcContext; +import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.AnalyzeRewriteContext; +import org.apache.hadoop.hive.ql.plan.CreateTableDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; import org.apache.hadoop.hive.ql.plan.FilterDesc.sampleDesc; import org.apache.hadoop.hive.ql.plan.LoadFileDesc; @@ -61,7 +63,7 @@ **/ public class ParseContext { - private QB qb; + private HashMap opToPartPruner; private HashMap opToPartList; private HashMap opToSamplePruner; @@ -98,10 +100,14 @@ private FetchTask fetchTask; private QueryProperties queryProperties; - private TableDesc fetchTabledesc; + private TableDesc fetchTableDesc; private Operator fetchSource; private ListSinkOperator fetchSink; + private AnalyzeRewriteContext analyzeRewrite; + private CreateTableDesc createTableDesc; + + public ParseContext() { } @@ -142,7 +148,6 @@ public ParseContext() { */ public ParseContext( HiveConf conf, - QB qb, HashMap opToPartPruner, HashMap opToPartList, HashMap> topOps, @@ -159,9 +164,9 @@ public ParseContext( Map> opToPartToSkewedPruner, Map viewAliasToInput, List reduceSinkOperatorsAddedByEnforceBucketingSorting, + AnalyzeRewriteContext analyzeRewrite, CreateTableDesc createTableDesc, QueryProperties queryProperties) { this.conf = conf; - this.qb = qb; this.opToPartPruner = opToPartPruner; this.opToPartList = opToPartList; this.joinOps = joinOps; @@ -184,25 +189,12 @@ public ParseContext( this.viewAliasToInput = viewAliasToInput; this.reduceSinkOperatorsAddedByEnforceBucketingSorting = reduceSinkOperatorsAddedByEnforceBucketingSorting; + this.analyzeRewrite = analyzeRewrite; + this.createTableDesc = createTableDesc; this.queryProperties = queryProperties; } /** - * @return the qb - */ - public QB getQB() { - return qb; - } - - /** - * @param qb - * the qb to set - */ - public void setQB(QB qb) { - this.qb = qb; - } - - /** * @return the context */ public Context getContext() { @@ -499,12 +491,12 @@ public void setQueryProperties(QueryProperties queryProperties) { this.queryProperties = queryProperties; } - public TableDesc getFetchTabledesc() { - return fetchTabledesc; + public TableDesc getFetchTableDesc() { + return fetchTableDesc; } - public void setFetchTabledesc(TableDesc fetchTabledesc) { - this.fetchTabledesc = fetchTabledesc; + public void setFetchTabledesc(TableDesc fetchTableDesc) { + this.fetchTableDesc = fetchTableDesc; } public Operator getFetchSource() { @@ -522,4 +514,21 @@ public ListSinkOperator getFetchSink() { public void setFetchSink(ListSinkOperator fetchSink) { this.fetchSink = fetchSink; } + + public AnalyzeRewriteContext getAnalyzeRewrite() { + return this.analyzeRewrite; + } + + public void setAnalyzeRewrite(AnalyzeRewriteContext analyzeRewrite) { + this.analyzeRewrite = analyzeRewrite; + } + + public CreateTableDesc getCreateTable() { + return this.createTableDesc; + } + + public void setCreateTable(CreateTableDesc createTableDesc) { + this.createTableDesc = createTableDesc; + } + } diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/ProcessAnalyzeTable.java ql/src/java/org/apache/hadoop/hive/ql/parse/ProcessAnalyzeTable.java index 61592c1..7108a47 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/ProcessAnalyzeTable.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/ProcessAnalyzeTable.java @@ -18,7 +18,6 @@ package org.apache.hadoop.hive.ql.parse; -import java.lang.StringBuffer; import java.util.List; import java.util.Set; import java.util.Stack; @@ -41,8 +40,8 @@ import org.apache.hadoop.hive.ql.optimizer.GenMapRedUtils; import org.apache.hadoop.hive.ql.plan.MapWork; import org.apache.hadoop.hive.ql.plan.StatsNoJobWork; -import org.apache.hadoop.hive.ql.plan.TezWork; import org.apache.hadoop.hive.ql.plan.StatsWork; +import org.apache.hadoop.hive.ql.plan.TezWork; import org.apache.hadoop.mapred.InputFormat; /** @@ -77,10 +76,8 @@ public Object process(Node nd, Stack stack, ParseContext parseContext = context.parseContext; Class inputFormat = tableScan.getConf().getTableMetadata() .getInputFormatClass(); - QB queryBlock = parseContext.getQB(); - QBParseInfo parseInfo = parseContext.getQB().getParseInfo(); - - if (parseInfo.isAnalyzeCommand()) { + + if (parseContext.getQueryProperties().isAnalyzeCommand()) { assert tableScan.getChildOperators() == null || tableScan.getChildOperators().size() == 0; @@ -102,14 +99,14 @@ public Object process(Node nd, Stack stack, // ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS noscan; // There will not be any Tez job above this task - StatsNoJobWork snjWork = new StatsNoJobWork(parseContext.getQB().getParseInfo().getTableSpec()); + StatsNoJobWork snjWork = new StatsNoJobWork(tableScan.getConf().getTableMetadata().getTableSpec()); snjWork.setStatsReliable(parseContext.getConf().getBoolVar( HiveConf.ConfVars.HIVE_STATS_RELIABLE)); // If partition is specified, get pruned partition list - Set confirmedParts = GenMapRedUtils.getConfirmedPartitionsForScan(parseInfo); + Set confirmedParts = GenMapRedUtils.getConfirmedPartitionsForScan(tableScan); if (confirmedParts.size() > 0) { - Table source = parseContext.getQB().getMetaData().getTableForAlias(alias); - List partCols = GenMapRedUtils.getPartitionColumns(parseInfo); + Table source = tableScan.getConf().getTableMetadata(); + List partCols = GenMapRedUtils.getPartitionColumns(tableScan); PrunedPartitionList partList = new PrunedPartitionList(source, confirmedParts, partCols, false); snjWork.setPrunedPartitionList(partList); @@ -124,8 +121,8 @@ public Object process(Node nd, Stack stack, // ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS; // The plan consists of a simple TezTask followed by a StatsTask. // The Tez task is just a simple TableScanOperator - - StatsWork statsWork = new StatsWork(parseInfo.getTableSpec()); + + StatsWork statsWork = new StatsWork(tableScan.getConf().getTableMetadata().getTableSpec()); statsWork.setAggKey(tableScan.getConf().getStatsAggPrefix()); statsWork.setSourceTask(context.currentTask); statsWork.setStatsReliable(parseContext.getConf().getBoolVar(HiveConf.ConfVars.HIVE_STATS_RELIABLE)); @@ -134,7 +131,7 @@ public Object process(Node nd, Stack stack, // ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS noscan; // The plan consists of a StatsTask only. - if (parseInfo.isNoScanAnalyzeCommand()) { + if (parseContext.getQueryProperties().isNoScanAnalyzeCommand()) { statsTask.setParentTasks(null); statsWork.setNoScanAnalyzeCommand(true); context.rootTasks.remove(context.currentTask); @@ -142,17 +139,17 @@ public Object process(Node nd, Stack stack, } // ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS partialscan; - if (parseInfo.isPartialScanAnalyzeCommand()) { - handlePartialScanCommand(tableScan, parseContext, parseInfo, statsWork, context, statsTask); + if (parseContext.getQueryProperties().isPartialScanAnalyzeCommand()) { + handlePartialScanCommand(tableScan, parseContext, statsWork, context, statsTask); } // NOTE: here we should use the new partition predicate pushdown API to get a list of pruned list, // and pass it to setTaskPlan as the last parameter - Set confirmedPartns = GenMapRedUtils.getConfirmedPartitionsForScan(parseInfo); + Set confirmedPartns = GenMapRedUtils.getConfirmedPartitionsForScan(tableScan); PrunedPartitionList partitions = null; if (confirmedPartns.size() > 0) { - Table source = queryBlock.getMetaData().getTableForAlias(alias); - List partCols = GenMapRedUtils.getPartitionColumns(parseInfo); + Table source = tableScan.getConf().getTableMetadata(); + List partCols = GenMapRedUtils.getPartitionColumns(tableScan); partitions = new PrunedPartitionList(source, confirmedPartns, partCols, false); } @@ -172,14 +169,14 @@ public Object process(Node nd, Stack stack, * It is composed of PartialScanTask followed by StatsTask. */ private void handlePartialScanCommand(TableScanOperator tableScan, ParseContext parseContext, - QBParseInfo parseInfo, StatsWork statsWork, GenTezProcContext context, - Task statsTask) throws SemanticException { + StatsWork statsWork, GenTezProcContext context, Task statsTask) + throws SemanticException { String aggregationKey = tableScan.getConf().getStatsAggPrefix(); StringBuffer aggregationKeyBuffer = new StringBuffer(aggregationKey); - List inputPaths = GenMapRedUtils.getInputPathsForPartialScan(parseInfo, aggregationKeyBuffer); + List inputPaths = GenMapRedUtils.getInputPathsForPartialScan(tableScan, aggregationKeyBuffer); aggregationKey = aggregationKeyBuffer.toString(); - + // scan work PartialScanWork scanWork = new PartialScanWork(inputPaths); scanWork.setMapperCannotSpanPartns(true); diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/QBParseInfo.java ql/src/java/org/apache/hadoop/hive/ql/parse/QBParseInfo.java index 3e51188..015e213 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/QBParseInfo.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/QBParseInfo.java @@ -30,7 +30,8 @@ import org.antlr.runtime.tree.Tree; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.tableSpec; +import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.AnalyzeRewriteContext; +import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.TableSpec; /** * Implementation of the parse information related to a query block. @@ -61,12 +62,9 @@ private boolean isPartialScanAnalyzeCommand; // used for the analyze command (statistics) // (partialscan) - private final HashMap tableSpecs; // used for statistics + private final HashMap tableSpecs; // used for statistics - private String tableName; // used for column statistics - private List colName; // used for column statistics - private List colType; // used for column statistics - private boolean isTblLvl; // used for column statistics + private AnalyzeRewriteContext analyzeRewrite; /** @@ -138,7 +136,7 @@ public QBParseInfo(String alias, boolean isSubQ) { aliasToLateralViews = new HashMap>(); - tableSpecs = new HashMap(); + tableSpecs = new HashMap(); } @@ -544,18 +542,18 @@ public boolean isInsertToTable() { return isInsertToTable; } - public void addTableSpec(String tName, tableSpec tSpec) { + public void addTableSpec(String tName, TableSpec tSpec) { tableSpecs.put(tName, tSpec); } - public tableSpec getTableSpec(String tName) { + public TableSpec getTableSpec(String tName) { return tableSpecs.get(tName); } /** * This method is used only for the analyze command to get the partition specs */ - public tableSpec getTableSpec() { + public TableSpec getTableSpec() { Iterator tName = tableSpecs.keySet().iterator(); return tableSpecs.get(tName.next()); @@ -588,36 +586,12 @@ public tableSpec getTableSpec() { SORT_BY_CLAUSE } - public String getTableName() { - return tableName; + public AnalyzeRewriteContext getAnalyzeRewrite() { + return analyzeRewrite; } - public void setTableName(String tableName) { - this.tableName = tableName; - } - - public List getColName() { - return colName; - } - - public void setColName(List colName) { - this.colName = colName; - } - - public boolean isTblLvl() { - return isTblLvl; - } - - public void setTblLvl(boolean isTblLvl) { - this.isTblLvl = isTblLvl; - } - - public List getColType() { - return colType; - } - - public void setColType(List colType) { - this.colType = colType; + public void setAnalyzeRewrite(AnalyzeRewriteContext analyzeRewrite) { + this.analyzeRewrite = analyzeRewrite; } /** diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index bdb9204..6e85f9d 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -116,7 +116,7 @@ import org.apache.hadoop.hive.ql.optimizer.Optimizer; import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException; import org.apache.hadoop.hive.ql.optimizer.unionproc.UnionProcContext; -import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.tableSpec.SpecType; +import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.TableSpec.SpecType; import org.apache.hadoop.hive.ql.parse.CalcitePlanner.ASTSearcher; import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.OrderExpression; import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.OrderSpec; @@ -290,6 +290,9 @@ /** Not thread-safe. */ final ASTSearcher astSearcher = new ASTSearcher(); + + protected AnalyzeRewriteContext analyzeRewrite; + private CreateTableDesc tableDesc; static class Phase1Ctx { String dest; @@ -388,21 +391,22 @@ public void initParseCtx(ParseContext pctx) { idToTableNameMap = pctx.getIdToTableNameMap(); uCtx = pctx.getUCtx(); listMapJoinOpsNoReducer = pctx.getListMapJoinOpsNoReducer(); - qb = pctx.getQB(); prunedPartitions = pctx.getPrunedPartitions(); fetchTask = pctx.getFetchTask(); setLineageInfo(pctx.getLineageInfo()); } public ParseContext getParseContext() { - return new ParseContext(conf, qb, opToPartPruner, opToPartList, topOps, + // Make sure the basic query properties are initialized + copyInfoToQueryProperties(queryProperties); + return new ParseContext(conf, opToPartPruner, opToPartList, topOps, new HashSet(joinContext.keySet()), new HashSet(smbMapJoinContext.keySet()), loadTableWork, loadFileWork, ctx, idToTableNameMap, destTableId, uCtx, listMapJoinOpsNoReducer, prunedPartitions, opToSamplePruner, globalLimitCtx, nameToSplitSample, inputs, rootTasks, - opToPartToSkewedPruner, viewAliasToInput, - reduceSinkOperatorsAddedByEnforceBucketingSorting, queryProperties); + opToPartToSkewedPruner, viewAliasToInput, reduceSinkOperatorsAddedByEnforceBucketingSorting, + analyzeRewrite, tableDesc, queryProperties); } @SuppressWarnings("nls") @@ -645,12 +649,11 @@ private String processTable(QB qb, ASTNode tabref) throws SemanticException { (ASTNode) tabref.getChild(0), ErrorMsg.SAMPLE_RESTRICTION.getMsg())); } - qb.getParseInfo().setTabSample( - alias, - new TableSample( + TableSample tabSample = new TableSample( unescapeIdentifier(sampleClause.getChild(0).getText()), unescapeIdentifier(sampleClause.getChild(1).getText()), - sampleCols)); + sampleCols); + qb.getParseInfo().setTabSample(alias, tabSample); if (unparseTranslator.isEnabled()) { for (ASTNode sampleCol : sampleCols) { unparseTranslator.addIdentifierTranslation((ASTNode) sampleCol @@ -1536,7 +1539,7 @@ public void getMetaData(QB qb, ReadEntity parentInput) throws SemanticException if (qb.getParseInfo().isAnalyzeCommand()) { // allow partial partition specification for nonscan since noscan is fast. - tableSpec ts = new tableSpec(db, conf, (ASTNode) ast.getChild(0), true, this.noscan); + TableSpec ts = new TableSpec(db, conf, (ASTNode) ast.getChild(0), true, this.noscan); if (ts.specType == SpecType.DYNAMIC_PARTITION) { // dynamic partitions try { ts.partitions = db.getPartitionsByNames(ts.tableHandle, ts.partSpec); @@ -1568,6 +1571,7 @@ public void getMetaData(QB qb, ReadEntity parentInput) throws SemanticException } } + tab.setTableSpec(ts); qb.getParseInfo().addTableSpec(alias, ts); } @@ -1609,7 +1613,7 @@ public void getMetaData(QB qb, ReadEntity parentInput) throws SemanticException ASTNode ast = qbp.getDestForClause(name); switch (ast.getToken().getType()) { case HiveParser.TOK_TAB: { - tableSpec ts = new tableSpec(db, conf, ast); + TableSpec ts = new TableSpec(db, conf, ast); if (ts.tableHandle.isView()) { throw new SemanticException(ErrorMsg.DML_AGAINST_VIEW.getMsg()); } @@ -1621,7 +1625,7 @@ public void getMetaData(QB qb, ReadEntity parentInput) throws SemanticException .getMsg(ast, "The class is " + outputFormatClass.toString())); } - // tableSpec ts is got from the query (user specified), + // TableSpec ts is got from the query (user specified), // which means the user didn't specify partitions in their query, // but whether the table itself is partitioned is not know. if (ts.specType != SpecType.STATIC_PARTITION) { @@ -1674,7 +1678,7 @@ public void getMetaData(QB qb, ReadEntity parentInput) throws SemanticException "Error creating temporary folder on: " + location.toString()), e); } if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVESTATSAUTOGATHER)) { - tableSpec ts = new tableSpec(db, conf, this.ast); + TableSpec ts = new TableSpec(db, conf, this.ast); // Set that variable to automatically collect stats during the MapReduce job qb.getParseInfo().setIsInsertToTable(true); // Add the table spec for the destination table. @@ -9133,9 +9137,11 @@ private Operator genTablePlan(String alias, QB qb) throws SemanticException { } // check if this table is sampled and needs more than input pruning - Operator tableOp = top; + Operator op = top; TableSample ts = qb.getParseInfo().getTabSample(alias); if (ts != null) { + TableScanOperator tableScanOp = (TableScanOperator) top; + tableScanOp.getConf().setTableSample(ts); int num = ts.getNumerator(); int den = ts.getDenominator(); ArrayList sampleExprs = ts.getExprs(); @@ -9194,7 +9200,7 @@ private Operator genTablePlan(String alias, QB qb) throws SemanticException { LOG.info("No need for sample filter"); ExprNodeDesc samplePredicate = genSamplePredicate(ts, tabBucketCols, colsEqual, alias, rwsch, qb.getMetaData(), null); - tableOp = OperatorFactory.getAndMakeChild(new FilterDesc( + op = OperatorFactory.getAndMakeChild(new FilterDesc( samplePredicate, true, new sampleDesc(ts.getNumerator(), ts .getDenominator(), tabBucketCols, true)), new RowSchema(rwsch.getColumnInfos()), top); @@ -9204,7 +9210,7 @@ private Operator genTablePlan(String alias, QB qb) throws SemanticException { LOG.info("Need sample filter"); ExprNodeDesc samplePredicate = genSamplePredicate(ts, tabBucketCols, colsEqual, alias, rwsch, qb.getMetaData(), null); - tableOp = OperatorFactory.getAndMakeChild(new FilterDesc( + op = OperatorFactory.getAndMakeChild(new FilterDesc( samplePredicate, true), new RowSchema(rwsch.getColumnInfos()), top); } @@ -9234,7 +9240,7 @@ private Operator genTablePlan(String alias, QB qb) throws SemanticException { qb.getParseInfo().setTabSample(alias, tsSample); ExprNodeDesc samplePred = genSamplePredicate(tsSample, tab .getBucketCols(), true, alias, rwsch, qb.getMetaData(), null); - tableOp = OperatorFactory + op = OperatorFactory .getAndMakeChild(new FilterDesc(samplePred, true, new sampleDesc(tsSample.getNumerator(), tsSample .getDenominator(), tab.getBucketCols(), true)), @@ -9252,7 +9258,7 @@ private Operator genTablePlan(String alias, QB qb) throws SemanticException { .valueOf(460476415))); ExprNodeDesc samplePred = genSamplePredicate(tsSample, null, false, alias, rwsch, qb.getMetaData(), randFunc); - tableOp = OperatorFactory.getAndMakeChild(new FilterDesc( + op = OperatorFactory.getAndMakeChild(new FilterDesc( samplePred, true), new RowSchema(rwsch.getColumnInfos()), top); } @@ -9260,10 +9266,10 @@ private Operator genTablePlan(String alias, QB qb) throws SemanticException { } } - Operator output = putOpInsertMap(tableOp, rwsch); + Operator output = putOpInsertMap(op, rwsch); if (LOG.isDebugEnabled()) { - LOG.debug("Created Table Plan for " + alias + " " + tableOp.toString()); + LOG.debug("Created Table Plan for " + alias + " " + op.toString()); } return output; @@ -9308,7 +9314,7 @@ private void setupStats(TableScanDesc tsDesc, QBParseInfo qbp, Table tab, String tsDesc.addVirtualCols(vcList); String tblName = tab.getTableName(); - tableSpec tblSpec = qbp.getTableSpec(alias); + TableSpec tblSpec = qbp.getTableSpec(alias); Map partSpec = tblSpec.getPartSpec(); if (partSpec != null) { @@ -9819,14 +9825,15 @@ void analyzeInternal(ASTNode ast, PlannerContext plannerCtx) throws SemanticExce } // 4. Generate Parse Context for Optimizer & Physical compiler - ParseContext pCtx = new ParseContext(conf, qb, - opToPartPruner, opToPartList, topOps, + copyInfoToQueryProperties(queryProperties); + ParseContext pCtx = new ParseContext(conf, opToPartPruner, opToPartList, topOps, new HashSet(joinContext.keySet()), new HashSet(smbMapJoinContext.keySet()), loadTableWork, loadFileWork, ctx, idToTableNameMap, destTableId, uCtx, listMapJoinOpsNoReducer, prunedPartitions, opToSamplePruner, globalLimitCtx, nameToSplitSample, inputs, rootTasks, opToPartToSkewedPruner, - viewAliasToInput, reduceSinkOperatorsAddedByEnforceBucketingSorting, queryProperties); + viewAliasToInput, reduceSinkOperatorsAddedByEnforceBucketingSorting, + analyzeRewrite, tableDesc, queryProperties); // 5. Take care of view creation if (createVwDesc != null) { @@ -10597,13 +10604,12 @@ ASTNode analyzeCreateTable( } // Handle different types of CREATE TABLE command - CreateTableDesc crtTblDesc = null; switch (command_type) { case CREATE_TABLE: // REGULAR CREATE TABLE DDL tblProps = addDefaultProperties(tblProps); - crtTblDesc = new CreateTableDesc(dbDotTab, isExt, isTemporary, cols, partCols, + CreateTableDesc crtTblDesc = new CreateTableDesc(dbDotTab, isExt, isTemporary, cols, partCols, bucketCols, sortCols, numBuckets, rowFormatParams.fieldDelim, rowFormatParams.fieldEscape, rowFormatParams.collItemDelim, rowFormatParams.mapKeyDelim, rowFormatParams.lineDelim, @@ -10655,16 +10661,16 @@ ASTNode analyzeCreateTable( tblProps = addDefaultProperties(tblProps); - crtTblDesc = new CreateTableDesc(qualifiedTabName[0], dbDotTab, isExt, isTemporary, cols, + tableDesc = new CreateTableDesc(qualifiedTabName[0], dbDotTab, isExt, isTemporary, cols, partCols, bucketCols, sortCols, numBuckets, rowFormatParams.fieldDelim, rowFormatParams.fieldEscape, rowFormatParams.collItemDelim, rowFormatParams.mapKeyDelim, rowFormatParams.lineDelim, comment, storageFormat.getInputFormat(), storageFormat.getOutputFormat(), location, storageFormat.getSerde(), storageFormat.getStorageHandler(), storageFormat.getSerdeProps(), tblProps, ifNotExists, skewedColNames, skewedValues); - crtTblDesc.setStoredAsSubDirectories(storedAsDirs); - crtTblDesc.setNullFormat(rowFormatParams.nullFormat); - qb.setTableDesc(crtTblDesc); + tableDesc.setStoredAsSubDirectories(storedAsDirs); + tableDesc.setNullFormat(rowFormatParams.nullFormat); + qb.setTableDesc(tableDesc); SessionState.get().setCommandType(HiveOperation.CREATETABLE_AS_SELECT); @@ -11872,4 +11878,19 @@ private static ASTNode buildSelExprSubTree(String tableAlias, String col) { selexpr.addChild(dot); return selexpr; } + + private void copyInfoToQueryProperties(QueryProperties queryProperties) { + if (qb != null) { + queryProperties.setQuery(qb.getIsQuery()); + queryProperties.setAnalyzeCommand(qb.getParseInfo().isAnalyzeCommand()); + queryProperties.setPartialScanAnalyzeCommand(qb.getParseInfo().isPartialScanAnalyzeCommand()); + queryProperties.setNoScanAnalyzeCommand(qb.getParseInfo().isNoScanAnalyzeCommand()); + queryProperties.setAnalyzeRewrite(qb.isAnalyzeRewrite()); + queryProperties.setCTAS(qb.getTableDesc() != null); + queryProperties.setInsertToTable(qb.getParseInfo().isInsertToTable()); + queryProperties.setHasOuterOrderBy(!qb.getParseInfo().getIsSubQ() && + !qb.getParseInfo().getDestToOrderBy().isEmpty()); + queryProperties.setOuterQueryLimit(qb.getParseInfo().getOuterQueryLimit()); + } + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompiler.java ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompiler.java index 6ef09cd..b4d5382 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompiler.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompiler.java @@ -47,6 +47,7 @@ import org.apache.hadoop.hive.ql.metadata.Hive; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.optimizer.GenMapRedUtils; +import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.AnalyzeRewriteContext; import org.apache.hadoop.hive.ql.plan.ColumnStatsDesc; import org.apache.hadoop.hive.ql.plan.ColumnStatsWork; import org.apache.hadoop.hive.ql.plan.CreateTableDesc; @@ -86,13 +87,13 @@ public void compile(final ParseContext pCtx, final List> mvTask = new ArrayList>(); List loadTableWork = pCtx.getLoadTableWork(); List loadFileWork = pCtx.getLoadFileWork(); - boolean isCStats = qb.isAnalyzeRewrite(); + boolean isCStats = pCtx.getQueryProperties().isAnalyzeRewrite(); + int outerQueryLimit = pCtx.getQueryProperties().getOuterQueryLimit(); if (pCtx.getFetchTask() != null) { return; @@ -105,7 +106,7 @@ public void compile(final ParseContext pCtx, final List loadTableWork, - List loadFileWork, List> rootTasks) { - QBParseInfo qbParseInfo = qb.getParseInfo(); + protected void genColumnStatsTask(AnalyzeRewriteContext analyzeRewrite, List loadTableWork, + List loadFileWork, List> rootTasks, int outerQueryLimit) { ColumnStatsTask cStatsTask = null; ColumnStatsWork cStatsWork = null; FetchWork fetch = null; - String tableName = qbParseInfo.getTableName(); - List colName = qbParseInfo.getColName(); - List colType = qbParseInfo.getColType(); - boolean isTblLevel = qbParseInfo.isTblLvl(); + String tableName = analyzeRewrite.getTableName(); + List colName = analyzeRewrite.getColName(); + List colType = analyzeRewrite.getColType(); + boolean isTblLevel = analyzeRewrite.isTblLvl(); String cols = loadFileWork.get(0).getColumns(); String colTypes = loadFileWork.get(0).getColumnTypes(); @@ -322,8 +323,7 @@ protected void genColumnStatsTask(QB qb, List loadTableWork, String resFileFormat = HiveConf.getVar(conf, HiveConf.ConfVars.HIVEQUERYRESULTFILEFORMAT); TableDesc resultTab = PlanUtils.getDefaultQueryOutputTableDesc(cols, colTypes, resFileFormat); - fetch = new FetchWork(loadFileWork.get(0).getSourcePath(), - resultTab, qb.getParseInfo().getOuterQueryLimit()); + fetch = new FetchWork(loadFileWork.get(0).getSourcePath(), resultTab, outerQueryLimit); ColumnStatsDesc cStatsDesc = new ColumnStatsDesc(tableName, colName, colType, isTblLevel); @@ -390,7 +390,6 @@ protected abstract void generateTaskTree(List> root */ public ParseContext getParseContext(ParseContext pCtx, List> rootTasks) { ParseContext clone = new ParseContext(conf, - pCtx.getQB(), pCtx.getOpToPartPruner(), pCtx.getOpToPartList(), pCtx.getTopOps(), pCtx.getJoinOps(), pCtx.getSmbMapJoinOps(), pCtx.getLoadTableWork(), pCtx.getLoadFileWork(), pCtx.getContext(), @@ -400,7 +399,7 @@ public ParseContext getParseContext(ParseContext pCtx, List stack, @SuppressWarnings("rawtypes") Class inputFormat = tableScan.getConf().getTableMetadata() .getInputFormatClass(); - QB queryBlock = parseContext.getQB(); - QBParseInfo parseInfo = parseContext.getQB().getParseInfo(); - if (parseInfo.isAnalyzeCommand()) { + if (parseContext.getQueryProperties().isAnalyzeCommand()) { Preconditions.checkArgument(tableScan.getChildOperators() == null || tableScan.getChildOperators().size() == 0, "AssertionError: expected tableScan.getChildOperators() to be null, " @@ -102,14 +98,14 @@ public Object process(Node nd, Stack stack, Preconditions.checkArgument(alias != null, "AssertionError: expected alias to be not null"); SparkWork sparkWork = context.currentTask.getWork(); - boolean partialScan = parseInfo.isPartialScanAnalyzeCommand(); - boolean noScan = parseInfo.isNoScanAnalyzeCommand(); + boolean partialScan = parseContext.getQueryProperties().isPartialScanAnalyzeCommand(); + boolean noScan = parseContext.getQueryProperties().isNoScanAnalyzeCommand(); if (inputFormat.equals(OrcInputFormat.class) && (noScan || partialScan)) { // ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS partialscan; // ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS noscan; // There will not be any Spark job above this task - StatsNoJobWork snjWork = new StatsNoJobWork(parseContext.getQB().getParseInfo().getTableSpec()); + StatsNoJobWork snjWork = new StatsNoJobWork(tableScan.getConf().getTableMetadata().getTableSpec()); snjWork.setStatsReliable(parseContext.getConf().getBoolVar( HiveConf.ConfVars.HIVE_STATS_RELIABLE)); Task snjTask = TaskFactory.get(snjWork, parseContext.getConf()); @@ -123,7 +119,7 @@ public Object process(Node nd, Stack stack, // The plan consists of a simple SparkTask followed by a StatsTask. // The Spark task is just a simple TableScanOperator - StatsWork statsWork = new StatsWork(parseInfo.getTableSpec()); + StatsWork statsWork = new StatsWork(tableScan.getConf().getTableMetadata().getTableSpec()); statsWork.setAggKey(tableScan.getConf().getStatsAggPrefix()); statsWork.setSourceTask(context.currentTask); statsWork.setStatsReliable(parseContext.getConf().getBoolVar(HiveConf.ConfVars.HIVE_STATS_RELIABLE)); @@ -132,7 +128,7 @@ public Object process(Node nd, Stack stack, // ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS noscan; // The plan consists of a StatsTask only. - if (parseInfo.isNoScanAnalyzeCommand()) { + if (parseContext.getQueryProperties().isNoScanAnalyzeCommand()) { statsTask.setParentTasks(null); statsWork.setNoScanAnalyzeCommand(true); context.rootTasks.remove(context.currentTask); @@ -140,17 +136,17 @@ public Object process(Node nd, Stack stack, } // ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS partialscan; - if (parseInfo.isPartialScanAnalyzeCommand()) { - handlePartialScanCommand(tableScan, parseContext, parseInfo, statsWork, context, statsTask); + if (parseContext.getQueryProperties().isPartialScanAnalyzeCommand()) { + handlePartialScanCommand(tableScan, parseContext, statsWork, context, statsTask); } // NOTE: here we should use the new partition predicate pushdown API to get a list of pruned list, // and pass it to setTaskPlan as the last parameter - Set confirmedPartns = GenMapRedUtils.getConfirmedPartitionsForScan(parseInfo); + Set confirmedPartns = GenMapRedUtils.getConfirmedPartitionsForScan(tableScan); PrunedPartitionList partitions = null; if (confirmedPartns.size() > 0) { - Table source = queryBlock.getMetaData().getTableForAlias(alias); - List partCols = GenMapRedUtils.getPartitionColumns(parseInfo); + Table source = tableScan.getConf().getTableMetadata(); + List partCols = GenMapRedUtils.getPartitionColumns(tableScan); partitions = new PrunedPartitionList(source, confirmedPartns, partCols, false); } @@ -169,11 +165,11 @@ public Object process(Node nd, Stack stack, * It is composed of PartialScanTask followed by StatsTask. */ private void handlePartialScanCommand(TableScanOperator tableScan, ParseContext parseContext, - QBParseInfo parseInfo, StatsWork statsWork, GenSparkProcContext context, - Task statsTask) throws SemanticException { + StatsWork statsWork, GenSparkProcContext context, Task statsTask) + throws SemanticException { String aggregationKey = tableScan.getConf().getStatsAggPrefix(); StringBuffer aggregationKeyBuffer = new StringBuffer(aggregationKey); - List inputPaths = GenMapRedUtils.getInputPathsForPartialScan(parseInfo, aggregationKeyBuffer); + List inputPaths = GenMapRedUtils.getInputPathsForPartialScan(tableScan, aggregationKeyBuffer); aggregationKey = aggregationKeyBuffer.toString(); // scan work diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/StatsNoJobWork.java ql/src/java/org/apache/hadoop/hive/ql/plan/StatsNoJobWork.java index 3e5a607..7e3f0bf 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/StatsNoJobWork.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/StatsNoJobWork.java @@ -20,9 +20,10 @@ import java.io.Serializable; -import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.tableSpec; +import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.TableSpec; import org.apache.hadoop.hive.ql.parse.PrunedPartitionList; + /** * Client-side stats aggregator task. */ @@ -30,14 +31,14 @@ public class StatsNoJobWork implements Serializable { private static final long serialVersionUID = 1L; - private tableSpec tableSpecs; + private TableSpec tableSpecs; private boolean statsReliable; private PrunedPartitionList prunedPartitionList; public StatsNoJobWork() { } - public StatsNoJobWork(tableSpec tableSpecs) { + public StatsNoJobWork(TableSpec tableSpecs) { this.tableSpecs = tableSpecs; } @@ -45,7 +46,7 @@ public StatsNoJobWork(boolean statsReliable) { this.statsReliable = statsReliable; } - public tableSpec getTableSpecs() { + public TableSpec getTableSpecs() { return tableSpecs; } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/StatsWork.java ql/src/java/org/apache/hadoop/hive/ql/plan/StatsWork.java index 66d4d4a..3cf0f7f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/StatsWork.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/StatsWork.java @@ -21,7 +21,7 @@ import java.io.Serializable; import org.apache.hadoop.hive.ql.exec.Task; -import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.tableSpec; +import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.TableSpec; /** * ConditionalStats. @@ -31,7 +31,7 @@ public class StatsWork implements Serializable { private static final long serialVersionUID = 1L; - private tableSpec tableSpecs; // source table spec -- for TableScanOperator + private TableSpec tableSpecs; // source table spec -- for TableScanOperator private LoadTableDesc loadTableDesc; // same as MoveWork.loadTableDesc -- for FileSinkOperator private LoadFileDesc loadFileDesc; // same as MoveWork.loadFileDesc -- for FileSinkOperator private String aggKey; // aggregation key prefix @@ -58,7 +58,7 @@ public StatsWork() { } - public StatsWork(tableSpec tableSpecs) { + public StatsWork(TableSpec tableSpecs) { this.tableSpecs = tableSpecs; } @@ -74,7 +74,7 @@ public StatsWork(boolean statsReliable) { this.statsReliable = statsReliable; } - public tableSpec getTableSpecs() { + public TableSpec getTableSpecs() { return tableSpecs; } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/TableScanDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/TableScanDesc.java index 0e85990..6530c37 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/TableScanDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/TableScanDesc.java @@ -26,6 +26,7 @@ import org.apache.hadoop.hive.ql.exec.PTFUtils; import org.apache.hadoop.hive.ql.metadata.Table; import org.apache.hadoop.hive.ql.metadata.VirtualColumn; +import org.apache.hadoop.hive.ql.parse.TableSample; /** * Table Scan Descriptor Currently, data is only read from a base source as part @@ -91,9 +92,11 @@ // input file name (big) to bucket number private Map bucketFileNameMapping; - + private boolean isMetadataOnly = false; + private transient TableSample tableSample; + private transient final Table tableMetadata; @@ -268,4 +271,12 @@ public boolean getIsMetadataOnly() { public Table getTableMetadata() { return tableMetadata; } + + public TableSample getTableSample() { + return tableSample; + } + + public void setTableSample(TableSample tableSample) { + this.tableSample = tableSample; + } }