diff --git common/src/java/org/apache/hadoop/hive/conf/HiveConf.java common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 4c3ef3e..7521bef 100644 --- common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -1399,7 +1399,7 @@ private static void populateLlapDaemonVarsSet(Set llapDaemonVarsSetLocal "When trying a smaller subset of data for simple LIMIT, how much size we need to guarantee each row to have at least."), HIVELIMITOPTLIMITFILE("hive.limit.optimize.limit.file", 10, "When trying a smaller subset of data for simple LIMIT, maximum number of files we can sample."), - HIVELIMITOPTENABLE("hive.limit.optimize.enable", false, + HIVELIMITOPTENABLE("hive.limit.optimize.enable", true, "Whether to enable to optimization to trying a smaller subset of data for simple LIMIT first."), HIVELIMITOPTMAXFETCH("hive.limit.optimize.fetch.max", 50000, "Maximum number of rows allowed for a smaller subset of data for simple LIMIT, if it is a fetch query. \n" + diff --git ql/src/java/org/apache/hadoop/hive/ql/QueryProperties.java ql/src/java/org/apache/hadoop/hive/ql/QueryProperties.java index 650792b..247641d 100644 --- ql/src/java/org/apache/hadoop/hive/ql/QueryProperties.java +++ ql/src/java/org/apache/hadoop/hive/ql/QueryProperties.java @@ -39,7 +39,6 @@ boolean noScanAnalyzeCommand; boolean analyzeRewrite; boolean ctas; - int outerQueryLimit; boolean hasJoin = false; boolean hasGroupBy = false; @@ -116,14 +115,6 @@ public void setCTAS(boolean ctas) { this.ctas = ctas; } - public int getOuterQueryLimit() { - return outerQueryLimit; - } - - public void setOuterQueryLimit(int outerQueryLimit) { - this.outerQueryLimit = outerQueryLimit; - } - public boolean hasJoin() { return (noOfJoins > 0); } @@ -282,7 +273,6 @@ public void clear() { noScanAnalyzeCommand = false; analyzeRewrite = false; ctas = false; - outerQueryLimit = -1; hasJoin = false; hasGroupBy = false; diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchOptimizer.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchOptimizer.java index eb0ba7b..9c696cc 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchOptimizer.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchOptimizer.java @@ -27,8 +27,6 @@ import java.util.Map; import java.util.Set; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; import org.apache.hadoop.fs.ContentSummary; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; @@ -46,7 +44,6 @@ import org.apache.hadoop.hive.ql.exec.SelectOperator; import org.apache.hadoop.hive.ql.exec.TableScanOperator; import org.apache.hadoop.hive.ql.exec.TaskFactory; -import org.apache.hadoop.hive.ql.exec.UDTFOperator; import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.hooks.ReadEntity; import org.apache.hadoop.hive.ql.io.ContentSummaryInputFormat; @@ -82,6 +79,8 @@ import org.apache.hadoop.hive.ql.udf.generic.GenericUDFToVarchar; import org.apache.hadoop.mapred.InputFormat; import org.apache.hadoop.mapred.JobConf; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** * Tries to convert simple fetch query to single fetch task, which fetches rows directly @@ -126,7 +125,8 @@ private FetchTask optimize(ParseContext pctx, String alias, TableScanOperator so pctx.getConf(), HiveConf.ConfVars.HIVEFETCHTASKCONVERSION); boolean aggressive = "more".equals(mode); - final int limit = pctx.getQueryProperties().getOuterQueryLimit(); + final int limit = pctx.getGlobalLimitCtx().isEnable() ? + pctx.getGlobalLimitCtx().getGlobalLimit() : -1; FetchData fetch = checkTree(aggressive, pctx, alias, source); if (fetch != null && checkThreshold(fetch, limit, pctx)) { FetchWork fetchWork = fetch.convertToWork(); diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveOpConverter.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveOpConverter.java index 0ead9be..fd17917 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveOpConverter.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveOpConverter.java @@ -46,7 +46,6 @@ import org.apache.hadoop.hive.ql.exec.ColumnInfo; import org.apache.hadoop.hive.ql.exec.FilterOperator; import org.apache.hadoop.hive.ql.exec.JoinOperator; -import org.apache.hadoop.hive.ql.exec.LimitOperator; import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.OperatorFactory; import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator; @@ -149,18 +148,9 @@ private OpAttr clone(Operator... inputs) { } } - private void handleTopLimit(Operator rootOp) { - if (rootOp instanceof LimitOperator) { - // this can happen only on top most limit, not while visiting Limit Operator - // since that can be within subquery. - this.semanticAnalyzer.getQB().getParseInfo().setOuterQueryLimit(((LimitOperator) rootOp).getConf().getLimit()); - } - } - public Operator convert(RelNode root) throws SemanticException { OpAttr opAf = dispatch(root); Operator rootOp = opAf.inputs.get(0); - handleTopLimit(rootOp); return rootOp; } diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/QBParseInfo.java ql/src/java/org/apache/hadoop/hive/ql/parse/QBParseInfo.java index 3a0402e..ab1df67 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/QBParseInfo.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/QBParseInfo.java @@ -106,7 +106,6 @@ // KEY of SimpleEntry: offset // VALUE of SimpleEntry: rowcount private final HashMap> destToLimit; - private int outerQueryLimit; // used by GroupBy private final LinkedHashMap> destToAggregationExprs; @@ -147,7 +146,6 @@ public QBParseInfo(String alias, boolean isSubQ) { this.alias = alias; this.isSubQ = isSubQ; - outerQueryLimit = -1; aliasToLateralViews = new HashMap>(); @@ -459,21 +457,6 @@ public Integer getDestLimitOffset(String dest) { return destToLimit.get(dest) == null ? 0 : destToLimit.get(dest).getKey(); } - /** - * @return the outerQueryLimit - */ - public int getOuterQueryLimit() { - return outerQueryLimit; - } - - /** - * @param outerQueryLimit - * the outerQueryLimit to set - */ - public void setOuterQueryLimit(int outerQueryLimit) { - this.outerQueryLimit = outerQueryLimit; - } - public boolean isTopLevelSimpleSelectStarQuery() { if (alias != null || destToSelExpr.size() != 1 || !isSimpleSelectQuery()) { return false; diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index 747f387..c448fbe 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -9469,7 +9469,6 @@ private Operator genPostGroupByBodyPlan(Operator curr, String dest, QB qb, curr = genLimitMapRedPlan(dest, qb, curr, offset.intValue(), limit.intValue(), extraMRStep); - qb.getParseInfo().setOuterQueryLimit(limit.intValue()); } if (!queryState.getHiveOperation().equals(HiveOperation.CREATEVIEW)) { curr = genFileSinkPlan(dest, qb, curr); @@ -13109,7 +13108,6 @@ private void copyInfoToQueryProperties(QueryProperties queryProperties) { queryProperties.setCTAS(qb.getTableDesc() != null); queryProperties.setHasOuterOrderBy(!qb.getParseInfo().getIsSubQ() && !qb.getParseInfo().getDestToOrderBy().isEmpty()); - queryProperties.setOuterQueryLimit(qb.getParseInfo().getOuterQueryLimit()); queryProperties.setMaterializedView(qb.getViewDesc() != null); } } diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompiler.java ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompiler.java index 97cf585..69e7457 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompiler.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompiler.java @@ -20,18 +20,12 @@ import java.io.Serializable; import java.util.ArrayList; -import java.util.Collection; import java.util.HashSet; import java.util.Iterator; import java.util.LinkedHashSet; -import java.util.LinkedList; import java.util.List; -import java.util.Queue; import java.util.Set; -import java.util.Stack; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.common.HiveStatsUtils; import org.apache.hadoop.hive.conf.HiveConf; @@ -42,7 +36,6 @@ import org.apache.hadoop.hive.ql.QueryState; import org.apache.hadoop.hive.ql.exec.ColumnStatsTask; import org.apache.hadoop.hive.ql.exec.FetchTask; -import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.StatsTask; import org.apache.hadoop.hive.ql.exec.Task; import org.apache.hadoop.hive.ql.exec.TaskFactory; @@ -54,7 +47,6 @@ import org.apache.hadoop.hive.ql.metadata.Hive; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.optimizer.GenMapRedUtils; -import org.apache.hadoop.hive.ql.optimizer.physical.AnnotateRunTimeStatsOptimizer; import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.AnalyzeRewriteContext; import org.apache.hadoop.hive.ql.plan.ColumnStatsDesc; import org.apache.hadoop.hive.ql.plan.ColumnStatsWork; @@ -76,6 +68,8 @@ import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe; import org.apache.hadoop.hive.serde2.thrift.ThriftFormatter; import org.apache.hadoop.hive.serde2.thrift.ThriftJDBCBinarySerDe; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import com.google.common.collect.Interner; import com.google.common.collect.Interners; @@ -113,7 +107,7 @@ public void compile(final ParseContext pCtx, final List loadFileWork = pCtx.getLoadFileWork(); boolean isCStats = pCtx.getQueryProperties().isAnalyzeRewrite(); - int outerQueryLimit = pCtx.getQueryProperties().getOuterQueryLimit(); + int outerQueryLimit = globalLimitCtx.isEnable() ? globalLimitCtx.getGlobalLimit() : -1; if (pCtx.getFetchTask() != null) { if (pCtx.getFetchTask().getTblDesc() == null) { @@ -191,7 +185,7 @@ public void compile(final ParseContext pCtx, final List fetchLimit) { + if (outerQueryLimit > fetchLimit) { LOG.info("For FetchTask, LIMIT " + globalLimitCtx.getGlobalLimit() + " > " + fetchLimit + ". Doesn't qualify limit optimiztion."); globalLimitCtx.disableOpt();