diff --git a/ql/src/java/org/apache/hadoop/hive/ql/Context.java b/ql/src/java/org/apache/hadoop/hive/ql/Context.java index 0f7da53..e0bec33 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/Context.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/Context.java @@ -695,4 +695,8 @@ public void setAcidOperation(AcidUtils.Operation op) { public AcidUtils.Operation getAcidOperation() { return acidOperation; } + + public boolean isFirstOrLast() { + return tryCount == 0 || tryCount == Integer.MAX_VALUE; + } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java index 9d5730d..e56d3cc 100755 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java @@ -442,6 +442,11 @@ public static void pushFilters(JobConf jobConf, TableScanOperator tableScan) { return; } + int rowLimit = scanDesc.getRowLimit(); + if (rowLimit > 0) { + ColumnProjectionUtils.setRowLimit(jobConf, rowLimit); + } + // construct column name list and types for reference by filter push down Utilities.setColumnNameList(jobConf, tableScan); Utilities.setColumnTypeList(jobConf, tableScan); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GlobalLimitOptimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GlobalLimitOptimizer.java index c0a2592..505f395 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GlobalLimitOptimizer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GlobalLimitOptimizer.java @@ -24,6 +24,7 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.Context; import org.apache.hadoop.hive.ql.exec.FilterOperator; import org.apache.hadoop.hive.ql.exec.GroupByOperator; @@ -77,7 +78,7 @@ public ParseContext transform(ParseContext pctx) throws SemanticException { // The query only qualifies when there are only one top operator // and there is no transformer or UDTF and no block sampling // is used. - if (ctx.getTryCount() == 0 && topOps.size() == 1 + if (ctx.isFirstOrLast() && topOps.size() == 1 && !globalLimitCtx.ifHasTransformOrUDTF() && nameToSplitSample.isEmpty()) { @@ -104,7 +105,10 @@ public ParseContext transform(ParseContext pctx) throws SemanticException { Set filterOps = OperatorUtils.findOperators(ts, FilterOperator.class); if (filterOps.size() == 0) { - globalLimitCtx.enableOpt(tempGlobalLimit); + if (HiveConf.getBoolVar(pctx.getConf(), HiveConf.ConfVars.HIVELIMITOPTENABLE)) { + globalLimitCtx.enableOpt(tempGlobalLimit); + } + ts.getConf().setRowLimit(tempGlobalLimit); } } else { // check if the pruner only contains partition columns @@ -125,7 +129,10 @@ public ParseContext transform(ParseContext pctx) throws SemanticException { // If there is any unknown partition, create a map-reduce job for // the filter to prune correctly if (!partsList.hasUnknownPartitions()) { - globalLimitCtx.enableOpt(tempGlobalLimit); + if (HiveConf.getBoolVar(pctx.getConf(), HiveConf.ConfVars.HIVELIMITOPTENABLE)) { + globalLimitCtx.enableOpt(tempGlobalLimit); + } + ts.getConf().setRowLimit(tempGlobalLimit); } } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java index 3482a47..49accd9 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java @@ -142,9 +142,7 @@ public void initialize(HiveConf hiveConf) { if(HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEIDENTITYPROJECTREMOVER)) { transformations.add(new IdentityProjectRemover()); } - if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVELIMITOPTENABLE)) { - transformations.add(new GlobalLimitOptimizer()); - } + transformations.add(new GlobalLimitOptimizer()); if(HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTCORRELATION) && !HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEGROUPBYSKEW) && !HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVE_OPTIMIZE_SKEWJOIN_COMPILETIME) && diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/QBParseInfo.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/QBParseInfo.java index 3e51188..a276365 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/QBParseInfo.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/QBParseInfo.java @@ -569,6 +569,10 @@ public tableSpec getTableSpec() { return destToAggregationExprs; } + public LinkedHashMap> getDestToWindowingExprs() { + return destToWindowingExprs; + } + public HashMap> getDestToDistinctFuncExprs() { return destToDistinctFuncExprs; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/TableScanDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/TableScanDesc.java index 0e85990..0113348 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/TableScanDesc.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/TableScanDesc.java @@ -236,8 +236,10 @@ public void setMaxStatsKeyPrefixLength(int maxStatsKeyPrefixLength) { this.maxStatsKeyPrefixLength = maxStatsKeyPrefixLength; } - public void setRowLimit(int rowLimit) { - this.rowLimit = rowLimit; + public void setRowLimit(int newLimit) { + if (rowLimit < 0 || newLimit < rowLimit) { + rowLimit = newLimit; + } } public int getRowLimit() { diff --git a/ql/src/test/results/clientpositive/annotate_stats_limit.q.out b/ql/src/test/results/clientpositive/annotate_stats_limit.q.out index 5f8b6f8..2dcb9a3 100644 --- a/ql/src/test/results/clientpositive/annotate_stats_limit.q.out +++ b/ql/src/test/results/clientpositive/annotate_stats_limit.q.out @@ -99,6 +99,7 @@ STAGE PLANS: Processor Tree: TableScan alias: loc_orc + Row Limit Per Split: 4 Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) @@ -127,6 +128,7 @@ STAGE PLANS: Processor Tree: TableScan alias: loc_orc + Row Limit Per Split: 16 Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) diff --git a/ql/src/test/results/clientpositive/annotate_stats_select.q.out b/ql/src/test/results/clientpositive/annotate_stats_select.q.out index a4c8c41..1caf28c 100644 --- a/ql/src/test/results/clientpositive/annotate_stats_select.q.out +++ b/ql/src/test/results/clientpositive/annotate_stats_select.q.out @@ -821,6 +821,7 @@ STAGE PLANS: Processor Tree: TableScan alias: alltypes_orc + Row Limit Per Split: 10 Statistics: Num rows: 2 Data size: 1686 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: i1 (type: int) @@ -849,6 +850,7 @@ STAGE PLANS: Processor Tree: TableScan alias: alltypes_orc + Row Limit Per Split: 10 Statistics: Num rows: 2 Data size: 1686 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: i1 (type: int) @@ -933,6 +935,7 @@ STAGE PLANS: Processor Tree: TableScan alias: alltypes_orc + Row Limit Per Split: 10 Statistics: Num rows: 2 Data size: 1686 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: 11.0 (type: double) diff --git a/ql/src/test/results/clientpositive/ansi_sql_arithmetic.q.out b/ql/src/test/results/clientpositive/ansi_sql_arithmetic.q.out index 5e5a2f6..652ee0a 100644 --- a/ql/src/test/results/clientpositive/ansi_sql_arithmetic.q.out +++ b/ql/src/test/results/clientpositive/ansi_sql_arithmetic.q.out @@ -14,6 +14,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: src + Row Limit Per Split: 1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: (CAST( UDFToInteger(key) AS decimal(10,0)) / CAST( UDFToInteger(key) AS decimal(10,0))) (type: decimal(21,11)) @@ -61,6 +62,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: src + Row Limit Per Split: 1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: (UDFToDouble(UDFToInteger(key)) / UDFToDouble(UDFToInteger(key))) (type: double) diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/ColumnProjectionUtils.java b/serde/src/java/org/apache/hadoop/hive/serde2/ColumnProjectionUtils.java index e403ad9..78d0d08 100644 --- a/serde/src/java/org/apache/hadoop/hive/serde2/ColumnProjectionUtils.java +++ b/serde/src/java/org/apache/hadoop/hive/serde2/ColumnProjectionUtils.java @@ -36,6 +36,8 @@ private static final String READ_COLUMN_IDS_CONF_STR_DEFAULT = ""; private static final boolean READ_ALL_COLUMNS_DEFAULT = true; + public static final String SPLIT_ROW_LIMIT = "hive.split.row.limit"; + /** * @deprecated for backwards compatibility with <= 0.12, use setReadAllColumns */ @@ -186,6 +188,14 @@ private static String toReadColumnIDString(List ids) { return id; } + public static void setRowLimit(Configuration conf, int rowLimit) { + conf.setInt(SPLIT_ROW_LIMIT, rowLimit); + } + + public static int getRowLimit(Configuration conf) { + return conf.getInt(SPLIT_ROW_LIMIT, -1); + } + private ColumnProjectionUtils() { // prevent instantiation }