diff --git a/contrib/src/test/results/clientpositive/dboutput.q.out b/contrib/src/test/results/clientpositive/dboutput.q.out index 476e306..4d5e361 100644 --- a/contrib/src/test/results/clientpositive/dboutput.q.out +++ b/contrib/src/test/results/clientpositive/dboutput.q.out @@ -56,6 +56,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: src + Row Limit Per Split: 1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: dboutput('jdbc:derby:../build/test_dboutput_db;create=true','','','CREATE TABLE app_info ( kkey VARCHAR(255) NOT NULL, vvalue VARCHAR(255) NOT NULL, UNIQUE(kkey))') (type: int), dboutput('jdbc:derby:../build/test_dboutput_db','','','INSERT INTO app_info (kkey,vvalue) VALUES (?,?)','20','a') (type: int), dboutput('jdbc:derby:../build/test_dboutput_db','','','INSERT INTO app_info (kkey,vvalue) VALUES (?,?)','20','b') (type: int) diff --git a/contrib/src/test/results/clientpositive/udf_example_add.q.out b/contrib/src/test/results/clientpositive/udf_example_add.q.out index 7916679..7734a67 100644 --- a/contrib/src/test/results/clientpositive/udf_example_add.q.out +++ b/contrib/src/test/results/clientpositive/udf_example_add.q.out @@ -34,6 +34,7 @@ STAGE PLANS: Processor Tree: TableScan alias: src + Row Limit Per Split: 1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: 3 (type: int), 6 (type: int), 10 (type: int), 3.3000000000000003 (type: double), 6.6 (type: double), 11.0 (type: double), 10.4 (type: double) diff --git a/contrib/src/test/results/clientpositive/udf_example_format.q.out b/contrib/src/test/results/clientpositive/udf_example_format.q.out index 34b10c4..03f4bcd 100644 --- a/contrib/src/test/results/clientpositive/udf_example_format.q.out +++ b/contrib/src/test/results/clientpositive/udf_example_format.q.out @@ -28,6 +28,7 @@ STAGE PLANS: Processor Tree: TableScan alias: src + Row Limit Per Split: 1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: 'abc' (type: string), '1.1' (type: string), '1.1 1.200000e+00' (type: string), 'a 12 10' (type: string) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/Context.java b/ql/src/java/org/apache/hadoop/hive/ql/Context.java index 0f7da53..e0bec33 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/Context.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/Context.java @@ -695,4 +695,8 @@ public void setAcidOperation(AcidUtils.Operation op) { public AcidUtils.Operation getAcidOperation() { return acidOperation; } + + public boolean isFirstOrLast() { + return tryCount == 0 || tryCount == Integer.MAX_VALUE; + } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java index 9d5730d..e56d3cc 100755 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java @@ -442,6 +442,11 @@ public static void pushFilters(JobConf jobConf, TableScanOperator tableScan) { return; } + int rowLimit = scanDesc.getRowLimit(); + if (rowLimit > 0) { + ColumnProjectionUtils.setRowLimit(jobConf, rowLimit); + } + // construct column name list and types for reference by filter push down Utilities.setColumnNameList(jobConf, tableScan); Utilities.setColumnTypeList(jobConf, tableScan); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GlobalLimitOptimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GlobalLimitOptimizer.java index 41bb84c..c40ccf1 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GlobalLimitOptimizer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GlobalLimitOptimizer.java @@ -24,6 +24,7 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.Context; import org.apache.hadoop.hive.ql.exec.FilterOperator; import org.apache.hadoop.hive.ql.exec.GroupByOperator; @@ -76,7 +77,7 @@ public ParseContext transform(ParseContext pctx) throws SemanticException { // The query only qualifies when there are only one top operator // and there is no transformer or UDTF and no block sampling // is used. - if (ctx.getTryCount() == 0 && topOps.size() == 1 + if (ctx.isFirstOrLast() && topOps.size() == 1 && !globalLimitCtx.ifHasTransformOrUDTF() && nameToSplitSample.isEmpty()) { @@ -102,7 +103,10 @@ public ParseContext transform(ParseContext pctx) throws SemanticException { if (!tab.isPartitioned()) { if (filterOps.size() == 0) { - globalLimitCtx.enableOpt(tempGlobalLimit); + if (HiveConf.getBoolVar(pctx.getConf(), HiveConf.ConfVars.HIVELIMITOPTENABLE)) { + globalLimitCtx.enableOpt(tempGlobalLimit); + } + ts.getConf().setRowLimit(tempGlobalLimit); } } else { // check if the pruner only contains partition columns @@ -114,7 +118,10 @@ public ParseContext transform(ParseContext pctx) throws SemanticException { // If there is any unknown partition, create a map-reduce job for // the filter to prune correctly if (!partsList.hasUnknownPartitions()) { - globalLimitCtx.enableOpt(tempGlobalLimit); + if (HiveConf.getBoolVar(pctx.getConf(), HiveConf.ConfVars.HIVELIMITOPTENABLE)) { + globalLimitCtx.enableOpt(tempGlobalLimit); + } + ts.getConf().setRowLimit(tempGlobalLimit); } } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java index 3482a47..49accd9 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java @@ -142,9 +142,7 @@ public void initialize(HiveConf hiveConf) { if(HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEIDENTITYPROJECTREMOVER)) { transformations.add(new IdentityProjectRemover()); } - if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVELIMITOPTENABLE)) { - transformations.add(new GlobalLimitOptimizer()); - } + transformations.add(new GlobalLimitOptimizer()); if(HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTCORRELATION) && !HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEGROUPBYSKEW) && !HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVE_OPTIMIZE_SKEWJOIN_COMPILETIME) && diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/QBParseInfo.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/QBParseInfo.java index 54ab25f..3bf72e6 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/QBParseInfo.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/QBParseInfo.java @@ -585,6 +585,10 @@ public tableSpec getTableSpec() { return destToAggregationExprs; } + public LinkedHashMap> getDestToWindowingExprs() { + return destToWindowingExprs; + } + public HashMap> getDestToDistinctFuncExprs() { return destToDistinctFuncExprs; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/TableScanDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/TableScanDesc.java index 0e85990..0113348 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/TableScanDesc.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/TableScanDesc.java @@ -236,8 +236,10 @@ public void setMaxStatsKeyPrefixLength(int maxStatsKeyPrefixLength) { this.maxStatsKeyPrefixLength = maxStatsKeyPrefixLength; } - public void setRowLimit(int rowLimit) { - this.rowLimit = rowLimit; + public void setRowLimit(int newLimit) { + if (rowLimit < 0 || newLimit < rowLimit) { + rowLimit = newLimit; + } } public int getRowLimit() { diff --git a/ql/src/test/results/clientnegative/limit_partition.q.out b/ql/src/test/results/clientnegative/limit_partition.q.out index e434deb..e4b05a3 100644 --- a/ql/src/test/results/clientnegative/limit_partition.q.out +++ b/ql/src/test/results/clientnegative/limit_partition.q.out @@ -12,6 +12,7 @@ STAGE PLANS: Processor Tree: TableScan alias: srcpart + Row Limit Per Split: 1 Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string), ds (type: string), hr (type: string) diff --git a/ql/src/test/results/clientpositive/annotate_stats_limit.q.out b/ql/src/test/results/clientpositive/annotate_stats_limit.q.out index 5f8b6f8..2dcb9a3 100644 --- a/ql/src/test/results/clientpositive/annotate_stats_limit.q.out +++ b/ql/src/test/results/clientpositive/annotate_stats_limit.q.out @@ -99,6 +99,7 @@ STAGE PLANS: Processor Tree: TableScan alias: loc_orc + Row Limit Per Split: 4 Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) @@ -127,6 +128,7 @@ STAGE PLANS: Processor Tree: TableScan alias: loc_orc + Row Limit Per Split: 16 Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) diff --git a/ql/src/test/results/clientpositive/annotate_stats_select.q.out b/ql/src/test/results/clientpositive/annotate_stats_select.q.out index a4c8c41..1caf28c 100644 --- a/ql/src/test/results/clientpositive/annotate_stats_select.q.out +++ b/ql/src/test/results/clientpositive/annotate_stats_select.q.out @@ -821,6 +821,7 @@ STAGE PLANS: Processor Tree: TableScan alias: alltypes_orc + Row Limit Per Split: 10 Statistics: Num rows: 2 Data size: 1686 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: i1 (type: int) @@ -849,6 +850,7 @@ STAGE PLANS: Processor Tree: TableScan alias: alltypes_orc + Row Limit Per Split: 10 Statistics: Num rows: 2 Data size: 1686 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: i1 (type: int) @@ -933,6 +935,7 @@ STAGE PLANS: Processor Tree: TableScan alias: alltypes_orc + Row Limit Per Split: 10 Statistics: Num rows: 2 Data size: 1686 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: 11.0 (type: double) diff --git a/ql/src/test/results/clientpositive/ansi_sql_arithmetic.q.out b/ql/src/test/results/clientpositive/ansi_sql_arithmetic.q.out index 5e5a2f6..652ee0a 100644 --- a/ql/src/test/results/clientpositive/ansi_sql_arithmetic.q.out +++ b/ql/src/test/results/clientpositive/ansi_sql_arithmetic.q.out @@ -14,6 +14,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: src + Row Limit Per Split: 1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: (CAST( UDFToInteger(key) AS decimal(10,0)) / CAST( UDFToInteger(key) AS decimal(10,0))) (type: decimal(21,11)) @@ -61,6 +62,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: src + Row Limit Per Split: 1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: (UDFToDouble(UDFToInteger(key)) / UDFToDouble(UDFToInteger(key))) (type: double) diff --git a/ql/src/test/results/clientpositive/decimal_udf.q.out b/ql/src/test/results/clientpositive/decimal_udf.q.out index fbf4208..f266ef8 100644 --- a/ql/src/test/results/clientpositive/decimal_udf.q.out +++ b/ql/src/test/results/clientpositive/decimal_udf.q.out @@ -905,6 +905,7 @@ STAGE PLANS: Processor Tree: TableScan alias: decimal_udf + Row Limit Per Split: 1 Statistics: Num rows: 3 Data size: 359 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: (key / CAST( 0 AS decimal(10,0))) (type: decimal(22,12)) @@ -938,6 +939,7 @@ STAGE PLANS: Processor Tree: TableScan alias: decimal_udf + Row Limit Per Split: 1 Statistics: Num rows: 3 Data size: 359 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: (UDFToDouble(key) / UDFToDouble(null)) (type: double) diff --git a/ql/src/test/results/clientpositive/input_limit.q.out b/ql/src/test/results/clientpositive/input_limit.q.out index d1d62ec..c7ddf78 100644 --- a/ql/src/test/results/clientpositive/input_limit.q.out +++ b/ql/src/test/results/clientpositive/input_limit.q.out @@ -14,6 +14,7 @@ STAGE PLANS: Processor Tree: TableScan alias: x + Row Limit Per Split: 20 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) diff --git a/ql/src/test/results/clientpositive/input_part8.q.out b/ql/src/test/results/clientpositive/input_part8.q.out index 0fef273..a741d98 100644 --- a/ql/src/test/results/clientpositive/input_part8.q.out +++ b/ql/src/test/results/clientpositive/input_part8.q.out @@ -14,6 +14,7 @@ STAGE PLANS: Processor Tree: TableScan alias: x + Row Limit Per Split: 10 Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string), '2008-04-08' (type: string), hr (type: string) diff --git a/ql/src/test/results/clientpositive/literal_decimal.q.out b/ql/src/test/results/clientpositive/literal_decimal.q.out index 7ef4b38..66ac5fd 100644 --- a/ql/src/test/results/clientpositive/literal_decimal.q.out +++ b/ql/src/test/results/clientpositive/literal_decimal.q.out @@ -12,6 +12,7 @@ STAGE PLANS: Processor Tree: TableScan alias: src + Row Limit Per Split: 1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: (- 1) (type: decimal(1,0)), 0 (type: decimal(1,0)), 1 (type: decimal(1,0)), 3.14 (type: decimal(3,2)), (- 3.14) (type: decimal(3,2)), 99999999999999999 (type: decimal(17,0)), 99999999999999999.9999999999999 (type: decimal(30,13)), 1E99 (type: decimal(1,0)) diff --git a/ql/src/test/results/clientpositive/literal_double.q.out b/ql/src/test/results/clientpositive/literal_double.q.out index 5d46d2d..0754daf 100644 --- a/ql/src/test/results/clientpositive/literal_double.q.out +++ b/ql/src/test/results/clientpositive/literal_double.q.out @@ -12,6 +12,7 @@ STAGE PLANS: Processor Tree: TableScan alias: src + Row Limit Per Split: 1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: 3.14 (type: double), -3.14 (type: double), 3.14E8 (type: double), 3.14E-8 (type: double), -3.14E8 (type: double), -3.14E-8 (type: double), 3.14E8 (type: double), 3.14E8 (type: double), 3.14E-8 (type: double) diff --git a/ql/src/test/results/clientpositive/literal_ints.q.out b/ql/src/test/results/clientpositive/literal_ints.q.out index 9a56ebd..4260062 100644 --- a/ql/src/test/results/clientpositive/literal_ints.q.out +++ b/ql/src/test/results/clientpositive/literal_ints.q.out @@ -12,6 +12,7 @@ STAGE PLANS: Processor Tree: TableScan alias: src + Row Limit Per Split: 1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: 100 (type: int), 100 (type: tinyint), 100 (type: smallint), 100 (type: bigint) diff --git a/ql/src/test/results/clientpositive/literal_string.q.out b/ql/src/test/results/clientpositive/literal_string.q.out index b2f5e12..c9537a3 100644 --- a/ql/src/test/results/clientpositive/literal_string.q.out +++ b/ql/src/test/results/clientpositive/literal_string.q.out @@ -22,6 +22,7 @@ STAGE PLANS: Processor Tree: TableScan alias: src + Row Limit Per Split: 1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: 'facebook' (type: string), 'facebook' (type: string), 'facebook' (type: string), 'facebook' (type: string), 'facebook' (type: string), 'facebook' (type: string), 'facebook' (type: string), 'facebook' (type: string), 'facebook' (type: string), 'facebook' (type: string) diff --git a/ql/src/test/results/clientpositive/macro.q.out b/ql/src/test/results/clientpositive/macro.q.out index 76ea250..a652456 100644 --- a/ql/src/test/results/clientpositive/macro.q.out +++ b/ql/src/test/results/clientpositive/macro.q.out @@ -27,6 +27,7 @@ STAGE PLANS: Processor Tree: TableScan alias: src + Row Limit Per Split: 1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: 0.8807970779778823 (type: double) @@ -71,6 +72,7 @@ STAGE PLANS: Processor Tree: TableScan alias: src + Row Limit Per Split: 1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Select Operator @@ -117,6 +119,7 @@ STAGE PLANS: Processor Tree: TableScan alias: src + Row Limit Per Split: 1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: 2 (type: int) @@ -162,6 +165,7 @@ STAGE PLANS: Processor Tree: TableScan alias: src + Row Limit Per Split: 1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Select Operator @@ -235,6 +239,7 @@ STAGE PLANS: Processor Tree: TableScan alias: src + Row Limit Per Split: 1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: 10 (type: int) @@ -280,6 +285,7 @@ STAGE PLANS: Processor Tree: TableScan alias: src + Row Limit Per Split: 1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Select Operator diff --git a/ql/src/test/results/clientpositive/nonmr_fetch.q.out b/ql/src/test/results/clientpositive/nonmr_fetch.q.out index a4ce905..7d95dd9 100644 --- a/ql/src/test/results/clientpositive/nonmr_fetch.q.out +++ b/ql/src/test/results/clientpositive/nonmr_fetch.q.out @@ -14,6 +14,7 @@ STAGE PLANS: Processor Tree: TableScan alias: src + Row Limit Per Split: 10 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) @@ -56,6 +57,7 @@ STAGE PLANS: Processor Tree: TableScan alias: srcpart + Row Limit Per Split: 10 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string), '2008-04-08' (type: string), '11' (type: string) @@ -100,6 +102,7 @@ STAGE PLANS: Processor Tree: TableScan alias: src + Row Limit Per Split: 10 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) @@ -255,6 +258,7 @@ STAGE PLANS: Processor Tree: TableScan alias: src + Row Limit Per Split: 10 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) @@ -297,6 +301,7 @@ STAGE PLANS: Processor Tree: TableScan alias: srcpart + Row Limit Per Split: 10 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string), '2008-04-08' (type: string), '11' (type: string) @@ -343,6 +348,7 @@ STAGE PLANS: Processor Tree: TableScan alias: src + Row Limit Per Split: 10 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: (UDFToInteger(key) * 10) (type: int), upper(value) (type: string) @@ -434,6 +440,7 @@ STAGE PLANS: Processor Tree: TableScan alias: srcpart + Row Limit Per Split: 10 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) diff --git a/ql/src/test/results/clientpositive/nonmr_fetch_threshold.q.out b/ql/src/test/results/clientpositive/nonmr_fetch_threshold.q.out index 6bfc624..554a5df 100644 --- a/ql/src/test/results/clientpositive/nonmr_fetch_threshold.q.out +++ b/ql/src/test/results/clientpositive/nonmr_fetch_threshold.q.out @@ -12,6 +12,7 @@ STAGE PLANS: Processor Tree: TableScan alias: srcpart + Row Limit Per Split: 10 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string), '2008-04-08' (type: string), '11' (type: string) @@ -36,6 +37,7 @@ STAGE PLANS: Processor Tree: TableScan alias: src + Row Limit Per Split: 10 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: (UDFToInteger(key) * 10) (type: int), upper(value) (type: string) @@ -60,6 +62,7 @@ STAGE PLANS: Processor Tree: TableScan alias: srcpart + Row Limit Per Split: 10 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string), '2008-04-08' (type: string), '11' (type: string) @@ -84,6 +87,7 @@ STAGE PLANS: Processor Tree: TableScan alias: src + Row Limit Per Split: 10 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: (UDFToInteger(key) * 10) (type: int), upper(value) (type: string) @@ -133,6 +137,7 @@ STAGE PLANS: Processor Tree: TableScan alias: srcpart + Row Limit Per Split: 10 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string), '2008-04-08' (type: string), '11' (type: string) @@ -157,6 +162,7 @@ STAGE PLANS: Processor Tree: TableScan alias: src + Row Limit Per Split: 10 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: (UDFToInteger(key) * 10) (type: int), upper(value) (type: string) diff --git a/ql/src/test/results/clientpositive/num_op_type_conv.q.out b/ql/src/test/results/clientpositive/num_op_type_conv.q.out index 708fb51..145476c 100644 --- a/ql/src/test/results/clientpositive/num_op_type_conv.q.out +++ b/ql/src/test/results/clientpositive/num_op_type_conv.q.out @@ -18,6 +18,7 @@ STAGE PLANS: Processor Tree: TableScan alias: src + Row Limit Per Split: 1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: (UDFToDouble(null) + 7.0) (type: double), (1.0 - UDFToDouble(null)) (type: double), (UDFToDouble(null) + UDFToDouble(null)) (type: double), 1 (type: bigint), 0 (type: bigint), 0.0 (type: double) diff --git a/ql/src/test/results/clientpositive/quote2.q.out b/ql/src/test/results/clientpositive/quote2.q.out index 086960a..a46f1a9 100644 --- a/ql/src/test/results/clientpositive/quote2.q.out +++ b/ql/src/test/results/clientpositive/quote2.q.out @@ -40,6 +40,7 @@ STAGE PLANS: Processor Tree: TableScan alias: src + Row Limit Per Split: 1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: 'abc' (type: string), 'abc' (type: string), 'abc'' (type: string), 'abc"' (type: string), 'abc\' (type: string), 'abc\' (type: string), 'abc\'' (type: string), 'abc\"' (type: string), 'abc\\' (type: string), 'abc\\' (type: string), 'abc\\'' (type: string), 'abc\\"' (type: string), 'abc\\\' (type: string), 'abc\\\' (type: string), 'abc""""\' (type: string), 'abc''''\' (type: string), 'mysql_%\_\%' (type: string), 'mysql\\_\\\%' (type: string), 'awk '{print NR"\t"$0}'' (type: string), 'tab tab' (type: string), 'tab tab' (type: string) diff --git a/ql/src/test/results/clientpositive/spark/temp_table.q.out b/ql/src/test/results/clientpositive/spark/temp_table.q.out index 16d663d..04bc7c7 100644 --- a/ql/src/test/results/clientpositive/spark/temp_table.q.out +++ b/ql/src/test/results/clientpositive/spark/temp_table.q.out @@ -160,6 +160,7 @@ STAGE PLANS: Processor Tree: TableScan alias: foo + Row Limit Per Split: 10 Statistics: Num rows: 247 Data size: 2609 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) diff --git a/ql/src/test/results/clientpositive/spark/udf_example_add.q.out b/ql/src/test/results/clientpositive/spark/udf_example_add.q.out index 7916679..7734a67 100644 --- a/ql/src/test/results/clientpositive/spark/udf_example_add.q.out +++ b/ql/src/test/results/clientpositive/spark/udf_example_add.q.out @@ -34,6 +34,7 @@ STAGE PLANS: Processor Tree: TableScan alias: src + Row Limit Per Split: 1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: 3 (type: int), 6 (type: int), 10 (type: int), 3.3000000000000003 (type: double), 6.6 (type: double), 11.0 (type: double), 10.4 (type: double) diff --git a/ql/src/test/results/clientpositive/spark/vector_elt.q.out b/ql/src/test/results/clientpositive/spark/vector_elt.q.out index 180ea15..473258a 100644 --- a/ql/src/test/results/clientpositive/spark/vector_elt.q.out +++ b/ql/src/test/results/clientpositive/spark/vector_elt.q.out @@ -87,6 +87,7 @@ STAGE PLANS: Processor Tree: TableScan alias: alltypesorc + Row Limit Per Split: 1 Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: 'defg' (type: string), 'cc' (type: string), 'abc' (type: string), '2' (type: string), '12345' (type: string), '123456789012' (type: string), '1.25' (type: string), '16.0' (type: string), null (type: void), null (type: void) diff --git a/ql/src/test/results/clientpositive/spark/vector_string_concat.q.out b/ql/src/test/results/clientpositive/spark/vector_string_concat.q.out index 9ec8538..61aff34 100644 --- a/ql/src/test/results/clientpositive/spark/vector_string_concat.q.out +++ b/ql/src/test/results/clientpositive/spark/vector_string_concat.q.out @@ -117,6 +117,7 @@ STAGE PLANS: Processor Tree: TableScan alias: over1korc + Row Limit Per Split: 20 Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: s (type: string), concat(concat(' ', s), ' ') (type: string), concat(concat('|', rtrim(concat(concat(' ', s), ' '))), '|') (type: string) diff --git a/ql/src/test/results/clientpositive/spark/vectorization_div0.q.out b/ql/src/test/results/clientpositive/spark/vectorization_div0.q.out index 30d116f..71918fd 100644 --- a/ql/src/test/results/clientpositive/spark/vectorization_div0.q.out +++ b/ql/src/test/results/clientpositive/spark/vectorization_div0.q.out @@ -16,6 +16,7 @@ STAGE PLANS: Processor Tree: TableScan alias: alltypesorc + Row Limit Per Split: 100 Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: (cdouble / 0.0) (type: double) diff --git a/ql/src/test/results/clientpositive/str_to_map.q.java1.7.out b/ql/src/test/results/clientpositive/str_to_map.q.java1.7.out index c296c37..62b4bb5 100644 --- a/ql/src/test/results/clientpositive/str_to_map.q.java1.7.out +++ b/ql/src/test/results/clientpositive/str_to_map.q.java1.7.out @@ -27,6 +27,7 @@ STAGE PLANS: Processor Tree: TableScan alias: src + Row Limit Per Split: 3 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: str_to_map('a=1,b=2,c=3',',','=')['a'] (type: string) @@ -62,6 +63,7 @@ STAGE PLANS: Processor Tree: TableScan alias: src + Row Limit Per Split: 3 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: str_to_map('a:1,b:2,c:3') (type: map) @@ -97,6 +99,7 @@ STAGE PLANS: Processor Tree: TableScan alias: src + Row Limit Per Split: 3 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: str_to_map('a:1,b:2,c:3',',',':') (type: map) diff --git a/ql/src/test/results/clientpositive/temp_table.q.out b/ql/src/test/results/clientpositive/temp_table.q.out index cad71fd..32dedfb 100644 --- a/ql/src/test/results/clientpositive/temp_table.q.out +++ b/ql/src/test/results/clientpositive/temp_table.q.out @@ -242,6 +242,7 @@ STAGE PLANS: Processor Tree: TableScan alias: foo + Row Limit Per Split: 10 Statistics: Num rows: 247 Data size: 2609 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) diff --git a/ql/src/test/results/clientpositive/tez/temp_table.q.out b/ql/src/test/results/clientpositive/tez/temp_table.q.out index 1bf4085..701c83a 100644 --- a/ql/src/test/results/clientpositive/tez/temp_table.q.out +++ b/ql/src/test/results/clientpositive/tez/temp_table.q.out @@ -168,6 +168,7 @@ STAGE PLANS: Processor Tree: TableScan alias: foo + Row Limit Per Split: 10 Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 diff --git a/ql/src/test/results/clientpositive/tez/vector_decimal_udf.q.out b/ql/src/test/results/clientpositive/tez/vector_decimal_udf.q.out index 8e7371a..c7daca1 100644 --- a/ql/src/test/results/clientpositive/tez/vector_decimal_udf.q.out +++ b/ql/src/test/results/clientpositive/tez/vector_decimal_udf.q.out @@ -1140,6 +1140,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_udf + Row Limit Per Split: 1 Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: (key / CAST( 0 AS decimal(10,0))) (type: decimal(22,12)) @@ -1189,6 +1190,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_udf + Row Limit Per Split: 1 Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: (UDFToDouble(key) / UDFToDouble(null)) (type: double) diff --git a/ql/src/test/results/clientpositive/tez/vector_elt.q.out b/ql/src/test/results/clientpositive/tez/vector_elt.q.out index b27798a..e86a8a6 100644 --- a/ql/src/test/results/clientpositive/tez/vector_elt.q.out +++ b/ql/src/test/results/clientpositive/tez/vector_elt.q.out @@ -83,6 +83,7 @@ STAGE PLANS: Processor Tree: TableScan alias: alltypesorc + Row Limit Per Split: 1 Select Operator expressions: 'defg' (type: string), 'cc' (type: string), 'abc' (type: string), '2' (type: string), '12345' (type: string), '123456789012' (type: string), '1.25' (type: string), '16.0' (type: string), null (type: void), null (type: void) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 diff --git a/ql/src/test/results/clientpositive/tez/vector_string_concat.q.out b/ql/src/test/results/clientpositive/tez/vector_string_concat.q.out index f6eb9c5..f2f74e8 100644 --- a/ql/src/test/results/clientpositive/tez/vector_string_concat.q.out +++ b/ql/src/test/results/clientpositive/tez/vector_string_concat.q.out @@ -117,6 +117,7 @@ STAGE PLANS: Processor Tree: TableScan alias: over1korc + Row Limit Per Split: 20 Select Operator expressions: s (type: string), concat(concat(' ', s), ' ') (type: string), concat(concat('|', rtrim(concat(concat(' ', s), ' '))), '|') (type: string) outputColumnNames: _col0, _col1, _col2 diff --git a/ql/src/test/results/clientpositive/tez/vectorization_div0.q.out b/ql/src/test/results/clientpositive/tez/vectorization_div0.q.out index 49dc346..002fd49 100644 --- a/ql/src/test/results/clientpositive/tez/vectorization_div0.q.out +++ b/ql/src/test/results/clientpositive/tez/vectorization_div0.q.out @@ -16,6 +16,7 @@ STAGE PLANS: Processor Tree: TableScan alias: alltypesorc + Row Limit Per Split: 100 Select Operator expressions: (cdouble / 0.0) (type: double) outputColumnNames: _col0 diff --git a/ql/src/test/results/clientpositive/tez/vectorized_date_funcs.q.out b/ql/src/test/results/clientpositive/tez/vectorized_date_funcs.q.out index be782e4..1200cd5 100644 --- a/ql/src/test/results/clientpositive/tez/vectorized_date_funcs.q.out +++ b/ql/src/test/results/clientpositive/tez/vectorized_date_funcs.q.out @@ -861,6 +861,7 @@ STAGE PLANS: Processor Tree: TableScan alias: date_udf_flight_orc + Row Limit Per Split: 10 Select Operator expressions: fl_date (type: date), to_date(date_add(fl_date, 2)) (type: string), to_date(date_sub(fl_date, 2)) (type: string), datediff(fl_date, date_add(fl_date, 2)) (type: int), datediff(fl_date, date_sub(fl_date, 2)) (type: int), datediff(date_add(fl_date, 2), date_sub(fl_date, 2)) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 diff --git a/ql/src/test/results/clientpositive/type_cast_1.q.out b/ql/src/test/results/clientpositive/type_cast_1.q.out index 99aff5b..bae6172 100644 --- a/ql/src/test/results/clientpositive/type_cast_1.q.out +++ b/ql/src/test/results/clientpositive/type_cast_1.q.out @@ -14,6 +14,7 @@ STAGE PLANS: Processor Tree: TableScan alias: src + Row Limit Per Split: 1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: 5 (type: int) diff --git a/ql/src/test/results/clientpositive/type_widening.q.out b/ql/src/test/results/clientpositive/type_widening.q.out index e6617d4..5e7feaf 100644 --- a/ql/src/test/results/clientpositive/type_widening.q.out +++ b/ql/src/test/results/clientpositive/type_widening.q.out @@ -14,6 +14,7 @@ STAGE PLANS: Processor Tree: TableScan alias: src + Row Limit Per Split: 1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: 0 (type: bigint) diff --git a/ql/src/test/results/clientpositive/udf_between.q.out b/ql/src/test/results/clientpositive/udf_between.q.out index efe6615..cddba55 100644 --- a/ql/src/test/results/clientpositive/udf_between.q.out +++ b/ql/src/test/results/clientpositive/udf_between.q.out @@ -132,6 +132,7 @@ STAGE PLANS: Processor Tree: TableScan alias: src + Row Limit Per Split: 1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) @@ -165,6 +166,7 @@ STAGE PLANS: Processor Tree: TableScan alias: src + Row Limit Per Split: 1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) diff --git a/ql/src/test/results/clientpositive/udf_example_add.q.out b/ql/src/test/results/clientpositive/udf_example_add.q.out index 7916679..7734a67 100644 --- a/ql/src/test/results/clientpositive/udf_example_add.q.out +++ b/ql/src/test/results/clientpositive/udf_example_add.q.out @@ -34,6 +34,7 @@ STAGE PLANS: Processor Tree: TableScan alias: src + Row Limit Per Split: 1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: 3 (type: int), 6 (type: int), 10 (type: int), 3.3000000000000003 (type: double), 6.6 (type: double), 11.0 (type: double), 10.4 (type: double) diff --git a/ql/src/test/results/clientpositive/udf_in_file.q.out b/ql/src/test/results/clientpositive/udf_in_file.q.out index 5564890..3ad3d2f 100644 --- a/ql/src/test/results/clientpositive/udf_in_file.q.out +++ b/ql/src/test/results/clientpositive/udf_in_file.q.out @@ -57,6 +57,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: value_src + Row Limit Per Split: 1 Statistics: Num rows: 0 Data size: 24 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: in_file(str_val, '../../data/files/test2.dat') (type: boolean), in_file(ch_val, '../../data/files/test2.dat') (type: boolean), in_file(vch_val, '../../data/files/test2.dat') (type: boolean), in_file(str_val_neg, '../../data/files/test2.dat') (type: boolean), in_file(ch_val_neg, '../../data/files/test2.dat') (type: boolean), in_file(vch_val_neg, '../../data/files/test2.dat') (type: boolean), in_file('303', '../../data/files/test2.dat') (type: boolean), in_file('304', '../../data/files/test2.dat') (type: boolean), in_file(UDFToString(null), '../../data/files/test2.dat') (type: boolean) diff --git a/ql/src/test/results/clientpositive/udf_isnull_isnotnull.q.out b/ql/src/test/results/clientpositive/udf_isnull_isnotnull.q.out index a7d45ea..1637962 100644 --- a/ql/src/test/results/clientpositive/udf_isnull_isnotnull.q.out +++ b/ql/src/test/results/clientpositive/udf_isnull_isnotnull.q.out @@ -42,6 +42,7 @@ STAGE PLANS: Processor Tree: TableScan alias: src + Row Limit Per Split: 1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: null is null (type: boolean), true (type: boolean), true (type: boolean) diff --git a/ql/src/test/results/clientpositive/udf_reflect2.q.out b/ql/src/test/results/clientpositive/udf_reflect2.q.out index b9266df..d6ee97d 100644 --- a/ql/src/test/results/clientpositive/udf_reflect2.q.out +++ b/ql/src/test/results/clientpositive/udf_reflect2.q.out @@ -317,6 +317,7 @@ STAGE PLANS: Processor Tree: TableScan alias: src + Row Limit Per Split: 5 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Select Operator diff --git a/ql/src/test/results/clientpositive/vector_decimal_udf.q.out b/ql/src/test/results/clientpositive/vector_decimal_udf.q.out index 102fd07..243b0b0 100644 --- a/ql/src/test/results/clientpositive/vector_decimal_udf.q.out +++ b/ql/src/test/results/clientpositive/vector_decimal_udf.q.out @@ -1098,6 +1098,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_udf + Row Limit Per Split: 1 Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: (key / CAST( 0 AS decimal(10,0))) (type: decimal(22,12)) @@ -1144,6 +1145,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_udf + Row Limit Per Split: 1 Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: (UDFToDouble(key) / UDFToDouble(null)) (type: double) diff --git a/ql/src/test/results/clientpositive/vector_elt.q.out b/ql/src/test/results/clientpositive/vector_elt.q.out index 3a2c1fc..4a6c7f8 100644 --- a/ql/src/test/results/clientpositive/vector_elt.q.out +++ b/ql/src/test/results/clientpositive/vector_elt.q.out @@ -100,6 +100,7 @@ STAGE PLANS: Processor Tree: TableScan alias: alltypesorc + Row Limit Per Split: 1 Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: 'defg' (type: string), 'cc' (type: string), 'abc' (type: string), '2' (type: string), '12345' (type: string), '123456789012' (type: string), '1.25' (type: string), '16.0' (type: string), null (type: void), null (type: void) diff --git a/ql/src/test/results/clientpositive/vector_string_concat.q.out b/ql/src/test/results/clientpositive/vector_string_concat.q.out index 072d837..99f98a4 100644 --- a/ql/src/test/results/clientpositive/vector_string_concat.q.out +++ b/ql/src/test/results/clientpositive/vector_string_concat.q.out @@ -117,6 +117,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: over1korc + Row Limit Per Split: 20 Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: s (type: string), concat(concat(' ', s), ' ') (type: string), concat(concat('|', rtrim(concat(concat(' ', s), ' '))), '|') (type: string) diff --git a/ql/src/test/results/clientpositive/vectorization_div0.q.out b/ql/src/test/results/clientpositive/vectorization_div0.q.out index 9cd35d3..f6b1b9a 100644 --- a/ql/src/test/results/clientpositive/vectorization_div0.q.out +++ b/ql/src/test/results/clientpositive/vectorization_div0.q.out @@ -16,6 +16,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc + Row Limit Per Split: 100 Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: (cdouble / 0.0) (type: double) diff --git a/ql/src/test/results/clientpositive/vectorized_date_funcs.q.out b/ql/src/test/results/clientpositive/vectorized_date_funcs.q.out index 8cde444..ba9cb9f 100644 --- a/ql/src/test/results/clientpositive/vectorized_date_funcs.q.out +++ b/ql/src/test/results/clientpositive/vectorized_date_funcs.q.out @@ -906,6 +906,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: date_udf_flight_orc + Row Limit Per Split: 10 Statistics: Num rows: 137 Data size: 13152 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: fl_date (type: date), to_date(date_add(fl_date, 2)) (type: string), to_date(date_sub(fl_date, 2)) (type: string), datediff(fl_date, date_add(fl_date, 2)) (type: int), datediff(fl_date, date_sub(fl_date, 2)) (type: int), datediff(date_add(fl_date, 2), date_sub(fl_date, 2)) (type: int) diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/ColumnProjectionUtils.java b/serde/src/java/org/apache/hadoop/hive/serde2/ColumnProjectionUtils.java index e403ad9..78d0d08 100644 --- a/serde/src/java/org/apache/hadoop/hive/serde2/ColumnProjectionUtils.java +++ b/serde/src/java/org/apache/hadoop/hive/serde2/ColumnProjectionUtils.java @@ -36,6 +36,8 @@ private static final String READ_COLUMN_IDS_CONF_STR_DEFAULT = ""; private static final boolean READ_ALL_COLUMNS_DEFAULT = true; + public static final String SPLIT_ROW_LIMIT = "hive.split.row.limit"; + /** * @deprecated for backwards compatibility with <= 0.12, use setReadAllColumns */ @@ -186,6 +188,14 @@ private static String toReadColumnIDString(List ids) { return id; } + public static void setRowLimit(Configuration conf, int rowLimit) { + conf.setInt(SPLIT_ROW_LIMIT, rowLimit); + } + + public static int getRowLimit(Configuration conf) { + return conf.getInt(SPLIT_ROW_LIMIT, -1); + } + private ColumnProjectionUtils() { // prevent instantiation }