diff --git itests/hive-unit/src/test/java/org/apache/hive/jdbc/AbstractTestJdbcGenericUDTFGetSplits.java itests/hive-unit/src/test/java/org/apache/hive/jdbc/AbstractTestJdbcGenericUDTFGetSplits.java index 8cbca69737..f0ee90834e 100644 --- itests/hive-unit/src/test/java/org/apache/hive/jdbc/AbstractTestJdbcGenericUDTFGetSplits.java +++ itests/hive-unit/src/test/java/org/apache/hive/jdbc/AbstractTestJdbcGenericUDTFGetSplits.java @@ -164,14 +164,23 @@ protected void testGenericUDTFOrderBySplitCount1(String udtfName, int[] expected query = "select " + udtfName + "(" + "'select value from " + tableName + " order by under_col limit 0', 5)"; runQuery(query, getConfigs(), expectedCounts[2]); + query = "select " + udtfName + "(" + "'select value from " + tableName + " limit 2', 5)"; + runQuery(query, getConfigs(), expectedCounts[3]); + + query = "select " + udtfName + "(" + "'select value from " + tableName + " group by value limit 2', 5)"; + runQuery(query, getConfigs(), expectedCounts[4]); + + query = "select " + udtfName + "(" + "'select value from " + tableName + " where value is not null limit 2', 5)"; + runQuery(query, getConfigs(), expectedCounts[5]); + query = "select " + udtfName + "(" + "'select `value` from (select value from " + tableName + " where value is not null order by value) as t', 5)"; - runQuery(query, getConfigs(), expectedCounts[3]); + runQuery(query, getConfigs(), expectedCounts[6]); List setCmds = getConfigs(); setCmds.add("set hive.llap.external.splits.order.by.force.single.split=false"); query = "select " + udtfName + "(" + "'select `value` from (select value from " + tableName + " where value is not null order by value) as t', 5)"; - runQuery(query, setCmds, expectedCounts[4]); + runQuery(query, setCmds, expectedCounts[7]); } } diff --git itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcGenericUDTFGetSplits.java itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcGenericUDTFGetSplits.java index defbe78802..35cb9131ee 100644 --- itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcGenericUDTFGetSplits.java +++ itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcGenericUDTFGetSplits.java @@ -38,7 +38,7 @@ @Test(timeout = 200000) public void testGenericUDTFOrderBySplitCount1() throws Exception { - super.testGenericUDTFOrderBySplitCount1("get_splits", new int[]{10, 1, 0, 1, 10}); + super.testGenericUDTFOrderBySplitCount1("get_splits", new int[]{10, 1, 0, 2, 2, 2, 1, 10}); } @Test diff --git itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcGenericUDTFGetSplits2.java itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcGenericUDTFGetSplits2.java index 330174513c..c313e516a6 100644 --- itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcGenericUDTFGetSplits2.java +++ itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcGenericUDTFGetSplits2.java @@ -25,7 +25,7 @@ @Test(timeout = 200000) public void testGenericUDTFOrderBySplitCount1() throws Exception { - super.testGenericUDTFOrderBySplitCount1("get_llap_splits", new int[]{12, 3, 1, 3, 12}); + super.testGenericUDTFOrderBySplitCount1("get_llap_splits", new int[]{12, 3, 1, 4, 4, 4, 3, 12}); } } diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFGetSplits.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFGetSplits.java index 00a6c89b1e..ac467b38cb 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFGetSplits.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFGetSplits.java @@ -133,6 +133,7 @@ protected transient IntObjectInspector intOI; protected transient JobConf jc; private boolean orderByQuery; + private boolean limitQuery; private boolean forceSingleSplit; protected ByteArrayOutputStream bos = new ByteArrayOutputStream(1024); protected DataOutput dos = new DataOutputStream(bos); @@ -318,6 +319,7 @@ private PlanFragment createPlanFragment(String query, ApplicationId splitsAppId) QueryPlan plan = driver.getPlan(); orderByQuery = plan.getQueryProperties().hasOrderBy() || plan.getQueryProperties().hasOuterOrderBy(); + limitQuery = plan.getQueryProperties().getOuterQueryLimit() != -1 ; forceSingleSplit = orderByQuery && HiveConf.getBoolVar(conf, ConfVars.LLAP_EXTERNAL_SPLITS_ORDER_BY_FORCE_SINGLE_SPLIT); List> roots = plan.getRootTasks(); @@ -335,8 +337,11 @@ private PlanFragment createPlanFragment(String query, ApplicationId splitsAppId) tezWork = ((TezTask) roots.get(0)).getWork(); } - if (tezWork == null || tezWork.getAllWork().size() != 1) { - + // A simple limit query (select * from table limit n) generates only mapper work (no reduce phase). + // This can create multiple splits on the original table, multiple llap daemons executing those splits + // will return "n" rows each and hence, the result may end up containing more rows than "n". + // Therefore, a limit query needs to be materialized. + if (tezWork == null || tezWork.getAllWork().size() != 1 || limitQuery) { String tableName = "table_" + UUID.randomUUID().toString().replaceAll("-", ""); String storageFormatString = getTempTableStorageFormatString(conf);