diff --git data/files/max_partition_test_input.txt data/files/max_partition_test_input.txt new file mode 100644 index 0000000..82c9736 --- /dev/null +++ data/files/max_partition_test_input.txt @@ -0,0 +1,12 @@ +1 value1 25 2008-04-09 +2 value2 30 2008-04-09 +3 value3 35 2008-04-09 +4 value4 40 2008-04-09 +5 value5 25 2008-05-09 +6 value6 30 2008-05-09 +7 value7 35 2008-05-09 +8 value8 40 2008-05-09 +9 value9 25 2009-04-09 +10 value10 30 2009-04-09 +11 value11 35 2009-04-09 +12 value12 40 2009-04-09 \ No newline at end of file diff --git itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/TestMetaStoreLimitPartitionRequest.java itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/TestMetaStoreLimitPartitionRequest.java new file mode 100644 index 0000000..236eb8f --- /dev/null +++ itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/TestMetaStoreLimitPartitionRequest.java @@ -0,0 +1,319 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql; + +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; + +import java.sql.Connection; +import java.sql.DriverManager; +import java.sql.ResultSet; +import java.sql.SQLException; +import java.sql.Statement; +import java.util.HashMap; +import java.util.Map; + +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hive.jdbc.miniHS2.MiniHS2; +import org.apache.hive.service.cli.HiveSQLException; +import org.junit.After; +import org.junit.AfterClass; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Test; + +/** + * This unit test is for testing HIVE-13884 with more complex queries and + * hive.metastore.limit.partition.request enabled. + * It covers cases when the query predicates can be pushed down and the + * number of partitions can be retrieved via directSQL. + * It also covers cases when the number of partitions cannot be retrieved + * via directSQL, so it falls back to ORM. + */ +public class TestMetaStoreLimitPartitionRequest { + + private static final String DB_NAME = "max_partition_test_db"; + private static final String TABLE_NAME = "max_partition_test_table"; + private static final String INPUT_FILE_NAME = "max_partition_test_input.txt"; + private static int PARTITION_REQUEST_LIMIT = 4; + private static MiniHS2 miniHS2 = null; + private static HiveConf conf; + private static Path dataFilePath; + private Connection hs2Conn = null; + private Statement stmt; + + @BeforeClass + public static void beforeTest() throws Exception { + Class.forName(MiniHS2.getJdbcDriverName()); + conf = new HiveConf(); + String dataFileDir = conf.get("test.data.files").replace('\\', '/').replace("c:", ""); + dataFilePath = new Path(dataFileDir, INPUT_FILE_NAME); + DriverManager.setLoginTimeout(0); + + conf.setBoolVar(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY, false); + conf.setIntVar(HiveConf.ConfVars.METASTORE_LIMIT_PARTITION_REQUEST, PARTITION_REQUEST_LIMIT); + conf.setBoolVar(HiveConf.ConfVars.METASTORE_INTEGER_JDO_PUSHDOWN, true); + conf.setBoolVar(HiveConf.ConfVars.METASTORE_TRY_DIRECT_SQL, true); + conf.setBoolVar(HiveConf.ConfVars.DYNAMICPARTITIONING, true); + conf.setVar(HiveConf.ConfVars.DYNAMICPARTITIONINGMODE, "nonstrict"); + conf.setBoolVar(HiveConf.ConfVars.HIVE_CBO_ENABLED, false); + + miniHS2 = new MiniHS2.Builder().withConf(conf).build(); + Map overlayProps = new HashMap(); + miniHS2.start(overlayProps); + createDb(); + } + + private static void createDb() throws Exception { + Connection conn = + DriverManager.getConnection(miniHS2.getJdbcURL(), System.getProperty("user.name"), "bar"); + Statement stmt2 = conn.createStatement(); + stmt2.execute("DROP DATABASE IF EXISTS " + DB_NAME + " CASCADE"); + stmt2.execute("CREATE DATABASE " + DB_NAME); + stmt2.close(); + conn.close(); + } + + @Before + public void setUp() throws Exception { + hs2Conn = DriverManager.getConnection(miniHS2.getJdbcURL(DB_NAME), + System.getProperty("user.name"), "bar"); + stmt = hs2Conn.createStatement(); + stmt.execute("USE " + DB_NAME); + + String tmpTableName = TABLE_NAME + "_tmp"; + stmt.execute("CREATE TABLE " + tmpTableName + + " (id string, value string, num string, ds date) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' LINES TERMINATED BY '\n' STORED AS TEXTFILE"); + stmt.execute("LOAD DATA LOCAL INPATH '" + dataFilePath.toString() + "' INTO TABLE " + tmpTableName); + stmt.execute("CREATE TABLE " + TABLE_NAME + " (id string, value string) PARTITIONED BY (num string, ds date)"); + stmt.execute("INSERT OVERWRITE TABLE " + TABLE_NAME + " PARTITION (num, ds) SELECT id, value, num, ds FROM " + tmpTableName); + } + + @After + public void tearDown() throws Exception { + String tmpTableName = TABLE_NAME + "_tmp"; + stmt.execute("DROP TABLE IF EXISTS " + TABLE_NAME); + stmt.execute("DROP TABLE IF EXISTS " + tmpTableName); + stmt.execute("DROP TABLE IF EXISTS " + TABLE_NAME + "_num_tmp"); + + if (hs2Conn != null) { + hs2Conn.close(); + } + } + + @AfterClass + public static void afterTest() throws Exception { + if (miniHS2 != null && miniHS2.isStarted()) { + miniHS2.stop(); + } + } + + /* Tests with queries which can be pushed down and executed with directSQL */ + + @Test + public void testSimpleQueryWithDirectSql() throws Exception { + String queryString = "select value from %s where num='25' and ds='2008-04-09'"; + executeQuery(queryString, 1); + } + + @Test + public void testMoreComplexQueryWithDirectSql() throws Exception { + String queryString = "select value from %s where (ds between '2009-01-01' and '2009-12-31' and num='25') or (ds between '2008-01-01' and '2008-12-31' and num='30')"; + executeQuery(queryString, 3); + } + + /* + * Tests with queries which can be pushed down and executed with directSQL, but the number of + * partitions which should be fetched is bigger than the maximum set by the + * hive.metastore.limit.partition.request parameter. + */ + + @Test + public void testSimpleQueryWithDirectSqlTooManyPartitions() throws Exception { + String queryString = "select value from %s where ds>'2008-04-20'"; + executeQueryExceedPartitionLimit(queryString, 8); + } + + @Test + public void testMoreComplexQueryWithDirectSqlTooManyPartitions() throws Exception { + String queryString = "select value from %s where num='25' or (num='30' and ds between '2008-01-01' and '2008-12-31')"; + executeQueryExceedPartitionLimit(queryString, 5); + } + + /* + * Tests with queries which cannot be executed with directSQL, because of type mismatch. The type + * of the num column is string, but the parameters used in the where clause are numbers. After + * falling back to ORM, the number of partitions can be fetched by the + * ObjectStore.getNumPartitionsViaOrmFilter method. + */ + + @Test + public void testQueryWithFallbackToORM1() throws Exception { + String queryString = "select value from %s where num!=25 and num!=35 and num!=40"; + executeQuery(queryString, 3); + } + + @Test + public void testQueryWithFallbackToORMTooManyPartitions1() throws Exception { + String queryString = "select value from %s where num=30 or num=25"; + executeQueryExceedPartitionLimit(queryString, 6); + } + + /* + * Tests with queries which cannot be executed with directSQL, because of type mismatch. The type + * of the num column is string, but the parameters used in the where clause are numbers. After + * falling back to ORM the number of partitions cannot be fetched by the + * ObjectStore.getNumPartitionsViaOrmFilter method. They are fetched by the + * ObjectStore.getPartitionNamesPrunedByExprNoTxn method. + */ + + @Test + public void testQueryWithFallbackToORM2() throws Exception { + String queryString = "select value from %s where num!=25 and ds='2008-04-09'"; + executeQuery(queryString, 3); + } + + @Test + public void testQueryWithFallbackToORM3() throws Exception { + String queryString = "select value from %s where num between 26 and 31"; + executeQuery(queryString, 3); + } + + @Test + public void testQueryWithFallbackToORMTooManyPartitions2() throws Exception { + String queryString = "select value from %s where num!=25 and (ds='2008-04-09' or ds='2008-05-09')"; + executeQueryExceedPartitionLimit(queryString, 6); + } + + @Test + public void testQueryWithFallbackToORMTooManyPartitions3() throws Exception { + String queryString = "select value from %s where num>=30"; + executeQueryExceedPartitionLimit(queryString, 9); + } + + @Test + public void testQueryWithFallbackToORMTooManyPartitions4() throws Exception { + String queryString = "select value from %s where num between 20 and 50"; + executeQueryExceedPartitionLimit(queryString, 12); + } + + /* + * Tests with queries which cannot be executed with directSQL, because the contain like or in. + * After falling back to ORM the number of partitions cannot be fetched by the + * ObjectStore.getNumPartitionsViaOrmFilter method. They are fetched by the + * ObjectStore.getPartitionNamesPrunedByExprNoTxn method. + */ + + @Test + public void testQueryWithInWithFallbackToORM() throws Exception { + setupNumTmpTable(); + String queryString = "select value from %s a where ds='2008-04-09' and a.num in (select value from " + TABLE_NAME + "_num_tmp)"; + executeQuery(queryString, 2); + } + + @Test + public void testQueryWithInWithFallbackToORMTooManyPartitions() throws Exception { + setupNumTmpTable(); + String queryString = "select value from %s a where a.num in (select value from " + TABLE_NAME + "_num_tmp)"; + executeQueryExceedPartitionLimit(queryString, 12); + } + + @Test + public void testQueryWithInWithFallbackToORMTooManyPartitions2() throws Exception { + setupNumTmpTable(); + String queryString = "select value from %s a where a.num in (select value from " + TABLE_NAME + "_num_tmp where value='25')"; + executeQueryExceedPartitionLimit(queryString, 12); + } + + @Test + public void testQueryWithLikeWithFallbackToORMTooManyPartitions() throws Exception { + String queryString = "select value from %s where num like '3%%'"; + executeQueryExceedPartitionLimit(queryString, 6); + } + + private void setupNumTmpTable() throws SQLException { + stmt.execute("CREATE TABLE " + TABLE_NAME + "_num_tmp (value string)"); + stmt.execute("INSERT INTO " + TABLE_NAME + "_num_tmp VALUES ('25')"); + stmt.execute("INSERT INTO " + TABLE_NAME + "_num_tmp VALUES ('30')"); + } + + private void executeQuery(String query, int expectedResultSize) throws SQLException { + String queryStr = String.format(query, TABLE_NAME); + ResultSet result = stmt.executeQuery(queryStr); + assertTrue(result != null); + int resultSize = getResultSize(result); + assertTrue(getWrongResultSizeErrorMsg(queryStr, resultSize, expectedResultSize), + resultSize == expectedResultSize); + } + + private int getResultSize(ResultSet result) throws SQLException { + int size = 0; + while(result.next()) { + size++; + } + return size; + } + + private void executeQueryExceedPartitionLimit(String query, int expectedPartitionNumber) throws Exception { + try { + String queryStr = String.format(query, TABLE_NAME); + stmt.executeQuery(queryStr); + fail("The query should have failed, because the number of requested partitions are bigger than " + + PARTITION_REQUEST_LIMIT); + } catch (HiveSQLException e) { + String exceedLimitMsg = getExceedLimitMessage(expectedPartitionNumber); + assertTrue(getWrongExceptionMessage(exceedLimitMsg, e.getMessage()), + e.getMessage().contains(exceedLimitMsg.toString())); + } + } + + private String getExceedLimitMessage(int expectedPartitionNumber) { + StringBuilder exceedLimitMsg = new StringBuilder(); + exceedLimitMsg.append("Number of partitions scanned (="); + exceedLimitMsg.append(expectedPartitionNumber); + exceedLimitMsg.append(") on table '"); + exceedLimitMsg.append(TABLE_NAME); + exceedLimitMsg.append("' exceeds limit (="); + exceedLimitMsg.append(PARTITION_REQUEST_LIMIT); + exceedLimitMsg.append(")"); + return exceedLimitMsg.toString(); + } + + private String getWrongResultSizeErrorMsg(String query, int resultSize, int expectedResultSize) { + StringBuilder errorMsg = new StringBuilder(); + errorMsg.append("The query '"); + errorMsg.append(query); + errorMsg.append("' returned "); + errorMsg.append(resultSize); + errorMsg.append(" rows instead of the expected "); + errorMsg.append(expectedResultSize); + return errorMsg.toString(); + } + + private String getWrongExceptionMessage(String exceedLimitMsg, String exceptionMessage) { + StringBuilder errorMsg = new StringBuilder(); + errorMsg.append("The message of the exception doesn't contain the expected '"); + errorMsg.append(exceedLimitMsg.toString()); + errorMsg.append("'. It is: "); + errorMsg.append(exceptionMessage); + return errorMsg.toString(); + } + +}