commit c411ccafac06eef7ab0156cd8e67d2fed999273c Author: Akshay Goyal akshaygoyal2006@gmail.com Date: Wed Dec 16 17:43:58 2015 +0530 HIVE-11487: getNumPartitionsByFilter api in metastore api diff --git a/metastore/if/hive_metastore.thrift b/metastore/if/hive_metastore.thrift index bb754f1..c291f19 100755 --- a/metastore/if/hive_metastore.thrift +++ b/metastore/if/hive_metastore.thrift @@ -1053,6 +1053,10 @@ service ThriftHiveMetastore extends fb303.FacebookService PartitionsByExprResult get_partitions_by_expr(1:PartitionsByExprRequest req) throws(1:MetaException o1, 2:NoSuchObjectException o2) + // get the partitions matching the given partition filter + i32 get_num_partitions_by_filter(1:string db_name 2:string tbl_name 3:string filter) + throws(1:MetaException o1, 2:NoSuchObjectException o2) + // get partitions give a list of partition names list get_partitions_by_names(1:string db_name 2:string tbl_name 3:list names) throws(1:MetaException o1, 2:NoSuchObjectException o2) diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java b/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java index 0940fd7..ade8d06 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java @@ -4542,6 +4542,25 @@ private void rethrowException(Exception e) } @Override + public int get_num_partitions_by_filter(final String dbName, + final String tblName, final String filter) + throws MetaException, NoSuchObjectException, TException { + startTableFunction("get_num_partitions_by_filter", dbName, tblName); + + int ret = -1; + Exception ex = null; + try { + ret = getMS().getNumPartitionsByFilter(dbName, tblName, filter); + } catch (Exception e) { + ex = e; + rethrowException(e); + } finally { + endFunction("get_num_partitions_by_filter", ret != -1, ex, tblName); + } + return ret; + } + + @Override public List get_partitions_by_names(final String dbName, final String tblName, final List partNames) throws MetaException, NoSuchObjectException, TException { diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java b/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java index c5e7a5f..e127c8a 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java @@ -1199,7 +1199,6 @@ public boolean listPartitionsByExpr(String db_name, String tbl_name, byte[] expr return !r.isSetHasUnknownPartitions() || r.isHasUnknownPartitions(); // Assume the worst. } - /** * @param name * @return the database @@ -1390,6 +1389,24 @@ public boolean tableExists(String tableName) throws MetaException, client.get_partition_names_ps(db_name, tbl_name, part_vals, max_parts)); } + /** + * Get number of partitions matching specified filter + * @param db_name the database name + * @param tbl_name the table name + * @param filter the filter string, + * for example "part1 = \"p1_abc\" and part2 <= "\p2_test\"". Filtering can + * be done only on string partition keys. + * @return number of partitions + * @throws MetaException + * @throws NoSuchObjectException + * @throws TException + */ + public int getNumPartitionsByFilter(String db_name, String tbl_name, + String filter) throws MetaException, + NoSuchObjectException, TException { + return client.get_num_partitions_by_filter(db_name, tbl_name, filter); + } + @Override public void alter_partition(String dbName, String tblName, Partition newPart) throws InvalidOperationException, MetaException, TException { diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/IMetaStoreClient.java b/metastore/src/java/org/apache/hadoop/hive/metastore/IMetaStoreClient.java index aa96f77..57a5c7f 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/IMetaStoreClient.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/IMetaStoreClient.java @@ -530,6 +530,22 @@ public PartitionSpecProxy listPartitionSpecs(String dbName, String tableName, in throws MetaException, TException, NoSuchObjectException; /** + * Get number of partitions matching specified filter + * @param dbName the database name + * @param tableName the table name + * @param filter the filter string, + * for example "part1 = \"p1_abc\" and part2 <= "\p2_test\"". Filtering can + * be done only on string partition keys. + * @return number of partitions + * @throws MetaException + * @throws NoSuchObjectException + * @throws TException + */ + public int getNumPartitionsByFilter(String dbName, String tableName, + String filter) throws MetaException, NoSuchObjectException, TException; + + + /** * Get list of partitions matching specified filter * @param db_name the database name * @param tbl_name the table name diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java b/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java index 36b316a..be54b9c 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java @@ -385,6 +385,20 @@ public Database getDatabase(String dbName) throws MetaException{ isViewTable, sqlFilter, params, joins, max); } + public int getNumPartitionsViaSqlFilter(Table table, ExpressionTree tree) throws MetaException { + List params = new ArrayList(); + Listjoins = new ArrayList(); + // Derby and Oracle do not interpret filters ANSI-properly in some cases and need a workaround. + boolean dbHasJoinCastBug = (dbType == DB.DERBY || dbType == DB.ORACLE); + String sqlFilter = PartitionFilterGenerator.generateSqlFilter( + table, tree, params, joins, dbHasJoinCastBug, defaultPartName); + if (sqlFilter == null) { + return 0; // Cannot make SQL filter to push down. + } + return getNumPartitionsViaSqlFilterInternal(table.getDbName(), table.getTableName(), sqlFilter, params, joins); + } + + /** * Gets all partitions of a table by using direct SQL queries. * @param dbName Metastore db name. @@ -807,6 +821,39 @@ public void apply(SerDeInfo t, Object[] fields) { return orderedResult; } + private int getNumPartitionsViaSqlFilterInternal(String dbName, String tblName, + String sqlFilter, List paramsForFilter, + List joinsForFilter) throws MetaException { + boolean doTrace = LOG.isDebugEnabled(); + dbName = dbName.toLowerCase(); + tblName = tblName.toLowerCase(); + + // Get number of partitions by doing count on PART_ID. + String queryText = "select count(\"PARTITIONS\".\"PART_ID\") from \"PARTITIONS\"" + + " inner join \"TBLS\" on \"PARTITIONS\".\"TBL_ID\" = \"TBLS\".\"TBL_ID\" " + + " and \"TBLS\".\"TBL_NAME\" = ? " + + " inner join \"DBS\" on \"TBLS\".\"DB_ID\" = \"DBS\".\"DB_ID\" " + + " and \"DBS\".\"NAME\" = ? " + + join(joinsForFilter, ' ') + + (sqlFilter == null ? "" : (" where " + sqlFilter)); + + Object[] params = new Object[paramsForFilter.size() + 2]; + params[0] = tblName; + params[1] = dbName; + for (int i = 0; i < paramsForFilter.size(); ++i) { + params[i + 2] = paramsForFilter.get(i); + } + + long start = doTrace ? System.nanoTime() : 0; + Query query = pm.newQuery("javax.jdo.query.SQL", queryText); + @SuppressWarnings("unchecked") + int sqlResult = extractSqlInt(query.executeWithArray(params)); + long queryTime = doTrace ? System.nanoTime() : 0; + timingTrace(doTrace, queryText, start, queryTime); + return sqlResult; + } + + private void timingTrace(boolean doTrace, String queryText, long start, long queryTime) { if (!doTrace) return; LOG.debug("Direct SQL query in " + (queryTime - start) / 1000000.0 + "ms + " + diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java b/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java index abfe2b8..1963967 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java @@ -2279,6 +2279,7 @@ protected boolean getPartitionsByExprInternal(String dbName, String tblName, fin } return result; } + @Override protected List getJdoResult( GetHelper> ctx) throws MetaException, NoSuchObjectException { @@ -2365,6 +2366,12 @@ private boolean getPartitionNamesPrunedByExprNoTxn(Table table, byte[] expr, return results; } + + private Integer getNumPartitionsViaOrmFilter(Table table, ExpressionTree tree, boolean isValidatedFilter) + throws MetaException { + return getPartitionsViaOrmFilter(table, tree, (short) -1, isValidatedFilter).size(); + } + /** * Gets partition names from the table via ORM (JDOQL) name filter. * @param dbName Database name. @@ -2646,6 +2653,36 @@ protected String describeResult() { } } + @Override + public int getNumPartitionsByFilter(String dbName, String tblName, + String filter) throws MetaException, NoSuchObjectException { + return getNumPartitionsByFilterInternal(dbName, tblName, filter, + true, true); + } + + protected int getNumPartitionsByFilterInternal(String dbName, String tblName, + String filter, boolean allowSql, boolean allowJdo) + throws MetaException, NoSuchObjectException { + final ExpressionTree tree = (filter != null && !filter.isEmpty()) + ? PartFilterExprUtil.getFilterParser(filter).tree : ExpressionTree.EMPTY_TREE; + return new GetHelper(dbName, tblName, allowSql, allowJdo) { + @Override + protected String describeResult() { + return null; + } + + @Override + protected Integer getSqlResult(GetHelper ctx) throws MetaException { + return directSql.getNumPartitionsViaSqlFilter(ctx.getTable(), tree); + } + @Override + protected Integer getJdoResult( + GetHelper ctx) throws MetaException, NoSuchObjectException { + return getNumPartitionsViaOrmFilter(ctx.getTable(), tree, true); + } + }.run(true); + } + protected List getPartitionsByFilterInternal(String dbName, String tblName, String filter, final short maxParts, boolean allowSql, boolean allowJdo) throws MetaException, NoSuchObjectException { diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/RawStore.java b/metastore/src/java/org/apache/hadoop/hive/metastore/RawStore.java index e118a3b..4e67b57 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/RawStore.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/RawStore.java @@ -223,6 +223,9 @@ public abstract boolean getPartitionsByExpr(String dbName, String tblName, byte[] expr, String defaultPartitionName, short maxParts, List result) throws TException; + public abstract int getNumPartitionsByFilter(String dbName, String tblName, String filter) + throws MetaException, NoSuchObjectException; + public abstract List getPartitionsByNames( String dbName, String tblName, List partNames) throws MetaException, NoSuchObjectException; diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/HBaseStore.java b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/HBaseStore.java index b9509ab..0bb6df1 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/HBaseStore.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/HBaseStore.java @@ -758,6 +758,21 @@ public boolean getPartitionsByExpr(String dbName, String tblName, byte[] expr, } } + @Override + public int getNumPartitionsByFilter(String dbName, String tblName, String filter) + throws MetaException, NoSuchObjectException { + final ExpressionTree exprTree = (filter != null && !filter.isEmpty()) ? PartFilterExprUtil + .getFilterParser(filter).tree : ExpressionTree.EMPTY_TREE; + List result = new ArrayList(); + boolean commit = false; + openTransaction(); + try { + return getPartitionsByFilter(dbName, tblName, filter, Short.MAX_VALUE).size(); + } finally { + commitOrRoleBack(commit); + } + } + /** * Gets the partition names from a table, pruned using an expression. * @param table Table. diff --git a/metastore/src/test/org/apache/hadoop/hive/metastore/DummyRawStoreControlledCommit.java b/metastore/src/test/org/apache/hadoop/hive/metastore/DummyRawStoreControlledCommit.java index c1156b3..f7e9214 100644 --- a/metastore/src/test/org/apache/hadoop/hive/metastore/DummyRawStoreControlledCommit.java +++ b/metastore/src/test/org/apache/hadoop/hive/metastore/DummyRawStoreControlledCommit.java @@ -310,6 +310,12 @@ public void alterIndex(String dbName, String baseTblName, String name, Index new } @Override + public int getNumPartitionsByFilter(String dbName, String tblName, + String filter) throws MetaException, NoSuchObjectException { + return objectStore.getNumPartitionsByFilter(dbName, tblName, filter); + } + + @Override public List getPartitionsByNames(String dbName, String tblName, List partNames) throws MetaException, NoSuchObjectException { return objectStore.getPartitionsByNames(dbName, tblName, partNames); diff --git a/metastore/src/test/org/apache/hadoop/hive/metastore/DummyRawStoreForJdoConnection.java b/metastore/src/test/org/apache/hadoop/hive/metastore/DummyRawStoreForJdoConnection.java index bf20e99..8bc863a 100644 --- a/metastore/src/test/org/apache/hadoop/hive/metastore/DummyRawStoreForJdoConnection.java +++ b/metastore/src/test/org/apache/hadoop/hive/metastore/DummyRawStoreForJdoConnection.java @@ -341,6 +341,12 @@ public boolean getPartitionsByExpr(String dbName, String tblName, byte[] expr, } @Override + public int getNumPartitionsByFilter(String dbName, String tblName, String filter) + throws MetaException, NoSuchObjectException { + return -1; + } + + @Override public Table markPartitionForEvent(String dbName, String tblName, Map partVals, PartitionEventType evtType) throws MetaException, UnknownTableException, InvalidPartitionException, UnknownPartitionException { diff --git a/metastore/src/test/org/apache/hadoop/hive/metastore/TestObjectStore.java b/metastore/src/test/org/apache/hadoop/hive/metastore/TestObjectStore.java index 9089d1c..117762a 100644 --- a/metastore/src/test/org/apache/hadoop/hive/metastore/TestObjectStore.java +++ b/metastore/src/test/org/apache/hadoop/hive/metastore/TestObjectStore.java @@ -37,6 +37,7 @@ import org.apache.hadoop.hive.metastore.api.StorageDescriptor; import org.apache.hadoop.hive.metastore.api.Table; import org.apache.hadoop.hive.ql.io.sarg.SearchArgument; +import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; import org.junit.After; import org.junit.Assert; @@ -158,8 +159,8 @@ public void testPartitionOps() throws MetaException, InvalidObjectException, NoS StorageDescriptor sd = new StorageDescriptor(null, "location", null, null, false, 0, new SerDeInfo("SerDeName", "serializationLib", null), null, null, null); HashMap tableParams = new HashMap(); tableParams.put("EXTERNAL", "false"); - FieldSchema partitionKey1 = new FieldSchema("Country", "String", ""); - FieldSchema partitionKey2 = new FieldSchema("State", "String", ""); + FieldSchema partitionKey1 = new FieldSchema("Country", serdeConstants.STRING_TYPE_NAME, ""); + FieldSchema partitionKey2 = new FieldSchema("State", serdeConstants.STRING_TYPE_NAME, ""); Table tbl1 = new Table(TABLE1, DB1, "owner", 1, 2, 3, sd, Arrays.asList(partitionKey1, partitionKey2), tableParams, "viewOriginalText", "viewExpandedText", "MANAGED_TABLE"); objectStore.createTable(tbl1); HashMap partitionParams = new HashMap(); @@ -177,6 +178,12 @@ public void testPartitionOps() throws MetaException, InvalidObjectException, NoS Assert.assertEquals(111, partitions.get(0).getCreateTime()); Assert.assertEquals(222, partitions.get(1).getCreateTime()); + int numPartitions = objectStore.getNumPartitionsByFilter(DB1, TABLE1, ""); + Assert.assertEquals(partitions.size(), numPartitions); + + numPartitions = objectStore.getNumPartitionsByFilter(DB1, TABLE1, "country = \"US\""); + Assert.assertEquals(2, numPartitions); + objectStore.dropPartition(DB1, TABLE1, value1); partitions = objectStore.getPartitions(DB1, TABLE1, 10); Assert.assertEquals(1, partitions.size()); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java index c682df2..d050e6f 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java @@ -2373,6 +2373,30 @@ public boolean getPartitionsByExpr(Table tbl, ExprNodeGenericFuncDesc expr, Hive return hasUnknownParts; } + /** + * Get a number of Partitions by filter. + * @param tbl The table containing the partitions. + * @param filter A string represent partition predicates. + * @return the number of partitions satisfying the partition predicates. + * @throws HiveException + * @throws MetaException + * @throws NoSuchObjectException + * @throws TException + */ + public int getNumPartitionsByFilter(Table tbl, String filter) + throws HiveException, MetaException, NoSuchObjectException, TException { + + if (!tbl.isPartitioned()) { + throw new HiveException("Partition spec should only be supplied for a " + + "partitioned table"); + } + + int numParts = getMSC().getNumPartitionsByFilter( + tbl.getDbName(), tbl.getTableName(), filter); + + return numParts; + } + public void validatePartitionNameCharacters(List partVals) throws HiveException { try { getMSC().validatePartitionNameCharacters(partVals);