diff --git metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java index e7694b7..0448cb5 100644 --- metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java +++ metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java @@ -92,6 +92,11 @@ * use that. MySQL detection is done by actually issuing the set-ansi-quotes command. */ private final boolean isMySql; + /** + * Retrieve partition objects in batch. This is a workaround for Oracle + * ORA-01795: maximum number of expressions in a list is 1000 + */ + private int batchSize = 1000; /** * Whether direct SQL can be used with the current datastore backing {@link #pm}. @@ -187,9 +192,21 @@ private void trySetAnsiQuotesForMysql() throws SQLException { if (partNames.isEmpty()) { return new ArrayList(); } - return getPartitionsViaSqlFilterInternal(dbName, tblName, null, - "\"PARTITIONS\".\"PART_NAME\" in (" + makeParams(partNames.size()) + ")", - partNames, new ArrayList(), max); + // apply max + if (max != null && max != -1 && max != 0) { + partNames = partNames.subList(0, max); + } + // retrieve partition objects in batch + List result = new ArrayList(partNames.size()); + for (int index = 0; index < partNames.size(); index += batchSize) { + int toIndex = index + batchSize < partNames.size() ? index + batchSize : partNames.size(); + List batchedPartNames = partNames.subList(index, toIndex); + result.addAll(getPartitionsViaSqlFilterInternal(dbName, tblName, null, + "\"PARTITIONS\".\"PART_NAME\" in (" + makeParams(partNames.size()) + ")", batchedPartNames, new ArrayList(), + max)); + } + LOG.info("Number of partitions obtained via directSQL through partition names:" + (result == null ? 0 : result.size())); + return result; } /** @@ -305,17 +322,31 @@ private boolean isViewTable(String dbName, String tblName) throws MetaException } // Prepare StringBuilder for "PART_ID in (...)" to use in future queries. - int sbCapacity = sqlResult.size() * 7; // if there are 100k things => 6 chars, plus comma - StringBuilder partSb = new StringBuilder(sbCapacity); - // Assume db and table names are the same for all partition, that's what we're selecting for. - for (Object partitionId : sqlResult) { - partSb.append(StatObjectConverter.extractSqlLong(partitionId)).append(","); + int sbCapacity = batchSize * 7; // if there are 100k things => 6 chars, plus comma + List result = new ArrayList(); + for (int index = 0; index < sqlResult.size(); index += batchSize) { + int toIndex = index + batchSize < sqlResult.size() ? index + batchSize : sqlResult.size(); + List batchedSqlResult = sqlResult.subList(index, toIndex); + // Assume db and table names are the same for all partition, that's what we're selecting for. + StringBuilder partSb = new StringBuilder(sbCapacity); + for (Object partitionId : batchedSqlResult) { + partSb.append(StatObjectConverter.extractSqlLong(partitionId)).append(","); + } + String partIds = trimCommaList(partSb); + // Assume db and table names are the same for all partition, that's what we're selecting for. + result.addAll(getPartitionsViaSqlFilterInternalInBatch(tblName, dbName, isView, partIds, params, batchSize)); } - String partIds = trimCommaList(partSb); timingTrace(doTrace, queryText, start, queryTime); + query.close(sqlResult); + return result; + } + + private List getPartitionsViaSqlFilterInternalInBatch(String tblName, String dbName, + Boolean isView, String partIds, Object[] params, int batchSize) throws MetaException { + boolean doTrace = LOG.isDebugEnabled(); // Now get most of the other fields. - queryText = + String queryText = "select \"PARTITIONS\".\"PART_ID\", \"SDS\".\"SD_ID\", \"SDS\".\"CD_ID\"," + " \"SERDES\".\"SERDE_ID\", \"PARTITIONS\".\"CREATE_TIME\"," + " \"PARTITIONS\".\"LAST_ACCESS_TIME\", \"SDS\".\"INPUT_FORMAT\", \"SDS\".\"IS_COMPRESSED\"," @@ -325,11 +356,11 @@ private boolean isViewTable(String dbName, String tblName) throws MetaException + " left outer join \"SDS\" on \"PARTITIONS\".\"SD_ID\" = \"SDS\".\"SD_ID\" " + " left outer join \"SERDES\" on \"SDS\".\"SERDE_ID\" = \"SERDES\".\"SERDE_ID\" " + "where \"PART_ID\" in (" + partIds + ") order by \"PART_NAME\" asc"; - start = doTrace ? System.nanoTime() : 0; - query = pm.newQuery("javax.jdo.query.SQL", queryText); + long start = doTrace ? System.nanoTime() : 0; + Query query = pm.newQuery("javax.jdo.query.SQL", queryText); @SuppressWarnings("unchecked") List sqlResult2 = (List)query.executeWithArray(params); - queryTime = doTrace ? System.nanoTime() : 0; + long queryTime = doTrace ? System.nanoTime() : 0; // Read all the fields and create partitions, SDs and serdes. TreeMap partitions = new TreeMap(); @@ -337,9 +368,10 @@ private boolean isViewTable(String dbName, String tblName) throws MetaException TreeMap serdes = new TreeMap(); TreeMap> colss = new TreeMap>(); // Keep order by name, consistent with JDO. - ArrayList orderedResult = new ArrayList(sqlResult.size()); + ArrayList orderedResult = new ArrayList(batchSize); // Prepare StringBuilder-s for "in (...)" lists to use in one-to-many queries. + int sbCapacity = batchSize * 7; StringBuilder sdSb = new StringBuilder(sbCapacity), serdeSb = new StringBuilder(sbCapacity); StringBuilder colsSb = new StringBuilder(7); // We expect that there's only one field schema. tblName = tblName.toLowerCase(); @@ -596,6 +628,7 @@ public void apply(SerDeInfo t, Object[] fields) { t.putToParameters((String)fields[1], (String)fields[2]); }}); + LOG.info("Number of partitions obtained via directSQL in batch:" + orderedResult.size()); return orderedResult; } @@ -846,7 +879,7 @@ public void visit(LeafNode node) throws MetaException { } // This is a workaround for DERBY-6358; as such, it is pretty horrible. - tableValue = "(case when \"TBLS\".\"TBL_NAME\" = ? and \"DBS\".\"NAME\" = ? then " + tableValue = "(case when TBLS.TBL_NAME = ? and DBS.NAME = ? and " + "FILTER" + partColIndex + ".PART_ID = PARTITIONS.PART_ID then " + tableValue + " else null end)"; params.add(table.getTableName().toLowerCase()); params.add(table.getDbName().toLowerCase());