Details
-
Bug
-
Status: Resolved
-
Major
-
Resolution: Fixed
-
Impala 2.8.0
Description
When running "alter table recover partitions foo" against a table with a large number of partitions performance is really bad as an ArrayList is used to check if the partition already exists.
java.util.ArrayList.contains(Object) ends up consuming the majority of the CPU
private void getAllPartitionsNotInHms(Path path, List<String> partitionKeys, int depth, FileSystem fs, List<String> partitionValues, List<LiteralExpr> partitionExprs, List<List<LiteralExpr>> existingPartitions, List<List<String>> partitionsNotInHms) throws IOException { if (depth == partitionKeys.size()) { if (existingPartitions.contains(partitionExprs)) { if (LOG.isTraceEnabled()) { LOG.trace(String.format("Skip recovery of path '%s' because it already " + "exists in metastore", path.toString())); } } else { partitionsNotInHms.add(partitionValues); existingPartitions.add(partitionExprs); } return; }
Stack Trace Sample Count Percentage(%) org.apache.impala.service.JniCatalog.execDdl(byte[]) 25,561 99.98 org.apache.impala.service.CatalogOpExecutor.execDdlRequest(TDdlExecRequest) 25,561 99.98 org.apache.impala.service.CatalogOpExecutor.alterTable(TAlterTableParams, TDdlExecResponse) 25,561 99.98 org.apache.impala.service.CatalogOpExecutor.alterTableRecoverPartitions(Table) 25,561 99.98 org.apache.impala.catalog.HdfsTable.getPathsWithoutPartitions() 25,561 99.98 org.apache.impala.catalog.HdfsTable.getAllPartitionsNotInHms(Path, List, List, List) 25,561 99.98 org.apache.impala.catalog.HdfsTable.getAllPartitionsNotInHms(Path, List, int, FileSystem, List, List, List, List) 25,561 99.98 org.apache.impala.catalog.HdfsTable.getAllPartitionsNotInHms(Path, List, int, FileSystem, List, List, List, List) 25,561 99.98 org.apache.impala.catalog.HdfsTable.getAllPartitionsNotInHms(Path, List, int, FileSystem, List, List, List, List) 25,427 99.456 java.util.ArrayList.contains(Object) 25,334 99.093 java.util.ArrayList.indexOf(Object) 25,334 99.093 java.util.AbstractList.equals(Object) 24,755 96.828 java.util.ArrayList.listIterator() 8,190 32.035 java.util.ArrayList$ListItr.<init>(ArrayList, int) 8,184 32.011 java.util.ArrayList$Itr.<init>(ArrayList, ArrayList$1) 8,184 32.011 java.util.ArrayList$Itr.<init>(ArrayList) 8,184 32.011