diff --git a/ql/src/test/queries/clientpositive/alter_rename_table.q b/ql/src/test/queries/clientpositive/alter_rename_table.q index 53fb230cf6..bcf6ad563b 100644 --- a/ql/src/test/queries/clientpositive/alter_rename_table.q +++ b/ql/src/test/queries/clientpositive/alter_rename_table.q @@ -36,4 +36,14 @@ create table source.src1 like default.src; load data local inpath '../../data/files/kv1.txt' overwrite into table source.src; ALTER TABLE source.src RENAME TO target.src1; -select * from target.src1 tablesample (10 rows); \ No newline at end of file +select * from target.src1 tablesample (10 rows); + +set metastore.rawstore.batch.size=1; +set metastore.try.direct.sql=false; + +create table source.src2 like default.src; +load data local inpath '../../data/files/kv1.txt' overwrite into table source.src2; +ANALYZE TABlE source.src2 COMPUTE STATISTICS FOR COLUMNS; +ALTER TABLE source.src2 RENAME TO target.src3; +DESC FORMATTED target.src3; +select * from target.src3 tablesample (10 rows); diff --git a/ql/src/test/results/clientpositive/alter_rename_table.q.out b/ql/src/test/results/clientpositive/alter_rename_table.q.out index 732d8a28d8..9ac8fd2f9b 100644 --- a/ql/src/test/results/clientpositive/alter_rename_table.q.out +++ b/ql/src/test/results/clientpositive/alter_rename_table.q.out @@ -261,3 +261,91 @@ POSTHOOK: Input: target@src1 278 val_278 98 val_98 484 val_484 +PREHOOK: query: create table source.src2 like default.src +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:source +PREHOOK: Output: source@src2 +POSTHOOK: query: create table source.src2 like default.src +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:source +POSTHOOK: Output: source@src2 +PREHOOK: query: load data local inpath '../../data/files/kv1.txt' overwrite into table source.src2 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: source@src2 +POSTHOOK: query: load data local inpath '../../data/files/kv1.txt' overwrite into table source.src2 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: source@src2 +PREHOOK: query: ANALYZE TABlE source.src2 COMPUTE STATISTICS FOR COLUMNS +PREHOOK: type: QUERY +PREHOOK: Input: source@src2 +#### A masked pattern was here #### +PREHOOK: Output: source@src2 +POSTHOOK: query: ANALYZE TABlE source.src2 COMPUTE STATISTICS FOR COLUMNS +POSTHOOK: type: QUERY +POSTHOOK: Input: source@src2 +#### A masked pattern was here #### +POSTHOOK: Output: source@src2 +PREHOOK: query: ALTER TABLE source.src2 RENAME TO target.src3 +PREHOOK: type: ALTERTABLE_RENAME +PREHOOK: Input: source@src2 +PREHOOK: Output: source@src2 +POSTHOOK: query: ALTER TABLE source.src2 RENAME TO target.src3 +POSTHOOK: type: ALTERTABLE_RENAME +POSTHOOK: Input: source@src2 +POSTHOOK: Output: source@src2 +POSTHOOK: Output: target@src3 +PREHOOK: query: DESC FORMATTED target.src3 +PREHOOK: type: DESCTABLE +PREHOOK: Input: target@src3 +POSTHOOK: query: DESC FORMATTED target.src3 +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: target@src3 +# col_name data_type comment +key string default +value string default + +# Detailed Table Information +Database: target +#### A masked pattern was here #### +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} +#### A masked pattern was here #### + numFiles 1 + numRows 500 + rawDataSize 5312 + totalSize 5812 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: select * from target.src3 tablesample (10 rows) +PREHOOK: type: QUERY +PREHOOK: Input: target@src3 +#### A masked pattern was here #### +POSTHOOK: query: select * from target.src3 tablesample (10 rows) +POSTHOOK: type: QUERY +POSTHOOK: Input: target@src3 +#### A masked pattern was here #### +238 val_238 +86 val_86 +311 val_311 +27 val_27 +165 val_165 +409 val_409 +255 val_255 +278 val_278 +98 val_98 +484 val_484 diff --git a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/Batchable.java b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/Batchable.java new file mode 100644 index 0000000000..7e488a5b3a --- /dev/null +++ b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/Batchable.java @@ -0,0 +1,86 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.metastore; + +import java.util.ArrayList; +import java.util.List; +import javax.jdo.Query; + +import org.apache.hadoop.hive.metastore.api.MetaException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Base class to add the batch process for DirectSQL or RawStore queries. + * 1. Provide the implementation of run() to process one batch + * 2. Call Batchable.runBatched() to process the whole dataset + * + * I: input type, R: result type + */ +public abstract class Batchable { + private static final Logger LOG = LoggerFactory.getLogger(Batchable.class); + public static final int NO_BATCHING = -1; + + private List queries = null; + public abstract List run(List input) throws MetaException; + + public void addQueryAfterUse(Query query) { + if (queries == null) { + queries = new ArrayList(1); + } + queries.add(query); + } + protected void addQueryAfterUse(Batchable b) { + if (b.queries == null) { + return; + } + if (queries == null) { + queries = new ArrayList(1); + } + queries.addAll(b.queries); + } + public void closeAllQueries() { + for (Query q : queries) { + try { + q.closeAll(); + } catch (Throwable t) { + LOG.error("Failed to close a query", t); + } + } + } + + public static List runBatched( + final int batchSize, + List input, + Batchable runnable) throws MetaException { + if (batchSize == NO_BATCHING || batchSize >= input.size()) { + return runnable.run(input); + } + List result = new ArrayList(input.size()); + for (int fromIndex = 0, toIndex = 0; toIndex < input.size(); fromIndex = toIndex) { + toIndex = Math.min(fromIndex + batchSize, input.size()); + List batchedInput = input.subList(fromIndex, toIndex); + List batchedOutput = runnable.run(batchedInput); + if (batchedOutput != null) { + result.addAll(batchedOutput); + } + } + return result; + } +} diff --git a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java index 997f5fdb88..4e0e887a99 100644 --- a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java +++ b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java @@ -455,7 +455,7 @@ public Database getDatabase(String catName, String dbName) throws MetaException{ if (partNames.isEmpty()) { return Collections.emptyList(); } - return runBatched(partNames, new Batchable() { + return Batchable.runBatched(batchSize, partNames, new Batchable() { @Override public List run(List input) throws MetaException { String filter = "" + PARTITIONS + ".\"PART_NAME\" in (" + makeParams(input.size()) + ")"; @@ -596,7 +596,7 @@ private boolean isViewTable(String catName, String dbName, String tblName) throw } // Get full objects. For Oracle/etc. do it in batches. - List result = runBatched(sqlResult, new Batchable() { + List result = Batchable.runBatched(batchSize, sqlResult, new Batchable() { @Override public List run(List input) throws MetaException { return getPartitionsFromPartitionIds(catNameLcase, dbNameLcase, tblNameLcase, isView, @@ -1374,7 +1374,7 @@ public ColumnStatistics getTableStats(final String catName, final String dbName, return ensureList(qResult); } }; - List list = runBatched(colNames, b); + List list = Batchable.runBatched(batchSize, colNames, b); if (list.isEmpty()) { return null; } @@ -1460,10 +1460,10 @@ private long partsFoundForPartitions( + " where \"CAT_NAME\" = ? and \"DB_NAME\" = ? and \"TABLE_NAME\" = ? " + " and \"COLUMN_NAME\" in (%1$s) and \"PARTITION_NAME\" in (%2$s)" + " group by \"PARTITION_NAME\""; - List allCounts = runBatched(colNames, new Batchable() { + List allCounts = Batchable.runBatched(batchSize, colNames, new Batchable() { @Override public List run(final List inputColName) throws MetaException { - return runBatched(partNames, new Batchable() { + return Batchable.runBatched(batchSize, partNames, new Batchable() { @Override public List run(List inputPartNames) throws MetaException { long partsFound = 0; @@ -1503,10 +1503,10 @@ private long partsFoundForPartitions( final String tableName, final List partNames, List colNames, long partsFound, final boolean useDensityFunctionForNDVEstimation, final double ndvTuner, final boolean enableBitVector) throws MetaException { final boolean areAllPartsFound = (partsFound == partNames.size()); - return runBatched(colNames, new Batchable() { + return Batchable.runBatched(batchSize, colNames, new Batchable() { @Override public List run(final List inputColNames) throws MetaException { - return runBatched(partNames, new Batchable() { + return Batchable.runBatched(batchSize, partNames, new Batchable() { @Override public List run(List inputPartNames) throws MetaException { return columnStatisticsObjForPartitionsBatch(catName, dbName, tableName, inputPartNames, @@ -1918,13 +1918,13 @@ private ColumnStatisticsObj prepareCSObjWithAdjustedNDV(Object[] row, int i, } }; try { - return runBatched(partNames, b2); + return Batchable.runBatched(batchSize, partNames, b2); } finally { addQueryAfterUse(b2); } } }; - List list = runBatched(colNames, b); + List list = Batchable.runBatched(batchSize, colNames, b); List result = new ArrayList( Math.min(list.size(), partNames.size())); @@ -2027,49 +2027,6 @@ public void prepareTxn() throws MetaException { } - private static abstract class Batchable { - private List queries = null; - public abstract List run(List input) throws MetaException; - public void addQueryAfterUse(Query query) { - if (queries == null) { - queries = new ArrayList(1); - } - queries.add(query); - } - protected void addQueryAfterUse(Batchable b) { - if (b.queries == null) return; - if (queries == null) { - queries = new ArrayList(1); - } - queries.addAll(b.queries); - } - public void closeAllQueries() { - for (Query q : queries) { - try { - q.closeAll(); - } catch (Throwable t) { - LOG.error("Failed to close a query", t); - } - } - } - } - - private List runBatched(List input, Batchable runnable) throws MetaException { - if (batchSize == NO_BATCHING || batchSize >= input.size()) { - return runnable.run(input); - } - List result = new ArrayList(input.size()); - for (int fromIndex = 0, toIndex = 0; toIndex < input.size(); fromIndex = toIndex) { - toIndex = Math.min(fromIndex + batchSize, input.size()); - List batchedInput = input.subList(fromIndex, toIndex); - List batchedOutput = runnable.run(batchedInput); - if (batchedOutput != null) { - result.addAll(batchedOutput); - } - } - return result; - } - public List getForeignKeys(String catName, String parent_db_name, String parent_tbl_name, String foreign_db_name, String foreign_tbl_name) throws MetaException { diff --git a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java index 125d5a79f2..5006970a23 100644 --- a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java +++ b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java @@ -243,6 +243,7 @@ private static Properties prop = null; private static PersistenceManagerFactory pmf = null; private static boolean forTwoMetastoreTesting = false; + private int batchSize = Batchable.NO_BATCHING; private static final DateTimeFormatter YMDHMS_FORMAT = DateTimeFormatter.ofPattern( "yyyy_MM_dd_HH_mm_ss"); @@ -384,6 +385,8 @@ public void setConf(Configuration conf) { directSqlErrors = Metrics.getOrCreateCounter(MetricsConstants.DIRECTSQL_ERRORS); } + this.batchSize = MetastoreConf.getIntVar(conf, ConfVars.RAWSTORE_PARTITION_BATCH_SIZE); + if (!isInitialized) { throw new RuntimeException( "Unable to create persistence manager. Check dss.log for details"); @@ -8005,25 +8008,33 @@ public boolean updatePartitionColumnStatistics(ColumnStatistics colStats, List result = null; validateTableCols(table, colNames); Query query = queryWrapper.query = pm.newQuery(MTableColumnStatistics.class); - String filter = "tableName == t1 && dbName == t2 && catName == t3 && ("; - String paramStr = "java.lang.String t1, java.lang.String t2, java.lang.String t3"; - Object[] params = new Object[colNames.size() + 3]; - params[0] = table.getTableName(); - params[1] = table.getDbName(); - params[2] = table.getCatName(); - for (int i = 0; i < colNames.size(); ++i) { - filter += ((i == 0) ? "" : " || ") + "colName == c" + i; - paramStr += ", java.lang.String c" + i; - params[i + 3] = colNames.get(i); - } - filter += ")"; - query.setFilter(filter); - query.declareParameters(paramStr); - result = (List) query.executeWithArray(params); - pm.retrieveAll(result); + List result = + Batchable.runBatched(batchSize, colNames, new Batchable() { + @Override + public List run(List input) + throws MetaException { + String filter = "tableName == t1 && dbName == t2 && catName == t3 && ("; + String paramStr = "java.lang.String t1, java.lang.String t2, java.lang.String t3"; + Object[] params = new Object[input.size() + 3]; + params[0] = table.getTableName(); + params[1] = table.getDbName(); + params[2] = table.getCatName(); + for (int i = 0; i < input.size(); ++i) { + filter += ((i == 0) ? "" : " || ") + "colName == c" + i; + paramStr += ", java.lang.String c" + i; + params[i + 3] = input.get(i); + } + filter += ")"; + query.setFilter(filter); + query.declareParameters(paramStr); + List paritial = (List) query.executeWithArray(params); + pm.retrieveAll(paritial); + return paritial; + } + }); + if (result.size() > colNames.size()) { throw new MetaException("Unexpected " + result.size() + " statistics for " + colNames.size() + " columns"); diff --git a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/conf/MetastoreConf.java b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/conf/MetastoreConf.java index 59749e4947..4d9967902d 100644 --- a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/conf/MetastoreConf.java +++ b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/conf/MetastoreConf.java @@ -846,6 +846,11 @@ public static ConfVars getMetaConf(String name) { "hive.metastore.wm.default.pool.size", 4, "The size of a default pool to create when creating an empty resource plan;\n" + "If not positive, no default pool will be created."), + RAWSTORE_PARTITION_BATCH_SIZE("metastore.rawstore.batch.size", + "metastore.rawstore.batch.size", -1, + "Batch size for partition and other object retrieval from the underlying DB in JDO.\n" + + "The JDO implementation such as DataNucleus may run into issues when the generated queries are\n" + + "too large. Use this parameter to break the query into multiple batches. -1 means no batching."), // Hive values we have copied and use as is // These two are used to indicate that we are running tests