From cd9b3bec634b54d8ff110d579fb71ce49e3790d6 Mon Sep 17 00:00:00 2001 From: Alexander Kolbasov Date: Tue, 17 Jul 2018 23:36:50 -0700 Subject: [PATCH] HIVE-19902: Provide Metastore micro-benchmarks --- .../metastore-benchmarks/README.md | 128 ++++ .../metastore-benchmarks/pom.xml | 164 ++++++ .../hive/metastore/tools/BenchData.java | 41 ++ .../hive/metastore/tools/BenchmarkTool.java | 255 ++++++++ .../hive/metastore/tools/HMSBenchmarks.java | 447 ++++++++++++++ .../src/main/resources/log4j.properties | 6 + .../src/main/resources/log4j2.xml | 33 ++ standalone-metastore/metastore-tools/pom.xml | 135 +++++ .../metastore-tools/tools-common/pom.xml | 113 ++++ .../hive/metastore/tools/BenchmarkSuite.java | 266 +++++++++ .../hive/metastore/tools/Constants.java | 33 ++ .../hive/metastore/tools/HMSClient.java | 428 ++++++++++++++ .../hive/metastore/tools/MicroBenchmark.java | 123 ++++ .../hadoop/hive/metastore/tools/Util.java | 554 ++++++++++++++++++ .../hive/metastore/tools/HMSClientTest.java | 206 +++++++ .../hadoop/hive/metastore/tools/UtilTest.java | 81 +++ standalone-metastore/pom.xml | 9 + 17 files changed, 3022 insertions(+) create mode 100644 standalone-metastore/metastore-tools/metastore-benchmarks/README.md create mode 100644 standalone-metastore/metastore-tools/metastore-benchmarks/pom.xml create mode 100644 standalone-metastore/metastore-tools/metastore-benchmarks/src/main/java/org/apache/hadoop/hive/metastore/tools/BenchData.java create mode 100644 standalone-metastore/metastore-tools/metastore-benchmarks/src/main/java/org/apache/hadoop/hive/metastore/tools/BenchmarkTool.java create mode 100644 standalone-metastore/metastore-tools/metastore-benchmarks/src/main/java/org/apache/hadoop/hive/metastore/tools/HMSBenchmarks.java create mode 100644 standalone-metastore/metastore-tools/metastore-benchmarks/src/main/resources/log4j.properties create mode 100644 standalone-metastore/metastore-tools/metastore-benchmarks/src/main/resources/log4j2.xml create mode 100644 standalone-metastore/metastore-tools/pom.xml create mode 100644 standalone-metastore/metastore-tools/tools-common/pom.xml create mode 100644 standalone-metastore/metastore-tools/tools-common/src/main/java/org/apache/hadoop/hive/metastore/tools/BenchmarkSuite.java create mode 100644 standalone-metastore/metastore-tools/tools-common/src/main/java/org/apache/hadoop/hive/metastore/tools/Constants.java create mode 100644 standalone-metastore/metastore-tools/tools-common/src/main/java/org/apache/hadoop/hive/metastore/tools/HMSClient.java create mode 100644 standalone-metastore/metastore-tools/tools-common/src/main/java/org/apache/hadoop/hive/metastore/tools/MicroBenchmark.java create mode 100644 standalone-metastore/metastore-tools/tools-common/src/main/java/org/apache/hadoop/hive/metastore/tools/Util.java create mode 100644 standalone-metastore/metastore-tools/tools-common/src/test/java/org/apache/hadoop/hive/metastore/tools/HMSClientTest.java create mode 100644 standalone-metastore/metastore-tools/tools-common/src/test/java/org/apache/hadoop/hive/metastore/tools/UtilTest.java diff --git a/standalone-metastore/metastore-tools/metastore-benchmarks/README.md b/standalone-metastore/metastore-tools/metastore-benchmarks/README.md new file mode 100644 index 0000000000..fdbb94612d --- /dev/null +++ b/standalone-metastore/metastore-tools/metastore-benchmarks/README.md @@ -0,0 +1,128 @@ +## Hive Metastore micro-benchmarks + +## Installation + + mvn clean install + +You can run tests as well. Just set `HMS_HOST` environment variable to some HMS instance which is +capable of running your requests (non-kerberised one) and run + + mvn install + +target directory has two mega-jars which have all the dependencies. + +Alternatively you can use [bin/hbench](../bin/hbench) script which use Maven to run the code. + +## HmsBench usage + + Usage: BenchmarkTool [-ChlV] [--sanitize] [--confdir=] + [--params=] [--savedata=] + [--separator=] [-d=] [-H=URI] + [-L=] [-N=] [-o=] + [-P=] [-t=] [-T=] [-W=] + [-E=]... [-M=]... + --confdir= configuration directory + --params= number of table/partition parameters + Default: 0 + --sanitize sanitize results (remove outliers) + --savedata= + save raw data in specified dir + --separator= + CSV field separator + Default: + -C, --csv produce CSV output + -d, --db= database name + -E, --exclude= test name patterns to exclude + -h, --help Show this help message and exit. + -H, --host=URI HMS Host + -l, --list list matching benchmarks + -L, --spin= spin count + Default: 100 + -M, --pattern= test name patterns + -N, --number= umber of object instances + Default: 100 + -o, --output= output file + -P, --port= HMS Server port + Default: 9083 + -t, --table= table name + -T, --threads= number of concurrent threads + Default: 2 + -V, --version Print version information and exit. + -W, --warmup= warmup count + Default: 15 + +### Using single jar + + java -jar hbench-jar-with-dependencies.jar [test]... + +### Using hbench on kerberized cluster + + java -jar hbench-jar-with-dependencies.jar -H `hostname` [test]... + +### Examples + +1. Run tests with 500 objects created, 10 times warm-up and exclude concurrent operations and drop operations + + java -jar hmsbench-jar-with-dependencies.jar -H `hostname` -N 500 -W 10 -E 'drop.*' -E 'concurrent.*' + +2. Run tests, produce output in tab-separated format and write individual data points in 'data' directory + + java -jar hmsbench-jar-with-dependencies.jar -H host.com -o result.csv --csv --savedata data + +3. Run tests on localhost + * save raw data in directory /tmp/benchdata + * sanitize results (remove outliers) + * produce tab-separated file + * use table name 'testbench' + * create 100 parameters in partition tests + * run with 100 and thousand partitions + + + java -jar hmsbench-jar-with-dependencies.jar -H `hostname` \ + --savedata /tmp/benchdata \ + --sanitize \ + -N 100 -N 1000 \ + -o bench_results.csv -C \ + -d testbench \ + --params=100 + +Result: + + Operation Mean Med Min Max Err% + addPartition 16.97 16.89 13.84 27.10 8.849 + addPartitions.100 315.9 313.7 274.2 387.0 6.485 + addPartitions.1000 3016 3017 2854 3226 2.861 + concurrentPartitionAdd#2.100 1289 1289 1158 1434 4.872 + concurrentPartitionAdd#2.1000 1.221e+04 1.226e+04 1.074e+04 1.354e+04 5.077 + createTable 18.21 18.15 14.78 24.17 10.30 + dropDatabase 31.13 30.86 26.46 39.09 8.192 + dropDatabase.100 1436 1435 1165 1637 5.929 + dropDatabase.1000 1.376e+04 1.371e+04 1.272e+04 1.516e+04 3.864 + dropPartition 29.43 28.81 24.79 63.24 13.97 + dropPartitions.100 686.5 680.3 575.1 819.8 6.544 + dropPartitions.1000 6247 6166 5616 7535 6.435 + dropTable 27.53 27.34 23.23 35.35 9.241 + dropTableWithPartitions 36.41 36.19 31.33 50.41 8.310 + dropTableWithPartitions.100 793.3 792.0 687.9 987.4 7.293 + dropTableWithPartitions.1000 6981 6964 6336 9179 5.115 + getNid 0.6760 0.6512 0.4482 1.530 21.93 + getPartition 6.242 6.227 5.155 9.791 11.27 + getPartitionNames 4.888 4.660 3.842 13.12 22.53 + getPartitionNames.100 5.031 4.957 3.995 7.156 10.77 + getPartitionNames.1000 8.998 8.915 8.016 12.65 7.520 + getPartitions.100 9.717 9.475 7.883 13.08 9.835 + getPartitions.1000 32.60 32.03 28.30 50.02 9.036 + getPartitionsByNames 6.506 6.384 4.810 9.503 15.51 + getPartitionsByNames.100 9.312 9.025 7.955 18.44 14.46 + getPartitionsByNames.1000 38.47 37.49 34.57 62.51 10.23 + getTable 4.092 3.868 3.132 12.20 24.56 + listDatabases 0.6919 0.6835 0.5309 1.053 12.25 + listPartition 5.556 5.465 4.737 7.969 10.00 + listPartitions.100 9.087 8.874 7.630 12.13 10.86 + listPartitions.1000 33.79 32.55 28.63 46.15 11.14 + listTables 0.9851 0.9761 0.7948 1.378 12.07 + listTables.100 1.416 1.374 1.051 3.228 16.68 + listTables.1000 4.327 4.183 3.484 6.604 14.38 + renameTable 46.67 46.09 40.16 62.46 7.536 + renameTable.100 915.8 915.9 831.0 1022 3.833 + renameTable.1000 9015 8972 8073 1.137e+04 4.228 \ No newline at end of file diff --git a/standalone-metastore/metastore-tools/metastore-benchmarks/pom.xml b/standalone-metastore/metastore-tools/metastore-benchmarks/pom.xml new file mode 100644 index 0000000000..079a07b33e --- /dev/null +++ b/standalone-metastore/metastore-tools/metastore-benchmarks/pom.xml @@ -0,0 +1,164 @@ + + + + + hive-metastore-tools + org.apache.hive + 4.0.0-SNAPSHOT + + 4.0.0 + + jar + + hive-metastore-benchmarks + Hive metastore benchmarks + + + + org.apache.hive + metastore-tools-common + ${hive.version} + compile + + + org.apache.hive.hcatalog + hive-hcatalog-server-extensions + + + + org.slf4j + slf4j-log4j12 + + + org.apache.logging.log4j + log4j-slf4j-impl + + + + org.jetbrains + annotations + + + info.picocli + picocli + + + + org.apache.maven.plugins + maven-jxr-plugin + 2.5 + + + + org.junit.jupiter + junit-jupiter-api + test + + + + org.junit.platform + junit-platform-runner + test + + + + org.hamcrest + hamcrest-all + test + + + + + + + dist + + + + maven-assembly-plugin + + + + + + org.apache.hadoop.hive.metastore.tools.BenchmarkTool + true + + + + jar-with-dependencies + + hmsbench + + make-assembly-hclient + package + + single + + + + + + + + + + + + + org.apache.maven.plugins + maven-compiler-plugin + + 1.8 + 1.8 + javac-with-errorprone + true + + + + + org.codehaus.plexus + plexus-compiler-javac-errorprone + ${javac.errorprone.version} + + + com.google.errorprone + error_prone_core + ${errorprone.core.version} + + + + + + + + + + + org.apache.maven.plugins + maven-jxr-plugin + 2.5 + + + + + diff --git a/standalone-metastore/metastore-tools/metastore-benchmarks/src/main/java/org/apache/hadoop/hive/metastore/tools/BenchData.java b/standalone-metastore/metastore-tools/metastore-benchmarks/src/main/java/org/apache/hadoop/hive/metastore/tools/BenchData.java new file mode 100644 index 0000000000..db620a2020 --- /dev/null +++ b/standalone-metastore/metastore-tools/metastore-benchmarks/src/main/java/org/apache/hadoop/hive/metastore/tools/BenchData.java @@ -0,0 +1,41 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.metastore.tools; + +/** + * Container for all data needed for running a benchmark. + */ +final class BenchData { + private HMSClient client; + final String dbName; + final String tableName; + + BenchData(String dbName, String tableName) { + this.dbName = dbName; + this.tableName = tableName; + } + + HMSClient getClient() { + return client; + } + + void setClient(HMSClient client) { + this.client = client; + } +} + diff --git a/standalone-metastore/metastore-tools/metastore-benchmarks/src/main/java/org/apache/hadoop/hive/metastore/tools/BenchmarkTool.java b/standalone-metastore/metastore-tools/metastore-benchmarks/src/main/java/org/apache/hadoop/hive/metastore/tools/BenchmarkTool.java new file mode 100644 index 0000000000..041cd76234 --- /dev/null +++ b/standalone-metastore/metastore-tools/metastore-benchmarks/src/main/java/org/apache/hadoop/hive/metastore/tools/BenchmarkTool.java @@ -0,0 +1,255 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.metastore.tools; + +import org.apache.commons.math3.stat.descriptive.DescriptiveStatistics; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import picocli.CommandLine; + +import java.io.FileNotFoundException; +import java.io.IOException; +import java.io.PrintStream; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.Arrays; +import java.util.Formatter; +import java.util.Map; +import java.util.concurrent.TimeUnit; +import java.util.regex.Pattern; + +import static org.apache.hadoop.hive.metastore.tools.Constants.HMS_DEFAULT_PORT; +import static org.apache.hadoop.hive.metastore.tools.HMSBenchmarks.*; +import static org.apache.hadoop.hive.metastore.tools.Util.getServerUri; +import static picocli.CommandLine.Command; +import static picocli.CommandLine.Option; + +/** + * Command-line access to Hive Metastore. + */ +@SuppressWarnings( {"squid:S106", "squid:S1148"}) // Using System.out +@Command(name = "BenchmarkTool", + mixinStandardHelpOptions = true, version = "1.0", + showDefaultValues = true) + +public class BenchmarkTool implements Runnable { + private static final Logger LOG = LoggerFactory.getLogger(BenchmarkTool.class); + private static final TimeUnit scale = TimeUnit.MILLISECONDS; + private static final String CSV_SEPARATOR = "\t"; + private static final String TEST_TABLE = "bench_table"; + + + @Option(names = {"-H", "--host"}, description = "HMS Host", paramLabel = "URI") + private String host; + + @Option(names = {"-P", "--port"}, description = "HMS Server port") + private Integer port = HMS_DEFAULT_PORT; + + @Option(names = {"-d", "--db"}, description = "database name") + private String dbName = "bench_" + System.getProperty("user.name"); + + @Option(names = {"-t", "--table"}, description = "table name") + private String tableName = TEST_TABLE + "_" + System.getProperty("user.name"); + + + @Option(names = {"-N", "--number"}, description = "umber of object instances") + private int[] instances = {100}; + + @Option(names = {"-L", "--spin"}, description = "spin count") + private int spinCount = 100; + + @Option(names = {"-W", "--warmup"}, description = "warmup count") + private int warmup = 15; + + @Option(names = {"-l", "--list"}, description = "list matching benchmarks") + private boolean doList = false; + + @Option(names = {"-o", "--output"}, description = "output file") + private String outputFile; + + @Option(names = {"-T", "--threads"}, description = "number of concurrent threads") + private int nThreads = 2; + + @Option(names = {"--confdir"}, description = "configuration directory") + private String confDir; + + @Option(names = {"--sanitize"}, description = "sanitize results (remove outliers)") + private boolean doSanitize = false; + + @Option(names = {"-C", "--csv"}, description = "produce CSV output") + private boolean doCSV = false; + + @Option(names = {"--params"}, description = "number of table/partition parameters") + private int nParameters = 0; + + @Option(names = {"--savedata"}, description = "save raw data in specified dir") + private String dataSaveDir; + + @Option(names = {"--separator"}, description = "CSV field separator") + private String csvSeparator = CSV_SEPARATOR; + + @Option(names = {"-M", "--pattern"}, description = "test name patterns") + private Pattern[] matches; + + @Option(names = {"-E", "--exclude"}, description = "test name patterns to exclude") + private Pattern[] exclude; + + public static void main(String[] args) { + CommandLine.run(new BenchmarkTool(), args); + } + + static void saveData(Map result, String location, TimeUnit scale) throws IOException { + Path dir = Paths.get(location); + if (!dir.toFile().exists()) { + LOG.debug("creating directory {}", location); + Files.createDirectories(dir); + } else if (!dir.toFile().isDirectory()) { + LOG.error("{} should be a directory", location); + } + + // Create a new file for each benchmark and dump raw data to it. + result.forEach((name, data) -> saveDataFile(location, name, data, scale)); + } + + private static void saveDataFile(String location, String name, + DescriptiveStatistics data, TimeUnit scale) { + long conv = scale.toNanos(1); + Path dst = Paths.get(location, name); + try (PrintStream output = new PrintStream(dst.toString())) { + // Print all values one per line + Arrays.stream(data.getValues()).forEach(d -> output.println(d / conv)); + } catch (FileNotFoundException e) { + LOG.error("failed to write to {}", dst); + } + } + + + @Override + public void run() { + LOG.info("Using warmup " + warmup + + " spin " + spinCount + " nparams " + nParameters + " threads " + nThreads); + + StringBuilder sb = new StringBuilder(); + BenchData bData = new BenchData(dbName, tableName); + + MicroBenchmark bench = new MicroBenchmark(warmup, spinCount); + BenchmarkSuite suite = new BenchmarkSuite(); + + suite + .setScale(scale) + .doSanitize(doSanitize) + .add("getNid", () -> benchmarkGetNotificationId(bench, bData)) + .add("listDatabases", () -> benchmarkListDatabases(bench, bData)) + .add("listTables", () -> benchmarkListAllTables(bench, bData)) + .add("getTable", () -> benchmarkGetTable(bench, bData)) + .add("createTable", () -> benchmarkTableCreate(bench, bData)) + .add("dropTable", () -> benchmarkDeleteCreate(bench, bData)) + .add("dropTableWithPartitions", + () -> benchmarkDeleteWithPartitions(bench, bData, 1, nParameters)) + .add("addPartition", () -> benchmarkCreatePartition(bench, bData)) + .add("dropPartition", () -> benchmarkDropPartition(bench, bData)) + .add("listPartition", () -> benchmarkListPartition(bench, bData)) + .add("getPartition", + () -> benchmarkGetPartitions(bench, bData, 1)) + .add("getPartitionNames", + () -> benchmarkGetPartitionNames(bench, bData, 1)) + .add("getPartitionsByNames", + () -> benchmarkGetPartitionsByName(bench, bData, 1)) + .add("renameTable", + () -> benchmarkRenameTable(bench, bData, 1)) + .add("dropDatabase", + () -> benchmarkDropDatabase(bench, bData, 1)); + + for (int howMany: instances) { + suite.add("listTables" + '.' + howMany, + () -> benchmarkListTables(bench, bData, howMany)) + .add("dropTableWithPartitions" + '.' + howMany, + () -> benchmarkDeleteWithPartitions(bench, bData, howMany, nParameters)) + .add("listPartitions" + '.' + howMany, + () -> benchmarkListManyPartitions(bench, bData, howMany)) + .add("getPartitions" + '.' + howMany, + () -> benchmarkGetPartitions(bench, bData, howMany)) + .add("getPartitionNames" + '.' + howMany, + () -> benchmarkGetPartitionNames(bench, bData, howMany)) + .add("getPartitionsByNames" + '.' + howMany, + () -> benchmarkGetPartitionsByName(bench, bData, howMany)) + .add("addPartitions" + '.' + howMany, + () -> benchmarkCreatePartitions(bench, bData, howMany)) + .add("dropPartitions" + '.' + howMany, + () -> benchmarkDropPartitions(bench, bData, howMany)) + .add("renameTable" + '.' + howMany, + () -> benchmarkRenameTable(bench, bData, howMany)) + .add("dropDatabase" + '.' + howMany, + () -> benchmarkDropDatabase(bench, bData, howMany)); + } + + if (doList) { + suite.listMatching(matches, exclude).forEach(System.out::println); + return; + } + + LOG.info("Using table '{}.{}", dbName, tableName); + + try (HMSClient client = new HMSClient(getServerUri(host, String.valueOf(port)), confDir)) { + bData.setClient(client); + if (!client.dbExists(dbName)) { + client.createDatabase(dbName); + } + + if (client.tableExists(dbName, tableName)) { + client.dropTable(dbName, tableName); + } + + // Arrange various benchmarks in a suite + BenchmarkSuite result = suite.runMatching(matches, exclude); + + Formatter fmt = new Formatter(sb); + if (doCSV) { + result.displayCSV(fmt, csvSeparator); + } else { + result.display(fmt); + } + + PrintStream output = System.out; + if (outputFile != null) { + output = new PrintStream(outputFile); + } + + if (outputFile != null) { + // Print results to stdout as well + StringBuilder s = new StringBuilder(); + Formatter f = new Formatter(s); + result.display(f); + System.out.print(s); + f.close(); + } + + output.print(sb.toString()); + fmt.close(); + + if (dataSaveDir != null) { + saveData(result.getResult(), dataSaveDir, scale); + } + } catch (Exception e) { + e.printStackTrace(); + } + } +} diff --git a/standalone-metastore/metastore-tools/metastore-benchmarks/src/main/java/org/apache/hadoop/hive/metastore/tools/HMSBenchmarks.java b/standalone-metastore/metastore-tools/metastore-benchmarks/src/main/java/org/apache/hadoop/hive/metastore/tools/HMSBenchmarks.java new file mode 100644 index 0000000000..f53f2ef43b --- /dev/null +++ b/standalone-metastore/metastore-tools/metastore-benchmarks/src/main/java/org/apache/hadoop/hive/metastore/tools/HMSBenchmarks.java @@ -0,0 +1,447 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.metastore.tools; + +import org.apache.commons.math3.stat.descriptive.DescriptiveStatistics; +import org.apache.hadoop.hive.metastore.TableType; +import org.apache.hadoop.hive.metastore.api.FieldSchema; +import org.apache.hadoop.hive.metastore.api.Partition; +import org.apache.hadoop.hive.metastore.api.Table; +import org.apache.thrift.TException; +import org.jetbrains.annotations.NotNull; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.net.URI; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Future; +import java.util.stream.IntStream; + +import static org.apache.hadoop.hive.metastore.tools.Util.addManyPartitions; +import static org.apache.hadoop.hive.metastore.tools.Util.addManyPartitionsNoException; +import static org.apache.hadoop.hive.metastore.tools.Util.createSchema; +import static org.apache.hadoop.hive.metastore.tools.Util.generatePartitionNames; +import static java.util.concurrent.Executors.newFixedThreadPool; +import static org.apache.hadoop.hive.metastore.tools.Util.throwingSupplierWrapper; + +/** + * Actual benchmark code. + */ +final class HMSBenchmarks { + private static final Logger LOG = LoggerFactory.getLogger(HMSBenchmarks.class); + + private static final String PARAM_KEY = "parameter_"; + private static final String PARAM_VALUE = "value_"; + + static DescriptiveStatistics benchmarkListDatabases(@NotNull MicroBenchmark benchmark, + @NotNull BenchData data) { + final HMSClient client = data.getClient(); + return benchmark.measure(() -> + throwingSupplierWrapper(() -> client.getAllDatabases(null))); + } + + static DescriptiveStatistics benchmarkListAllTables(@NotNull MicroBenchmark benchmark, + @NotNull BenchData data) { + + final HMSClient client = data.getClient(); + String dbName = data.dbName; + + return benchmark.measure(() -> + throwingSupplierWrapper(() -> client.getAllTables(dbName, null))); + } + + static DescriptiveStatistics benchmarkTableCreate(@NotNull MicroBenchmark bench, + @NotNull BenchData data) { + final HMSClient client = data.getClient(); + String dbName = data.dbName; + String tableName = data.tableName; + Table table = Util.TableBuilder.buildDefaultTable(dbName, tableName); + + return bench.measure(null, + () -> throwingSupplierWrapper(() -> client.createTable(table)), + () -> throwingSupplierWrapper(() -> client.dropTable(dbName, tableName))); + } + + static DescriptiveStatistics benchmarkDeleteCreate(@NotNull MicroBenchmark bench, + @NotNull BenchData data) { + final HMSClient client = data.getClient(); + String dbName = data.dbName; + String tableName = data.tableName; + Table table = Util.TableBuilder.buildDefaultTable(dbName, tableName); + + return bench.measure( + () -> throwingSupplierWrapper(() -> client.createTable(table)), + () -> throwingSupplierWrapper(() -> client.dropTable(dbName, tableName)), + null); + } + + static DescriptiveStatistics benchmarkDeleteWithPartitions(@NotNull MicroBenchmark bench, + @NotNull BenchData data, + int howMany, + int nparams) { + final HMSClient client = data.getClient(); + String dbName = data.dbName; + String tableName = data.tableName; + + // Create many parameters + Map parameters = new HashMap<>(nparams); + for (int i = 0; i < nparams; i++) { + parameters.put(PARAM_KEY + i, PARAM_VALUE + i); + } + + return bench.measure( + () -> throwingSupplierWrapper(() -> { + createPartitionedTable(client, dbName, tableName); + addManyPartitions(client, dbName, tableName, parameters, + Collections.singletonList("d"), howMany); + return true; + }), + () -> throwingSupplierWrapper(() -> client.dropTable(dbName, tableName)), + null); + } + + static DescriptiveStatistics benchmarkGetTable(@NotNull MicroBenchmark bench, + @NotNull BenchData data) { + final HMSClient client = data.getClient(); + String dbName = data.dbName; + String tableName = data.tableName; + + createPartitionedTable(client, dbName, tableName); + try { + return bench.measure(() -> + throwingSupplierWrapper(() -> client.getTable(dbName, tableName))); + } finally { + throwingSupplierWrapper(() -> client.dropTable(dbName, tableName)); + } + } + + static DescriptiveStatistics benchmarkListTables(@NotNull MicroBenchmark bench, + @NotNull BenchData data, + int count) { + final HMSClient client = data.getClient(); + String dbName = data.dbName; + + // Create a bunch of tables + String format = "tmp_table_%d"; + try { + createManyTables(client, count, dbName, format); + return bench.measure(() -> + throwingSupplierWrapper(() -> client.getAllTables(dbName, null))); + } finally { + dropManyTables(client, count, dbName, format); + } + } + + static DescriptiveStatistics benchmarkCreatePartition(@NotNull MicroBenchmark bench, + @NotNull BenchData data) { + final HMSClient client = data.getClient(); + String dbName = data.dbName; + String tableName = data.tableName; + + createPartitionedTable(client, dbName, tableName); + final List values = Collections.singletonList("d1"); + try { + Table t = client.getTable(dbName, tableName); + Partition partition = new Util.PartitionBuilder(t) + .withValues(values) + .build(); + + return bench.measure(null, + () -> throwingSupplierWrapper(() -> client.addPartition(partition)), + () -> throwingSupplierWrapper(() -> client.dropPartition(dbName, tableName, values))); + } catch (TException e) { + e.printStackTrace(); + return new DescriptiveStatistics(); + } finally { + throwingSupplierWrapper(() -> client.dropTable(dbName, tableName)); + } + } + + static DescriptiveStatistics benchmarkListPartition(@NotNull MicroBenchmark bench, + @NotNull BenchData data) { + final HMSClient client = data.getClient(); + String dbName = data.dbName; + String tableName = data.tableName; + + createPartitionedTable(client, dbName, tableName); + try { + addManyPartitions(client, dbName, tableName, null, + Collections.singletonList("d"), 1); + + return bench.measure(() -> + throwingSupplierWrapper(() -> client.listPartitions(dbName, tableName))); + } catch (TException e) { + e.printStackTrace(); + return new DescriptiveStatistics(); + } finally { + throwingSupplierWrapper(() -> client.dropTable(dbName, tableName)); + } + } + + static DescriptiveStatistics benchmarkListManyPartitions(@NotNull MicroBenchmark bench, + @NotNull BenchData data, + int howMany) { + final HMSClient client = data.getClient(); + String dbName = data.dbName; + String tableName = data.tableName; + + createPartitionedTable(client, dbName, tableName); + try { + addManyPartitions(client, dbName, tableName, null, Collections.singletonList("d"), howMany); + LOG.debug("Created {} partitions", howMany); + LOG.debug("started benchmark... "); + return bench.measure(() -> + throwingSupplierWrapper(() -> client.listPartitions(dbName, tableName))); + } catch (TException e) { + e.printStackTrace(); + return new DescriptiveStatistics(); + } finally { + throwingSupplierWrapper(() -> client.dropTable(dbName, tableName)); + } + } + + static DescriptiveStatistics benchmarkGetPartitions(@NotNull MicroBenchmark bench, + @NotNull BenchData data, + int howMany) { + final HMSClient client = data.getClient(); + String dbName = data.dbName; + String tableName = data.tableName; + + createPartitionedTable(client, dbName, tableName); + try { + addManyPartitions(client, dbName, tableName, null, Collections.singletonList("d"), howMany); + LOG.debug("Created {} partitions", howMany); + LOG.debug("started benchmark... "); + return bench.measure(() -> + throwingSupplierWrapper(() -> client.getPartitions(dbName, tableName))); + } catch (TException e) { + e.printStackTrace(); + return new DescriptiveStatistics(); + } finally { + throwingSupplierWrapper(() -> client.dropTable(dbName, tableName)); + } + } + + static DescriptiveStatistics benchmarkDropPartition(@NotNull MicroBenchmark bench, + @NotNull BenchData data) { + final HMSClient client = data.getClient(); + String dbName = data.dbName; + String tableName = data.tableName; + + createPartitionedTable(client, dbName, tableName); + final List values = Collections.singletonList("d1"); + try { + Table t = client.getTable(dbName, tableName); + Partition partition = new Util.PartitionBuilder(t) + .withValues(values) + .build(); + + return bench.measure( + () -> throwingSupplierWrapper(() -> client.addPartition(partition)), + () -> throwingSupplierWrapper(() -> client.dropPartition(dbName, tableName, values)), + null); + } catch (TException e) { + e.printStackTrace(); + return new DescriptiveStatistics(); + } finally { + throwingSupplierWrapper(() -> client.dropTable(dbName, tableName)); + } + } + + static DescriptiveStatistics benchmarkCreatePartitions(@NotNull MicroBenchmark bench, + @NotNull BenchData data, + int count) { + final HMSClient client = data.getClient(); + String dbName = data.dbName; + String tableName = data.tableName; + + createPartitionedTable(client, dbName, tableName); + try { + return bench.measure( + null, + () -> addManyPartitionsNoException(client, dbName, tableName, null, + Collections.singletonList("d"), count), + () -> throwingSupplierWrapper(() -> + client.dropPartitions(dbName, tableName, null)) + ); + } finally { + throwingSupplierWrapper(() -> client.dropTable(dbName, tableName)); + } + } + + static DescriptiveStatistics benchmarkDropPartitions(@NotNull MicroBenchmark bench, + @NotNull BenchData data, + int count) { + final HMSClient client = data.getClient(); + String dbName = data.dbName; + String tableName = data.tableName; + + createPartitionedTable(client, dbName, tableName); + try { + return bench.measure( + () -> addManyPartitionsNoException(client, dbName, tableName, null, + Collections.singletonList("d"), count), + () -> throwingSupplierWrapper(() -> + client.dropPartitions(dbName, tableName, null)), + null + ); + } finally { + throwingSupplierWrapper(() -> client.dropTable(dbName, tableName)); + } + } + + static DescriptiveStatistics benchmarkGetPartitionNames(@NotNull MicroBenchmark bench, + @NotNull BenchData data, + int count) { + final HMSClient client = data.getClient(); + String dbName = data.dbName; + String tableName = data.tableName; + + createPartitionedTable(client, dbName, tableName); + try { + addManyPartitionsNoException(client, dbName, tableName, null, + Collections.singletonList("d"), count); + return bench.measure( + () -> throwingSupplierWrapper(() -> client.getPartitionNames(dbName, tableName)) + ); + } finally { + throwingSupplierWrapper(() -> client.dropTable(dbName, tableName)); + } + } + + static DescriptiveStatistics benchmarkGetPartitionsByName(@NotNull MicroBenchmark bench, + @NotNull BenchData data, + int count) { + final HMSClient client = data.getClient(); + String dbName = data.dbName; + String tableName = data.tableName; + + createPartitionedTable(client, dbName, tableName); + try { + addManyPartitionsNoException(client, dbName, tableName, null, + Collections.singletonList("d"), count); + List partitionNames = throwingSupplierWrapper(() -> + client.getPartitionNames(dbName, tableName)); + return bench.measure( + () -> + throwingSupplierWrapper(() -> + client.getPartitionsByNames(dbName, tableName, partitionNames)) + ); + } finally { + throwingSupplierWrapper(() -> client.dropTable(dbName, tableName)); + } + } + + static DescriptiveStatistics benchmarkRenameTable(@NotNull MicroBenchmark bench, + @NotNull BenchData data, + int count) { + final HMSClient client = data.getClient(); + String dbName = data.dbName; + String tableName = data.tableName; + + createPartitionedTable(client, dbName, tableName); + try { + addManyPartitionsNoException(client, dbName, tableName, null, + Collections.singletonList("d"), count); + Table oldTable = client.getTable(dbName, tableName); + oldTable.getSd().setLocation(""); + Table newTable = oldTable.deepCopy(); + newTable.setTableName(tableName + "_renamed"); + + return bench.measure( + () -> { + // Measuring 2 renames, so the tests are idempotent + throwingSupplierWrapper(() -> + client.alterTable(oldTable.getDbName(), oldTable.getTableName(), newTable)); + throwingSupplierWrapper(() -> + client.alterTable(newTable.getDbName(), newTable.getTableName(), oldTable)); + } + ); + } catch (TException e) { + e.printStackTrace(); + return new DescriptiveStatistics(); + } finally { + throwingSupplierWrapper(() -> client.dropTable(dbName, tableName)); + } + } + + static DescriptiveStatistics benchmarkDropDatabase(@NotNull MicroBenchmark bench, + @NotNull BenchData data, + int count) { + final HMSClient client = data.getClient(); + String dbName = data.dbName; + + throwingSupplierWrapper(() -> client.dropDatabase(dbName)); + try { + return bench.measure( + () -> { + throwingSupplierWrapper(() -> client.createDatabase(dbName)); + createManyTables(client, count, dbName, "tmp_table_%d"); + }, + () -> throwingSupplierWrapper(() -> client.dropDatabase(dbName)), + null + ); + } finally { + throwingSupplierWrapper(() -> client.createDatabase(dbName)); + } + } + + private static void createManyTables(HMSClient client, int howMany, String dbName, String format) { + List columns = createSchema(new ArrayList<>(Arrays.asList("name", "string"))); + List partitions = createSchema(new ArrayList<>(Arrays.asList("date", "string"))); + IntStream.range(0, howMany) + .forEach(i -> + throwingSupplierWrapper(() -> client.createTable( + new Util.TableBuilder(dbName, String.format(format, i)) + .withType(TableType.MANAGED_TABLE) + .withColumns(columns) + .withPartitionKeys(partitions) + .build()))); + } + + private static void dropManyTables(HMSClient client, int howMany, String dbName, String format) { + IntStream.range(0, howMany) + .forEach(i -> + throwingSupplierWrapper(() -> client.dropTable(dbName, String.format(format, i)))); + } + + // Create a simple table with a single column and single partition + private static void createPartitionedTable(HMSClient client, String dbName, String tableName) { + throwingSupplierWrapper(() -> client.createTable( + new Util.TableBuilder(dbName, tableName) + .withType(TableType.MANAGED_TABLE) + .withColumns(createSchema(Collections.singletonList("name:string"))) + .withPartitionKeys(createSchema(Collections.singletonList("date"))) + .build())); + } + + static DescriptiveStatistics benchmarkGetNotificationId(@NotNull MicroBenchmark benchmark, + @NotNull BenchData data) { + HMSClient client = data.getClient(); + return benchmark.measure(() -> + throwingSupplierWrapper(client::getCurrentNotificationId)); + } + +} diff --git a/standalone-metastore/metastore-tools/metastore-benchmarks/src/main/resources/log4j.properties b/standalone-metastore/metastore-tools/metastore-benchmarks/src/main/resources/log4j.properties new file mode 100644 index 0000000000..3abc88706f --- /dev/null +++ b/standalone-metastore/metastore-tools/metastore-benchmarks/src/main/resources/log4j.properties @@ -0,0 +1,6 @@ +log4j.rootLogger=INFO, CA + +log4j.appender.CA=org.apache.log4j.ConsoleAppender + +log4j.appender.CA.layout=org.apache.log4j.PatternLayout +log4j.appender.CA.layout.ConversionPattern=%-4r [%t] %-5p %c %x - %m%n \ No newline at end of file diff --git a/standalone-metastore/metastore-tools/metastore-benchmarks/src/main/resources/log4j2.xml b/standalone-metastore/metastore-tools/metastore-benchmarks/src/main/resources/log4j2.xml new file mode 100644 index 0000000000..dba13928f6 --- /dev/null +++ b/standalone-metastore/metastore-tools/metastore-benchmarks/src/main/resources/log4j2.xml @@ -0,0 +1,33 @@ + + + +      + + + + + + + + + + + + + \ No newline at end of file diff --git a/standalone-metastore/metastore-tools/pom.xml b/standalone-metastore/metastore-tools/pom.xml new file mode 100644 index 0000000000..f6fb6dc95e --- /dev/null +++ b/standalone-metastore/metastore-tools/pom.xml @@ -0,0 +1,135 @@ + + + + + hive-standalone-metastore + org.apache.hive + 4.0.0-SNAPSHOT + + 4.0.0 + + hive-metastore-tools + Hive Metastore Tools + 4.0.0-SNAPSHOT + + pom + + + metastore-benchmarks + tools-common + + + + 4.0.0-SNAPSHOT + 2.20.1 + ${basedir}/checkstyle + UTF-8 + UTF-8 + 2.8 + 2.3.1 + 3.1.0 + 1.2.0 + 5.2.0 + 3.6.1 + 16.0.2 + + + + + + org.apache.hive.hcatalog + hive-hcatalog-server-extensions + ${hive.version} + + + org.apache.hive + hive-common + ${hive.version} + + + org.apache.hive + hive-standalone-metastore-common + ${hive.version} + + + + org.apache.commons + commons-math3 + ${commons-math3.version} + + + + org.slf4j + slf4j-log4j12 + 1.7.25 + + + + org.jetbrains + annotations + ${jetbrain-annotation.version} + + + + org.apache.maven.plugins + maven-jxr-plugin + 2.5 + + + + org.junit.jupiter + junit-jupiter-api + ${junit.jupiter.api.version} + test + + + + org.junit.platform + junit-platform-runner + ${junit.platform.runner.version} + + + junit + junit + ${junit.version} + + + info.picocli + picocli + ${picocli.version} + + + + + + + + + org.apache.maven.plugins + maven-compiler-plugin + 3.7.0 + + + org.apache.maven.plugins + maven-surefire-plugin + ${maven.surefire.version} + + + + + + diff --git a/standalone-metastore/metastore-tools/tools-common/pom.xml b/standalone-metastore/metastore-tools/tools-common/pom.xml new file mode 100644 index 0000000000..6b03dd58b4 --- /dev/null +++ b/standalone-metastore/metastore-tools/tools-common/pom.xml @@ -0,0 +1,113 @@ + + + + + hive-metastore-tools + org.apache.hive + 4.0.0-SNAPSHOT + + 4.0.0 + + jar + + metastore-tools-common + Hive Metastore Tools common libraries + + + + org.apache.hive + hive-standalone-metastore-common + + + org.apache.hive.hcatalog + hive-hcatalog-server-extensions + + + + org.jetbrains + annotations + compile + + + org.apache.hive + hive-common + + + + org.junit.jupiter + junit-jupiter-api + test + + + + org.hamcrest + hamcrest-all + test + + + junit + junit + + + + + + + org.apache.maven.plugins + maven-compiler-plugin + + 1.8 + 1.8 + javac-with-errorprone + true + + + + + org.codehaus.plexus + plexus-compiler-javac-errorprone + ${javac.errorprone.version} + + + com.google.errorprone + error_prone_core + ${errorprone.core.version} + + + + + org.apache.maven.plugins + maven-surefire-plugin + + + + + + + + + org.apache.maven.plugins + maven-jxr-plugin + 2.5 + + + + + + diff --git a/standalone-metastore/metastore-tools/tools-common/src/main/java/org/apache/hadoop/hive/metastore/tools/BenchmarkSuite.java b/standalone-metastore/metastore-tools/tools-common/src/main/java/org/apache/hadoop/hive/metastore/tools/BenchmarkSuite.java new file mode 100644 index 0000000000..5211082a7d --- /dev/null +++ b/standalone-metastore/metastore-tools/tools-common/src/main/java/org/apache/hadoop/hive/metastore/tools/BenchmarkSuite.java @@ -0,0 +1,266 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.metastore.tools; + +import org.apache.commons.math3.stat.descriptive.DescriptiveStatistics; +import org.apache.commons.math3.stat.descriptive.rank.Median; +import org.jetbrains.annotations.NotNull; +import org.jetbrains.annotations.Nullable; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Formatter; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.TreeMap; +import java.util.concurrent.TimeUnit; +import java.util.function.Supplier; +import java.util.regex.Pattern; + +import static org.apache.hadoop.hive.metastore.tools.Util.filterMatches; + +/** + * Group of benchmarks that can be joined together. + * Every benchmark has an associated name and code to run it. + * It is possible to run all benchmarks or only ones matching the filter.

+ * + * Results can be optionally sanitized - any result that is outside of + * mean +/- margin * delta is removed from the result set. This helps remove random + * outliers. + * + *

Example

+ * + *
+ *   StringBuilder sb = new StringBuilder();
+ *   Formatter fmt = new Formatter(sb);
+ *   BenchmarkSuite suite = new BenchmarkSuite();
+ *      // Arrange various benchmarks in a suite
+ *      BenchmarkSuite result = suite
+ *           .setScale(scale)
+ *           .doSanitize(true)
+ *           .add("someBenchmark", someBenchmarkFunc)
+ *           .add("anotherBenchmark", anotherBenchmarkFunc)
+ *           .runMatching(patterns, exclude);
+ *      result.display(fmt);
+ * 
+ * + */ +public final class BenchmarkSuite { + private static final Logger LOG = LoggerFactory.getLogger(BenchmarkSuite.class); + // Delta margin for data sanitizing. When sanitizing is enabled, we filter out + // all result which are outside + // mean +/- MARGIN * stddev + private static final double MARGIN = 2; + // Collection of benchmarks + private final Map> suite = new HashMap<>(); + // List of benchmarks. All benchmarks are executed in the order + // they are inserted + private final List benchmarks = new ArrayList<>(); + // Once benchmarks are executed, results are stored in TreeMap to prserve the order. + private final Map result = new TreeMap<>(); + // Whether sanitizing of results is requested + private boolean doSanitize = false; + // Time units - we use milliseconds. + private TimeUnit scale = TimeUnit.MILLISECONDS; + + /** + * Set scaling factor for displaying results. + * When data is reported, all times are divided by scale functor. + * Data is always collected in nanoseconds, so this can be used to present + * data using different time units. + * @param scale: scaling factor + * @return this for chaining + */ + public BenchmarkSuite setScale(TimeUnit scale) { + this.scale = scale; + return this; + } + + /** + * Enable or disable result sanitization. + * This should be done before benchmarks are executed. + * @param sanitize enable sanitization if true, disable if false + * @return this object, allowing chained calls. + */ + public BenchmarkSuite doSanitize(boolean sanitize) { + this.doSanitize = sanitize; + return this; + } + + /** + * Get raw benchmark results + * @return map of benchmark name to the statistics describing the result + */ + public Map getResult() { + return result; + } + + /** + * Run all benchmarks in the 'names' list. + * @param names list of benchmarks to run + * @return this to allow chaining + */ + private BenchmarkSuite runAll(List names) { + if (doSanitize) { + names.forEach(name -> { + LOG.info("Running benchmark {}", name); + result.put(name, sanitize(suite.get(name).get())); + }); + } else { + names.forEach(name -> { + LOG.info("Running benchmark {}", name); + result.put(name, suite.get(name).get()); + }); + } + return this; + } + + /** + * Return list of benchmark names that match positive patterns and do not + * match negative patterns. + * @param positive regexp patterns that should match benchmark name + * @param negatve regexp patterns that should be excluded when matches + * @return list of benchmark names + */ + public List listMatching(@Nullable Pattern[] positive, + @Nullable Pattern[] negatve) { + return filterMatches(benchmarks, positive, negatve); + } + + /** + * Run all benchmarks (filtered by positive and negative matches. + * See {@link #listMatching(Pattern[], Pattern[])} for details. + * @param positive regexp patterns that should match benchmark name + * @param negatve regexp patterns that should be excluded when matches + * @return this + */ + public BenchmarkSuite runMatching(@Nullable Pattern[] positive, + @Nullable Pattern[] negatve) { + return runAll(filterMatches(benchmarks, positive, negatve)); + } + + /** + * Add new benchmark to the suite. + * @param name benchmark name + * @param b benchmark corresponding to name + * @return this + */ + public BenchmarkSuite add(@NotNull String name, @NotNull Supplier b) { + suite.put(name, b); + benchmarks.add(name); + return this; + } + + /** + * Get new statistics that excludes values beyond mean +/- 2 * stdev + * + * @param data Source data + * @return new {@link @DescriptiveStatistics objects with sanitized data} + */ + private static DescriptiveStatistics sanitize(@NotNull DescriptiveStatistics data) { + double meanValue = data.getMean(); + double delta = MARGIN * meanValue; + double minVal = meanValue - delta; + double maxVal = meanValue + delta; + return new DescriptiveStatistics(Arrays.stream(data.getValues()) + .filter(x -> x > minVal && x < maxVal) + .toArray()); + } + + /** + * Get median value for given statistics. + * @param data collected datapoints. + * @return median value. + */ + private static double median(@NotNull DescriptiveStatistics data) { + return new Median().evaluate(data.getValues()); + } + + /** + * Produce printable result + * @param fmt text formatter - destination of formatted results. + * @param name benchmark name + * @param stats benchmark data + */ + private void displayStats(@NotNull Formatter fmt, @NotNull String name, + @NotNull DescriptiveStatistics stats) { + double mean = stats.getMean(); + double err = stats.getStandardDeviation() / mean * 100; + long conv = scale.toNanos(1); + + fmt.format("%-30s %-8.4g %-8.4g %-8.4g %-8.4g %-8.4g%n", + name, + mean / conv, + median(stats) / conv, + stats.getMin() / conv, + stats.getMax() / conv, + err); + } + + /** + * Produce results in printable CSV format, separated by separator. + * @param fmt text formatter - destination of formatted results. + * @param name benchmark name + * @param stats benchmark data + * @param separator field separator + */ + private void displayCSV(@NotNull Formatter fmt, @NotNull String name, + @NotNull DescriptiveStatistics stats, @NotNull String separator) { + double mean = stats.getMean(); + double err = stats.getStandardDeviation() / mean * 100; + long conv = scale.toNanos(1); + + fmt.format("%s%s%g%s%g%s%g%s%g%s%g%n", + name, separator, + mean / conv, separator, + median(stats) / conv, separator, + stats.getMin() / conv, separator, + stats.getMax() / conv, separator, + err); + } + + /** + * Format all results + * @param fmt text formatter - destination of formatted results. + * @return this + */ + BenchmarkSuite display(Formatter fmt) { + fmt.format("%-30s %-8s %-8s %-8s %-8s %-8s%n", + "Operation", "Mean", "Med", "Min", "Max", "Err%"); + result.forEach((name, stat) -> displayStats(fmt, name, stat)); + return this; + } + + /** + * Format all results in CSV format + * @param fmt text formatter - destination of formatted results. + * @param separator field separator + * @return this + */ + BenchmarkSuite displayCSV(Formatter fmt, String separator) { + fmt.format("%s%s%s%s%s%s%s%s%s%s%s%n", + "Operation", separator, "Mean", separator, "Med", separator, "Min", + separator, "Max", separator, "Err%"); + result.forEach((name, s) -> displayCSV(fmt, name, s, separator)); + return this; + } +} diff --git a/standalone-metastore/metastore-tools/tools-common/src/main/java/org/apache/hadoop/hive/metastore/tools/Constants.java b/standalone-metastore/metastore-tools/tools-common/src/main/java/org/apache/hadoop/hive/metastore/tools/Constants.java new file mode 100644 index 0000000000..5a584f6ade --- /dev/null +++ b/standalone-metastore/metastore-tools/tools-common/src/main/java/org/apache/hadoop/hive/metastore/tools/Constants.java @@ -0,0 +1,33 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.metastore.tools; + +/** + * Common constants for metastore tools. + */ +public final class Constants { + static final String OPT_HOST = "host"; + static final String OPT_PORT = "port"; + static final String OPT_DATABASE = "database"; + static final String OPT_CONF = "conf"; + static final String OPT_VERBOSE = "verbose"; + static final int HMS_DEFAULT_PORT = 8093; + + // Disable object construction + private Constants() {} +} diff --git a/standalone-metastore/metastore-tools/tools-common/src/main/java/org/apache/hadoop/hive/metastore/tools/HMSClient.java b/standalone-metastore/metastore-tools/tools-common/src/main/java/org/apache/hadoop/hive/metastore/tools/HMSClient.java new file mode 100644 index 0000000000..7cc1e42a8b --- /dev/null +++ b/standalone-metastore/metastore-tools/tools-common/src/main/java/org/apache/hadoop/hive/metastore/tools/HMSClient.java @@ -0,0 +1,428 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.metastore.tools; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.metastore.api.Database; +import org.apache.hadoop.hive.metastore.api.DropPartitionsRequest; +import org.apache.hadoop.hive.metastore.api.DropPartitionsResult; +import org.apache.hadoop.hive.metastore.api.MetaException; +import org.apache.hadoop.hive.metastore.api.Partition; +import org.apache.hadoop.hive.metastore.api.RequestPartsSpec; +import org.apache.hadoop.hive.metastore.api.Table; +import org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore; +import org.apache.hadoop.hive.metastore.conf.MetastoreConf; +import org.apache.hadoop.hive.metastore.security.HadoopThriftAuthBridge; +import org.apache.hadoop.hive.metastore.utils.MetaStoreUtils; +import org.apache.hadoop.hive.metastore.utils.SecurityUtils; +import org.apache.hadoop.security.UserGroupInformation; +import org.apache.thrift.TException; +import org.apache.thrift.protocol.TBinaryProtocol; +import org.apache.thrift.protocol.TCompactProtocol; +import org.apache.thrift.protocol.TProtocol; +import org.apache.thrift.transport.TFramedTransport; +import org.apache.thrift.transport.TSocket; +import org.apache.thrift.transport.TTransport; +import org.jetbrains.annotations.NotNull; +import org.jetbrains.annotations.Nullable; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import javax.security.auth.login.LoginException; +import java.io.File; +import java.io.IOException; +import java.net.MalformedURLException; +import java.net.URI; +import java.net.URISyntaxException; +import java.security.PrivilegedExceptionAction; +import java.util.Arrays; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.concurrent.TimeUnit; +import java.util.stream.Collectors; + +/** + *  Wrapper for Thrift HMS interface. + */ +final class HMSClient implements AutoCloseable { + private static final Logger LOG = LoggerFactory.getLogger(HMSClient.class); + private static final String METASTORE_URI = "hive.metastore.uris"; + private static final String CONFIG_DIR = "/etc/hive/conf"; + private static final String HIVE_SITE = "hive-site.xml"; + private static final String CORE_SITE = "core-site.xml"; + private static final String PRINCIPAL_KEY = "hive.metastore.kerberos.principal"; + + private final String confDir; + private ThriftHiveMetastore.Iface client; + private TTransport transport; + private URI serverURI; + + public URI getServerURI() { + return serverURI; + } + + @Override + public String toString() { + return serverURI.toString(); + } + + HMSClient(@Nullable URI uri) + throws TException, IOException, InterruptedException, LoginException, URISyntaxException { + this(uri, CONFIG_DIR); + } + + HMSClient(@Nullable URI uri, @Nullable String confDir) + throws TException, IOException, InterruptedException, LoginException, URISyntaxException { + this.confDir = confDir == null ? CONFIG_DIR : confDir; + getClient(uri); + } + + private void addResource(Configuration conf, @NotNull String r) throws MalformedURLException { + File f = new File(confDir + "/" + r); + if (f.exists() && !f.isDirectory()) { + LOG.debug("Adding configuration resource {}", r); + conf.addResource(f.toURI().toURL()); + } else { + LOG.debug("Configuration {} does not exist", r); + } + } + + /** + * Create a client to Hive Metastore. + * If principal is specified, create kerberised client. + * + * @param uri server uri + * @throws MetaException if fails to login using kerberos credentials + * @throws IOException if fails connecting to metastore + * @throws InterruptedException if interrupted during kerberos setup + */ + private void getClient(@Nullable URI uri) + throws TException, IOException, InterruptedException, URISyntaxException, LoginException { + Configuration conf = new HiveConf(); + addResource(conf, HIVE_SITE); + if (uri != null) { + conf.set(METASTORE_URI, uri.toString()); + } + + // Pick up the first URI from the list of available URIs + serverURI = uri != null ? + uri : + new URI(conf.get(METASTORE_URI).split(",")[0]); + + String principal = conf.get(PRINCIPAL_KEY); + + if (principal == null) { + open(conf, serverURI); + return; + } + + LOG.debug("Opening kerberos connection to HMS"); + addResource(conf, CORE_SITE); + + Configuration hadoopConf = new Configuration(); + addResource(hadoopConf, HIVE_SITE); + addResource(hadoopConf, CORE_SITE); + + // Kerberos magic + UserGroupInformation.setConfiguration(hadoopConf); + UserGroupInformation.getLoginUser() + .doAs((PrivilegedExceptionAction) + () -> open(conf, serverURI)); + } + + boolean dbExists(@NotNull String dbName) throws TException { + return getAllDatabases(dbName).contains(dbName); + } + + boolean tableExists(@NotNull String dbName, @NotNull String tableName) throws TException { + return getAllTables(dbName, tableName).contains(tableName); + } + + Database getDatabase(@NotNull String dbName) throws TException { + return client.get_database(dbName); + } + + /** + * Return all databases with name matching the filter. + * + * @param filter Regexp. Can be null or empty in which case everything matches + * @return list of database names matching the filter + * @throws MetaException + */ + Set getAllDatabases(@Nullable String filter) throws TException { + if (filter == null || filter.isEmpty()) { + return new HashSet<>(client.get_all_databases()); + } + return client.get_all_databases() + .stream() + .filter(n -> n.matches(filter)) + .collect(Collectors.toSet()); + } + + Set getAllTables(@NotNull String dbName, @Nullable String filter) throws TException { + if (filter == null || filter.isEmpty()) { + return new HashSet<>(client.get_all_tables(dbName)); + } + return client.get_all_tables(dbName) + .stream() + .filter(n -> n.matches(filter)) + .collect(Collectors.toSet()); + } + + /** + * Create database with the given name if it doesn't exist + * + * @param name database name + */ + boolean createDatabase(@NotNull String name) throws TException { + return createDatabase(name, null, null, null); + } + + /** + * Create database if it doesn't exist + * + * @param name Database name + * @param description Database description + * @param location Database location + * @param params Database params + * @throws TException if database exists + */ + boolean createDatabase(@NotNull String name, + @Nullable String description, + @Nullable String location, + @Nullable Map params) + throws TException { + Database db = new Database(name, description, location, params); + client.create_database(db); + return true; + } + + boolean createDatabase(Database db) throws TException { + client.create_database(db); + return true; + } + + boolean dropDatabase(@NotNull String dbName) throws TException { + client.drop_database(dbName, true, true); + return true; + } + + boolean createTable(Table table) throws TException { + client.create_table(table); + return true; + } + + boolean dropTable(@NotNull String dbName, @NotNull String tableName) throws TException { + client.drop_table(dbName, tableName, true); + return true; + } + + Table getTable(@NotNull String dbName, @NotNull String tableName) throws TException { + return client.get_table(dbName, tableName); + } + + Partition createPartition(@NotNull Table table, @NotNull List values) throws TException { + return client.add_partition(new Util.PartitionBuilder(table).withValues(values).build()); + } + + Partition addPartition(@NotNull Partition partition) throws TException { + return client.add_partition(partition); + } + + void addPartitions(List partitions) throws TException { + client.add_partitions(partitions); + } + + + List listPartitions(@NotNull String dbName, + @NotNull String tableName) throws TException { + return client.get_partitions(dbName, tableName, (short) -1); + } + + Long getCurrentNotificationId() throws TException { + return client.get_current_notificationEventId().getEventId(); + } + + List getPartitionNames(@NotNull String dbName, + @NotNull String tableName) throws TException { + return client.get_partition_names(dbName, tableName, (short) -1); + } + + public boolean dropPartition(@NotNull String dbName, @NotNull String tableName, + @NotNull List arguments) + throws TException { + return client.drop_partition(dbName, tableName, arguments, true); + } + + List getPartitions(@NotNull String dbName, @NotNull String tableName) throws TException { + return client.get_partitions(dbName, tableName, (short) -1); + } + + DropPartitionsResult dropPartitions(@NotNull String dbName, @NotNull String tableName, + @Nullable List partNames) throws TException { + if (partNames == null) { + return dropPartitions(dbName, tableName, getPartitionNames(dbName, tableName)); + } + if (partNames.isEmpty()) { + return null; + } + return client.drop_partitions_req(new DropPartitionsRequest(dbName, + tableName, RequestPartsSpec.names(partNames))); + } + + List getPartitionsByNames(@NotNull String dbName, @NotNull String tableName, + @Nullable List names) throws TException { + if (names == null) { + return client.get_partitions_by_names(dbName, tableName, + getPartitionNames(dbName, tableName)); + } + return client.get_partitions_by_names(dbName, tableName, names); + } + + boolean alterTable(@NotNull String dbName, @NotNull String tableName, @NotNull Table newTable) + throws TException { + client.alter_table(dbName, tableName, newTable); + return true; + } + + void alterPartition(@NotNull String dbName, @NotNull String tableName, + @NotNull Partition partition) throws TException { + client.alter_partition(dbName, tableName, partition); + } + + void alterPartitions(@NotNull String dbName, @NotNull String tableName, + @NotNull List partitions) throws TException { + client.alter_partitions(dbName, tableName, partitions); + } + + void appendPartition(@NotNull String dbName, @NotNull String tableName, + @NotNull List partitionValues) throws TException { + client.append_partition_with_environment_context(dbName, tableName, partitionValues, null); + } + + private TTransport open(Configuration conf, @NotNull URI uri) throws + TException, IOException, LoginException { + boolean useSSL = MetastoreConf.getBoolVar(conf, MetastoreConf.ConfVars.USE_SSL); + boolean useSasl = MetastoreConf.getBoolVar(conf, MetastoreConf.ConfVars.USE_THRIFT_SASL); + boolean useFramedTransport = MetastoreConf.getBoolVar(conf, MetastoreConf.ConfVars.USE_THRIFT_FRAMED_TRANSPORT); + boolean useCompactProtocol = MetastoreConf.getBoolVar(conf, MetastoreConf.ConfVars.USE_THRIFT_COMPACT_PROTOCOL); + int clientSocketTimeout = (int) MetastoreConf.getTimeVar(conf, + MetastoreConf.ConfVars.CLIENT_SOCKET_TIMEOUT, TimeUnit.MILLISECONDS); + + LOG.debug("Connecting to {}, framedTransport = {}", uri, useFramedTransport); + + String host = uri.getHost(); + int port = uri.getPort(); + + // Sasl/SSL code is copied from HiveMetastoreCLient + if (!useSSL) { + transport = new TSocket(host, port, clientSocketTimeout); + } else { + String trustStorePath = MetastoreConf.getVar(conf, MetastoreConf.ConfVars.SSL_TRUSTSTORE_PATH).trim(); + if (trustStorePath.isEmpty()) { + throw new IllegalArgumentException(MetastoreConf.ConfVars.SSL_TRUSTSTORE_PATH.toString() + + " Not configured for SSL connection"); + } + String trustStorePassword = + MetastoreConf.getPassword(conf, MetastoreConf.ConfVars.SSL_TRUSTSTORE_PASSWORD); + + // Create an SSL socket and connect + transport = SecurityUtils.getSSLSocket(host, port, clientSocketTimeout, + trustStorePath, trustStorePassword); + LOG.info("Opened an SSL connection to metastore, current connections"); + } + + if (useSasl) { + // Wrap thrift connection with SASL for secure connection. + HadoopThriftAuthBridge.Client authBridge = + HadoopThriftAuthBridge.getBridge().createClient(); + + // check if we should use delegation tokens to authenticate + // the call below gets hold of the tokens if they are set up by hadoop + // this should happen on the map/reduce tasks if the client added the + // tokens into hadoop's credential store in the front end during job + // submission. + String tokenSig = MetastoreConf.getVar(conf, MetastoreConf.ConfVars.TOKEN_SIGNATURE); + // tokenSig could be null + String tokenStrForm = SecurityUtils.getTokenStrForm(tokenSig); + + if (tokenStrForm != null) { + LOG.info("HMSC::open(): Found delegation token. Creating DIGEST-based thrift connection."); + // authenticate using delegation tokens via the "DIGEST" mechanism + transport = authBridge.createClientTransport(null, host, + "DIGEST", tokenStrForm, transport, + MetaStoreUtils.getMetaStoreSaslProperties(conf, useSSL)); + } else { + LOG.info("HMSC::open(): Could not find delegation token. Creating KERBEROS-based thrift connection."); + String principalConfig = + MetastoreConf.getVar(conf, MetastoreConf.ConfVars.KERBEROS_PRINCIPAL); + transport = authBridge.createClientTransport( + principalConfig, host, "KERBEROS", null, + transport, MetaStoreUtils.getMetaStoreSaslProperties(conf, useSSL)); + } + } else { + if (useFramedTransport) { + transport = new TFramedTransport(transport); + } + } + + final TProtocol protocol; + if (useCompactProtocol) { + protocol = new TCompactProtocol(transport); + } else { + protocol = new TBinaryProtocol(transport); + } + client = new ThriftHiveMetastore.Client(protocol); + if (!transport.isOpen()) { + transport.open(); + LOG.info("Opened a connection to metastore, current connections"); + + if (!useSasl && MetastoreConf.getBoolVar(conf, MetastoreConf.ConfVars.EXECUTE_SET_UGI)) { + // Call set_ugi, only in unsecure mode. + try { + UserGroupInformation ugi = SecurityUtils.getUGI(); + client.set_ugi(ugi.getUserName(), Arrays.asList(ugi.getGroupNames())); + } catch (LoginException e) { + LOG.warn("Failed to do login. set_ugi() is not successful, " + + "Continuing without it.", e); + } catch (IOException e) { + LOG.warn("Failed to find ugi of client set_ugi() is not successful, " + + "Continuing without it.", e); + } catch (TException e) { + LOG.warn("set_ugi() not successful, Likely cause: new client talking to old server. " + + "Continuing without it.", e); + } + } + } + + LOG.debug("Connected to metastore, using compact protocol = {}", useCompactProtocol); + return transport; + } + + @Override + public void close() throws Exception { + if (transport != null && transport.isOpen()) { + LOG.debug("Closing thrift transport"); + transport.close(); + } + } +} diff --git a/standalone-metastore/metastore-tools/tools-common/src/main/java/org/apache/hadoop/hive/metastore/tools/MicroBenchmark.java b/standalone-metastore/metastore-tools/tools-common/src/main/java/org/apache/hadoop/hive/metastore/tools/MicroBenchmark.java new file mode 100644 index 0000000000..9da7306c60 --- /dev/null +++ b/standalone-metastore/metastore-tools/tools-common/src/main/java/org/apache/hadoop/hive/metastore/tools/MicroBenchmark.java @@ -0,0 +1,123 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.metastore.tools; + +import org.apache.commons.math3.stat.descriptive.DescriptiveStatistics; +import org.jetbrains.annotations.NotNull; +import org.jetbrains.annotations.Nullable; + +/** + * Micro-benchmark some piece of code.

+ * + * Every benchmark has three parts: + *

    + *
  • Optional pre-test
  • + *
  • Mandatory test
  • + *
  • Optional post-test
  • + *
+ * Measurement consists of the warm-up phase and measurement phase. + * Consumer can specify number of times the warmup and measurement is repeated.

+ * All time is measured in nanoseconds. + */ +class MicroBenchmark { + // Specify defaults + private static final int WARMUP_DEFAULT = 15; + private static final int ITERATIONS_DEFAULT = 100; + private static final int SCALE_DEFAULT = 1; + + private final int warmup; + private final int iterations; + private final int scaleFactor; + + /** + * Create default micro benchmark measurer + */ + public MicroBenchmark() { + this(WARMUP_DEFAULT, ITERATIONS_DEFAULT, SCALE_DEFAULT); + } + + /** + * Create micro benchmark measurer. + * @param warmup number of test calls for warmup + * @param iterations number of test calls for measurement + */ + MicroBenchmark(int warmup, int iterations) { + this(warmup, iterations, SCALE_DEFAULT); + } + + /** + * Create micro benchmark measurer. + * + * @param warmup number of test calls for warmup + * @param iterations number of test calls for measurement + * @param scaleFactor Every delta is divided by scale factor + */ + private MicroBenchmark(int warmup, int iterations, int scaleFactor) { + this.warmup = warmup; + this.iterations = iterations; + this.scaleFactor = scaleFactor; + } + + /** + * Run the benchmark and measure run-time statistics in nanoseconds.

+ * Before the run the warm-up phase is executed. + * @param pre Optional pre-test setup + * @param test Mandatory test + * @param post Optional post-test cleanup + * @return Statistics describing the results. All times are in nanoseconds. + */ + public DescriptiveStatistics measure(@Nullable Runnable pre, + @NotNull Runnable test, + @Nullable Runnable post) { + // Warmup phase + for (int i = 0; i < warmup; i++) { + if (pre != null) { + pre.run(); + } + test.run(); + if (post != null) { + post.run(); + } + } + // Run the benchmark + DescriptiveStatistics stats = new DescriptiveStatistics(); + for (int i = 0; i < iterations; i++) { + if (pre != null) { + pre.run(); + } + long start = System.nanoTime(); + test.run(); + long end = System.nanoTime(); + stats.addValue((double)(end - start) / scaleFactor); + if (post != null) { + post.run(); + } + } + return stats; + } + + /** + * Run the benchmark and measure run-time statistics in nanoseconds.

+ * Before the run the warm-up phase is executed. No pre or post operations are executed. + * @param test test to measure + * @return Statistics describing the results. All times are in nanoseconds. + */ + public DescriptiveStatistics measure(@NotNull Runnable test) { + return measure(null, test, null); + } +} diff --git a/standalone-metastore/metastore-tools/tools-common/src/main/java/org/apache/hadoop/hive/metastore/tools/Util.java b/standalone-metastore/metastore-tools/tools-common/src/main/java/org/apache/hadoop/hive/metastore/tools/Util.java new file mode 100644 index 0000000000..101d6759c5 --- /dev/null +++ b/standalone-metastore/metastore-tools/tools-common/src/main/java/org/apache/hadoop/hive/metastore/tools/Util.java @@ -0,0 +1,554 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.metastore.tools; + +import com.google.common.base.Joiner; +import com.google.common.net.HostAndPort; +import org.apache.hadoop.hive.metastore.TableType; +import org.apache.hadoop.hive.metastore.api.Database; +import org.apache.hadoop.hive.metastore.api.FieldSchema; +import org.apache.hadoop.hive.metastore.api.Partition; +import org.apache.hadoop.hive.metastore.api.PrincipalType; +import org.apache.hadoop.hive.metastore.api.SerDeInfo; +import org.apache.hadoop.hive.metastore.api.StorageDescriptor; +import org.apache.hadoop.hive.metastore.api.Table; +import org.apache.thrift.TException; +import org.jetbrains.annotations.NotNull; +import org.jetbrains.annotations.Nullable; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.net.URI; +import java.net.URISyntaxException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.regex.Pattern; +import java.util.stream.Collectors; +import java.util.stream.IntStream; + +/** + * Helper utilities. The Util class is just a placeholder for static methods, + * it should be never instantiated. + */ +public final class Util { + private static final String DEFAULT_TYPE = "string"; + private static final String TYPE_SEPARATOR = ":"; + private static final String THRIFT_SCHEMA = "thrift"; + static final String DEFAULT_HOST = "localhost"; + private static final String ENV_SERVER = "HMS_HOST"; + private static final String ENV_PORT = "HMS_PORT"; + private static final String PROP_HOST = "hms.host"; + private static final String PROP_PORT = "hms.port"; + + private static final String HIVE_INPUT_FORMAT = "org.apache.hadoop.hive.ql.io.HiveInputFormat"; + private static final String HIVE_OUTPUT_FORMAT = "org.apache.hadoop.hive.ql.io.HiveOutputFormat"; + private static final String LAZY_SIMPLE_SERDE = "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"; + + private static final Pattern[] EMPTY_PATTERN = new Pattern[]{}; + private static final Pattern[] MATCH_ALL_PATTERN = new Pattern[]{Pattern.compile(".*")}; + + private static final Logger LOG = LoggerFactory.getLogger(Util.class); + + // Disable public constructor + private Util() { + } + + /** + * Wrapper that moves all checked exceptions to RuntimeException. + * + * @param throwingSupplier Supplier that throws Exception + * @param Supplier return type + * @return Supplier that throws unchecked exception + */ + public static T throwingSupplierWrapper(ThrowingSupplier throwingSupplier) { + try { + return throwingSupplier.get(); + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + /** + * Version of the Supplier that can throw exceptions. + * + * @param Supplier return type + * @param Exception type + */ + @FunctionalInterface + public interface ThrowingSupplier { + T get() throws E; + } + + /** + * A builder for Database. The name of the new database is required. Everything else + * selects reasonable defaults. + * This is a modified version of Hive 3.0 DatabaseBuilder. + */ + public static class DatabaseBuilder { + private String name; + private String description; + private String location; + private String ownerName; + private PrincipalType ownerType; + private Map params = null; + + // Disable default constructor + private DatabaseBuilder() { + } + + /** + * Constructor from database name. + * + * @param name Database name + */ + public DatabaseBuilder(@NotNull String name) { + this.name = name; + ownerType = PrincipalType.USER; + } + + /** + * Add database description. + * + * @param description Database description string. + * @return this + */ + public DatabaseBuilder withDescription(@NotNull String description) { + this.description = description; + return this; + } + + /** + * Add database location + * + * @param location Database location string + * @return this + */ + public DatabaseBuilder withLocation(@NotNull String location) { + this.location = location; + return this; + } + + /** + * Add Database parameters + * + * @param params database parameters + * @return this + */ + public DatabaseBuilder withParams(@NotNull Map params) { + this.params = params; + return this; + } + + /** + * Add a single database parameter. + * + * @param key parameter key + * @param val parameter value + * @return this + */ + public DatabaseBuilder withParam(@NotNull String key, @NotNull String val) { + if (this.params == null) { + this.params = new HashMap<>(); + } + this.params.put(key, val); + return this; + } + + /** + * Add database owner name + * + * @param ownerName new owner name + * @return this + */ + public DatabaseBuilder withOwnerName(@NotNull String ownerName) { + this.ownerName = ownerName; + return this; + } + + /** + * Add owner tyoe + * + * @param ownerType database owner type (USER or GROUP) + * @return this + */ + public DatabaseBuilder withOwnerType(PrincipalType ownerType) { + this.ownerType = ownerType; + return this; + } + + /** + * Build database object + * + * @return database + */ + public Database build() { + Database db = new Database(name, description, location, params); + if (ownerName != null) { + db.setOwnerName(ownerName); + } + if (ownerType != null) { + db.setOwnerType(ownerType); + } + return db; + } + } + + /** + * Builder for Table. + */ + public static class TableBuilder { + private final String dbName; + private final String tableName; + private TableType tableType = TableType.MANAGED_TABLE; + private String location; + private String serde = LAZY_SIMPLE_SERDE; + private String owner; + private List columns; + private List partitionKeys; + private String inputFormat = HIVE_INPUT_FORMAT; + private String outputFormat = HIVE_OUTPUT_FORMAT; + private Map parameters = new HashMap<>(); + + private TableBuilder() { + dbName = null; + tableName = null; + } + + TableBuilder(String dbName, String tableName) { + this.dbName = dbName; + this.tableName = tableName; + } + + static Table buildDefaultTable(String dbName, String tableName) { + return new TableBuilder(dbName, tableName).build(); + } + + TableBuilder withType(TableType tabeType) { + this.tableType = tabeType; + return this; + } + + TableBuilder withOwner(String owner) { + this.owner = owner; + return this; + } + + TableBuilder withColumns(List columns) { + this.columns = columns; + return this; + } + + TableBuilder withPartitionKeys(List partitionKeys) { + this.partitionKeys = partitionKeys; + return this; + } + + TableBuilder withSerde(String serde) { + this.serde = serde; + return this; + } + + TableBuilder withInputFormat(String inputFormat) { + this.inputFormat = inputFormat; + return this; + } + + TableBuilder withOutputFormat(String outputFormat) { + this.outputFormat = outputFormat; + return this; + } + + TableBuilder withParameter(String name, String value) { + parameters.put(name, value); + return this; + } + + TableBuilder withLocation(String location) { + this.location = location; + return this; + } + + Table build() { + StorageDescriptor sd = new StorageDescriptor(); + if (columns == null) { + sd.setCols(Collections.emptyList()); + } else { + sd.setCols(columns); + } + SerDeInfo serdeInfo = new SerDeInfo(); + serdeInfo.setSerializationLib(serde); + serdeInfo.setName(tableName); + sd.setSerdeInfo(serdeInfo); + sd.setInputFormat(inputFormat); + sd.setOutputFormat(outputFormat); + if (location != null) { + sd.setLocation(location); + } + + Table table = new Table(); + table.setDbName(dbName); + table.setTableName(tableName); + table.setSd(sd); + table.setParameters(parameters); + table.setOwner(owner); + if (partitionKeys != null) { + table.setPartitionKeys(partitionKeys); + } + table.setTableType(tableType.toString()); + return table; + } + } + + /** + * Builder of partitions. + */ + public static class PartitionBuilder { + private final Table table; + private List values; + private String location; + private Map parameters = new HashMap<>(); + + private PartitionBuilder() { + table = null; + } + + PartitionBuilder(Table table) { + this.table = table; + } + + PartitionBuilder withValues(List values) { + this.values = new ArrayList<>(values); + return this; + } + + PartitionBuilder withLocation(String location) { + this.location = location; + return this; + } + + PartitionBuilder withParameter(String name, String value) { + parameters.put(name, value); + return this; + } + + PartitionBuilder withParameters(Map params) { + parameters = params; + return this; + } + + Partition build() { + Partition partition = new Partition(); + List partitionNames = table.getPartitionKeys() + .stream() + .map(FieldSchema::getName) + .collect(Collectors.toList()); + if (partitionNames.size() != values.size()) { + throw new RuntimeException("Partition values do not match table schema"); + } + List spec = IntStream.range(0, values.size()) + .mapToObj(i -> partitionNames.get(i) + "=" + values.get(i)) + .collect(Collectors.toList()); + + partition.setDbName(table.getDbName()); + partition.setTableName(table.getTableName()); + partition.setParameters(parameters); + partition.setValues(values); + partition.setSd(table.getSd().deepCopy()); + if (this.location == null) { + partition.getSd().setLocation(table.getSd().getLocation() + "/" + Joiner.on("/").join(spec)); + } else { + partition.getSd().setLocation(location); + } + return partition; + } + } + + /** + * Create table schema from parameters. + * + * @param params list of parameters. Each parameter can be either a simple name or + * name:type for non-String types. + * @return table schema description + */ + public static List createSchema(@Nullable List params) { + if (params == null || params.isEmpty()) { + return Collections.emptyList(); + } + + return params.stream() + .map(Util::param2Schema) + .collect(Collectors.toList()); + } + + /** + * Get server URI.

+ * HMS host is obtained from + *

    + *
  1. Argument
  2. + *
  3. HMS_HOST environment parameter
  4. + *
  5. hms.host Java property
  6. + *
  7. use 'localhost' if above fails
  8. + *
+ * HMS Port is obtained from + *
    + *
  1. Argument
  2. + *
  3. host:port string
  4. + *
  5. HMS_PORT environment variable
  6. + *
  7. hms.port Java property
  8. + *
  9. default port value
  10. + *
+ * + * @param host HMS host string. + * @param portString HMS port + * @return HMS URI + * @throws URISyntaxException if URI is is invalid + */ + public static @Nullable URI getServerUri(@Nullable String host, @Nullable String portString) throws + URISyntaxException { + if (host == null) { + host = System.getenv(ENV_SERVER); + } + if (host == null) { + host = System.getProperty(PROP_HOST); + } + if (host == null) { + host = DEFAULT_HOST; + } + host = host.trim(); + + if ((portString == null || portString.isEmpty() || portString.equals("0")) && + !host.contains(":")) { + portString = System.getenv(ENV_PORT); + if (portString == null) { + portString = System.getProperty(PROP_PORT); + } + } + Integer port = Constants.HMS_DEFAULT_PORT; + if (portString != null) { + port = Integer.parseInt(portString); + } + + HostAndPort hp = HostAndPort.fromString(host) + .withDefaultPort(port); + + LOG.info("Connecting to {}:{}", hp.getHostText(), hp.getPort()); + + return new URI(THRIFT_SCHEMA, null, hp.getHostText(), hp.getPort(), + null, null, null); + } + + + private static FieldSchema param2Schema(@NotNull String param) { + String colType = DEFAULT_TYPE; + String name = param; + if (param.contains(TYPE_SEPARATOR)) { + String[] parts = param.split(TYPE_SEPARATOR); + name = parts[0]; + colType = parts[1].toLowerCase(); + } + return new FieldSchema(name, colType, ""); + } + + /** + * Create multiple partition objects. + * + * @param table + * @param arguments - list of partition names. + * @param npartitions - Partition parameters + * @return List of created partitions + */ + static List createManyPartitions(@NotNull Table table, + @Nullable Map parameters, + @NotNull List arguments, + int npartitions) { + return IntStream.range(0, npartitions) + .mapToObj(i -> + new PartitionBuilder(table) + .withParameters(parameters) + .withValues( + arguments.stream() + .map(a -> a + i) + .collect(Collectors.toList())).build()) + .collect(Collectors.toList()); + } + + /** + * Add many partitions in one HMS call + * + * @param client HMS Client + * @param dbName database name + * @param tableName table name + * @param arguments list of partition names + * @param npartitions number of partitions to create + * @throws TException if fails to create partitions + */ + static Object addManyPartitions(@NotNull HMSClient client, + @NotNull String dbName, + @NotNull String tableName, + @Nullable Map parameters, + @NotNull List arguments, + int npartitions) throws TException { + Table table = client.getTable(dbName, tableName); + client.addPartitions(createManyPartitions(table, parameters, arguments, npartitions)); + return null; + } + + static List generatePartitionNames(@NotNull String prefix, int npartitions) { + return IntStream.range(0, npartitions).mapToObj(i -> prefix + i).collect(Collectors.toList()); + } + + static void addManyPartitionsNoException(@NotNull HMSClient client, + @NotNull String dbName, + @NotNull String tableName, + @Nullable Map parameters, + List arguments, + int npartitions) { + throwingSupplierWrapper(() -> + addManyPartitions(client, dbName, tableName, parameters, arguments, npartitions)); + } + + /** + * Filter candidates - find all that match positive matches and do not match + * any negative matches. + * + * @param candidates list of candidate strings. If null, return an empty list. + * @param positivePatterns list of regexp that should all match. If null, everything matches. + * @param negativePatterns list of regexp, none of these should match. If null, everything matches. + * @return list of filtered results. + */ + public static List filterMatches(@Nullable List candidates, + @Nullable Pattern[] positivePatterns, + @Nullable Pattern[] negativePatterns) { + if (candidates == null || candidates.isEmpty()) { + return Collections.emptyList(); + } + final Pattern[] positive = (positivePatterns == null || positivePatterns.length == 0) ? + MATCH_ALL_PATTERN : positivePatterns; + final Pattern[] negative = negativePatterns == null ? EMPTY_PATTERN : negativePatterns; + + return candidates.stream() + .filter(c -> Arrays.stream(positive).anyMatch(p -> p.matcher(c).matches())) + .filter(c -> Arrays.stream(negative).noneMatch(p -> p.matcher(c).matches())) + .collect(Collectors.toList()); + } +} diff --git a/standalone-metastore/metastore-tools/tools-common/src/test/java/org/apache/hadoop/hive/metastore/tools/HMSClientTest.java b/standalone-metastore/metastore-tools/tools-common/src/test/java/org/apache/hadoop/hive/metastore/tools/HMSClientTest.java new file mode 100644 index 0000000000..ab4b62543f --- /dev/null +++ b/standalone-metastore/metastore-tools/tools-common/src/test/java/org/apache/hadoop/hive/metastore/tools/HMSClientTest.java @@ -0,0 +1,206 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.metastore.tools; + +import com.google.common.collect.ImmutableMap; +import org.apache.hadoop.hive.metastore.api.AlreadyExistsException; +import org.apache.hadoop.hive.metastore.api.Database; +import org.apache.hadoop.hive.metastore.api.InvalidObjectException; +import org.apache.hadoop.hive.metastore.api.MetaException; +import org.apache.hadoop.hive.metastore.api.NoSuchObjectException; +import org.apache.hadoop.hive.metastore.api.Table; +import org.apache.hadoop.hive.metastore.tools.HMSClient; +import org.apache.hadoop.hive.metastore.tools.Util; +import org.apache.thrift.TException; +import org.hamcrest.MatcherAssert; +import org.hamcrest.Matchers; +import org.junit.Assume; +import org.junit.Before; +import org.junit.Test; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeAll; + +import java.util.Set; + +import static org.apache.hadoop.hive.metastore.tools.Util.getServerUri; +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.contains; +import static org.hamcrest.Matchers.containsString; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.equalToIgnoringCase; +import static org.hamcrest.Matchers.hasItem; +import static org.junit.Assume.assumeTrue; +import static org.junit.jupiter.api.Assertions.assertThrows; + +public class HMSClientTest { + private static final String PARAM_NAME = "param"; + private static final String VALUE_NAME = "value"; + private static final String TEST_DATABASE="hmsClientTest"; + private static final String TEST_DATABASE_DESCRIPTION="hmsclienttest description"; + private static final ImmutableMap TEST_DATABASE_PARAMS = + new ImmutableMap.Builder() + .put(PARAM_NAME, VALUE_NAME) + .build(); + private static boolean hasClient = false; + + private static final String TEST_TABLE_NAME="test1"; + private static final Table TEST_TABLE = + Util.TableBuilder.buildDefaultTable(TEST_DATABASE, TEST_TABLE_NAME); + + private static HMSClient client = null; + + @BeforeAll + static void init() throws Exception { + Database db = new Util.DatabaseBuilder(TEST_DATABASE) + .withDescription(TEST_DATABASE_DESCRIPTION) + .withParams(TEST_DATABASE_PARAMS) + .build(); + // Create client and default test database + try { + client = + new HMSClient(getServerUri(null, null), null); + client.createDatabase(db); + } catch (Exception e) { + System.out.println(e.getMessage()); + e.printStackTrace(); + } + } + + @AfterAll + static void shutdown() throws TException { + if (client != null) { + // Destroy test database + client.dropDatabase(TEST_DATABASE); + } + } + + @Before + public void beforeTest() { + Assume.assumeTrue(client != null); + } + + /** + * Verify that list of databases contains "default" and test database + * @throws Exception + */ + @Test + public void getAllDatabases() throws Exception { + Set databases = client.getAllDatabases(null); + MatcherAssert.assertThat(databases, Matchers.hasItem("default")); + MatcherAssert.assertThat(databases, Matchers.hasItem(TEST_DATABASE.toLowerCase())); + assertThat(client.getAllDatabases(TEST_DATABASE.toLowerCase()), Matchers.contains(TEST_DATABASE.toLowerCase())); + } + + /** + * Verify that an attempt to create an existing database throws AlreadyExistsException. + */ + @Test + public void createExistingDatabase() { + Throwable exception = Assertions.assertThrows(AlreadyExistsException.class, + () -> client.createDatabase(TEST_DATABASE)); + } + + /** + * Creating a database with null name should not be allowed + * and should throw MetaException. + */ + @Test + public void createDatabaseNullName() { + Database db = new Util.DatabaseBuilder(TEST_DATABASE) + .build(); + db.setName(null); + Throwable exception = Assertions.assertThrows(MetaException.class, + () -> client.createDatabase(db)); + } + + /** + * Creating a database with an empty name should not be allowed + * and should throw InvalidObjectException + */ + @Test + public void createDatabaseEmptyName() { + Assume.assumeTrue(client != null); + Database db = new Util.DatabaseBuilder(TEST_DATABASE) + .build(); + db.setName(""); + Throwable exception = Assertions.assertThrows(InvalidObjectException.class, + () -> client.createDatabase(db)); + } + + /** + * Verify that getDatabase() returns all expected fields + * @throws TException if fails to get database info + */ + @Test + public void getDatabase() throws TException { + Database db = client.getDatabase(TEST_DATABASE); + MatcherAssert.assertThat(db.getName(), Matchers.equalToIgnoringCase(TEST_DATABASE)); + MatcherAssert.assertThat(db.getDescription(), Matchers.equalTo(TEST_DATABASE_DESCRIPTION)); + MatcherAssert.assertThat(db.getParameters(), Matchers.equalTo(TEST_DATABASE_PARAMS)); + MatcherAssert.assertThat(db.getLocationUri(), Matchers.containsString(TEST_DATABASE.toLowerCase())); + } + + /** + * Verify that locating database is case-insensitive + */ + @Test + public void getDatabaseCI() throws TException { + Database db = client.getDatabase(TEST_DATABASE.toUpperCase()); + MatcherAssert.assertThat(db.getName(), Matchers.equalToIgnoringCase(TEST_DATABASE)); + MatcherAssert.assertThat(db.getDescription(), Matchers.equalTo(TEST_DATABASE_DESCRIPTION)); + MatcherAssert.assertThat(db.getParameters(), Matchers.equalTo(TEST_DATABASE_PARAMS)); + MatcherAssert.assertThat(db.getLocationUri(), Matchers.containsString(TEST_DATABASE.toLowerCase())); + } + + /** + * Verify that searching for non-existing database throws + * NoSuchObjectException + */ + @Test + public void getNonExistingDb() { + Throwable exception = Assertions.assertThrows(NoSuchObjectException.class, + () -> client.getDatabase("WhatIsThisDatabase")); + } + + + /** + * Verify that dropping for non-existing database throws + * NoSuchObjectException + */ + @Test + public void dropNonExistingDb() { + Throwable exception = Assertions.assertThrows(NoSuchObjectException.class, + () -> client.dropDatabase("WhatIsThisDatabase")); + } + + @Test + public void getAllTables() throws TException { + try { + client.createTable(TEST_TABLE); + assertThat(client.getAllTables(TEST_DATABASE, null), Matchers.contains(TEST_TABLE_NAME)); + } catch (Exception e) { + System.out.println(e.getMessage()); + e.printStackTrace(); + } finally { + client.dropTable(TEST_DATABASE, TEST_TABLE_NAME); + } + } + +} \ No newline at end of file diff --git a/standalone-metastore/metastore-tools/tools-common/src/test/java/org/apache/hadoop/hive/metastore/tools/UtilTest.java b/standalone-metastore/metastore-tools/tools-common/src/test/java/org/apache/hadoop/hive/metastore/tools/UtilTest.java new file mode 100644 index 0000000000..202979dec2 --- /dev/null +++ b/standalone-metastore/metastore-tools/tools-common/src/test/java/org/apache/hadoop/hive/metastore/tools/UtilTest.java @@ -0,0 +1,81 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.metastore.tools; + +import com.google.common.collect.ImmutableList; +import org.junit.Test; + +import java.util.Collections; +import java.util.List; +import java.util.regex.Pattern; + +import static org.apache.hadoop.hive.metastore.tools.Util.filterMatches; +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.core.Is.is; + +public class UtilTest { + + public UtilTest() { + } + + /** + * Test that a null pattern returns all candidates for iflterMatches. + * Also verify that null candidates result in an empty result list. + */ + @Test + public void filterMatchesEmpty() { + List candidates = ImmutableList.of("foo", "bar"); + assertThat(filterMatches(candidates, null, null), is(candidates)); + assertThat(filterMatches(null, null, null), is(Collections.emptyList())); + } + + /** + * Test positive matches when some candidates match. + */ + @Test + public void filterMatchesPositive() { + List candidates = ImmutableList.of("foo", "bar"); + List expected = ImmutableList.of("foo"); + assertThat(filterMatches(candidates, new Pattern[]{Pattern.compile("f.*")}, null), + is(expected)); + } + + /** + * Test negative matches + */ + @Test + public void filterMatchesNegative() { + List candidates = ImmutableList.of("a", "b"); + List expected = ImmutableList.of("a"); + assertThat(filterMatches(candidates, null, new Pattern[]{Pattern.compile("b")}), + is(expected)); + } + + /** + * Test that multiple patterns are handled correctly. We use one positive and one negative parrent. + */ + @Test + public void filterMatchesMultiple() { + List candidates = ImmutableList.of("a", "b", "any", "boom", "hello"); + List patterns = ImmutableList.of("^a", "!y$"); + List expected = ImmutableList.of("a"); + assertThat(filterMatches(candidates, new Pattern[]{Pattern.compile("^a")}, new Pattern[]{Pattern.compile("y$")}), + is(expected)); + } +} \ No newline at end of file diff --git a/standalone-metastore/pom.xml b/standalone-metastore/pom.xml index 65bb86ee82..8ce2d8d7b8 100644 --- a/standalone-metastore/pom.xml +++ b/standalone-metastore/pom.xml @@ -18,6 +18,7 @@ 4.0.0 metastore-common + metastore-tools org.apache @@ -90,6 +91,7 @@ 2.5.0 1.3.0 2.7.0-SNAPSHOT + 1.3 you-must-set-this-to-run-thrift @@ -299,6 +301,13 @@ 9.3-1102-jdbc41 test + + + org.hamcrest + hamcrest-all + ${hamcrest.version} + test + -- 2.18.0