From 9ceb8576ecfefe3a4bde7d9138084616900fddc8 Mon Sep 17 00:00:00 2001 From: Alexander Kolbasov Date: Mon, 25 Jun 2018 13:03:53 -0700 Subject: [PATCH 1/1] HIVE-19902: Provide Metastore micro-benchmarks --- metastore-tools/data/conf/.keepme | 0 metastore-tools/metastore-benchmarks/README.md | 56 +++ metastore-tools/metastore-benchmarks/pom.xml | 180 +++++++ .../hive/metastore/tools/BenchmarkSuite.java | 261 ++++++++++ .../hadoop/hive/metastore/tools/BenchmarkTool.java | 277 +++++++++++ .../hadoop/hive/metastore/tools/HMSBenchmarks.java | 416 ++++++++++++++++ .../hive/metastore/tools/MicroBenchmark.java | 123 +++++ .../src/main/resources/log4j.properties | 6 + .../src/main/resources/log4j2.xml | 33 ++ metastore-tools/metastore-cli/README.md | 103 ++++ metastore-tools/metastore-cli/pom.xml | 180 +++++++ .../hadoop/hive/metastore/tools/HMSTool.java | 468 ++++++++++++++++++ metastore-tools/pom.xml | 121 +++++ metastore-tools/tools-common/pom.xml | 123 +++++ .../hadoop/hive/metastore/tools/Constants.java | 32 ++ .../hadoop/hive/metastore/tools/HMSClient.java | 428 +++++++++++++++++ .../apache/hadoop/hive/metastore/tools/Util.java | 529 +++++++++++++++++++++ .../hadoop/hive/metastore/tools/HMSClientTest.java | 206 ++++++++ .../hadoop/hive/metastore/tools/UtilTest.java | 79 +++ pom.xml | 1 + 20 files changed, 3622 insertions(+) create mode 100644 metastore-tools/data/conf/.keepme create mode 100644 metastore-tools/metastore-benchmarks/README.md create mode 100644 metastore-tools/metastore-benchmarks/pom.xml create mode 100644 metastore-tools/metastore-benchmarks/src/main/java/org/apache/hadoop/hive/metastore/tools/BenchmarkSuite.java create mode 100644 metastore-tools/metastore-benchmarks/src/main/java/org/apache/hadoop/hive/metastore/tools/BenchmarkTool.java create mode 100644 metastore-tools/metastore-benchmarks/src/main/java/org/apache/hadoop/hive/metastore/tools/HMSBenchmarks.java create mode 100644 metastore-tools/metastore-benchmarks/src/main/java/org/apache/hadoop/hive/metastore/tools/MicroBenchmark.java create mode 100644 metastore-tools/metastore-benchmarks/src/main/resources/log4j.properties create mode 100644 metastore-tools/metastore-benchmarks/src/main/resources/log4j2.xml create mode 100644 metastore-tools/metastore-cli/README.md create mode 100644 metastore-tools/metastore-cli/pom.xml create mode 100644 metastore-tools/metastore-cli/src/main/java/org/apache/hadoop/hive/metastore/tools/HMSTool.java create mode 100644 metastore-tools/pom.xml create mode 100644 metastore-tools/tools-common/pom.xml create mode 100644 metastore-tools/tools-common/src/main/java/org/apache/hadoop/hive/metastore/tools/Constants.java create mode 100644 metastore-tools/tools-common/src/main/java/org/apache/hadoop/hive/metastore/tools/HMSClient.java create mode 100644 metastore-tools/tools-common/src/main/java/org/apache/hadoop/hive/metastore/tools/Util.java create mode 100644 metastore-tools/tools-common/src/test/java/org/apache/hadoop/hive/metastore/tools/HMSClientTest.java create mode 100644 metastore-tools/tools-common/src/test/java/org/apache/hadoop/hive/metastore/tools/UtilTest.java diff --git a/metastore-tools/data/conf/.keepme b/metastore-tools/data/conf/.keepme new file mode 100644 index 0000000000..e69de29bb2 diff --git a/metastore-tools/metastore-benchmarks/README.md b/metastore-tools/metastore-benchmarks/README.md new file mode 100644 index 0000000000..a1524a772c --- /dev/null +++ b/metastore-tools/metastore-benchmarks/README.md @@ -0,0 +1,56 @@ +Simple CLI client for HMS Metastore. + +# Installation + + mvn clean install -Pdist + +You can run tests as well. Just set `HMS_HOST` environment variable to some HMS instance which is +capable of running your requests (non-kerberised one) and run + + mvn install + +target directory has mega-jar which has all the dependencies. + +Alternatively you can use `bin/hbench` and `bin/hclient` scripts which use Maven to run the code. + + +# HmsBench usage + + usage: hbench [test]... + -conf configuration directory + -csv produce CSV output + -d,--database database name (can be regexp for list) + -H,--host HMS Server + -h,--help print this info + -K,--separator field separator + -L,--spin spin count + -l,--list list benchmarks + -N,--number number of instances + -o,--output output file + -P,--port HMS Server port + -p,--partitions partitions list + -S,--pattern test patterns + -sanitize sanitize results + -savedata save raw data in specified dir + -T,--threads numberOfThreads + -v,--verbose verbose mode + -W,--warm warmup count + +## Using single jar + + java -jar hbench-jar-with-dependencies.jar [test]... + +## Using hbench on kerberized cluster + + java -jar hbench-jar-with-dependencies.jar -H `hostname` [test]... + +## Examples + +1. Run tests with 500 objects created, 10 times warm-up and exclude concurrent operations and drop operations + + java -jar hbench-jar-with-dependencies.jar -H `hostname` -N 500 -W 10 !drop.* !concurrent.* + +2. Run tests, produce output in tab-separated format and write individual data points in 'data' directory + + + java -jar hbench-jar-with-dependencies.jar -H host.com -o result.csv -csv -savedata data \ No newline at end of file diff --git a/metastore-tools/metastore-benchmarks/pom.xml b/metastore-tools/metastore-benchmarks/pom.xml new file mode 100644 index 0000000000..b4a432d288 --- /dev/null +++ b/metastore-tools/metastore-benchmarks/pom.xml @@ -0,0 +1,180 @@ + + + + + hive-metastore-tools + org.apache.hive + 4.0.0-SNAPSHOT + + 4.0.0 + + jar + + hive-metastore-benchmarks + Hive metastore benchmarks + + + + .. + + + + + org.apache.hive + metastore-tools-common + ${hive.version} + compile + + + org.apache.hive.hcatalog + hive-hcatalog-server-extensions + + + + commons-cli + commons-cli + + + + org.apache.commons + commons-math3 + + + + org.slf4j + slf4j-log4j12 + + + + org.jetbrains + annotations + + + + org.apache.maven.plugins + maven-jxr-plugin + 2.5 + + + + org.junit.jupiter + junit-jupiter-api + test + + + + org.junit.platform + junit-platform-runner + test + + + + org.hamcrest + hamcrest-all + test + + + + + + + dist + + + + maven-assembly-plugin + + + + + + org.apache.hadoop.hive.metastore.tools.BenchmarkTool + true + + + + jar-with-dependencies + + hmsbench + + make-assembly-hclient + package + + single + + + + + + + + + + + + + org.apache.maven.plugins + maven-compiler-plugin + 3.7.0 + + 1.8 + 1.8 + javac-with-errorprone + true + + + + + org.codehaus.plexus + plexus-compiler-javac-errorprone + ${javac.errorprone.version} + + + com.google.errorprone + error_prone_core + ${errorprone.core.version} + + + + + org.apache.maven.plugins + maven-surefire-plugin + ${maven.surefire.version} + + + + + + + + + org.apache.maven.plugins + maven-jxr-plugin + 2.5 + + + + + \ No newline at end of file diff --git a/metastore-tools/metastore-benchmarks/src/main/java/org/apache/hadoop/hive/metastore/tools/BenchmarkSuite.java b/metastore-tools/metastore-benchmarks/src/main/java/org/apache/hadoop/hive/metastore/tools/BenchmarkSuite.java new file mode 100644 index 0000000000..14c835160a --- /dev/null +++ b/metastore-tools/metastore-benchmarks/src/main/java/org/apache/hadoop/hive/metastore/tools/BenchmarkSuite.java @@ -0,0 +1,261 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.metastore.tools; + +import org.apache.commons.math3.stat.descriptive.DescriptiveStatistics; +import org.apache.commons.math3.stat.descriptive.rank.Median; +import org.jetbrains.annotations.NotNull; +import org.jetbrains.annotations.Nullable; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Formatter; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.TreeMap; +import java.util.concurrent.TimeUnit; +import java.util.function.Supplier; + +import static org.apache.hadoop.hive.metastore.tools.Util.filterMatches; + +/** + * Group of benchmarks that can be joined together. + * Every benchmark has an associated name and code to run it. + * It is possible to run all benchmarks or only ones matching the filter.

+ * + * Results can be optionally sanitized - any result that is outside of + * mean +/- margin * delta is removed from the result set. This helps remove random + * outliers. + * + *

Example

+ * + *
+ *   StringBuilder sb = new StringBuilder();
+ *   Formatter fmt = new Formatter(sb);
+ *   BenchmarkSuite suite = new BenchmarkSuite();
+ *      // Arrange various benchmarks in a suite
+ *      BenchmarkSuite result = suite
+ *           .setScale(scale)
+ *           .doSanitize(true)
+ *           .add("someBenchmark", someBenchmarkFunc)
+ *           .add("anotherBenchmark", anotherBenchmarkFunc)
+ *           .runMatching(patterns);
+ *      result.display(fmt);
+ * 
+ * + */ +public final class BenchmarkSuite { + private static final Logger LOG = LoggerFactory.getLogger(BenchmarkSuite.class); + // Delta margin for data sanitizing. When sanitizing is enabled, we filter out + // all result which are outside + // mean +/- MARGIN * stddev + private static final double MARGIN = 2; + // Collection of benchmarks + private final Map> suite = new HashMap<>(); + // List of benchmarks. All benchmarks are executed in the order + // they are inserted + private final List benchmarks = new ArrayList<>(); + // Once benchmarks are executed, results are stored in TreeMap to prserve the order. + private final Map result = new TreeMap<>(); + // Whether sanitizing of results is requested + private boolean doSanitize = false; + // Time units - we use milliseconds. + private TimeUnit scale = TimeUnit.MILLISECONDS; + + /** + * Set scaling factor for displaying results. + * When data is reported, all times are divided by scale functor. + * Data is always collected in nanoseconds, so this can be used to present + * data using different time units. + * @param scale + * @return this for chaining + */ + BenchmarkSuite setScale(TimeUnit scale) { + this.scale = scale; + return this; + } + + /** + * Enable or disable result sanitization. + * This should be done before benchmarks are executed. + * @param sanitize enable sanitization if true, disable if false + * @return this object, allowing chained calls. + */ + BenchmarkSuite doSanitize(boolean sanitize) { + this.doSanitize = sanitize; + return this; + } + + /** + * Get raw benchmark results + * @return map of benchmark name to the statistics describing the result + */ + public Map getResult() { + return result; + } + + /** + * List matching benchmarks + * @param patterns list of benchmark names or their parts. + * Adding "!" in front of the name means "exclude". + * @return list of matching benchmark names. + */ + public List list(@Nullable List patterns) { + return filterMatches(benchmarks, patterns); + } + + /** + * Run all benchmarks in the 'names' list. + * @param names list of benchmarks to run + * @return this to allow chaining + */ + private BenchmarkSuite runAll(List names) { + if (doSanitize) { + names.forEach(name -> { + LOG.info("Running benchmark {}", name); + result.put(name, sanitize(suite.get(name).get())); + }); + } else { + names.forEach(name -> { + LOG.info("Running benchmark {}", name); + result.put(name, suite.get(name).get()); + }); + } + return this; + } + + /** + * Run all benchmarks whose name matches the list of patterns + * @param patterns list of string patterns. + * Patterns prefixed with '!' mean negative match. + * @return this. + */ + public BenchmarkSuite runMatching(@Nullable List patterns) { + return runAll(filterMatches(benchmarks, patterns)); + } + + /** + * Add new benchmark to the suite. + * @param name benchmark name + * @param b benchmark corresponding to name + * @return this + */ + public BenchmarkSuite add(@NotNull String name, @NotNull Supplier b) { + suite.put(name, b); + benchmarks.add(name); + return this; + } + + /** + * Get new statistics that excludes values beyond mean +/- 2 * stdev + * + * @param data Source data + * @return new {@link @DescriptiveStatistics objects with sanitized data} + */ + private static DescriptiveStatistics sanitize(@NotNull DescriptiveStatistics data) { + double meanValue = data.getMean(); + double delta = MARGIN * meanValue; + double minVal = meanValue - delta; + double maxVal = meanValue + delta; + return new DescriptiveStatistics(Arrays.stream(data.getValues()) + .filter(x -> x > minVal && x < maxVal) + .toArray()); + } + + /** + * Get median value for given statistics. + * @param data collected datapoints. + * @return median value. + */ + private static double median(@NotNull DescriptiveStatistics data) { + return new Median().evaluate(data.getValues()); + } + + /** + * Produce printable result + * @param fmt text formatter - destination of formatted results. + * @param name benchmark name + * @param stats benchmark data + */ + private void displayStats(@NotNull Formatter fmt, @NotNull String name, + @NotNull DescriptiveStatistics stats) { + double mean = stats.getMean(); + double err = stats.getStandardDeviation() / mean * 100; + long conv = scale.toNanos(1); + + fmt.format("%-30s %-8.4g %-8.4g %-8.4g %-8.4g %-8.4g%n", + name, + mean / conv, + median(stats) / conv, + stats.getMin() / conv, + stats.getMax() / conv, + err); + } + + /** + * Produce results in printable CSV format, separated by separator. + * @param fmt text formatter - destination of formatted results. + * @param name benchmark name + * @param stats benchmark data + * @param separator field separator + */ + private void displayCSV(@NotNull Formatter fmt, @NotNull String name, + @NotNull DescriptiveStatistics stats, @NotNull String separator) { + double mean = stats.getMean(); + double err = stats.getStandardDeviation() / mean * 100; + long conv = scale.toNanos(1); + + fmt.format("%s%s%g%s%g%s%g%s%g%s%g%n", + name, separator, + mean / conv, separator, + median(stats) / conv, separator, + stats.getMin() / conv, separator, + stats.getMax() / conv, separator, + err); + } + + /** + * Format all results + * @param fmt text formatter - destination of formatted results. + * @return this + */ + BenchmarkSuite display(Formatter fmt) { + fmt.format("%-30s %-8s %-8s %-8s %-8s %-8s%n", + "Operation", "Mean", "Med", "Min", "Max", "Err%"); + result.forEach((name, stat) -> displayStats(fmt, name, stat)); + return this; + } + + /** + * Format all results in CSV format + * @param fmt text formatter - destination of formatted results. + * @param separator field separator + * @return this + */ + BenchmarkSuite displayCSV(Formatter fmt, String separator) { + fmt.format("%s%s%s%s%s%s%s%s%s%s%s%n", + "Operation", separator, "Mean", separator, "Med", separator, "Min", + separator, "Max", separator, "Err%"); + result.forEach((name, s) -> displayCSV(fmt, name, s, separator)); + return this; + } +} diff --git a/metastore-tools/metastore-benchmarks/src/main/java/org/apache/hadoop/hive/metastore/tools/BenchmarkTool.java b/metastore-tools/metastore-benchmarks/src/main/java/org/apache/hadoop/hive/metastore/tools/BenchmarkTool.java new file mode 100644 index 0000000000..72dd9e389b --- /dev/null +++ b/metastore-tools/metastore-benchmarks/src/main/java/org/apache/hadoop/hive/metastore/tools/BenchmarkTool.java @@ -0,0 +1,277 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.metastore.tools; + +import com.google.common.base.Joiner; +import com.google.common.collect.Lists; +import org.apache.commons.cli.CommandLine; +import org.apache.commons.cli.CommandLineParser; +import org.apache.commons.cli.DefaultParser; +import org.apache.commons.cli.HelpFormatter; +import org.apache.commons.cli.Option; +import org.apache.commons.cli.Options; +import org.apache.commons.cli.ParseException; +import org.apache.commons.math3.stat.descriptive.DescriptiveStatistics; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.FileNotFoundException; +import java.io.IOException; +import java.io.PrintStream; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.Arrays; +import java.util.Collections; +import java.util.Formatter; +import java.util.List; +import java.util.Map; +import java.util.concurrent.TimeUnit; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +import static org.apache.hadoop.hive.metastore.tools.HMSBenchmarks.*; + +import static org.apache.hadoop.hive.metastore.tools.Constants.OPT_CONF; +import static org.apache.hadoop.hive.metastore.tools.Constants.OPT_DATABASE; +import static org.apache.hadoop.hive.metastore.tools.Constants.OPT_HOST; +import static org.apache.hadoop.hive.metastore.tools.Constants.OPT_PORT; +import static org.apache.hadoop.hive.metastore.tools.Constants.OPT_VERBOSE; +import static org.apache.hadoop.hive.metastore.tools.Util.getServerUri; + +/** + * BenchmarkTool is a top-level application for measuring Hive Metastore + * performance. + */ +final class BenchmarkTool { + private static final Logger LOG = LoggerFactory.getLogger(BenchmarkTool.class); + private static final TimeUnit scale = TimeUnit.MILLISECONDS; + private static final String CSV_SEPARATOR = "\t"; + private static final String TEST_TABLE = "bench_table"; + + private static final String OPT_SEPARATOR = "separator"; + private static final String OPT_SPIN = "spin"; + private static final String OPT_WARM = "warm"; + private static final String OPT_LIST = "list"; + private static final String OPT_SANITIZE = "sanitize"; + private static final String OPT_OUTPUT = "output"; + private static final String OPT_CSV = "csv"; + private static final String OPT_SAVEDATA = "savedata"; + private static final String OPT_PATTERN = "pattern"; + private static final String OPT_NUMBER = "number"; + + + // There is no need to instantiate BenchmarkTool class. + private BenchmarkTool() {} + + public static void main(String[] args) throws Exception { + Options options = new Options(); + options.addOption("H", OPT_HOST, true, + "HMS Server (can also be specified with HMS_HOST environment variable)") + .addOption("P", OPT_PORT, true, "HMS Server port") + .addOption("h", "help", false, "print this info") + .addOption("d", OPT_DATABASE, true, "database name (can be regexp for list)") + .addOption("v", OPT_VERBOSE, false, "verbose mode") + .addOption("N", OPT_NUMBER, true, "number of instances") + .addOption("K", OPT_SEPARATOR, true, "field separator") + .addOption("L", OPT_SPIN, true, "spin count") + .addOption("W", OPT_WARM, true, "warmup count") + .addOption("l", OPT_LIST, true, "list benchmarks") + .addOption("o", OPT_OUTPUT, true, "output file") + .addOption(new Option(OPT_CONF, true, "configuration directory")) + .addOption(new Option(OPT_SANITIZE, false, "sanitize results")) + .addOption(new Option(OPT_CSV, false, "produce CSV output")) + .addOption(new Option(OPT_SAVEDATA, true, + "save raw data in specified dir")) + .addOption("S", OPT_PATTERN, true, "test patterns"); + + CommandLineParser parser = new DefaultParser(); + + CommandLine cmd = null; + + LOG.info("using args {}", Joiner.on(' ').join(args)); + + try { + cmd = parser.parse(options, args); + } catch (ParseException e) { + help(options); + } + + if (cmd.hasOption("help")) { + help(options); + } + + PrintStream output = System.out; + if (cmd.hasOption(OPT_OUTPUT)) { + output = new PrintStream(cmd.getOptionValue(OPT_OUTPUT)); + } + + String dbName = cmd.getOptionValue(OPT_DATABASE); + String tableName = TEST_TABLE; + + if (dbName == null || dbName.isEmpty()) { + throw new RuntimeException("Missing DB name"); + } + + LOG.info("Using table '{}.{}", dbName, tableName); + + List arguments = cmd.getArgList(); + + boolean filtertests = cmd.hasOption(OPT_PATTERN); + List patterns = filtertests ? + Lists.newArrayList(cmd.getOptionValue(OPT_PATTERN).split(",")) : + Collections.emptyList(); + // If we have arguments, they are filters on the tests, so add them. + if (!arguments.isEmpty()) { + patterns = Stream.concat(patterns.stream(), arguments.stream()).collect(Collectors.toList()); + } + + try (HMSClient client = + new HMSClient(getServerUri(cmd.getOptionValue(OPT_HOST), cmd.getOptionValue(OPT_PORT)), + cmd.getOptionValue(OPT_CONF))) { + if (!client.dbExists(dbName)) { + client.createDatabase(dbName); + } + + if (client.tableExists(dbName, tableName)) { + client.dropTable(dbName, tableName); + } + + int instances = Integer.parseInt(cmd.getOptionValue(OPT_NUMBER, "100")); + int warmup = Integer.parseInt(cmd.getOptionValue(OPT_WARM, "15")); + int spin = Integer.parseInt(cmd.getOptionValue(OPT_SPIN, "100")); + LOG.info("Using " + instances + " object instances" + " warmup " + warmup + + " spin " + spin); + + final String db = dbName; + final String tbl = tableName; + + LOG.info("Using {} object instances", instances); + StringBuilder sb = new StringBuilder(); + Formatter fmt = new Formatter(sb); + + MicroBenchmark bench = new MicroBenchmark(warmup, spin); + BenchmarkSuite suite = new BenchmarkSuite(); + + // Arrange various benchmarks in a suite + BenchmarkSuite result = suite + .setScale(scale) + .doSanitize(cmd.hasOption(OPT_SANITIZE)) + .add("getNid", () -> benchmarkGetNotificationId(bench, client)) + .add("listDatabases", () -> benchmarkListDatabases(bench, client)) + .add("listTables", () -> benchmarkListAllTables(bench, client, db)) + .add("listTables" + '.' + instances, + () -> benchmarkListTables(bench, client, db, instances)) + .add("getTable", () -> benchmarkGetTable(bench, client, db, tbl)) + .add("createTable", () -> benchmarkTableCreate(bench, client, db, tbl)) + .add("dropTable", () -> benchmarkDeleteCreate(bench, client, db, tbl)) + .add("dropTableWithPartitions", + () -> benchmarkDeleteWithPartitions(bench, client, db, tbl, 1)) + .add("dropTableWithPartitions" + '.' + instances, + () -> benchmarkDeleteWithPartitions(bench, client, db, tbl, instances)) + .add("addPartition", () -> benchmarkCreatePartition(bench, client, db, tbl)) + .add("dropPartition", () -> benchmarkDropPartition(bench, client, db, tbl)) + .add("listPartition", () -> benchmarkListPartition(bench, client, db, tbl)) + .add("listPartitions" + '.' + instances, + () -> benchmarkListManyPartitions(bench, client, db, tbl, instances)) + .add("getPartition", + () -> benchmarkGetPartitions(bench, client, db, tbl, 1)) + .add("getPartitions" + '.' + instances, + () -> benchmarkGetPartitions(bench, client, db, tbl, instances)) + .add("getPartitionNames", + () -> benchmarkGetPartitionNames(bench, client, db, tbl, 1)) + .add("getPartitionNames" + '.' + instances, + () -> benchmarkGetPartitionNames(bench, client, db, tbl, instances)) + .add("getPartitionsByNames", + () -> benchmarkGetPartitionsByName(bench, client, db, tbl, 1)) + .add("getPartitionsByNames" + '.' + instances, + () -> benchmarkGetPartitionsByName(bench, client, db, tbl, instances)) + .add("addPartitions" + '.' + instances, + () -> benchmarkCreatePartitions(bench, client, db, tbl, instances)) + .add("dropPartitions" + '.' + instances, + () -> benchmarkDropPartitions(bench, client, db, tbl, instances)) + .add("renameTable", + () -> benchmarkRenameTable(bench, client, db, tbl, 1)) + .add("renameTable" + '.' + instances, + () -> benchmarkRenameTable(bench, client, db, tbl, instances)) + .add("dropDatabase", + () -> benchmarkDropDatabase(bench, client, db, 1)) + .add("dropDatabase" + '.' + instances, + () -> benchmarkDropDatabase(bench, client, db, instances)) + .runMatching(patterns); + + if (cmd.hasOption(OPT_CSV)) { + result.displayCSV(fmt, CSV_SEPARATOR); + } else { + result.display(fmt); + } + + if (cmd.hasOption(OPT_OUTPUT)) { + // Print results to stdout as well + StringBuilder s = new StringBuilder(); + Formatter f = new Formatter(s); + result.display(f); + System.out.print(s); + f.close(); + } + + output.print(sb.toString()); + fmt.close(); + + if (cmd.hasOption(OPT_SAVEDATA)) { + saveData(result.getResult(), cmd.getOptionValue(OPT_SAVEDATA), scale); + } + + } + } + + private static void saveData(Map result, String location, TimeUnit scale) throws IOException { + Path dir = Paths.get(location); + if (!Files.exists(dir)) { + LOG.debug("creating directory {}", location); + Files.createDirectories(dir); + } else if (!Files.isDirectory(dir)) { + LOG.error("{} should be a directory", location); + } + + // Create a new file for each benchmark and dump raw data to it. + result.forEach((name, data) -> saveDataFile(location, name, data, scale)); + } + + private static void saveDataFile(String location, String name, + DescriptiveStatistics data, TimeUnit scale) { + long conv = scale.toNanos(1); + Path dst = Paths.get(location, name); + try (PrintStream output = new PrintStream(dst.toString())) { + // Print all values one per line + Arrays.stream(data.getValues()).forEach(d -> output.println(d / conv)); + } catch (FileNotFoundException e) { + LOG.error("failed to write to {}", dst.toString()); + } + + } + + private static void help(Options options) { + HelpFormatter formatter = new HelpFormatter(); + formatter.printHelp("hbench ...", options); + System.exit(0); + } + +} diff --git a/metastore-tools/metastore-benchmarks/src/main/java/org/apache/hadoop/hive/metastore/tools/HMSBenchmarks.java b/metastore-tools/metastore-benchmarks/src/main/java/org/apache/hadoop/hive/metastore/tools/HMSBenchmarks.java new file mode 100644 index 0000000000..a2efca6452 --- /dev/null +++ b/metastore-tools/metastore-benchmarks/src/main/java/org/apache/hadoop/hive/metastore/tools/HMSBenchmarks.java @@ -0,0 +1,416 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.metastore.tools; + +import org.apache.commons.math3.stat.descriptive.DescriptiveStatistics; +import org.apache.hadoop.hive.metastore.TableType; +import org.apache.hadoop.hive.metastore.api.FieldSchema; +import org.apache.hadoop.hive.metastore.api.Partition; +import org.apache.hadoop.hive.metastore.api.Table; +import org.apache.thrift.TException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.net.Socket; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import java.util.stream.IntStream; + +import static org.apache.hadoop.hive.metastore.tools.Util.addManyPartitions; +import static org.apache.hadoop.hive.metastore.tools.Util.addManyPartitionsNoException; +import static org.apache.hadoop.hive.metastore.tools.Util.createSchema; +import static org.apache.hadoop.hive.metastore.tools.Util.generatePartitionNames; +import static java.util.concurrent.Executors.newFixedThreadPool; +import static org.apache.hadoop.hive.metastore.tools.Util.throwingSupplierWrapper; + +/** + * Actual benchmark code. + */ +final class HMSBenchmarks { + + // No one should create instances of this class + private HMSBenchmarks() { + } + + private static final Logger LOG = LoggerFactory.getLogger(HMSBenchmarks.class); + + static DescriptiveStatistics benchmarkListDatabases(MicroBenchmark benchmark, + final HMSClient client) { + return benchmark.measure(() -> + throwingSupplierWrapper(() -> client.getAllDatabases(null))); + } + + static DescriptiveStatistics benchmarkListAllTables(MicroBenchmark benchmark, + final HMSClient client, + final String dbName) { + return benchmark.measure(() -> + throwingSupplierWrapper(() -> client.getAllTables(dbName, null))); + } + + static DescriptiveStatistics benchmarkTableCreate(MicroBenchmark bench, + final HMSClient client, + final String dbName, + final String tableName) { + Table table = Util.TableBuilder.buildDefaultTable(dbName, tableName); + + return bench.measure(null, + () -> throwingSupplierWrapper(() -> client.createTable(table)), + () -> throwingSupplierWrapper(() -> client.dropTable(dbName, tableName))); + } + + static DescriptiveStatistics benchmarkDeleteCreate(MicroBenchmark bench, + final HMSClient client, + final String dbName, + final String tableName) { + Table table = Util.TableBuilder.buildDefaultTable(dbName, tableName); + + return bench.measure( + () -> throwingSupplierWrapper(() -> client.createTable(table)), + () -> throwingSupplierWrapper(() -> client.dropTable(dbName, tableName)), + null); + } + + static DescriptiveStatistics benchmarkDeleteWithPartitions(MicroBenchmark bench, + final HMSClient client, + final String dbName, + final String tableName, + int howMany) { + return bench.measure( + () -> throwingSupplierWrapper(() -> { + createPartitionedTable(client, dbName, tableName); + addManyPartitions(client, dbName, tableName, Collections.singletonList("d"), howMany); + return true; + }), + () -> throwingSupplierWrapper(() -> client.dropTable(dbName, tableName)), + null); + } + + @SuppressWarnings("PMD") + static DescriptiveStatistics benchmarkNetworkLatency(MicroBenchmark bench, + final String server, int port) { + return bench.measure( + () -> { + //noinspection EmptyTryBlock + try (Socket socket = new Socket(server, port)) { + ; + } catch (IOException e) { + LOG.error("socket connection failed", e); + } + }); + } + + static DescriptiveStatistics benchmarkGetTable(MicroBenchmark bench, + final HMSClient client, + final String dbName, + final String tableName) { + createPartitionedTable(client, dbName, tableName); + try { + return bench.measure(() -> + throwingSupplierWrapper(() -> client.getTable(dbName, tableName))); + } finally { + throwingSupplierWrapper(() -> client.dropTable(dbName, tableName)); + } + } + + static DescriptiveStatistics benchmarkListTables(MicroBenchmark bench, + HMSClient client, + String dbName, + int count) { + // Create a bunch of tables + String format = "tmp_table_%d"; + try { + createManyTables(client, count, dbName, format); + return bench.measure(() -> + throwingSupplierWrapper(() -> client.getAllTables(dbName, null))); + } finally { + dropManyTables(client, count, dbName, format); + } + } + + static DescriptiveStatistics benchmarkCreatePartition(MicroBenchmark bench, + final HMSClient client, + final String dbName, + final String tableName) { + createPartitionedTable(client, dbName, tableName); + final List values = Collections.singletonList("d1"); + try { + Table t = client.getTable(dbName, tableName); + Partition partition = new Util.PartitionBuilder(t) + .withValues(values) + .build(); + + return bench.measure(null, + () -> throwingSupplierWrapper(() -> client.addPartition(partition)), + () -> throwingSupplierWrapper(() -> client.dropPartition(dbName, tableName, values))); + } catch (TException e) { + e.printStackTrace(); + return new DescriptiveStatistics(); + } finally { + throwingSupplierWrapper(() -> client.dropTable(dbName, tableName)); + } + } + + static DescriptiveStatistics benchmarkListPartition(MicroBenchmark bench, + final HMSClient client, + final String dbName, + final String tableName) { + createPartitionedTable(client, dbName, tableName); + try { + addManyPartitions(client, dbName, tableName, + Collections.singletonList("d"), 1); + + return bench.measure(() -> + throwingSupplierWrapper(() -> client.listPartitions(dbName, tableName))); + } catch (TException e) { + e.printStackTrace(); + return new DescriptiveStatistics(); + } finally { + throwingSupplierWrapper(() -> client.dropTable(dbName, tableName)); + } + } + + static DescriptiveStatistics benchmarkListManyPartitions(MicroBenchmark bench, + final HMSClient client, + final String dbName, + final String tableName, + int howMany) { + createPartitionedTable(client, dbName, tableName); + try { + addManyPartitions(client, dbName, tableName, Collections.singletonList("d"), howMany); + LOG.debug("Created {} partitions", howMany); + LOG.debug("started benchmark... "); + return bench.measure(() -> + throwingSupplierWrapper(() -> client.listPartitions(dbName, tableName))); + } catch (TException e) { + e.printStackTrace(); + return new DescriptiveStatistics(); + } finally { + throwingSupplierWrapper(() -> client.dropTable(dbName, tableName)); + } + } + + static DescriptiveStatistics benchmarkGetPartitions(final MicroBenchmark bench, + final HMSClient client, + final String dbName, + final String tableName, + int howMany) { + createPartitionedTable(client, dbName, tableName); + try { + addManyPartitions(client, dbName, tableName, Collections.singletonList("d"), howMany); + LOG.debug("Created {} partitions", howMany); + LOG.debug("started benchmark... "); + return bench.measure(() -> + throwingSupplierWrapper(() -> client.getPartitions(dbName, tableName))); + } catch (TException e) { + e.printStackTrace(); + return new DescriptiveStatistics(); + } finally { + throwingSupplierWrapper(() -> client.dropTable(dbName, tableName)); + } + } + + static DescriptiveStatistics benchmarkDropPartition(MicroBenchmark bench, + final HMSClient client, + final String dbName, + final String tableName) { + createPartitionedTable(client, dbName, tableName); + final List values = Collections.singletonList("d1"); + try { + Table t = client.getTable(dbName, tableName); + Partition partition = new Util.PartitionBuilder(t) + .withValues(values) + .build(); + + return bench.measure( + () -> throwingSupplierWrapper(() -> client.addPartition(partition)), + () -> throwingSupplierWrapper(() -> client.dropPartition(dbName, tableName, values)), + null); + } catch (TException e) { + e.printStackTrace(); + return new DescriptiveStatistics(); + } finally { + throwingSupplierWrapper(() -> client.dropTable(dbName, tableName)); + } + } + + static DescriptiveStatistics benchmarkCreatePartitions(MicroBenchmark bench, + final HMSClient client, + final String dbName, + final String tableName, + int count) { + createPartitionedTable(client, dbName, tableName); + try { + return bench.measure( + null, + () -> addManyPartitionsNoException(client, dbName, tableName, + Collections.singletonList("d"), count), + () -> throwingSupplierWrapper(() -> + client.dropPartitions(dbName, tableName, null)) + ); + } finally { + throwingSupplierWrapper(() -> client.dropTable(dbName, tableName)); + } + } + + static DescriptiveStatistics benchmarkDropPartitions(MicroBenchmark bench, + final HMSClient client, + final String dbName, + final String tableName, + int count) { + createPartitionedTable(client, dbName, tableName); + try { + return bench.measure( + () -> addManyPartitionsNoException(client, dbName, tableName, + Collections.singletonList("d"), count), + () -> throwingSupplierWrapper(() -> + client.dropPartitions(dbName, tableName, null)), + null + ); + } finally { + throwingSupplierWrapper(() -> client.dropTable(dbName, tableName)); + } + } + + static DescriptiveStatistics benchmarkGetPartitionNames(MicroBenchmark bench, + final HMSClient client, + final String dbName, + final String tableName, + int count) { + createPartitionedTable(client, dbName, tableName); + try { + addManyPartitionsNoException(client, dbName, tableName, + Collections.singletonList("d"), count); + return bench.measure( + () -> throwingSupplierWrapper(() -> client.getPartitionNames(dbName, tableName)) + ); + } finally { + throwingSupplierWrapper(() -> client.dropTable(dbName, tableName)); + } + } + + static DescriptiveStatistics benchmarkGetPartitionsByName(MicroBenchmark bench, + final HMSClient client, + final String dbName, + final String tableName, + int count) { + createPartitionedTable(client, dbName, tableName); + try { + addManyPartitionsNoException(client, dbName, tableName, + Collections.singletonList("d"), count); + List partitionNames = throwingSupplierWrapper(() -> + client.getPartitionNames(dbName, tableName)); + return bench.measure( + () -> + throwingSupplierWrapper(() -> + client.getPartitionsByNames(dbName, tableName, partitionNames)) + ); + } finally { + throwingSupplierWrapper(() -> client.dropTable(dbName, tableName)); + } + } + + static DescriptiveStatistics benchmarkRenameTable(MicroBenchmark bench, + final HMSClient client, + final String dbName, + final String tableName, + int count) { + createPartitionedTable(client, dbName, tableName); + try { + addManyPartitionsNoException(client, dbName, tableName, + Collections.singletonList("d"), count); + Table oldTable = client.getTable(dbName, tableName); + oldTable.getSd().setLocation(""); + Table newTable = oldTable.deepCopy(); + newTable.setTableName(tableName + "_renamed"); + + return bench.measure( + () -> { + // Measuring 2 renames, so the tests are idempotent + throwingSupplierWrapper(() -> + client.alterTable(oldTable.getDbName(), oldTable.getTableName(), newTable)); + throwingSupplierWrapper(() -> + client.alterTable(newTable.getDbName(), newTable.getTableName(), oldTable)); + } + ); + } catch (TException e) { + e.printStackTrace(); + return new DescriptiveStatistics(); + } finally { + throwingSupplierWrapper(() -> client.dropTable(dbName, tableName)); + } + } + + static DescriptiveStatistics benchmarkDropDatabase(MicroBenchmark bench, + final HMSClient client, + final String dbName, + int count) { + throwingSupplierWrapper(() -> client.dropDatabase(dbName)); + try { + return bench.measure( + () -> { + throwingSupplierWrapper(() -> client.createDatabase(dbName)); + createManyTables(client, count, dbName, "tmp_table_%d"); + }, + () -> throwingSupplierWrapper(() -> client.dropDatabase(dbName)), + null + ); + } finally { + throwingSupplierWrapper(() -> client.createDatabase(dbName)); + } + } + + private static void createManyTables(HMSClient client, int howMany, String dbName, String format) { + List columns = createSchema(new ArrayList<>(Arrays.asList("name", "string"))); + List partitions = createSchema(new ArrayList<>(Arrays.asList("date", "string"))); + IntStream.range(0, howMany) + .forEach(i -> + throwingSupplierWrapper(() -> client.createTable( + new Util.TableBuilder(dbName, String.format(format, i)) + .withType(TableType.MANAGED_TABLE) + .withColumns(columns) + .withPartitionKeys(partitions) + .build()))); + } + + private static void dropManyTables(HMSClient client, int howMany, String dbName, String format) { + IntStream.range(0, howMany) + .forEach(i -> + throwingSupplierWrapper(() -> client.dropTable(dbName, String.format(format, i)))); + } + + // Create a simple table with a single column and single partition + private static void createPartitionedTable(HMSClient client, String dbName, String tableName) { + throwingSupplierWrapper(() -> client.createTable( + new Util.TableBuilder(dbName, tableName) + .withType(TableType.MANAGED_TABLE) + .withColumns(createSchema(Collections.singletonList("name:string"))) + .withPartitionKeys(createSchema(Collections.singletonList("date"))) + .build())); + } + + static DescriptiveStatistics benchmarkGetNotificationId(MicroBenchmark benchmark, + final HMSClient client) { + return benchmark.measure(() -> + throwingSupplierWrapper(client::getCurrentNotificationId)); + } + +} diff --git a/metastore-tools/metastore-benchmarks/src/main/java/org/apache/hadoop/hive/metastore/tools/MicroBenchmark.java b/metastore-tools/metastore-benchmarks/src/main/java/org/apache/hadoop/hive/metastore/tools/MicroBenchmark.java new file mode 100644 index 0000000000..9da7306c60 --- /dev/null +++ b/metastore-tools/metastore-benchmarks/src/main/java/org/apache/hadoop/hive/metastore/tools/MicroBenchmark.java @@ -0,0 +1,123 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.metastore.tools; + +import org.apache.commons.math3.stat.descriptive.DescriptiveStatistics; +import org.jetbrains.annotations.NotNull; +import org.jetbrains.annotations.Nullable; + +/** + * Micro-benchmark some piece of code.

+ * + * Every benchmark has three parts: + *

    + *
  • Optional pre-test
  • + *
  • Mandatory test
  • + *
  • Optional post-test
  • + *
+ * Measurement consists of the warm-up phase and measurement phase. + * Consumer can specify number of times the warmup and measurement is repeated.

+ * All time is measured in nanoseconds. + */ +class MicroBenchmark { + // Specify defaults + private static final int WARMUP_DEFAULT = 15; + private static final int ITERATIONS_DEFAULT = 100; + private static final int SCALE_DEFAULT = 1; + + private final int warmup; + private final int iterations; + private final int scaleFactor; + + /** + * Create default micro benchmark measurer + */ + public MicroBenchmark() { + this(WARMUP_DEFAULT, ITERATIONS_DEFAULT, SCALE_DEFAULT); + } + + /** + * Create micro benchmark measurer. + * @param warmup number of test calls for warmup + * @param iterations number of test calls for measurement + */ + MicroBenchmark(int warmup, int iterations) { + this(warmup, iterations, SCALE_DEFAULT); + } + + /** + * Create micro benchmark measurer. + * + * @param warmup number of test calls for warmup + * @param iterations number of test calls for measurement + * @param scaleFactor Every delta is divided by scale factor + */ + private MicroBenchmark(int warmup, int iterations, int scaleFactor) { + this.warmup = warmup; + this.iterations = iterations; + this.scaleFactor = scaleFactor; + } + + /** + * Run the benchmark and measure run-time statistics in nanoseconds.

+ * Before the run the warm-up phase is executed. + * @param pre Optional pre-test setup + * @param test Mandatory test + * @param post Optional post-test cleanup + * @return Statistics describing the results. All times are in nanoseconds. + */ + public DescriptiveStatistics measure(@Nullable Runnable pre, + @NotNull Runnable test, + @Nullable Runnable post) { + // Warmup phase + for (int i = 0; i < warmup; i++) { + if (pre != null) { + pre.run(); + } + test.run(); + if (post != null) { + post.run(); + } + } + // Run the benchmark + DescriptiveStatistics stats = new DescriptiveStatistics(); + for (int i = 0; i < iterations; i++) { + if (pre != null) { + pre.run(); + } + long start = System.nanoTime(); + test.run(); + long end = System.nanoTime(); + stats.addValue((double)(end - start) / scaleFactor); + if (post != null) { + post.run(); + } + } + return stats; + } + + /** + * Run the benchmark and measure run-time statistics in nanoseconds.

+ * Before the run the warm-up phase is executed. No pre or post operations are executed. + * @param test test to measure + * @return Statistics describing the results. All times are in nanoseconds. + */ + public DescriptiveStatistics measure(@NotNull Runnable test) { + return measure(null, test, null); + } +} diff --git a/metastore-tools/metastore-benchmarks/src/main/resources/log4j.properties b/metastore-tools/metastore-benchmarks/src/main/resources/log4j.properties new file mode 100644 index 0000000000..3abc88706f --- /dev/null +++ b/metastore-tools/metastore-benchmarks/src/main/resources/log4j.properties @@ -0,0 +1,6 @@ +log4j.rootLogger=INFO, CA + +log4j.appender.CA=org.apache.log4j.ConsoleAppender + +log4j.appender.CA.layout=org.apache.log4j.PatternLayout +log4j.appender.CA.layout.ConversionPattern=%-4r [%t] %-5p %c %x - %m%n \ No newline at end of file diff --git a/metastore-tools/metastore-benchmarks/src/main/resources/log4j2.xml b/metastore-tools/metastore-benchmarks/src/main/resources/log4j2.xml new file mode 100644 index 0000000000..dba13928f6 --- /dev/null +++ b/metastore-tools/metastore-benchmarks/src/main/resources/log4j2.xml @@ -0,0 +1,33 @@ + + + +      + + + + + + + + + + + + + \ No newline at end of file diff --git a/metastore-tools/metastore-cli/README.md b/metastore-tools/metastore-cli/README.md new file mode 100644 index 0000000000..23aa15fa7e --- /dev/null +++ b/metastore-tools/metastore-cli/README.md @@ -0,0 +1,103 @@ +Simple CLI client for HMS Metastore. + +# Installation + + mvn clean install -Pdist + +You can run tests as well. Just set `HMS_HOST` environment variable to some HMS instance which is +capable of running your requests (non-kerberised one) and run + + mvn install + +target directory has two mega-jars whioh have all the dependencies. + +Alternatively you can use `bin/hbench` and `bin/hclient` scripts which use Maven to run the code. + + +# Hclient Usage + + usage: hclient list|create|addpart [name:type...] + -conf configuration directory + -d,--database database name (can be regexp for list) + -D,--drop drop table if exists + -H,--host HMS Server + -h,--help print this info + -N,--number number of instances + -P,--partitions partitions list + -S,--pattern table name pattern for bulk creation + -showparts show partitions + -t,--table table name (can be regexp for list) + -v,--verbose verbose mode + + + +# Examples + + $ export HMS_HOST=host.domain.com + +## List all databases and tables + + $ hclinent list + test.foo + test.bar + default.customers + default.impala_parquet_timestamps + default.impala_timestamps + default.sample_07 + default.sample_08 + +## List all tables in default database + + $ hclient list -d default + default.customers + default.impala_parquet_timestamps + default.impala_timestamps + default.sample_07 + default.sample_08 + default.web_logs + default.web_logs1 + +## List all tables with name 'impala' + + $ hclient list -d default -t '.*impala.*' + default.impala_parquet_timestamps + default.impala_timestamps + +## List table schemas for impala tables + + $ hclient list -d default -t '.*impala.*' -v + default.impala_parquet_timestamps + ts: timestamp + + default.impala_timestamps + ts: timestamp + +## Create new table + + $ hclient create -d test_db -t test_table id:int name + test_db.test_table + id: int + name: string + +## Create table with partitions + + $ hclient create -d test_db -t test_table1 -P date,country id:int name + test_db.test_table1 + id: int + name: string + date: string + country: string + +## Create multiple tables at once + $ hclient create -d test_db -t test_table2 -N 3 id:int name -v + test_db.test_table2_1 + id: int + name: string + + test_db.test_table2_2 + id: int + name: string + + test_db.test_table2_3 + id: int + name: string diff --git a/metastore-tools/metastore-cli/pom.xml b/metastore-tools/metastore-cli/pom.xml new file mode 100644 index 0000000000..bdd0a48ebf --- /dev/null +++ b/metastore-tools/metastore-cli/pom.xml @@ -0,0 +1,180 @@ + + + + + hive-metastore-tools + org.apache.hive + 4.0.0-SNAPSHOT + + 4.0.0 + + metastore-cli + Hive metastore CLI + + jar + + + + .. + + + + + org.apache.hive + metastore-tools-common + ${hive.version} + compile + + + org.apache.hive.hcatalog + hive-hcatalog-server-extensions + + + + commons-cli + commons-cli + + + + org.apache.commons + commons-math3 + + + + org.slf4j + slf4j-log4j12 + + + + org.jetbrains + annotations + + + + org.apache.maven.plugins + maven-jxr-plugin + 2.5 + + + + org.junit.jupiter + junit-jupiter-api + test + + + + org.junit.platform + junit-platform-runner + test + + + + org.hamcrest + hamcrest-all + test + + + + + + + dist + + + + maven-assembly-plugin + + + + + + org.apache.hadoop.hive.metastore.tools.HMSTool + true + + + + jar-with-dependencies + + hmsclient + + make-assembly-hclient + package + + single + + + + + + + + + + + + + org.apache.maven.plugins + maven-compiler-plugin + 3.7.0 + + 1.8 + 1.8 + javac-with-errorprone + true + + + + + org.codehaus.plexus + plexus-compiler-javac-errorprone + ${javac.errorprone.version} + + + com.google.errorprone + error_prone_core + ${errorprone.core.version} + + + + + org.apache.maven.plugins + maven-surefire-plugin + ${maven.surefire.version} + + + + + + + + + org.apache.maven.plugins + maven-jxr-plugin + 2.5 + + + + + \ No newline at end of file diff --git a/metastore-tools/metastore-cli/src/main/java/org/apache/hadoop/hive/metastore/tools/HMSTool.java b/metastore-tools/metastore-cli/src/main/java/org/apache/hadoop/hive/metastore/tools/HMSTool.java new file mode 100644 index 0000000000..11c00b2a51 --- /dev/null +++ b/metastore-tools/metastore-cli/src/main/java/org/apache/hadoop/hive/metastore/tools/HMSTool.java @@ -0,0 +1,468 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.metastore.tools; + +import org.apache.hadoop.hive.metastore.tools.Util.PartitionBuilder; +import com.google.common.base.Joiner; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import java.util.Set; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Future; + +import org.apache.commons.cli.CommandLine; +import org.apache.commons.cli.CommandLineParser; +import org.apache.commons.cli.DefaultParser; +import org.apache.commons.cli.HelpFormatter; +import org.apache.commons.cli.Option; +import org.apache.commons.cli.Options; +import org.apache.commons.cli.ParseException; +import org.apache.hadoop.hive.common.LogUtils; +import org.apache.hadoop.hive.metastore.api.Partition; +import org.apache.hadoop.hive.metastore.api.Table; +import org.apache.thrift.TException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import static org.apache.hadoop.hive.metastore.tools.Util.addManyPartitions; +import static org.apache.hadoop.hive.metastore.tools.Util.createSchema; +import static org.apache.hadoop.hive.metastore.tools.Util.getServerUri; +import static java.util.concurrent.Executors.newFixedThreadPool; +import static org.apache.hadoop.hive.metastore.tools.Util.throwingSupplierWrapper; + +// TODO Handle HADOOP_CONF_DIR and HADOOP_HOME + +/** + * Command-line Hive metastore tool. + */ +final class HMSTool { + static final String DBNAME = "default"; + private static final Logger LOG = LoggerFactory.getLogger(HMSTool.class); + + private static final String OPT_PARTITIONS = "partitions"; + private static final String OPT_COLUMNS = "columns"; + private static final String OPT_TABLE = "table"; + private static final String OPT_DROP = "drop"; + private static final String OPT_NUMBER = "number"; + static final String OPT_PATTERN = "pattern"; + private static final String OPT_SHOW_PARTS = "showparts"; + + private static final String DEFAULT_PATTERN = "%s_%d"; + + private static final String CMD_LIST = "list"; + private static final String CMD_CREATE = "create"; + private static final String CMD_ADD_PART = "addpart"; + private static final String CMD_DROP = "drop"; + private static final String CMD_LIST_NID = "currnid"; + private static final String CMD_RENAME = "rename"; + private static final String CMD_DROPDB = "dropdb"; + + + public static void main(String[] args) throws Exception { + Options options = new Options(); + options.addOption("H", Constants.OPT_HOST, true, "HMS Server") + .addOption("P", Constants.OPT_PORT, true, "HMS Server port") + .addOption("p", OPT_PARTITIONS, true, "partitions list") + .addOption("c", OPT_COLUMNS, true, "column schema") + .addOption("h", "help", false, "print this info") + .addOption("d", Constants.OPT_DATABASE, true, "database name (can be regexp for list)") + .addOption("t", OPT_TABLE, true, "table name (can be regexp for list)") + .addOption("v", Constants.OPT_VERBOSE, false, "verbose mode") + .addOption("N", OPT_NUMBER, true, "number of instances") + .addOption("S", OPT_PATTERN, true, "table name pattern for bulk creation") + .addOption(new Option(Constants.OPT_CONF, true, "configuration directory")) + .addOption(new Option(OPT_SHOW_PARTS, false, "show partitions")) + .addOption("D", OPT_DROP, false, "drop table if exists"); + + CommandLineParser parser = new DefaultParser(); + + CommandLine cmd = null; + + try { + cmd = parser.parse(options, args); + } catch (ParseException e) { + help(options); + System.exit(1); + } + + if (cmd.hasOption("help")) { + help(options); + } + + List arguments = cmd.getArgList(); + String command = CMD_LIST; + if (!arguments.isEmpty()) { + command = arguments.get(0); + arguments = arguments.subList(1, arguments.size()); + } + + LogUtils.initHiveLog4j(); + + try (HMSClient client = + new HMSClient(getServerUri(cmd.getOptionValue(Constants.OPT_HOST), cmd.getOptionValue(Constants.OPT_PORT)), + cmd.getOptionValue(Constants.OPT_CONF))) { + switch (command) { + case CMD_LIST: + cmdDisplayTables(client, cmd); + break; + case CMD_CREATE: + cmdCreate(client, cmd, arguments); + break; + case CMD_ADD_PART: + cmdAddPart(client, cmd, arguments); + break; + case CMD_DROP: + cmdDrop(client, cmd, arguments); + break; + case CMD_RENAME: + cmdRename(client, cmd, arguments); + break; + case CMD_LIST_NID: + System.out.println(client.getCurrentNotificationId()); + break; + case CMD_DROPDB: + cmdDropDatabase(client, cmd); + break; + default: + LOG.warn("Unknown command '" + command + "'"); + System.exit(1); + } + } + } + + private static void help(Options options) { + HelpFormatter formater = new HelpFormatter(); + formater.printHelp("hclient list|create|addpart [name:type...]", options); + System.exit(0); + } + + private static void cmdDisplayTables(HMSClient client, CommandLine cmd) throws TException { + String dbName = cmd.getOptionValue(Constants.OPT_DATABASE); + String tableName = cmd.getOptionValue(OPT_TABLE); + boolean verbose = cmd.hasOption(Constants.OPT_VERBOSE); + boolean showPartitions = cmd.hasOption(OPT_SHOW_PARTS); + + for (String database : client.getAllDatabases(dbName)) { + client.getAllTables(database, tableName) + .stream() + .sorted() + .forEach(tblName -> { + if (verbose) { + Table table = throwingSupplierWrapper(() -> client.getTable(database, tblName)); + displayTableSchema(table); + if (showPartitions) { + System.out.println("\t\t" + Joiner.on("\n\t\t") + .join(throwingSupplierWrapper(() -> + client.getPartitionNames(database, tblName)))); + } + System.out.println(); + } else { + System.out.println(database + "." + tblName); + } + }); + } + } + + private static void cmdCreate(HMSClient client, CommandLine cmd, List arguments) + throws TException { + String dbName = cmd.getOptionValue(Constants.OPT_DATABASE); + String tableName = cmd.getOptionValue(OPT_TABLE); + + if (tableName != null && tableName.contains(".")) { + String[] parts = tableName.split("\\."); + dbName = parts[0]; + tableName = parts[1]; + } + + boolean multiple = false; + int nTables = 0; + if (cmd.hasOption(OPT_NUMBER)) { + nTables = Integer.valueOf(cmd.getOptionValue(OPT_NUMBER, "0")); + if (nTables > 0) { + multiple = true; + } + } + + if (dbName == null) { + dbName = DBNAME; + } + + if (tableName == null) { + LOG.warn("Missing table name"); + System.exit(1); + } + + if (!client.dbExists(dbName)) { + client.createDatabase(dbName); + } else { + LOG.warn("Database '" + dbName + "' already exist"); + } + + String partitionsInfo = cmd.getOptionValue(OPT_PARTITIONS); + String[] partitions = partitionsInfo == null ? null : partitionsInfo.split(","); + List partitionInfo = partitions == null ? + Collections.emptyList() : + new ArrayList<>(Arrays.asList(partitions)); + + if (!multiple) { + if (dropTableIfExists(client, cmd, dbName, tableName)) + return; + + client.createTable(new Util.TableBuilder(dbName, tableName) + .withColumns(createSchema(arguments)) + .withPartitionKeys(createSchema(partitionInfo)) + .build()); + LOG.info("Created table '" + tableName + "'"); + } else { + Set tables = client.getAllTables(dbName, null); + for (int i = 1; i <= nTables; i++) { + String pattern = cmd.getOptionValue(OPT_PATTERN, DEFAULT_PATTERN); + String tbl = String.format(pattern, tableName, i); + if (tables.contains(tbl)) { + if (cmd.hasOption(OPT_DROP)) { + LOG.warn("Dropping existing table '" + tbl + "'"); + client.dropTable(dbName, tbl); + } else { + LOG.warn("Table '" + tbl + "' already exist"); + break; + } + } + + client.createTable(new Util.TableBuilder(dbName, tableName) + .withColumns(createSchema(arguments)) + .withPartitionKeys(createSchema(partitionInfo)) + .build()); + tables.add(tbl); + } + } + } + + private static boolean dropTableIfExists(HMSClient client, CommandLine cmd, String dbName, + String tableName) throws TException { + if (client.tableExists(dbName, tableName)) { + if (cmd.hasOption(OPT_DROP)) { + LOG.warn("Dropping existing table '" + tableName + "'"); + client.dropTable(dbName, tableName); + } else { + LOG.warn("Table '" + tableName + "' already exist"); + return true; + } + } + return false; + } + + private static void cmdAddPart(HMSClient client, CommandLine cmd, List arguments) + throws TException { + String dbName = cmd.getOptionValue(Constants.OPT_DATABASE); + String tableName = cmd.getOptionValue(OPT_TABLE); + + if (tableName != null && tableName.contains(".")) { + String[] parts = tableName.split("\\."); + dbName = parts[0]; + tableName = parts[1]; + } + + if (cmd.hasOption(OPT_NUMBER)) { + int nPartitions = Integer.parseInt(cmd.getOptionValue(OPT_NUMBER)); + addManyPartitions(client, dbName, tableName, arguments, nPartitions); + } else { + addPartition(client, dbName, tableName, arguments); + } + } + + private static String getPrefixedTableName(int i, String tableName) { + return new StringBuilder("client_") + .append(i) + .append("_") + .append(tableName).toString(); + } + + private static void loadTable(final CommandLine cmd, final int totalPartitions, + final String dbName, final String tableName, final int preLoadedPartitions) throws Exception { + List partitions = new ArrayList<>(totalPartitions); + // insert overwrite simulation begins here. It will alter preLoadedPartitions partitions + // and create (totalPartitions - preLoadedPartitions) new partitions + try (HMSClient client = new HMSClient( + getServerUri(cmd.getOptionValue(Constants.OPT_HOST), cmd.getOptionValue(Constants.OPT_PORT)), + cmd.getOptionValue(Constants.OPT_CONF))) { + List values = new ArrayList<>(1); + Table table = client.getTable(dbName, tableName); + for (int i = 0; i < totalPartitions; i++) { + values.add(String.valueOf(i)); + Partition partition = new PartitionBuilder(table).withValues(values).build(); + if (i < preLoadedPartitions) { + //partition is preloaded so treat it as a static partition alter + client.alterPartition(dbName, tableName, partition); + } else { + //add the new dynamic partition + client.appendPartition(dbName, tableName, values); + } + partitions.add(partition); + values.clear(); + } + //one alter_partitions call to simulate stats task + client.alterPartitions(dbName, tableName, partitions); + } + } + + private static void loadTableInParallel(final CommandLine cmd, final int totalPartitions, + final String dbName, final String tableName, final int numClients, + final int preloadedPartitions) { + ExecutorService executor = newFixedThreadPool(numClients); + try { + List> results = new ArrayList<>(); + //start from 1 till numClients + for (int i = 1; i <= numClients; i++) { + final int j = i; + results.add(executor.submit(() -> { + try { + loadTable(cmd, totalPartitions, dbName, getPrefixedTableName(j, tableName), preloadedPartitions); + } catch (Exception e) { + throw new RuntimeException(e); + } + })); + } + // Wait for results + results.forEach(r -> throwingSupplierWrapper(r::get)); + } finally { + executor.shutdownNow(); + } + } + + private static void cmdDrop(HMSClient client, CommandLine cmd, List arguments) + throws TException { + String dbName = cmd.getOptionValue(Constants.OPT_DATABASE); + String tableName = cmd.getOptionValue(OPT_TABLE); + + if (tableName != null && tableName.contains(".")) { + String[] parts = tableName.split("\\."); + dbName = parts[0]; + tableName = parts[1]; + } + if (dbName == null || dbName.isEmpty()) { + System.out.println("Missing database name"); + System.exit(1); + } + if (!arguments.isEmpty()) { + // Drop partition case + if (tableName == null || tableName.isEmpty()) { + System.out.println("Missing table name"); + System.exit(1); + } + client.dropPartition(dbName, tableName, arguments); + } else { + dropTables(client, dbName, tableName); + } + } + + /** + * Rename table + * @param client + * @param cmd + * @param arguments + * @throws TException + */ + private static void cmdRename(HMSClient client, CommandLine cmd, List arguments) + throws TException { + String dbName = cmd.getOptionValue(Constants.OPT_DATABASE); + String tableName = cmd.getOptionValue(OPT_TABLE); + + if (arguments.isEmpty()) { + System.out.println("Missing new name for rename"); + System.exit(1); + } + if (arguments.size() > 1) { + System.out.println("too many arguments for rename"); + System.exit(1); + } + + if (tableName != null && tableName.contains(".")) { + String[] parts = tableName.split("\\."); + dbName = parts[0]; + tableName = parts[1]; + } + + if (tableName == null) { + LOG.warn("Missing table name"); + System.exit(1); + } + if (dbName == null) { + dbName = DBNAME; + } + + String newName = arguments.get(0); + String oldTblName = dbName + "." + tableName; + String newTblName = dbName + "." + newName; + LOG.info("Renaming {} to {}", oldTblName, newTblName); + Table oldTable = client.getTable(dbName, tableName); + Table newTable = oldTable.deepCopy(); + newTable.setTableName(newName); + oldTable.getSd().setLocation(""); + client.alterTable(dbName, tableName, newTable); + } + + + private static void dropTables(HMSClient client, String dbName, String tableName) + throws TException { + for (String database : client.getAllDatabases(dbName)) { + client.getAllTables(database, tableName) + .stream() + .sorted() + .forEach(tblName -> + throwingSupplierWrapper(() -> client.dropTable(dbName, tblName))); + } + } + + private static void cmdDropDatabase(HMSClient client, CommandLine cmd) throws TException { + String dbName = cmd.getOptionValue(Constants.OPT_DATABASE); + if (dbName == null) { + LOG.warn("Missing database"); + System.exit(1); + } + client.dropDatabase(dbName); + } + + private static void addPartition(HMSClient client, String dbName, String tableName, + List values) throws TException { + if (dbName == null || dbName.isEmpty()) { + System.out.println("Missing database name"); + System.exit(1); + } + if (tableName == null || tableName.isEmpty()) { + System.out.println("Missing Table name"); + System.exit(1); + } + + Table table = client.getTable(dbName, tableName); + client.createPartition(table, values); + } + + private static void displayTableSchema(Table table) { + String dbName = table.getDbName(); + String tableName = table.getTableName(); + System.out.println(dbName + "." + tableName); + table.getSd().getCols() + .forEach(schema -> System.out.println("\t" + schema.getName() + ":\t" + schema.getType())); + table.getPartitionKeys() + .forEach(schema -> System.out.println("\t " + schema.getName() + ":\t" + schema.getType())); + } + +} diff --git a/metastore-tools/pom.xml b/metastore-tools/pom.xml new file mode 100644 index 0000000000..dbd82daf9e --- /dev/null +++ b/metastore-tools/pom.xml @@ -0,0 +1,121 @@ + + + + + hive + org.apache.hive + 4.0.0-SNAPSHOT + + 4.0.0 + + hive-metastore-tools + Hive Metastore Tools + 4.0.0-SNAPSHOT + + pom + + + metastore-benchmarks + tools-common + metastore-cli + + + + 4.0.0-SNAPSHOT + 2.20.1 + ${basedir}/checkstyle + UTF-8 + UTF-8 + 2.8 + 2.3.1 + + + + + + org.apache.hive.hcatalog + hive-hcatalog-server-extensions + ${hive.version} + + + org.apache.hive + hive-common + ${hive.version} + + + org.apache.hive + hive-standalone-metastore + ${hive.version} + + + + commons-cli + commons-cli + 1.4 + + + + org.apache.commons + commons-math3 + 3.6.1 + + + + org.slf4j + slf4j-log4j12 + 1.7.25 + + + + org.jetbrains + annotations + 16.0.2 + + + + org.apache.maven.plugins + maven-jxr-plugin + 2.5 + + + + org.junit.jupiter + junit-jupiter-api + 5.2.0 + test + + + + org.junit.platform + junit-platform-runner + 1.2.0 + + + junit + junit + ${junit.version} + + + + org.hamcrest + hamcrest-all + 1.3 + + + + + \ No newline at end of file diff --git a/metastore-tools/tools-common/pom.xml b/metastore-tools/tools-common/pom.xml new file mode 100644 index 0000000000..0cf7b3802f --- /dev/null +++ b/metastore-tools/tools-common/pom.xml @@ -0,0 +1,123 @@ + + + + + hive-metastore-tools + org.apache.hive + 4.0.0-SNAPSHOT + + 4.0.0 + + jar + + metastore-tools-common + Hive Metastore Tools common libraries + + + + .. + + + + + org.apache.hive + hive-standalone-metastore + + + org.apache.hive.hcatalog + hive-hcatalog-server-extensions + + + + org.jetbrains + annotations + compile + + + org.apache.hive + hive-common + + + + org.junit.jupiter + junit-jupiter-api + test + + + + org.hamcrest + hamcrest-all + test + + + junit + junit + + + + + + + org.apache.maven.plugins + maven-compiler-plugin + 3.7.0 + + 1.8 + 1.8 + javac-with-errorprone + true + + + + + org.codehaus.plexus + plexus-compiler-javac-errorprone + ${javac.errorprone.version} + + + com.google.errorprone + error_prone_core + ${errorprone.core.version} + + + + + org.apache.maven.plugins + maven-surefire-plugin + ${maven.surefire.version} + + + + + + + + + org.apache.maven.plugins + maven-jxr-plugin + 2.5 + + + + + + \ No newline at end of file diff --git a/metastore-tools/tools-common/src/main/java/org/apache/hadoop/hive/metastore/tools/Constants.java b/metastore-tools/tools-common/src/main/java/org/apache/hadoop/hive/metastore/tools/Constants.java new file mode 100644 index 0000000000..6e2b853133 --- /dev/null +++ b/metastore-tools/tools-common/src/main/java/org/apache/hadoop/hive/metastore/tools/Constants.java @@ -0,0 +1,32 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.metastore.tools; + +/** + * Common constants for metastore tools. + */ +public final class Constants { + static final String OPT_HOST = "host"; + static final String OPT_PORT = "port"; + static final String OPT_DATABASE = "database"; + static final String OPT_CONF = "conf"; + static final String OPT_VERBOSE = "verbose"; + + // Disable object construction + private Constants() {} +} diff --git a/metastore-tools/tools-common/src/main/java/org/apache/hadoop/hive/metastore/tools/HMSClient.java b/metastore-tools/tools-common/src/main/java/org/apache/hadoop/hive/metastore/tools/HMSClient.java new file mode 100644 index 0000000000..7cc1e42a8b --- /dev/null +++ b/metastore-tools/tools-common/src/main/java/org/apache/hadoop/hive/metastore/tools/HMSClient.java @@ -0,0 +1,428 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.metastore.tools; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.metastore.api.Database; +import org.apache.hadoop.hive.metastore.api.DropPartitionsRequest; +import org.apache.hadoop.hive.metastore.api.DropPartitionsResult; +import org.apache.hadoop.hive.metastore.api.MetaException; +import org.apache.hadoop.hive.metastore.api.Partition; +import org.apache.hadoop.hive.metastore.api.RequestPartsSpec; +import org.apache.hadoop.hive.metastore.api.Table; +import org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore; +import org.apache.hadoop.hive.metastore.conf.MetastoreConf; +import org.apache.hadoop.hive.metastore.security.HadoopThriftAuthBridge; +import org.apache.hadoop.hive.metastore.utils.MetaStoreUtils; +import org.apache.hadoop.hive.metastore.utils.SecurityUtils; +import org.apache.hadoop.security.UserGroupInformation; +import org.apache.thrift.TException; +import org.apache.thrift.protocol.TBinaryProtocol; +import org.apache.thrift.protocol.TCompactProtocol; +import org.apache.thrift.protocol.TProtocol; +import org.apache.thrift.transport.TFramedTransport; +import org.apache.thrift.transport.TSocket; +import org.apache.thrift.transport.TTransport; +import org.jetbrains.annotations.NotNull; +import org.jetbrains.annotations.Nullable; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import javax.security.auth.login.LoginException; +import java.io.File; +import java.io.IOException; +import java.net.MalformedURLException; +import java.net.URI; +import java.net.URISyntaxException; +import java.security.PrivilegedExceptionAction; +import java.util.Arrays; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.concurrent.TimeUnit; +import java.util.stream.Collectors; + +/** + *  Wrapper for Thrift HMS interface. + */ +final class HMSClient implements AutoCloseable { + private static final Logger LOG = LoggerFactory.getLogger(HMSClient.class); + private static final String METASTORE_URI = "hive.metastore.uris"; + private static final String CONFIG_DIR = "/etc/hive/conf"; + private static final String HIVE_SITE = "hive-site.xml"; + private static final String CORE_SITE = "core-site.xml"; + private static final String PRINCIPAL_KEY = "hive.metastore.kerberos.principal"; + + private final String confDir; + private ThriftHiveMetastore.Iface client; + private TTransport transport; + private URI serverURI; + + public URI getServerURI() { + return serverURI; + } + + @Override + public String toString() { + return serverURI.toString(); + } + + HMSClient(@Nullable URI uri) + throws TException, IOException, InterruptedException, LoginException, URISyntaxException { + this(uri, CONFIG_DIR); + } + + HMSClient(@Nullable URI uri, @Nullable String confDir) + throws TException, IOException, InterruptedException, LoginException, URISyntaxException { + this.confDir = confDir == null ? CONFIG_DIR : confDir; + getClient(uri); + } + + private void addResource(Configuration conf, @NotNull String r) throws MalformedURLException { + File f = new File(confDir + "/" + r); + if (f.exists() && !f.isDirectory()) { + LOG.debug("Adding configuration resource {}", r); + conf.addResource(f.toURI().toURL()); + } else { + LOG.debug("Configuration {} does not exist", r); + } + } + + /** + * Create a client to Hive Metastore. + * If principal is specified, create kerberised client. + * + * @param uri server uri + * @throws MetaException if fails to login using kerberos credentials + * @throws IOException if fails connecting to metastore + * @throws InterruptedException if interrupted during kerberos setup + */ + private void getClient(@Nullable URI uri) + throws TException, IOException, InterruptedException, URISyntaxException, LoginException { + Configuration conf = new HiveConf(); + addResource(conf, HIVE_SITE); + if (uri != null) { + conf.set(METASTORE_URI, uri.toString()); + } + + // Pick up the first URI from the list of available URIs + serverURI = uri != null ? + uri : + new URI(conf.get(METASTORE_URI).split(",")[0]); + + String principal = conf.get(PRINCIPAL_KEY); + + if (principal == null) { + open(conf, serverURI); + return; + } + + LOG.debug("Opening kerberos connection to HMS"); + addResource(conf, CORE_SITE); + + Configuration hadoopConf = new Configuration(); + addResource(hadoopConf, HIVE_SITE); + addResource(hadoopConf, CORE_SITE); + + // Kerberos magic + UserGroupInformation.setConfiguration(hadoopConf); + UserGroupInformation.getLoginUser() + .doAs((PrivilegedExceptionAction) + () -> open(conf, serverURI)); + } + + boolean dbExists(@NotNull String dbName) throws TException { + return getAllDatabases(dbName).contains(dbName); + } + + boolean tableExists(@NotNull String dbName, @NotNull String tableName) throws TException { + return getAllTables(dbName, tableName).contains(tableName); + } + + Database getDatabase(@NotNull String dbName) throws TException { + return client.get_database(dbName); + } + + /** + * Return all databases with name matching the filter. + * + * @param filter Regexp. Can be null or empty in which case everything matches + * @return list of database names matching the filter + * @throws MetaException + */ + Set getAllDatabases(@Nullable String filter) throws TException { + if (filter == null || filter.isEmpty()) { + return new HashSet<>(client.get_all_databases()); + } + return client.get_all_databases() + .stream() + .filter(n -> n.matches(filter)) + .collect(Collectors.toSet()); + } + + Set getAllTables(@NotNull String dbName, @Nullable String filter) throws TException { + if (filter == null || filter.isEmpty()) { + return new HashSet<>(client.get_all_tables(dbName)); + } + return client.get_all_tables(dbName) + .stream() + .filter(n -> n.matches(filter)) + .collect(Collectors.toSet()); + } + + /** + * Create database with the given name if it doesn't exist + * + * @param name database name + */ + boolean createDatabase(@NotNull String name) throws TException { + return createDatabase(name, null, null, null); + } + + /** + * Create database if it doesn't exist + * + * @param name Database name + * @param description Database description + * @param location Database location + * @param params Database params + * @throws TException if database exists + */ + boolean createDatabase(@NotNull String name, + @Nullable String description, + @Nullable String location, + @Nullable Map params) + throws TException { + Database db = new Database(name, description, location, params); + client.create_database(db); + return true; + } + + boolean createDatabase(Database db) throws TException { + client.create_database(db); + return true; + } + + boolean dropDatabase(@NotNull String dbName) throws TException { + client.drop_database(dbName, true, true); + return true; + } + + boolean createTable(Table table) throws TException { + client.create_table(table); + return true; + } + + boolean dropTable(@NotNull String dbName, @NotNull String tableName) throws TException { + client.drop_table(dbName, tableName, true); + return true; + } + + Table getTable(@NotNull String dbName, @NotNull String tableName) throws TException { + return client.get_table(dbName, tableName); + } + + Partition createPartition(@NotNull Table table, @NotNull List values) throws TException { + return client.add_partition(new Util.PartitionBuilder(table).withValues(values).build()); + } + + Partition addPartition(@NotNull Partition partition) throws TException { + return client.add_partition(partition); + } + + void addPartitions(List partitions) throws TException { + client.add_partitions(partitions); + } + + + List listPartitions(@NotNull String dbName, + @NotNull String tableName) throws TException { + return client.get_partitions(dbName, tableName, (short) -1); + } + + Long getCurrentNotificationId() throws TException { + return client.get_current_notificationEventId().getEventId(); + } + + List getPartitionNames(@NotNull String dbName, + @NotNull String tableName) throws TException { + return client.get_partition_names(dbName, tableName, (short) -1); + } + + public boolean dropPartition(@NotNull String dbName, @NotNull String tableName, + @NotNull List arguments) + throws TException { + return client.drop_partition(dbName, tableName, arguments, true); + } + + List getPartitions(@NotNull String dbName, @NotNull String tableName) throws TException { + return client.get_partitions(dbName, tableName, (short) -1); + } + + DropPartitionsResult dropPartitions(@NotNull String dbName, @NotNull String tableName, + @Nullable List partNames) throws TException { + if (partNames == null) { + return dropPartitions(dbName, tableName, getPartitionNames(dbName, tableName)); + } + if (partNames.isEmpty()) { + return null; + } + return client.drop_partitions_req(new DropPartitionsRequest(dbName, + tableName, RequestPartsSpec.names(partNames))); + } + + List getPartitionsByNames(@NotNull String dbName, @NotNull String tableName, + @Nullable List names) throws TException { + if (names == null) { + return client.get_partitions_by_names(dbName, tableName, + getPartitionNames(dbName, tableName)); + } + return client.get_partitions_by_names(dbName, tableName, names); + } + + boolean alterTable(@NotNull String dbName, @NotNull String tableName, @NotNull Table newTable) + throws TException { + client.alter_table(dbName, tableName, newTable); + return true; + } + + void alterPartition(@NotNull String dbName, @NotNull String tableName, + @NotNull Partition partition) throws TException { + client.alter_partition(dbName, tableName, partition); + } + + void alterPartitions(@NotNull String dbName, @NotNull String tableName, + @NotNull List partitions) throws TException { + client.alter_partitions(dbName, tableName, partitions); + } + + void appendPartition(@NotNull String dbName, @NotNull String tableName, + @NotNull List partitionValues) throws TException { + client.append_partition_with_environment_context(dbName, tableName, partitionValues, null); + } + + private TTransport open(Configuration conf, @NotNull URI uri) throws + TException, IOException, LoginException { + boolean useSSL = MetastoreConf.getBoolVar(conf, MetastoreConf.ConfVars.USE_SSL); + boolean useSasl = MetastoreConf.getBoolVar(conf, MetastoreConf.ConfVars.USE_THRIFT_SASL); + boolean useFramedTransport = MetastoreConf.getBoolVar(conf, MetastoreConf.ConfVars.USE_THRIFT_FRAMED_TRANSPORT); + boolean useCompactProtocol = MetastoreConf.getBoolVar(conf, MetastoreConf.ConfVars.USE_THRIFT_COMPACT_PROTOCOL); + int clientSocketTimeout = (int) MetastoreConf.getTimeVar(conf, + MetastoreConf.ConfVars.CLIENT_SOCKET_TIMEOUT, TimeUnit.MILLISECONDS); + + LOG.debug("Connecting to {}, framedTransport = {}", uri, useFramedTransport); + + String host = uri.getHost(); + int port = uri.getPort(); + + // Sasl/SSL code is copied from HiveMetastoreCLient + if (!useSSL) { + transport = new TSocket(host, port, clientSocketTimeout); + } else { + String trustStorePath = MetastoreConf.getVar(conf, MetastoreConf.ConfVars.SSL_TRUSTSTORE_PATH).trim(); + if (trustStorePath.isEmpty()) { + throw new IllegalArgumentException(MetastoreConf.ConfVars.SSL_TRUSTSTORE_PATH.toString() + + " Not configured for SSL connection"); + } + String trustStorePassword = + MetastoreConf.getPassword(conf, MetastoreConf.ConfVars.SSL_TRUSTSTORE_PASSWORD); + + // Create an SSL socket and connect + transport = SecurityUtils.getSSLSocket(host, port, clientSocketTimeout, + trustStorePath, trustStorePassword); + LOG.info("Opened an SSL connection to metastore, current connections"); + } + + if (useSasl) { + // Wrap thrift connection with SASL for secure connection. + HadoopThriftAuthBridge.Client authBridge = + HadoopThriftAuthBridge.getBridge().createClient(); + + // check if we should use delegation tokens to authenticate + // the call below gets hold of the tokens if they are set up by hadoop + // this should happen on the map/reduce tasks if the client added the + // tokens into hadoop's credential store in the front end during job + // submission. + String tokenSig = MetastoreConf.getVar(conf, MetastoreConf.ConfVars.TOKEN_SIGNATURE); + // tokenSig could be null + String tokenStrForm = SecurityUtils.getTokenStrForm(tokenSig); + + if (tokenStrForm != null) { + LOG.info("HMSC::open(): Found delegation token. Creating DIGEST-based thrift connection."); + // authenticate using delegation tokens via the "DIGEST" mechanism + transport = authBridge.createClientTransport(null, host, + "DIGEST", tokenStrForm, transport, + MetaStoreUtils.getMetaStoreSaslProperties(conf, useSSL)); + } else { + LOG.info("HMSC::open(): Could not find delegation token. Creating KERBEROS-based thrift connection."); + String principalConfig = + MetastoreConf.getVar(conf, MetastoreConf.ConfVars.KERBEROS_PRINCIPAL); + transport = authBridge.createClientTransport( + principalConfig, host, "KERBEROS", null, + transport, MetaStoreUtils.getMetaStoreSaslProperties(conf, useSSL)); + } + } else { + if (useFramedTransport) { + transport = new TFramedTransport(transport); + } + } + + final TProtocol protocol; + if (useCompactProtocol) { + protocol = new TCompactProtocol(transport); + } else { + protocol = new TBinaryProtocol(transport); + } + client = new ThriftHiveMetastore.Client(protocol); + if (!transport.isOpen()) { + transport.open(); + LOG.info("Opened a connection to metastore, current connections"); + + if (!useSasl && MetastoreConf.getBoolVar(conf, MetastoreConf.ConfVars.EXECUTE_SET_UGI)) { + // Call set_ugi, only in unsecure mode. + try { + UserGroupInformation ugi = SecurityUtils.getUGI(); + client.set_ugi(ugi.getUserName(), Arrays.asList(ugi.getGroupNames())); + } catch (LoginException e) { + LOG.warn("Failed to do login. set_ugi() is not successful, " + + "Continuing without it.", e); + } catch (IOException e) { + LOG.warn("Failed to find ugi of client set_ugi() is not successful, " + + "Continuing without it.", e); + } catch (TException e) { + LOG.warn("set_ugi() not successful, Likely cause: new client talking to old server. " + + "Continuing without it.", e); + } + } + } + + LOG.debug("Connected to metastore, using compact protocol = {}", useCompactProtocol); + return transport; + } + + @Override + public void close() throws Exception { + if (transport != null && transport.isOpen()) { + LOG.debug("Closing thrift transport"); + transport.close(); + } + } +} diff --git a/metastore-tools/tools-common/src/main/java/org/apache/hadoop/hive/metastore/tools/Util.java b/metastore-tools/tools-common/src/main/java/org/apache/hadoop/hive/metastore/tools/Util.java new file mode 100644 index 0000000000..d0874b4ec5 --- /dev/null +++ b/metastore-tools/tools-common/src/main/java/org/apache/hadoop/hive/metastore/tools/Util.java @@ -0,0 +1,529 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.metastore.tools; + +import com.google.common.base.Joiner; +import com.google.common.net.HostAndPort; +import org.apache.hadoop.hive.metastore.TableType; +import org.apache.hadoop.hive.metastore.api.Database; +import org.apache.hadoop.hive.metastore.api.FieldSchema; +import org.apache.hadoop.hive.metastore.api.Partition; +import org.apache.hadoop.hive.metastore.api.PrincipalType; +import org.apache.hadoop.hive.metastore.api.SerDeInfo; +import org.apache.hadoop.hive.metastore.api.StorageDescriptor; +import org.apache.hadoop.hive.metastore.api.Table; +import org.apache.thrift.TException; +import org.jetbrains.annotations.NotNull; +import org.jetbrains.annotations.Nullable; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.net.URI; +import java.net.URISyntaxException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; +import java.util.stream.IntStream; + +/** + * Helper utilities. The Util class is just a placeholder for static methods, + * it should be never instantiated. + */ +final class Util { + private static final String DEFAULT_TYPE = "string"; + private static final String TYPE_SEPARATOR = ":"; + private static final String THRIFT_SCHEMA = "thrift"; + static final String DEFAULT_HOST = "localhost"; + private static final int DEFAULT_PORT = 9083; + private static final String ENV_SERVER = "HMS_HOST"; + private static final String ENV_PORT = "HMS_PORT"; + private static final String PROP_HOST = "hms.host"; + private static final String PROP_PORT = "hms.port"; + + private static final String HIVE_INPUT_FORMAT = "org.apache.hadoop.hive.ql.io.HiveInputFormat"; + private static final String HIVE_OUTPUT_FORMAT = "org.apache.hadoop.hive.ql.io.HiveOutputFormat"; + private static final String LAZY_SIMPLE_SERDE = "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"; + + private static final Logger LOG = LoggerFactory.getLogger(Util.class); + + // Disable public constructor + private Util() {} + + /** + * Wrapper that moves all checked exceptions to RuntimeException + * + * @param throwingSupplier Supplier that throws Exception + * @param Supplier return type + * @return Supplier that throws unchecked exception + */ + public static T throwingSupplierWrapper(ThrowingSupplier throwingSupplier) { + try { + return throwingSupplier.get(); + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + /** + * Version if the SUpplier that can throw exceptions + * + * @param + * @param + */ + @FunctionalInterface + public interface ThrowingSupplier { + T get() throws E; + } + + /** + * A builder for Database. The name of the new database is required. Everything else + * selects reasonable defaults. + * This is a modified version of Hive 3.0 DatabaseBuilder. + */ + public static class DatabaseBuilder { + private String name; + private String description; + private String location; + private String ownerName; + private PrincipalType ownerType; + private Map params = null; + + // Disable default constructor + private DatabaseBuilder() { + } + + /** + * Constructor from database name. + * @param name Database name + */ + public DatabaseBuilder(@NotNull String name) { + this.name = name; + ownerType = PrincipalType.USER; + } + + /** + * Add database description. + * @param description Database description string. + * @return this + */ + public DatabaseBuilder withDescription(@NotNull String description) { + this.description = description; + return this; + } + + /** + * Add database location + * @param location Database location string + * @return this + */ + public DatabaseBuilder withLocation(@NotNull String location) { + this.location = location; + return this; + } + + /** + * Add Database parameters + * @param params database parameters + * @return this + */ + public DatabaseBuilder withParams(@NotNull Map params) { + this.params = params; + return this; + } + + /** + * Add a single database parameter. + * @param key parameter key + * @param val parameter value + * @return this + */ + public DatabaseBuilder withParam(@NotNull String key, @NotNull String val) { + if (this.params == null) { + this.params = new HashMap<>(); + } + this.params.put(key, val); + return this; + } + + /** + * Add database owner name + * @param ownerName new owner name + * @return this + */ + public DatabaseBuilder withOwnerName(@NotNull String ownerName) { + this.ownerName = ownerName; + return this; + } + + /** + * Add owner tyoe + * @param ownerType database owner type (USER or GROUP) + * @return this + */ + public DatabaseBuilder withOwnerType(PrincipalType ownerType) { + this.ownerType = ownerType; + return this; + } + + /** + * Build database object + * @return database + */ + public Database build() { + Database db = new Database(name, description, location, params); + if (ownerName != null) { + db.setOwnerName(ownerName); + } + if (ownerType != null) { + db.setOwnerType(ownerType); + } + return db; + } + } + + static class TableBuilder { + private final String dbName; + private final String tableName; + private TableType tableType = TableType.MANAGED_TABLE; + private String location; + private String serde = LAZY_SIMPLE_SERDE; + private String owner; + private List columns; + private List partitionKeys; + private String inputFormat = HIVE_INPUT_FORMAT; + private String outputFormat = HIVE_OUTPUT_FORMAT; + private Map parameters = new HashMap<>(); + + private TableBuilder() { + dbName = null; + tableName = null; + } + + TableBuilder(String dbName, String tableName) { + this.dbName = dbName; + this.tableName = tableName; + } + + static Table buildDefaultTable(String dbName, String tableName) { + return new TableBuilder(dbName, tableName).build(); + } + + TableBuilder withType(TableType tabeType) { + this.tableType = tabeType; + return this; + } + + TableBuilder withOwner(String owner) { + this.owner = owner; + return this; + } + + TableBuilder withColumns(List columns) { + this.columns = columns; + return this; + } + + TableBuilder withPartitionKeys(List partitionKeys) { + this.partitionKeys = partitionKeys; + return this; + } + + TableBuilder withSerde(String serde) { + this.serde = serde; + return this; + } + + TableBuilder withInputFormat(String inputFormat) { + this.inputFormat = inputFormat; + return this; + } + + TableBuilder withOutputFormat(String outputFormat) { + this.outputFormat = outputFormat; + return this; + } + + TableBuilder withParameter(String name, String value) { + parameters.put(name, value); + return this; + } + + TableBuilder withLocation(String location) { + this.location = location; + return this; + } + + Table build() { + StorageDescriptor sd = new StorageDescriptor(); + if (columns == null) { + sd.setCols(Collections.emptyList()); + } else { + sd.setCols(columns); + } + SerDeInfo serdeInfo = new SerDeInfo(); + serdeInfo.setSerializationLib(serde); + serdeInfo.setName(tableName); + sd.setSerdeInfo(serdeInfo); + sd.setInputFormat(inputFormat); + sd.setOutputFormat(outputFormat); + if (location != null) { + sd.setLocation(location); + } + + Table table = new Table(); + table.setDbName(dbName); + table.setTableName(tableName); + table.setSd(sd); + table.setParameters(parameters); + table.setOwner(owner); + if (partitionKeys != null) { + table.setPartitionKeys(partitionKeys); + } + table.setTableType(tableType.toString()); + return table; + } + } + + static class PartitionBuilder { + private final Table table; + private List values; + private String location; + private Map parameters = new HashMap<>(); + + private PartitionBuilder() { + table = null; + } + + PartitionBuilder(Table table) { + this.table = table; + } + + PartitionBuilder withValues(List values) { + this.values = new ArrayList<>(values); + return this; + } + + PartitionBuilder withLocation(String location) { + this.location = location; + return this; + } + + PartitionBuilder withParameter(String name, String value) { + parameters.put(name, value); + return this; + } + + Partition build() { + Partition partition = new Partition(); + List partitionNames = table.getPartitionKeys() + .stream() + .map(FieldSchema::getName) + .collect(Collectors.toList()); + if (partitionNames.size() != values.size()) { + throw new RuntimeException("Partition values do not match table schema"); + } + List spec = IntStream.range(0, values.size()) + .mapToObj(i -> partitionNames.get(i) + "=" + values.get(i)) + .collect(Collectors.toList()); + + partition.setDbName(table.getDbName()); + partition.setTableName(table.getTableName()); + partition.setParameters(parameters); + partition.setValues(values); + partition.setSd(table.getSd().deepCopy()); + if (this.location == null) { + partition.getSd().setLocation(table.getSd().getLocation() + "/" + Joiner.on("/").join(spec)); + } else { + partition.getSd().setLocation(location); + } + return partition; + } + } + + /** + * Create table schema from parameters. + * + * @param params list of parameters. Each parameter can be either a simple name or + * name:type for non-String types. + * @return table schema description + */ + static List createSchema(@Nullable List params) { + if (params == null || params.isEmpty()) { + return Collections.emptyList(); + } + + return params.stream() + .map(Util::param2Schema) + .collect(Collectors.toList()); + } + + /** + * Get server URI.

+ *

+ * HMS host is obtained from + *

    + *
  1. Argument
  2. + *
  3. HMS_HOST environment parameter
  4. + *
  5. hms.host Java property
  6. + *
  7. use 'localhost' if above fails
  8. + *
+ * HMS Port is obtained from + *
    + *
  1. Argument
  2. + *
  3. host:port string
  4. + *
  5. HMS_PORT environment variable
  6. + *
  7. hms.port Java property
  8. + *
  9. default port value
  10. + *
+ * + * @param host HMS host string. + * @param portString HMS port + * @return HMS URI + * @throws URISyntaxException + */ + static @Nullable URI getServerUri(@Nullable String host, @Nullable String portString) throws + URISyntaxException { + if (host == null) { + host = System.getenv(ENV_SERVER); + } + if (host == null) { + host = System.getProperty(PROP_HOST); + } + if (host == null) { + host = DEFAULT_HOST; + } + if (portString == null && !host.contains(":")) { + portString = System.getenv(ENV_PORT); + if (portString == null) { + portString = System.getProperty(PROP_PORT); + } + } + Integer port = DEFAULT_PORT; + if (portString != null) { + port = Integer.parseInt(portString); + } + + HostAndPort hp = HostAndPort.fromString(host) + .withDefaultPort(port); + + LOG.info("Connecting to {}:{}", hp.getHostText(), hp.getPort()); + + return new URI(THRIFT_SCHEMA, null, hp.getHostText(), hp.getPort(), + null, null, null); + } + + + private static FieldSchema param2Schema(@NotNull String param) { + String colType = DEFAULT_TYPE; + String name = param; + if (param.contains(TYPE_SEPARATOR)) { + String[] parts = param.split(TYPE_SEPARATOR); + name = parts[0]; + colType = parts[1].toLowerCase(); + } + return new FieldSchema(name, colType, ""); + } + + /** + * Create multiple partition objects. + * + * @param table + * @param arguments - list of partition names. + * @param npartitions + * @return + */ + static List createManyPartitions(@NotNull Table table, + @NotNull List arguments, + int npartitions) { + return IntStream.range(0, npartitions) + .mapToObj(i -> + new PartitionBuilder(table) + .withValues( + arguments.stream() + .map(a -> a + i) + .collect(Collectors.toList())).build()) + .collect(Collectors.toList()); + } + + static Object addManyPartitions(@NotNull HMSClient client, + @NotNull String dbName, + @NotNull String tableName, + @NotNull List arguments, + int npartitions) throws TException { + Table table = client.getTable(dbName, tableName); + client.addPartitions(createManyPartitions(table, arguments, npartitions)); + return null; + } + + static List generatePartitionNames(@NotNull String prefix, int npartitions) { + return IntStream.range(0, npartitions).mapToObj(i -> prefix+i).collect(Collectors.toList()); + } + + static void addManyPartitionsNoException(@NotNull HMSClient client, + @NotNull String dbName, + @NotNull String tableName, + List arguments, + int npartitions) { + throwingSupplierWrapper(() -> + addManyPartitions(client, dbName, tableName, arguments, npartitions)); + } + + static List filterMatches(@Nullable List candidates, + @Nullable List patterns) { + if (candidates == null || candidates.isEmpty()) { + return Collections.emptyList(); + } + if (patterns == null || patterns.isEmpty()) { + return candidates; + } + + Listpositive = positivePatterns(patterns); + Listnegative = negativePatterns(patterns); + + return candidates.stream() + .filter(c -> positive.isEmpty() || positive.stream().anyMatch(c::matches)) + .filter(c -> negative.isEmpty() || !negative.stream().anyMatch(c::matches)) + .collect(Collectors.toList()); + } + + /** + * Return list of positive patterns (not starting with bang) + * @param patterns + * @return + */ + private static List positivePatterns(@NotNull List<@NotNull String> patterns) { + return patterns.stream().filter(p -> !p.startsWith("!")).collect(Collectors.toList()); + } + + /** + * Return list of negative patterns (starting with bang + * @param patterns + * @return + */ + private static List negativePatterns(@NotNull List<@NotNull String> patterns) { + return patterns.stream() + .filter(p -> p.startsWith("!")) + .map(p -> p.substring(1)) + .collect(Collectors.toList()); + } + +} diff --git a/metastore-tools/tools-common/src/test/java/org/apache/hadoop/hive/metastore/tools/HMSClientTest.java b/metastore-tools/tools-common/src/test/java/org/apache/hadoop/hive/metastore/tools/HMSClientTest.java new file mode 100644 index 0000000000..ab4b62543f --- /dev/null +++ b/metastore-tools/tools-common/src/test/java/org/apache/hadoop/hive/metastore/tools/HMSClientTest.java @@ -0,0 +1,206 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.metastore.tools; + +import com.google.common.collect.ImmutableMap; +import org.apache.hadoop.hive.metastore.api.AlreadyExistsException; +import org.apache.hadoop.hive.metastore.api.Database; +import org.apache.hadoop.hive.metastore.api.InvalidObjectException; +import org.apache.hadoop.hive.metastore.api.MetaException; +import org.apache.hadoop.hive.metastore.api.NoSuchObjectException; +import org.apache.hadoop.hive.metastore.api.Table; +import org.apache.hadoop.hive.metastore.tools.HMSClient; +import org.apache.hadoop.hive.metastore.tools.Util; +import org.apache.thrift.TException; +import org.hamcrest.MatcherAssert; +import org.hamcrest.Matchers; +import org.junit.Assume; +import org.junit.Before; +import org.junit.Test; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeAll; + +import java.util.Set; + +import static org.apache.hadoop.hive.metastore.tools.Util.getServerUri; +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.contains; +import static org.hamcrest.Matchers.containsString; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.equalToIgnoringCase; +import static org.hamcrest.Matchers.hasItem; +import static org.junit.Assume.assumeTrue; +import static org.junit.jupiter.api.Assertions.assertThrows; + +public class HMSClientTest { + private static final String PARAM_NAME = "param"; + private static final String VALUE_NAME = "value"; + private static final String TEST_DATABASE="hmsClientTest"; + private static final String TEST_DATABASE_DESCRIPTION="hmsclienttest description"; + private static final ImmutableMap TEST_DATABASE_PARAMS = + new ImmutableMap.Builder() + .put(PARAM_NAME, VALUE_NAME) + .build(); + private static boolean hasClient = false; + + private static final String TEST_TABLE_NAME="test1"; + private static final Table TEST_TABLE = + Util.TableBuilder.buildDefaultTable(TEST_DATABASE, TEST_TABLE_NAME); + + private static HMSClient client = null; + + @BeforeAll + static void init() throws Exception { + Database db = new Util.DatabaseBuilder(TEST_DATABASE) + .withDescription(TEST_DATABASE_DESCRIPTION) + .withParams(TEST_DATABASE_PARAMS) + .build(); + // Create client and default test database + try { + client = + new HMSClient(getServerUri(null, null), null); + client.createDatabase(db); + } catch (Exception e) { + System.out.println(e.getMessage()); + e.printStackTrace(); + } + } + + @AfterAll + static void shutdown() throws TException { + if (client != null) { + // Destroy test database + client.dropDatabase(TEST_DATABASE); + } + } + + @Before + public void beforeTest() { + Assume.assumeTrue(client != null); + } + + /** + * Verify that list of databases contains "default" and test database + * @throws Exception + */ + @Test + public void getAllDatabases() throws Exception { + Set databases = client.getAllDatabases(null); + MatcherAssert.assertThat(databases, Matchers.hasItem("default")); + MatcherAssert.assertThat(databases, Matchers.hasItem(TEST_DATABASE.toLowerCase())); + assertThat(client.getAllDatabases(TEST_DATABASE.toLowerCase()), Matchers.contains(TEST_DATABASE.toLowerCase())); + } + + /** + * Verify that an attempt to create an existing database throws AlreadyExistsException. + */ + @Test + public void createExistingDatabase() { + Throwable exception = Assertions.assertThrows(AlreadyExistsException.class, + () -> client.createDatabase(TEST_DATABASE)); + } + + /** + * Creating a database with null name should not be allowed + * and should throw MetaException. + */ + @Test + public void createDatabaseNullName() { + Database db = new Util.DatabaseBuilder(TEST_DATABASE) + .build(); + db.setName(null); + Throwable exception = Assertions.assertThrows(MetaException.class, + () -> client.createDatabase(db)); + } + + /** + * Creating a database with an empty name should not be allowed + * and should throw InvalidObjectException + */ + @Test + public void createDatabaseEmptyName() { + Assume.assumeTrue(client != null); + Database db = new Util.DatabaseBuilder(TEST_DATABASE) + .build(); + db.setName(""); + Throwable exception = Assertions.assertThrows(InvalidObjectException.class, + () -> client.createDatabase(db)); + } + + /** + * Verify that getDatabase() returns all expected fields + * @throws TException if fails to get database info + */ + @Test + public void getDatabase() throws TException { + Database db = client.getDatabase(TEST_DATABASE); + MatcherAssert.assertThat(db.getName(), Matchers.equalToIgnoringCase(TEST_DATABASE)); + MatcherAssert.assertThat(db.getDescription(), Matchers.equalTo(TEST_DATABASE_DESCRIPTION)); + MatcherAssert.assertThat(db.getParameters(), Matchers.equalTo(TEST_DATABASE_PARAMS)); + MatcherAssert.assertThat(db.getLocationUri(), Matchers.containsString(TEST_DATABASE.toLowerCase())); + } + + /** + * Verify that locating database is case-insensitive + */ + @Test + public void getDatabaseCI() throws TException { + Database db = client.getDatabase(TEST_DATABASE.toUpperCase()); + MatcherAssert.assertThat(db.getName(), Matchers.equalToIgnoringCase(TEST_DATABASE)); + MatcherAssert.assertThat(db.getDescription(), Matchers.equalTo(TEST_DATABASE_DESCRIPTION)); + MatcherAssert.assertThat(db.getParameters(), Matchers.equalTo(TEST_DATABASE_PARAMS)); + MatcherAssert.assertThat(db.getLocationUri(), Matchers.containsString(TEST_DATABASE.toLowerCase())); + } + + /** + * Verify that searching for non-existing database throws + * NoSuchObjectException + */ + @Test + public void getNonExistingDb() { + Throwable exception = Assertions.assertThrows(NoSuchObjectException.class, + () -> client.getDatabase("WhatIsThisDatabase")); + } + + + /** + * Verify that dropping for non-existing database throws + * NoSuchObjectException + */ + @Test + public void dropNonExistingDb() { + Throwable exception = Assertions.assertThrows(NoSuchObjectException.class, + () -> client.dropDatabase("WhatIsThisDatabase")); + } + + @Test + public void getAllTables() throws TException { + try { + client.createTable(TEST_TABLE); + assertThat(client.getAllTables(TEST_DATABASE, null), Matchers.contains(TEST_TABLE_NAME)); + } catch (Exception e) { + System.out.println(e.getMessage()); + e.printStackTrace(); + } finally { + client.dropTable(TEST_DATABASE, TEST_TABLE_NAME); + } + } + +} \ No newline at end of file diff --git a/metastore-tools/tools-common/src/test/java/org/apache/hadoop/hive/metastore/tools/UtilTest.java b/metastore-tools/tools-common/src/test/java/org/apache/hadoop/hive/metastore/tools/UtilTest.java new file mode 100644 index 0000000000..7e1b9dc7cd --- /dev/null +++ b/metastore-tools/tools-common/src/test/java/org/apache/hadoop/hive/metastore/tools/UtilTest.java @@ -0,0 +1,79 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.metastore.tools; + +import com.google.common.collect.ImmutableList; +import org.hamcrest.MatcherAssert; +import org.junit.Test; + +import java.util.Collections; +import java.util.List; + +import static org.apache.hadoop.hive.metastore.tools.Util.filterMatches; +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.core.Is.is; + +public class UtilTest { + + public UtilTest() { + } + + /** + * Test that a null pattern returns all candidates for iflterMatches. + * Also verify that null candidates result in an empty result list. + */ + @Test + public void filterMatchesEmpty() { + List candidates = ImmutableList.of("a", "b"); + assertThat(filterMatches(candidates, null), is(candidates)); + assertThat(filterMatches(null, candidates), is(Collections.emptyList())); + } + + /** + * Test positive matches when some candidates match. + */ + @Test + public void filterMatchesPositive() { + List candidates = ImmutableList.of("a", "b"); + List expected = ImmutableList.of("a"); + List filtered = filterMatches(candidates, Collections.singletonList("a")); + MatcherAssert.assertThat(filtered, is(expected)); + } + + /** + * Test matches for negative notation (!something). + */ + @Test + public void filterMatchesNegative() { + List candidates = ImmutableList.of("a", "b"); + List expected = ImmutableList.of("a"); + assertThat(filterMatches(candidates, Collections.singletonList("!b")), is(expected)); + } + + /** + * Test that multiple patterns are handled correctly. We use one positive and one negative parrent. + */ + @Test + public void filterMatchesMultiple() { + List candidates = ImmutableList.of("a", "b", "any", "boom", "hello"); + List patterns = ImmutableList.of("^a", "!y$"); + List expected = ImmutableList.of("a"); + assertThat(filterMatches(candidates, patterns), is(expected)); + } +} \ No newline at end of file diff --git a/pom.xml b/pom.xml index 5202248315..04f83d2c9a 100644 --- a/pom.xml +++ b/pom.xml @@ -63,6 +63,7 @@ packaging standalone-metastore upgrade-acid + metastore-tools -- 2.16.3