() {
+ @Override
+ public boolean hasNext() {
+ return outerIter.hasNext();
+ }
+
+ @Override
+ public Row next() {
+ String strRow = outerIter.next();
+ if (strRow != null) {
+ String[] strCols = strRow.split(delimiter);
+ Row row = builder.build();
+ for (int i = 0; i < row.size(); i++) {
+ row.get(i).fromString(strCols[i], nullIndicator);
+ }
+ // Add this to the rows so we don't have to do the conversion again if asked.
+ rows.add(row);
+ return row;
+ } else {
+ return null;
+ }
+ }
+
+ @Override
+ public void remove() {
+ outerIter.remove();
+ }
+ };
+ }
+}
diff --git itests/capybara/src/main/java/org/apache/hive/test/capybara/infra/TestConf.java itests/capybara/src/main/java/org/apache/hive/test/capybara/infra/TestConf.java
new file mode 100644
index 0000000..cf82cf7
--- /dev/null
+++ itests/capybara/src/main/java/org/apache/hive/test/capybara/infra/TestConf.java
@@ -0,0 +1,268 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hive.test.capybara.infra;
+
+import com.google.common.annotations.VisibleForTesting;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.junit.Assert;
+
+/**
+ * Configuration for integration tests. Configuration is done as a set of system properties.
+ */
+public class TestConf {
+
+ /**
+ * Property that controls how Hive is accessed in the tests.
+ */
+ public static final String ACCESS_PROPERTY = "hive.test.capybara.access";
+
+ /**
+ * Value to set to access Hive via the CLI.
+ */
+ public static final String ACCESS_CLI = "cli";
+
+ /**
+ * Value to set to access Hive via JDBC
+ */
+ public static final String ACCESS_JDBC = "jdbc";
+
+ /**
+ * Default access method for the tests.
+ */
+ public static final String ACCESS_DEFAULT = ACCESS_CLI;
+
+ /**
+ * Determine whether this test is being executed via the CLI or JDBC
+ * @return cli or jdbc, depending on how this is being executed.
+ */
+ public static String access() {
+ return System.getProperty(ACCESS_PROPERTY, ACCESS_DEFAULT).toLowerCase();
+ }
+
+ @VisibleForTesting static void setAccess(String access) {
+ System.setProperty(ACCESS_PROPERTY, access);
+ }
+
+ /**
+ * Property that controls which engine is used to execute Hive queries.
+ */
+ public static final String ENGINE_PROPERTY = "hive.test.capybara.engine";
+
+ /**
+ * Value to set to execute Hive queries using Tez. (Currently local tests fail when set.)
+ */
+ public static final String ENGINE_TEZ = "tez";
+
+ /**
+ * Value to set to get the default engine from Hive (currently MapReduce).
+ */
+ public static final String ENGINE_UNSPECIFIED = "default";
+
+ /**
+ * Default engine.
+ */
+ public static final String ENGINE_DEFAULT = ENGINE_UNSPECIFIED;
+
+ /**
+ * Determine execution engine for this test
+ * @return default, tez or spark
+ */
+ public static String engine() {
+ return System.getProperty(ENGINE_PROPERTY, ENGINE_DEFAULT).toLowerCase();
+ }
+
+ @VisibleForTesting
+ static void setEngine(String engine) {
+ System.setProperty(ENGINE_PROPERTY, engine);
+ }
+
+ /**
+ * Property to set to control how many tasks Tez runs. This only controls Tez in the local
+ * (mini-cluster). Tez on the cluster will be controlled by the configuration of the cluster.
+ */
+ public static final String TEZ_NUM_TASKS_PROPERTY = "hive.test.capybara.tez.num.tasks";
+
+ /**
+ * Default number of Tez tasks when run in the minicluster.
+ */
+ public static final String TEZ_NUM_TASKS_DEFAULT = "2";
+
+ /**
+ * Determine number of tasks to start in MiniTezCluster
+ * @return numer of tasks
+ */
+ static int numTezTasks() {
+ return Integer.valueOf(System.getProperty(TEZ_NUM_TASKS_PROPERTY, TEZ_NUM_TASKS_DEFAULT));
+ }
+
+ /**
+ * Property to set to control which file format Hive uses by default.
+ */
+ public static final String FILE_FORMAT_PROPERTY = "hive.test.capybara.file.format";
+
+ /**
+ * Value to set to use ORC as the default file format.
+ */
+ public static final String FILE_FORMAT_ORC = "ORC";
+
+ /**
+ * Value to set to use Sequence as the default file format.
+ */
+ public static final String FILE_FORMAT_SEQUENCE = "SequenceFile";
+
+ /**
+ * Value to set to use Text as the default file format.
+ */
+ public static final String FILE_FORMAT_TEXT = "TextFile";
+
+ /**
+ * Value to set to use RCFile as the default file format.
+ */
+ public static final String FILE_FORMAT_RCFILE = "RCfile";
+
+ /**
+ * Default value to use for file format.
+ */
+ public static final String FILE_FORMAT_DEFAULT = FILE_FORMAT_ORC;
+
+ /**
+ * Determine default storage format for this test
+ * @return orc, parquet, text, rcfile
+ */
+ public static String fileFormat() {
+ String format = System.getProperty(FILE_FORMAT_PROPERTY, FILE_FORMAT_DEFAULT);
+ // So Validator.validate returns null if everything is good, or error string if there's an
+ // issue. Wow, that's the weirdest interface I've seen in a while.
+ Assert.assertNull(HiveConf.ConfVars.HIVEDEFAULTFILEFORMAT.getValidator().validate(format));
+ return format;
+ }
+
+ /**
+ * Property to set to control whether these tests use a secure cluster.
+ */
+ public static final String SECURITY_PROPERTY = "hive.test.capybara.security";
+
+ /**
+ * Value for running with security off.
+ */
+ public static final String SECURITY_NONSECURE = "nonsecure";
+
+ /**
+ * Default setting for security.
+ */
+ public static final String SECURITY_DEFAULT = SECURITY_NONSECURE;
+
+ /**
+ * Determine whether this test is being executed in secure or non-secure mode
+ * @return secure or nonsecure
+ */
+ public static String security() {
+ return System.getProperty(SECURITY_PROPERTY, SECURITY_DEFAULT).toLowerCase();
+ }
+
+ /**
+ * Property to set to control which metastore implementation Hive uses. This will only affect
+ * the minicluster setup, as the metastore on a real cluster will be controlled by the cluster.
+ */
+ public static final String METASTORE_PROPERTY = "hive.test.capybara.metastore";
+
+ /**
+ * Value for running with a RDBMS metastore.
+ */
+ public static final String METASTORE_RDBMS = "rdbms";
+
+ /**
+ * Default value for the metastore.
+ */
+ public static final String METASTORE_DEFAULT = METASTORE_RDBMS;
+
+ /**
+ * Determine which metastore implementation to use.
+ * @return rdbms or hbase.
+ */
+ public static String metastore() {
+ return System.getProperty(METASTORE_PROPERTY, METASTORE_DEFAULT).toLowerCase();
+ }
+
+ /**
+ * Property to set to control whether tests run locally in a minicluster or on a real cluster.
+ * The values are boolean. If set to true, you must also provide values for HADOOP_HOME and
+ * HIVE_HOME via system properties (e.g. add -DHADOOP_HOME=/cluster/test/hadoop to your command
+ * line).
+ */
+ public static final String USE_CLUSTER_PROPERTY ="hive.test.capybara.use.cluster";
+
+ /**
+ * Default value for running on a cluster.
+ */
+ public static final String USE_CLUSTER_DEFAULT = "false";
+
+ /**
+ * Determine whether the tests should run on a cluster.
+ * @return true if they should run on a cluster.
+ */
+ public static boolean onCluster() {
+ return Boolean.valueOf(System.getProperty(USE_CLUSTER_PROPERTY, USE_CLUSTER_DEFAULT));
+ }
+
+ /**
+ * Property to set to control the scale the tests run at. The unit of scale is a kilobyte.
+ */
+ public static final String SCALE_PROPERTY = "hive.test.capybara.scale";
+
+ /**
+ * Default value for the scale. This is set for the local case and should definitely be set
+ * higher if being run on a cluster.
+ */
+ public static final String SCALE_DEFAULT = "1";
+
+ public static int getScale() {
+ return Integer.valueOf(System.getProperty(SCALE_PROPERTY, SCALE_DEFAULT));
+ }
+
+ /**
+ * Property to set to control when generated data is spilled to disk. In bytes.
+ */
+ public static final String SPILL_SIZE_PROPERTY = "hive.test.capybara.data.spill.size";
+
+ /**
+ * Default spill size.
+ */
+ public static final String SPILL_SIZE_DEFAULT = Integer.toString(1024 * 1024 * 256);
+
+ public static int getSpillSize() {
+ // Keep in mind that twice the spill size may be in memory at a time as it will be spilling
+ // one batch while it is generating the next.
+ return Integer.valueOf(System.getProperty(SPILL_SIZE_PROPERTY, SPILL_SIZE_DEFAULT));
+ }
+
+ /**
+ * Property to set to control when data is generated on the cluster instead of on the local
+ * machine. This only applies when running tests on the cluster.
+ */
+ public static final String CLUSTERGEN_SIZE_PROPERTY = "hive.test.capybara.data.clustergen.threshold";
+
+ /**
+ * Default size to switch to generating data on the cluster. In bytes.
+ */
+ public static final String CLUSTERGEN_SIZE_DEFAULT = Integer.toString(1024 * 1024 * 1024);
+
+ public static int getClusterGenThreshold() {
+ return Integer.valueOf(System.getProperty(CLUSTERGEN_SIZE_PROPERTY, CLUSTERGEN_SIZE_DEFAULT));
+ }
+}
diff --git itests/capybara/src/main/java/org/apache/hive/test/capybara/infra/TestManager.java itests/capybara/src/main/java/org/apache/hive/test/capybara/infra/TestManager.java
new file mode 100644
index 0000000..89d3ab5
--- /dev/null
+++ itests/capybara/src/main/java/org/apache/hive/test/capybara/infra/TestManager.java
@@ -0,0 +1,107 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hive.test.capybara.infra;
+
+import com.google.common.annotations.VisibleForTesting;
+import org.apache.hadoop.conf.Configurable;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hive.test.capybara.iface.Benchmark;
+import org.apache.hive.test.capybara.iface.ClusterManager;
+
+/**
+ * Manage all aspects of the test. This is the glue that holds all the pieces together. The two
+ * most important pieces here are the ClusterManager, which contains references to the cluster
+ * being run on, and Benchmark, which references the generator of expected results for the tests.
+ */
+public class TestManager implements Configurable {
+
+ private static TestManager self = null;
+
+ private Configuration conf;
+ private ClusterManager cluster;
+ private Benchmark bench;
+ private Benchmark oneTimeBench;
+
+ public static TestManager getTestManager() {
+ if (self == null) {
+ self = new TestManager();
+ }
+ return self;
+ }
+
+ private TestManager() {
+
+ }
+
+ /**
+ * Get the cluster manager for this test.
+ * @return cluster manager
+ */
+ public ClusterManager getClusterManager() {
+ if (cluster == null) {
+ cluster = TestConf.onCluster() ? new ExternalClusterManager() : new MiniClusterManager();
+ cluster.setConf(conf);
+ }
+ return cluster;
+ }
+
+ /**
+ * Get the benchmark for this test.
+ * @return benchmark
+ */
+ public Benchmark getBenchmark() {
+ if (oneTimeBench != null) return oneTimeBench;
+ if (bench == null) {
+ bench = TestConf.onCluster() ? new PostgresBenchmark() : new DerbyBenchmark();
+ }
+ return bench;
+ }
+
+ /**
+ * Set up a special Benchmark for this test. This gives the user an opportunity to inject a
+ * special Benchmark for a particular test, rather than using whatever is standard for the
+ * current configuration. This will be reset at the end of the test.
+ * @param bench special Benchmark to use
+ */
+ public void setOneTimeBenchmark(Benchmark bench) {
+ oneTimeBench = bench;
+ }
+
+ /**
+ * Reset the Benchmark to the standard for the current configuration. This will be called at
+ * the end of each test by the system.
+ */
+ public void resetBenchmark() {
+ oneTimeBench = null;
+ }
+
+ @Override
+ public void setConf(Configuration conf) {
+ this.conf = conf;
+ }
+
+ @Override
+ public Configuration getConf() {
+ return conf;
+ }
+
+ @VisibleForTesting
+ void setClusterManager(ClusterManager clusterMgr) {
+ cluster = clusterMgr;
+ }
+}
diff --git itests/capybara/src/main/java/org/apache/hive/test/capybara/infra/TranslationException.java itests/capybara/src/main/java/org/apache/hive/test/capybara/infra/TranslationException.java
new file mode 100644
index 0000000..0960aa0
--- /dev/null
+++ itests/capybara/src/main/java/org/apache/hive/test/capybara/infra/TranslationException.java
@@ -0,0 +1,24 @@
+/**
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements. See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership. The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License. You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+package org.apache.hive.test.capybara.infra;
+
+class TranslationException extends Exception {
+ TranslationException(String section, String hiveSql) {
+ super("Could not translate " + section + ", Hive SQL: <" + hiveSql + ">");
+ }
+}
diff --git itests/capybara/src/main/java/org/apache/hive/test/capybara/package-info.java itests/capybara/src/main/java/org/apache/hive/test/capybara/package-info.java
new file mode 100644
index 0000000..ffdfd49
--- /dev/null
+++ itests/capybara/src/main/java/org/apache/hive/test/capybara/package-info.java
@@ -0,0 +1,118 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Capybara is a integration test framework for Hive. It has several goals:
+ *
+ * - Separate writing and running tests from permutations of features in Hive. That is, one
+ * test should be able to run in beeline, CLI, and JDBC, with ORC, Parquet, RCFile, etc., with
+ * Tez or Spark as an engine, etc.
+ * - The same tests should run locally on developer machines and against a cluster. These
+ * tests should be able to automatically scale as appropriate to their location. This allows
+ * simple testing for devs on their boxes with a few K of data and for users on their clusters
+ * with G or T of data.
+ * - Expected results of the queries should be auto-generated by the system. It's insane to
+ * rely on dev's eyeballing results.
+ * - Access to the query plan should be provided in code, rather than requiring string compares
+ * to see if the output of explain looks the same today as it did yesterday.
+ * - The tool should leverage JUnit and Maven. It should be in Java so that developers can
+ * test a variety of scenarios beyond just SQL queries.
+ * - The tool should be able to simulate user data. Given users queries and access to the
+ * user's tables, it should generate data like the user data and then build tests for the user's
+ * queries.
+ *
+ *
+ *
+ * Capybara works by managing an instance of Hive and a Benchmark. When running locally the
+ * Hive instance will be run in process using a DFSMiniCluster (and other mini-clusters as
+ * appropriate). When running on a cluster the Hive instance will connect to Hive via CLI or
+ * JDBC (depending on how the system is configured). The Benchmark usually uses a RDBMS (by
+ * default Derby locally and Postgres on the cluster) to run the same query and compare the
+ * results. To some extent the system can smooth over the differences between Hive SQL and ANSI
+ * standard SQL. If you need to run a completely different query against the benchmark (say
+ * you're testing a UDF not supported in the Benchmark) then you can run separate queries against
+ * Hive and the Benchmark. For extreme cases you can also provide your own Benchmark
+ * implementation.
+ *
+ *
+ * To use the framework create a JUnit4 test (i.e. one that uses @Test) and have it extend
+ * {@link org.apache.hive.test.capybara.IntegrationTest}. IntegrationTest handles setup and
+ * teardown of cluster and benchmark resources.
+ *
+ * {@link org.apache.hive.test.capybara.TableTool}
+ * provides methods to build a number of commonly used test tables. You can also use
+ * {@link org.apache.hive.test.capybara.iface.TestTable} to build your own table and populate it.
+ * The system keeps track of tables created and populated in both Hive and the Benchmark and will
+ * not re-create the tables if they already exist. It will detect changes in scale, file format,
+ * etc. that will require a re-creation and then handle dropping and re-creating the table.
+ *
+ * Once your tables are created and populated you can run queries against them using
+ * {@link org.apache.hive.test.capybara.IntegrationTest#runQuery(String)} and related methods.
+ * You can run any number of queries desired in the test. All operations will be run against
+ * both Hive and the Benchmark. If you need to set any configuartion values, this should be done
+ * via calls to {@link org.apache.hive.test.capybara.IntegrationTest#set}. The configuration is
+ * reset for each test so that set calls from one test do not affect any other tests. If you
+ * have a set of values you would like to have set for all tests in a file you can do that in a
+ * @Before method.
+ *
+ *
+ * Some features require a set of configurtion values to be set. Rather than requiring test
+ * writers to set these up each time, annotations are provided that will tell the system to set
+ * the appropriate configuration values for a test. For example, all of the values for testing
+ * SQL Standard Authorization can be turned on by annotating a test with @SqlStdAuthOn. See the
+ * {@link org.apache.hive.test.capybara.annotations} package for a full list.
+ *
+ * Once you have produced a result you would like to compare you can use one of the
+ * comparison functions to check your results. For select queries
+ * {@link org.apache.hive.test.capybara.IntegrationTest#compare} will compare results in the
+ * order they are returned. This should only be used if you expect your data to be sorted.
+ * {@link org.apache.hive.test.capybara.IntegrationTest#sortAndCompare} will sort results and
+ * compare them. {@link org.apache.hive.test.capybara.IntegrationTest#tableCompare} will
+ * compare entries in two tables. This is useful for insert queries.
+ *
+ * Which features are being tested (e.g. which file format is used for tables, which
+ * execution engine, how Hive is accessed, etc.) are controlled by system properties. You can
+ * find the system properties to set as well as default values by looking in
+ * {@link org.apache.hive.test.capybara.infra.TestConf}. To change these values you pass
+ * properties to JUnit as part of your maven build command. For example, to use Tez as your
+ * execution engine instead of the default (currently unspecified, which means Hive's local mode
+ * on your machine and the cluster default on yoru cluster), you would give a command like:
+ * mvn test -Dtest=ExampleTest -Dhive.test.capybara.engine=tez
+ *
+ *
+ * When running on a cluster you must tell capybara explicitly that it is on a cluster, and
+ * where to find configuration information for that cluster. The system property to tell it to
+ * run on the cluster is hive.test.capybara.use.cluster. To tell it where to find
+ * Hadoop you need to define the property HADOOP_HOME. To find Hive you need to set
+ * the property HIVE_HOME. If your postgres database is password protected, you can
+ * pass that via the property hive.test.capybara.postgres.password. So to run
+ * ExampleTest on a cluster the command is
+ * mvn test -Dtest=ExampleTest -Dhive.test.capybara.postgres.password=yourpasswd
+ * -Dhive.test.capybara.use.cluster=true -DHADOOP_HOME=hadoop_location
+ * -DHIVE_HOME=hive_location
+ *
+ *
+ * Some tests do not make sense in some contexts. For example, currently ACID features are
+ * only supported when using ORC file format. Therefore tests making use of ACID features
+ * should not be run when the file format being tested is anything other than ORC. To control
+ * this you can annotate your tests to indicate when they should not be run. In the ACID case
+ * you would mark it @NoParquet, @NoTextFile, @NoRcFile. You can see the complete list of
+ * annotations in {@link org.apache.hive.test.capybara.annotations}.
+ *
+ */
+package org.apache.hive.test.capybara;
\ No newline at end of file
diff --git itests/capybara/src/main/java/org/apache/hive/test/capybara/tools/UserQueryGenerator.java itests/capybara/src/main/java/org/apache/hive/test/capybara/tools/UserQueryGenerator.java
new file mode 100644
index 0000000..69e44b9
--- /dev/null
+++ itests/capybara/src/main/java/org/apache/hive/test/capybara/tools/UserQueryGenerator.java
@@ -0,0 +1,715 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hive.test.capybara.tools;
+
+import com.google.common.base.Function;
+import com.google.common.collect.Lists;
+import org.apache.commons.cli.CommandLine;
+import org.apache.commons.cli.GnuParser;
+import org.apache.commons.cli.HelpFormatter;
+import org.apache.commons.cli.OptionBuilder;
+import org.apache.commons.cli.Options;
+import org.apache.commons.cli.ParseException;
+import org.apache.commons.codec.binary.Base64;
+import org.apache.commons.lang.StringUtils;
+import org.apache.hadoop.hive.metastore.api.NoSuchObjectException;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.apache.hadoop.hive.common.StatsSetupConst;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.metastore.HiveMetaStoreClient;
+import org.apache.hadoop.hive.metastore.IMetaStoreClient;
+import org.apache.hadoop.hive.metastore.api.BinaryColumnStatsData;
+import org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData;
+import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData;
+import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
+import org.apache.hadoop.hive.metastore.api.DateColumnStatsData;
+import org.apache.hadoop.hive.metastore.api.Decimal;
+import org.apache.hadoop.hive.metastore.api.DecimalColumnStatsData;
+import org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData;
+import org.apache.hadoop.hive.metastore.api.FieldSchema;
+import org.apache.hadoop.hive.metastore.api.LongColumnStatsData;
+import org.apache.hadoop.hive.metastore.api.Partition;
+import org.apache.hadoop.hive.metastore.api.StringColumnStatsData;
+import org.apache.hadoop.hive.metastore.api.Table;
+import org.apache.thrift.TException;
+
+import java.io.BufferedReader;
+import java.io.BufferedWriter;
+import java.io.FileReader;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Random;
+import java.util.Set;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+/**
+ * A tool to collect information on tables based on user queries and then write code to test
+ * those queries.
+ *
+ * To use this tool:
+ *
+ * - If this is the first time you have used the tool, in your git directory do cd
+ * itests/capybara; mvn package then copy target/capybara-test-version
+ * .jar to $HIVE_HOME/lib/ and
+ * itests/capybara/src/scripts/capygen.sh to $HIVE_HOME/bin/ext.
+ * - For each test you wish to run, write all of the SQL for that test into a file, one
+ * line for each SQL command. Thus if you have 5 tests to run, you will have 5 separate
+ * files.
+ * - Run analyze table yourtable compute statistics for columns; for
+ * each table referenced in the queries you wish to test.
+ * - On a machine that has access to the metastore for your cluster run this class, passing
+ * in each of your input files and the name of the Java class for your test. The command
+ * runs as a Hive service. If you had two input files, test.sql and
+ * anothertest.sql and you wanted to create a class called TestMyApp the command line
+ * would be $HIVE_HOME/bin/hive --service capygen -i test.sql anothertest.sql -o TestMyApp
+ *
+ * - The resulting class will be in the package org.apache.hive.test.capybara
+ * .generated. You can put it in the appropriate directory in your source code. At this
+ * point doing cd itests/capybara; mvn test will run your test(s). If you like you
+ * can change the package the test is in, though you will then also need to make sure and put
+ * it in a place with an appropriate pom file and infrastructure to run the tests.
+ * These tests should not be pushed back to Apache as they will have your queries in them.
+ *
+ *
+ */
+public class UserQueryGenerator {
+ static final private Logger LOG = LoggerFactory.getLogger(UserQueryGenerator.class);
+
+ private BufferedWriter writer;
+ private int indent;
+
+ public static void main(String[] args) {
+
+ Options options = new Options();
+
+ options.addOption(OptionBuilder
+ .withLongOpt("help")
+ .withDescription("You're looking at it")
+ .create('h'));
+
+ options.addOption(OptionBuilder
+ .withLongOpt("input")
+ .withDescription("Input files")
+ .hasArgs()
+ .isRequired()
+ .create('i'));
+
+ options.addOption(OptionBuilder
+ .withLongOpt("output")
+ .withDescription("Output class")
+ .hasArg()
+ .isRequired()
+ .create('o'));
+
+ try {
+ CommandLine cli = new GnuParser().parse(options, args);
+
+ if (cli.hasOption('h')) {
+ HelpFormatter formatter = new HelpFormatter();
+ formatter.printHelp("userquerygenerator", options);
+ }
+
+ UserQueryGenerator gen = new UserQueryGenerator();
+ gen.run(cli.getOptionValues('i'), cli.getOptionValue('o'));
+ } catch (ParseException|TException|IOException e) {
+ System.err.println("Failed to run, " + e.getMessage());
+ LOG.error("Failure", e);
+ }
+ }
+
+ private UserQueryGenerator() {
+
+ }
+
+ private void run(String[] inputFiles, String outputClass) throws IOException, TException {
+ Map> tests = readStatementsFromFile(inputFiles);
+ Set tables = determineTables(tests);
+ getTableInfo(tables);
+ writeCode(tests, tables, outputClass);
+ }
+
+ private Map> readStatementsFromFile(String[] filenames) throws IOException {
+ Map> files = new HashMap<>();
+ for (String filename : filenames) {
+ List stmts = new ArrayList<>();
+ BufferedReader reader = new BufferedReader(new FileReader(filename));
+ String line;
+ List statement = new ArrayList<>();
+ while ((line = reader.readLine()) != null) {
+ LOG.info("Evaluating line: " + line);
+ if (statement.size() == 0 && line.toLowerCase().matches(".*;\\s*")) {
+ // It's a statement on one line
+ stmts.add(new SQLStatement(Arrays.asList(line)));
+ LOG.info("Found one line query <" + line + ">");
+ } else if (statement.size() == 0 && line.toLowerCase().matches(".*\\S+.*")) {
+ LOG.info("Starting new statement");
+ statement.add(line);
+ } else if (statement.size() > 0) {
+ statement.add(line);
+ LOG.info("Appending " + line + " to existing statement");
+ if (line.matches(".*;\\s*")) {
+ SQLStatement stmt = new SQLStatement(statement);
+ stmts.add(stmt);
+ LOG.info("Found multi-line query <" + stmt.toString() + ">");
+ statement = new ArrayList<>();
+ }
+ }
+ }
+ reader.close();
+ if (files.put(cleanFilename(filename), stmts) != null) {
+ throw new IOException("Two files both map to " + cleanFilename(filename));
+ };
+ }
+ return files;
+ }
+
+ private String cleanFilename(String original) {
+ if (original.contains(System.getProperty("file.separator"))) {
+ original = original.substring(original.lastIndexOf(System.getProperty("file.separator")) + 1);
+ }
+ return original.replace(".sql", "").replaceAll("[^a-zA-Z0-9_]", "_");
+ }
+
+ private Set determineTables(Map> statements) {
+ Set tables = new HashSet<>();
+
+ // Look for the table name at the head of the block.
+ // Each of these should either have a table name or possibly a subquery. Only worry about
+ // the table names
+ // TODO - doesn't handle from a, b
+ // TODO - doesn't handle use db
+ Pattern fromAndjoin = Pattern.compile("([A-Za-z0-9_\\.]+).*");
+ Pattern insert = Pattern.compile("insert\\s+into\\s+([A-Za-z0-9_\\.]+).*");
+ Pattern insertTable = Pattern.compile("insert\\s+into\\s+table\\s+([A-Za-z0-9_\\.]+).*");
+ Pattern insertOverwrite = Pattern.compile("insert\\s+overwrite\\s+table\\s+([A-Za-z0-9_\\.]+).*");
+ Pattern update = Pattern.compile("update\\s+([A-Za-z0-9_\\.]+).*");
+ Pattern delete = Pattern.compile("delete\\s+from\\s+([A-Za-z0-9_\\.]+).*");
+ for (List stmts : statements.values()) {
+ for (SQLStatement stmt : stmts) {
+ if (stmt.toString().startsWith("set")) continue;
+ findTableName(fromAndjoin, stmt.toString().split("\\s*from\\s*"), stmt, tables, true);
+ findTableName(fromAndjoin, stmt.toString().split("\\s*join\\s*"), stmt, tables, true);
+ findTableName(insert, new String[]{"", stmt.toString()}, stmt, tables, false);
+ findTableName(insertTable, new String[]{"", stmt.toString()}, stmt, tables, false);
+ findTableName(insertOverwrite, new String[]{"", stmt.toString()}, stmt, tables, false);
+ findTableName(update, new String[]{"", stmt.toString()}, stmt, tables, false);
+ findTableName(delete, new String[]{"", stmt.toString()}, stmt, tables, false);
+ }
+ }
+ return tables;
+ }
+
+ private void findTableName(Pattern pattern, String[] segments, SQLStatement stmt,
+ Set tables, boolean needsPopulated) {
+ // Skip the first segment since it will be the part before 'from' or 'join'
+ for (int i = 1; i < segments.length; i++) {
+ Matcher matcher = pattern.matcher(segments[i]);
+ if (matcher.matches()) {
+ String tableName = matcher.group(1);
+ LOG.info("Found table " + tableName + ", needsPopulated is " + needsPopulated);
+ TableInfo ti = new TableInfo(needsPopulated, tableName);
+ if (!needsPopulated) stmt.targetTable = ti;
+ tables.add(ti);
+ }
+ }
+ }
+
+ private void getTableInfo(Set tables) throws TException {
+ HiveConf conf = new HiveConf();
+ IMetaStoreClient msClient = new HiveMetaStoreClient(conf);
+ for (TableInfo table : tables) {
+ String dbName, tableOnlyName;
+ if (table.nameFromStatement.contains(".")) {
+ String[] compoundName = table.nameFromStatement.split("\\.");
+ dbName = compoundName[0];
+ tableOnlyName = compoundName[1];
+ } else {
+ dbName = "default";
+ tableOnlyName = table.nameFromStatement;
+ }
+
+ try {
+ table.msTable = msClient.getTable(dbName, tableOnlyName);
+ } catch (NoSuchObjectException e) {
+ // This might be ok, because the table might be created as part of the test.
+ LOG.warn("Failed to find table " + dbName + "." + tableOnlyName);
+ continue;
+ }
+
+ List colNames = Lists.transform(table.msTable.getSd().getCols(),
+ new Function() {
+ @Override
+ public String apply(FieldSchema input) {
+ return input.getName();
+ }
+ });
+
+ if (table.msTable.getPartitionKeys() != null && table.msTable.getPartitionKeys().size() > 0) {
+ List partNames = msClient.listPartitionNames(dbName, tableOnlyName, (short)-1);
+ LOG.info("Found partitions: " + StringUtils.join(partNames, ","));
+ table.numParts = partNames.size();
+ Map> partStats = msClient.getPartitionColumnStatistics(dbName,
+ tableOnlyName, partNames, colNames);
+ table.colStats = combineColStats(partStats);
+ // To get the sizes we need to fetch indivudal partitions and look at the aggregate stats
+ // . I don't want to fetch all of them at once for fear of blowing out the metastore
+ // memory. So we fetch them a thousand at a time.
+ List> partitionGroups = new ArrayList<>();
+ int numPartsInGroup = 0;
+ List currentGroup = null;
+ for (String partName : partNames) {
+ if (numPartsInGroup % 1000 == 0) {
+ if (currentGroup != null) {
+ partitionGroups.add(currentGroup);
+ }
+ currentGroup = new ArrayList<>(1000);
+ }
+ currentGroup.add(partName);
+ numPartsInGroup++;
+ }
+ // Add the last group
+ partitionGroups.add(currentGroup);
+
+ for (List oneGroup : partitionGroups) {
+ LOG.info("Fetching stats for partitions: " + StringUtils.join(oneGroup, ","));
+ List parts = msClient.getPartitionsByNames(dbName, tableOnlyName, oneGroup);
+ LOG.info("Got " + parts.size() + " partitions");
+ for (Partition part : parts) {
+ table.dataSize += Long.valueOf(part.getParameters().get(StatsSetupConst.RAW_DATA_SIZE));
+ table.rowCount += Long.valueOf(part.getParameters().get(StatsSetupConst.ROW_COUNT));
+ }
+ }
+ } else {
+ table.numParts = 0;
+ table.colStats = msClient.getTableColumnStatistics(dbName, tableOnlyName, colNames);
+ table.dataSize = Long.valueOf(table.msTable.getParameters().get(StatsSetupConst.RAW_DATA_SIZE));
+ table.rowCount = Long.valueOf(table.msTable.getParameters().get(StatsSetupConst.ROW_COUNT));
+ }
+ }
+ }
+
+ private List combineColStats(Map> partStats) {
+ Map colToStatsMap = new HashMap<>();
+
+ for (List statsObjs : partStats.values()) {
+ for (ColumnStatisticsObj latest : statsObjs) {
+ ColumnStatisticsObj existing = colToStatsMap.get(latest.getColName());
+ colToStatsMap.put(latest.getColName(), combineTwoStats(existing, latest));
+ }
+ }
+ return new ArrayList<>(colToStatsMap.values());
+ }
+
+ private ColumnStatisticsObj combineTwoStats(ColumnStatisticsObj existing,
+ ColumnStatisticsObj latest) {
+ if (existing == null) return latest;
+
+ ColumnStatisticsData newStatsData = new ColumnStatisticsData();
+ if (existing.getStatsData().isSetLongStats()) {
+ LongColumnStatsData existingStats = existing.getStatsData().getLongStats();
+ LongColumnStatsData latestStats = latest.getStatsData().getLongStats();
+ LongColumnStatsData newStats =
+ new LongColumnStatsData(existingStats.getNumNulls() + latestStats.getNumNulls(),
+ (long)(Math.max(existingStats.getNumDVs(), latestStats.getNumDVs()) * 1.2));
+ newStats.setHighValue(Math.max(existingStats.getHighValue(), latestStats.getHighValue()));
+ newStats.setLowValue(Math.max(existingStats.getLowValue(), latestStats.getLowValue()));
+ newStatsData.setLongStats(newStats);
+ } else if (existing.getStatsData().isSetDoubleStats()) {
+ DoubleColumnStatsData existingStats = existing.getStatsData().getDoubleStats();
+ DoubleColumnStatsData latestStats = latest.getStatsData().getDoubleStats();
+ DoubleColumnStatsData newStats =
+ new DoubleColumnStatsData(existingStats.getNumNulls() + latestStats.getNumNulls(),
+ (long)(Math.max(existingStats.getNumDVs(), latestStats.getNumDVs()) * 1.2));
+ newStats.setHighValue(Math.max(existingStats.getHighValue(), latestStats.getHighValue()));
+ newStats.setLowValue(Math.max(existingStats.getLowValue(), latestStats.getLowValue()));
+ newStatsData.setDoubleStats(newStats);
+ } else if (existing.getStatsData().isSetDecimalStats()) {
+ DecimalColumnStatsData existingStats = existing.getStatsData().getDecimalStats();
+ DecimalColumnStatsData latestStats = latest.getStatsData().getDecimalStats();
+ DecimalColumnStatsData newStats =
+ new DecimalColumnStatsData(existingStats.getNumNulls() + latestStats.getNumNulls(),
+ (long) (Math.max(existingStats.getNumDVs(), latestStats.getNumDVs()) * 1.2));
+ newStats.setHighValue(existingStats.getHighValue().compareTo(latestStats.getHighValue()) > 1 ?
+ existingStats.getHighValue() : latestStats.getHighValue());
+ newStats.setLowValue(existingStats.getLowValue().compareTo(latestStats.getLowValue()) < 1 ?
+ existingStats.getLowValue() : latestStats.getLowValue());
+ newStatsData.setDecimalStats(newStats);
+ } else if (existing.getStatsData().isSetDateStats()) {
+ DateColumnStatsData existingStats = existing.getStatsData().getDateStats();
+ DateColumnStatsData latestStats = latest.getStatsData().getDateStats();
+ DateColumnStatsData newStats =
+ new DateColumnStatsData(existingStats.getNumNulls() + latestStats.getNumNulls(),
+ (long) (Math.max(existingStats.getNumDVs(), latestStats.getNumDVs()) * 1.2));
+ newStats.setHighValue(existingStats.getHighValue().compareTo(latestStats.getHighValue()) > 1 ?
+ existingStats.getHighValue() : latestStats.getHighValue());
+ newStats.setLowValue(existingStats.getLowValue().compareTo(latestStats.getLowValue()) < 1 ?
+ existingStats.getLowValue() : latestStats.getLowValue());
+ newStatsData.setDateStats(newStats);
+ } else if (existing.getStatsData().isSetBooleanStats()) {
+ BooleanColumnStatsData existingStats = existing.getStatsData().getBooleanStats();
+ BooleanColumnStatsData latestStats = latest.getStatsData().getBooleanStats();
+ BooleanColumnStatsData newStats =
+ new BooleanColumnStatsData( existingStats.getNumTrues() + latestStats.getNumTrues(),
+ existingStats.getNumFalses() + latestStats.getNumFalses(),
+ existingStats.getNumNulls() + latestStats.getNumNulls());
+ newStatsData.setBooleanStats(newStats);
+ } else if (existing.getStatsData().isSetStringStats()) {
+ StringColumnStatsData existingStats = existing.getStatsData().getStringStats();
+ StringColumnStatsData latestStats = latest.getStatsData().getStringStats();
+ StringColumnStatsData newStats =
+ new StringColumnStatsData(
+ Math.max(existingStats.getMaxColLen(), latestStats.getMaxColLen()),
+ // TODO improve this
+ existingStats.getAvgColLen() + latestStats.getAvgColLen() / 2.0,
+ existingStats.getNumNulls() + latestStats.getNumNulls(),
+ (long) (Math.max(existingStats.getNumDVs(), latestStats.getNumDVs()) * 1.2));
+ newStatsData.setStringStats(newStats);
+ } else if (existing.getStatsData().isSetBinaryStats()) {
+ BinaryColumnStatsData existingStats = existing.getStatsData().getBinaryStats();
+ BinaryColumnStatsData latestStats = latest.getStatsData().getBinaryStats();
+ BinaryColumnStatsData newStats =
+ new BinaryColumnStatsData(
+ Math.max(existingStats.getMaxColLen(), latestStats.getMaxColLen()),
+ // TODO improve this
+ existingStats.getAvgColLen() + latestStats.getAvgColLen() / 2.0,
+ existingStats.getNumNulls() + latestStats.getNumNulls());
+ newStatsData.setBinaryStats(newStats);
+ }
+
+ return new ColumnStatisticsObj(latest.getColName(), latest.getColType(), newStatsData);
+ }
+
+ private void writeCode(Map> statements, Set tables,
+ String outputClass)
+ throws IOException {
+ writer = new BufferedWriter(new FileWriter(outputClass + ".java"));
+ indent = 0;
+ for (String line : prologue) writeALine(line);
+ writeALine("public class " + outputClass + " extends IntegrationTest {");
+ indent++;
+
+ // Create set of tables so we can fetch them in the tests if we need them.
+ writeALine("private Map targetTables = new HashMap<>();");
+
+ // Write the methods that build the tables, marking each with @Before
+ for (TableInfo info : tables) writeBefore(info);
+
+ // Write the tests, naming them with the filenames
+ for (Map.Entry> entry : statements.entrySet()) {
+ writeTest(entry.getKey(), entry.getValue());
+ }
+
+ indent--;
+ writeALine("}");
+ writer.close();
+ }
+
+ private void writeBefore(TableInfo info) throws IOException {
+ if (info.msTable == null) {
+ // If we don't have info from the metastore on the table we can't build it. We will assume
+ // it will be created somewhere as part of the test.
+ LOG.info("No metastore info for table " + info.nameFromStatement +
+ ", not creating @Before method for it.");
+ return;
+ }
+ writeALine("@Before");
+ writeALine("public void createTable" + safeTableName(info) + "() throws Exception {");
+ indent++;
+ writeALine("TestTable tTable = TestTable.getBuilder(\"" + info.msTable.getTableName() + "\")");
+ indent++; indent++;
+ writeALine(".setDbName(\"" + info.msTable.getDbName() + "\")");
+ for (FieldSchema fs : info.msTable.getSd().getCols()) {
+ writeALine(".addCol(\"" + fs.getName() + "\", \"" + fs.getType() + "\")");
+ }
+
+ if (info.msTable.getPartitionKeys() != null && info.msTable.getPartitionKeysSize() > 0) {
+ for (FieldSchema partCol : info.msTable.getPartitionKeys()) {
+ writeALine(".addPartCol(\"" + partCol.getName() + "\", \"" + partCol.getType() + "\")");
+
+ }
+ writeALine(".setNumParts(" + info.numParts + ")");
+ }
+ writeALine(".build();");
+ indent--; indent--;
+
+ writeALine("Map colStats = new HashMap<>();");
+ for (ColumnStatisticsObj cso : info.colStats) {
+ writeALine("colStats.put(\"" + cso.getColName() + "\",");
+ indent++;
+ writeALine("new StatsDataGenerator.ColStats(\"" + cso.getColName() + "\", \"" +
+ cso.getColType() + "\", ");
+ indent++;
+ if (cso.getColType().equalsIgnoreCase("bigint")) {
+ LongColumnStatsData lcsd = cso.getStatsData().getLongStats();
+ writeALine(lcsd.getLowValue() + "L, " + lcsd.getHighValue() + "L, 0, 0, 0, 0, " +
+ lcsd.getNumDVs() + ", " + lcsd.getNumNulls() + "));");
+
+ } else if (cso.getColType().toLowerCase().startsWith("int")) {
+ LongColumnStatsData lcsd = cso.getStatsData().getLongStats();
+ writeALine(lcsd.getLowValue() + ", " + lcsd.getHighValue() + ", 0, 0, 0, 0, " +
+ lcsd.getNumDVs() + ", " + lcsd.getNumNulls() + "));");
+
+ } else if (cso.getColType().equalsIgnoreCase("smallint")) {
+ LongColumnStatsData lcsd = cso.getStatsData().getLongStats();
+ writeALine("(short)" + lcsd.getLowValue() + ", (short)" + lcsd.getHighValue() +
+ ", 0, 0, 0, 0, " + lcsd.getNumDVs() + ", " + lcsd.getNumNulls() + "));");
+
+ } else if (cso.getColType().equalsIgnoreCase("tinyint")) {
+ LongColumnStatsData lcsd = cso.getStatsData().getLongStats();
+ writeALine("(byte)" + lcsd.getLowValue() + ", (byte)" + lcsd.getHighValue() +
+ ", 0, 0, 0, 0, " + lcsd.getNumDVs() + ", " + lcsd.getNumNulls() + "));");
+ } else if (cso.getColType().equalsIgnoreCase("float")) {
+ DoubleColumnStatsData dcsd = cso.getStatsData().getDoubleStats();
+ writeALine(dcsd.getLowValue() + "f, " + dcsd.getHighValue() + "f, 0, 0, 0, 0, " +
+ dcsd.getNumDVs() + ", " + dcsd.getNumNulls() + "));");
+
+ } else if (cso.getColType().equalsIgnoreCase("double")) {
+ DoubleColumnStatsData dcsd = cso.getStatsData().getDoubleStats();
+ writeALine(dcsd.getLowValue() + ", " + dcsd.getHighValue() + ", 0, 0, 0, 0, " +
+ dcsd.getNumDVs() + ", " + dcsd.getNumNulls() + "));");
+ } else if (cso.getColType().toLowerCase().startsWith("decimal")) {
+ DecimalColumnStatsData dcsd = cso.getStatsData().getDecimalStats();
+ Decimal low = dcsd.getLowValue();
+ Decimal high = dcsd.getHighValue();
+ writeALine("new BigDecimal(new BigInteger(Base64.decodeBase64(\"" +
+ Base64.encodeBase64URLSafeString(low.getUnscaled()) + "\")), " + low.getScale() +
+ "), new BigDecimal(new BigInteger(Base64.decodeBase64(\""
+ + Base64.encodeBase64URLSafeString(high.getUnscaled()) + "\")), " + high.getScale() +
+ "), 0, 0, 0, 0, " + dcsd.getNumDVs() + ", " + dcsd.getNumNulls() + "));");
+ } else if (cso.getColType().equalsIgnoreCase("date")) {
+ DateColumnStatsData dcsd = cso.getStatsData().getDateStats();
+ writeALine("new Date(" + (dcsd.getLowValue().getDaysSinceEpoch() * 86400) + "), " +
+ "new Date(" + (dcsd.getHighValue().getDaysSinceEpoch() * 86400) + "), 0, 0, 0, 0, " +
+ dcsd.getNumDVs() + ", " + dcsd.getNumNulls() + "));");
+ } else if (cso.getColType().equalsIgnoreCase("timestamp")) {
+ LongColumnStatsData lcsd = cso.getStatsData().getLongStats();
+ writeALine("new Timestamp(" + (lcsd.getLowValue()) + "), " +
+ "new Timestamp(" + (lcsd.getHighValue()) + "), 0, 0, 0, 0, " + lcsd.getNumDVs() + ", "
+ + lcsd.getNumNulls() + "));");
+ } else if (cso.getColType().equalsIgnoreCase("string") ||
+ cso.getColType().toLowerCase().startsWith("varchar") ||
+ cso.getColType().toLowerCase().startsWith("char")) {
+ StringColumnStatsData scsd = cso.getStatsData().getStringStats();
+ writeALine(
+ "null, null, " + scsd.getAvgColLen() + ", " + scsd.getMaxColLen() + ", 0, 0, " +
+ scsd.getNumDVs() + ", " + scsd.getNumNulls() + "));");
+ } else if (cso.getColType().equalsIgnoreCase("boolean")) {
+ BooleanColumnStatsData bcsd = cso.getStatsData().getBooleanStats();
+ writeALine("0, 0, 0, 0, " + bcsd.getNumFalses() + ", " + bcsd.getNumTrues() + ", 0, " +
+ bcsd.getNumNulls() + "));");
+ } else if (cso.getColType().equalsIgnoreCase("binary")) {
+ BinaryColumnStatsData bcsd = cso.getStatsData().getBinaryStats();
+ writeALine(
+ "null, null, " + bcsd.getAvgColLen() + ", " + bcsd.getMaxColLen() + ", 0, 0, 0,"
+ + bcsd.getNumNulls() + "));");
+ } else {
+ throw new RuntimeException("Unknown column type " + cso.getColType());
+ }
+ indent--;
+ indent--;
+ }
+ writeALine("StatsDataGenerator.TableStats tableStats = new StatsDataGenerator.TableStats" +
+ "(colStats, " + info.numParts + ", \"" + info.msTable.getDbName() + "\", \"" +
+ info.msTable.getTableName() + "\", " + info.dataSize + ", " + info.rowCount + ");");
+ writeALine("StatsDataGenerator gen = new StatsDataGenerator(tableStats, " +
+ new Random().nextInt() + ");");
+ if (info.needsPopulated) {
+ writeALine("tTable.create();");
+ writeALine("tTable.populate(gen);");
+ } else {
+ writeALine("tTable.createTargetTable();");
+ writeALine("targetTables.put(\"" + info.nameFromStatement + "\", tTable);");
+ }
+ indent--;
+ writeALine("}");
+ writeALine("");
+ }
+
+ private String safeTableName(TableInfo info) {
+ return info.msTable == null ? info.nameFromStatement :
+ info.msTable.getDbName() + "_" + info.msTable.getTableName();
+ }
+
+ private void writeTest(String name, List statements) throws IOException {
+ writeALine("@Test");
+ writeALine("public void " + name + "() throws Exception {");
+ indent++;
+ SQLStatement lastStmt = null;
+ for (SQLStatement stmt : statements) {
+ if (stmt.toString().startsWith("set")) {
+ String[] setParts = stmt.toString().substring(3).split("=");
+ writeALine("set(\"" + setParts[0].trim() + "\", \"" + setParts[1].trim() + "\");");
+ } else {
+ writeALine("runQuery(");
+ stmt.writeOut();
+ writeALine(");");
+ }
+ lastStmt = stmt;
+ }
+ // Only compare based on the last statement.
+ if (lastStmt.toString().startsWith("select")) {
+ if (lastStmt.toString().contains("order by")) {
+ writeALine("compare();");
+ } else {
+ writeALine("sortAndCompare();");
+ }
+ } else if (lastStmt.toString().startsWith("insert") || lastStmt.toString().startsWith("update") ||
+ lastStmt.toString().startsWith("delete")) {
+ TableInfo targetTable = lastStmt.targetTable;
+ if (targetTable.msTable == null) {
+ // We didn't build the table before because we didn't have metastore info. But by now
+ // the table must exist, so we need to go build it.
+ writeALine("String dbName, tableOnlyName;");
+ if (targetTable.nameFromStatement.contains(".")) {
+ String[] compoundName = targetTable.nameFromStatement.split("\\.");
+ writeALine("dbName = \"" + compoundName[0] + "\";");
+ writeALine("tableOnlyName = \"" + compoundName[1] + "\";");
+ } else {
+ writeALine("dbName = \"default\";");
+ writeALine("tableOnlyName = \"" + targetTable.nameFromStatement + "\";");
+ }
+
+ writeALine("TestTable tTable = TestTable.fromHiveMetastore(dbName, tableOnlyName);");
+ } else {
+ writeALine("TestTable tTable = targetTables.get(\"" + targetTable.nameFromStatement + "\");");
+ }
+ writeALine("tableCompare(tTable);");
+ } else {
+ throw new IOException("Unclear how to do comparison for statement " + lastStmt.toString());
+ }
+ indent--;
+ writeALine("}");
+ writeALine("");
+ }
+
+ private void writeALine(String line) throws IOException {
+ for (int i = 0; i < indent; i++) writer.write(" ");
+ writer.write(line);
+ writer.newLine();
+ }
+
+ private static class TableInfo {
+ Table msTable;
+ int numParts;
+ long dataSize;
+ long rowCount;
+ List colStats;
+ final boolean needsPopulated;
+ final String nameFromStatement;
+
+ TableInfo(boolean needsPopulated, String nameFromStatement) {
+ this.needsPopulated = needsPopulated;
+ this.nameFromStatement = nameFromStatement;
+ }
+
+ @Override
+ public boolean equals(Object other) {
+ if (!(other instanceof TableInfo)) return false;
+ return nameFromStatement.equals(((TableInfo)other).nameFromStatement);
+ }
+
+ @Override
+ public int hashCode() {
+ return nameFromStatement.hashCode();
+ }
+ }
+
+ private class SQLStatement {
+ final String[] stmt;
+ TableInfo targetTable;
+ private String asOneLine = null;
+
+ SQLStatement(List lines) {
+ // Trim anything after a comment indicator
+ stmt = new String[lines.size()];
+ for (int i = 0; i < lines.size(); i++) {
+ if (lines.get(i).contains("--")) {
+ if (lines.get(i).indexOf("--") != 0) {
+ stmt[i] = lines.get(i).substring(0, lines.get(i).indexOf("--")).trim();
+ } else {
+ stmt[i] = "";
+ }
+ } else {
+ stmt[i] = lines.get(i).trim();
+ }
+
+ if (stmt[i].length() == 0) continue;
+
+ stmt[i] = stmt[i].toLowerCase();
+
+ // Trim any ending ;s off
+ if (stmt[i].charAt(stmt[i].length() - 1) == ';') {
+ stmt[i] = stmt[i].substring(0, stmt[i].length() - 1);
+ }
+ // Escape any quotes
+ stmt[i] = stmt[i].replaceAll("\"", "\\\"");
+ }
+ }
+
+ void writeOut() throws IOException {
+ indent += 2;
+ boolean first = true;
+ for (String line : stmt) {
+ String plus = "";
+ if (first) first = false;
+ else plus = "+ ";
+ writeALine(plus + "\"" + line + "\"");
+ }
+ indent -= 2;
+ }
+
+ @Override
+ public String toString() {
+ if (asOneLine == null) asOneLine = StringUtils.join(stmt, " ");
+ return asOneLine;
+ }
+ }
+
+ private static final String[] prologue = {
+ "package org.apache.hive.test.capybara.generated;",
+ " ",
+ "import org.apache.commons.codec.binary.Base64;",
+ "import org.apache.hadoop.hive.conf.HiveConf;",
+ "import org.apache.hive.test.capybara.IntegrationTest;",
+ "import org.apache.hive.test.capybara.infra.StatsDataGenerator;",
+ "import org.apache.hive.test.capybara.iface.TestTable;",
+ "import org.junit.Assert;",
+ "import org.junit.Before;",
+ "import org.junit.Test;",
+ "import java.math.BigDecimal;",
+ "import java.math.BigInteger;",
+ "import java.sql.Date;",
+ "import java.sql.Timestamp;",
+ "import java.util.ArrayList;",
+ "import java.util.HashMap;",
+ "import java.util.List;",
+ "import java.util.Map;",
+ " "
+ };
+
+ // Problems:
+ // q2, q6 - grabbing the wrong table name
+}
diff --git itests/capybara/src/scripts/capygen.sh itests/capybara/src/scripts/capygen.sh
new file mode 100644
index 0000000..71aed4c
--- /dev/null
+++ itests/capybara/src/scripts/capygen.sh
@@ -0,0 +1,31 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+THISSERVICE=capygen
+export SERVICE_LIST="${SERVICE_LIST}${THISSERVICE} "
+
+capygen () {
+ CLASS=org.apache.hive.test.capybara.tools.UserQueryGenerator
+ HIVE_OPTS=''
+ execHiveCmd $CLASS "$@"
+}
+
+capygen_help () {
+ echo "usage ./hive capygen [-h] -i input_file ... -o output_file"
+ echo ""
+ echo " --input (-i) Input files to read queries from. Eache file becomes a separate JUnit test."
+ echo " --output (-o) Output file to write tests to. All tests will be placed in one class"
+ echo " --help (-h) Print help message"
+}
diff --git itests/capybara/src/test/java/org/apache/hive/test/capybara/data/TestDataSet.java itests/capybara/src/test/java/org/apache/hive/test/capybara/data/TestDataSet.java
new file mode 100644
index 0000000..7b01b7b
--- /dev/null
+++ itests/capybara/src/test/java/org/apache/hive/test/capybara/data/TestDataSet.java
@@ -0,0 +1,94 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hive.test.capybara.data;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+public class TestDataSet {
+
+ @Test
+ public void fuzzyFloat() {
+ FloatColumn one = new FloatColumn(0);
+ one.set(new Float(1.0f));
+ Assert.assertEquals(one, one);
+
+ FloatColumn two = new FloatColumn(0);
+ two.set(new Float(2.0f));
+ Assert.assertNotEquals(one, two);
+
+ FloatColumn pointOne = new FloatColumn(0);
+ pointOne.set(new Float(0.1f));
+ Assert.assertNotEquals(one, pointOne);
+
+ FloatColumn quintillion = new FloatColumn(0);
+ quintillion.set(new Float(1000000000000000000.0f));
+ Assert.assertEquals(quintillion, quintillion);
+
+ FloatColumn quintillionOne = new FloatColumn(0);
+ quintillionOne.set(new Float(1000000000000000001.0f));
+ Assert.assertEquals(quintillion, quintillionOne);
+
+ FloatColumn fiveQuintillion = new FloatColumn(0);
+ fiveQuintillion.set(new Float(5000000000000000000.0f));
+ Assert.assertNotEquals(quintillion, fiveQuintillion);
+
+ FloatColumn verySmall = new FloatColumn(0);
+ verySmall.set(new Float(0.0000000000000001f));
+ Assert.assertEquals(verySmall, verySmall);
+
+ FloatColumn justOverOne = new FloatColumn(0);
+ justOverOne.set(new Float(1.0000000000000001f));
+ Assert.assertEquals(one, justOverOne);
+ }
+
+ @Test
+ public void fuzzyDouble() {
+ DoubleColumn one = new DoubleColumn(0);
+ one.set(new Double(1.0));
+ Assert.assertEquals(one, one);
+
+ DoubleColumn two = new DoubleColumn(0);
+ two.set(new Double(2.0));
+ Assert.assertNotEquals(one, two);
+
+ DoubleColumn pointOne = new DoubleColumn(0);
+ pointOne.set(new Double(0.1));
+ Assert.assertNotEquals(one, pointOne);
+
+ DoubleColumn quintillion = new DoubleColumn(0);
+ quintillion.set(new Double(1000000000000000000.0));
+ Assert.assertEquals(quintillion, quintillion);
+
+ DoubleColumn quintillionOne = new DoubleColumn(0);
+ quintillionOne.set(new Double(1000000000000000001.0));
+ Assert.assertEquals(quintillion, quintillionOne);
+
+ DoubleColumn fiveQuintillion = new DoubleColumn(0);
+ fiveQuintillion.set(new Double(5000000000000000000.0));
+ Assert.assertNotEquals(quintillion, fiveQuintillion);
+
+ DoubleColumn verySmall = new DoubleColumn(0);
+ verySmall.set(new Double(0.0000000000000001));
+ Assert.assertEquals(verySmall, verySmall);
+
+ DoubleColumn justOverOne = new DoubleColumn(0);
+ justOverOne.set(new Double(1.0000000000000001));
+ Assert.assertEquals(one, justOverOne);
+ }
+}
diff --git itests/capybara/src/test/java/org/apache/hive/test/capybara/examples/ExampleStreamingTest.java itests/capybara/src/test/java/org/apache/hive/test/capybara/examples/ExampleStreamingTest.java
new file mode 100644
index 0000000..6f2d048
--- /dev/null
+++ itests/capybara/src/test/java/org/apache/hive/test/capybara/examples/ExampleStreamingTest.java
@@ -0,0 +1,96 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hive.test.capybara.examples;
+
+import org.apache.hive.hcatalog.streaming.DelimitedInputWriter;
+import org.apache.hive.hcatalog.streaming.HiveEndPoint;
+import org.apache.hive.hcatalog.streaming.StreamingConnection;
+import org.apache.hive.hcatalog.streaming.TransactionBatch;
+import org.apache.hive.test.capybara.IntegrationTest;
+import org.apache.hive.test.capybara.annotations.AcidOn;
+import org.apache.hive.test.capybara.iface.TestTable;
+import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class ExampleStreamingTest extends IntegrationTest {
+ static final private Logger LOG = LoggerFactory.getLogger(ExampleStreamingTest.class.getName());
+
+ private boolean running;
+
+ @Test
+ @AcidOn
+ public void stream() throws Exception {
+ // There's no way to validate results while streaming (since there's no guarantee that each
+ // database is at the same point in committing the results. But we can stream data in while
+ // running queries (just to make sure the queries work) and occasionally pause and run
+ // queries to verify we are getting the same results.
+ TestTable target = TestTable.getBuilder("streamtarget")
+ .addCol("a", "varchar(10)")
+ .addCol("b", "int")
+ .setBucketCols("a")
+ .setNumBuckets(2)
+ .setAcid(true)
+ .build();
+ String[] colNames = {"a", "b"};
+ target.createTargetTable();
+ String[] rows = new String[] {"abc,1", "def,2", "ghi,3", "jkl,4", "mno,5", "pqr,6", "stu,7",
+ "vwx,8", "yz,9", "alpha,10"};
+
+ HiveEndPoint endPoint = getHiveEndPoint(target, null);
+ StreamingConnection conn = endPoint.newConnection(true, getConf());
+ running = true;
+ Runnable queryRunner = new Runnable() {
+ @Override
+ public void run() {
+ while (running) {
+ try {
+ // Just run it on Hive, we're not interested in testing that the benchmark can read
+ // while writing.
+ runHive("select count(*) from streamtarget");
+ Thread.sleep(1000);
+ } catch (Exception e) {
+ throw new RuntimeException(e);
+ }
+ }
+ }
+ };
+
+ Thread queryThread = new Thread(queryRunner);
+ queryThread.start();
+
+ for (int i = 0; i < 5; i++) {
+ TransactionBatch txnBatch = conn.fetchTransactionBatch(5,
+ new DelimitedInputWriter(colNames, ",", endPoint, getConf(), ','));
+ while (txnBatch.remainingTransactions() > 0) {
+ txnBatch.beginNextTransaction();
+ for (String row : rows) txnBatch.write(row.getBytes());
+ txnBatch.commit();
+ }
+ txnBatch.close();
+ runQuery("select count(*) from streamtarget");
+ sortAndCompare();
+ }
+ running = false;
+ conn.close();
+ queryThread.join();
+ runQuery("select count(*) from streamtarget");
+ compare();
+ //tableCompare(target);
+ }
+}
diff --git itests/capybara/src/test/java/org/apache/hive/test/capybara/examples/ExampleTest.java itests/capybara/src/test/java/org/apache/hive/test/capybara/examples/ExampleTest.java
new file mode 100644
index 0000000..4b5e7a9
--- /dev/null
+++ itests/capybara/src/test/java/org/apache/hive/test/capybara/examples/ExampleTest.java
@@ -0,0 +1,119 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hive.test.capybara.examples;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.apache.hive.test.capybara.IntegrationTest;
+import org.apache.hive.test.capybara.TableTool;
+import org.apache.hive.test.capybara.annotations.AcidOn;
+import org.apache.hive.test.capybara.annotations.NoParquet;
+import org.apache.hive.test.capybara.annotations.NoRcFile;
+import org.apache.hive.test.capybara.annotations.NoSpark;
+import org.apache.hive.test.capybara.annotations.NoTextFile;
+import org.junit.Test;
+
+@NoSpark // These tests don't make sense when Spark is the engine
+public class ExampleTest extends IntegrationTest {
+ static final private Logger LOG = LoggerFactory.getLogger(ExampleTest.class);
+
+ @Test
+ public void simple() throws Exception {
+ TableTool.createAllTypes();
+
+ runQuery("select cvarchar, cchar, cstring, cint, cbigint, csmallint, ctinyint, " +
+ "cfloat, cdecimal, cdate, ctimestamp, cboolean from alltypes");
+ sortAndCompare();
+ }
+
+ @Test
+ public void countStar() throws Exception {
+ TableTool.createAllTypes();
+
+ runQuery("select count(*) from alltypes");
+ sortAndCompare();
+ }
+
+ @Test
+ public void groupBy() throws Exception {
+ TableTool.createCapySrcPart();
+
+ runQuery("select k, count(*) as cnt from capysrcpart where value is not null group by k order" +
+ " by cnt, k");
+ compare();
+ }
+
+ @Test
+ public void simpleJoin() throws Exception {
+ TableTool.createPseudoTpch();
+
+ runQuery("select p_name, avg(l_price) from ph_lineitem join ph_part on (l_partkey = " +
+ "p_partkey) group by p_name order by p_name");
+ compare();
+ }
+
+ // TODO
+ // * Make it work with HS2
+ // * Make it work with security
+ // * Make it work with HBase metastore
+ // * Make work for multi-user
+ // * Should scale move to M on the cluster instead of K?
+ // * Add qfile translator.
+ // * Add default scale (-1) to DataGenerator.generateData so people can set pctNull without
+ // messing with the scale.
+ // * Rename DataStore.fetchData to something that reflects what it actually does (like
+ // executeStmt). Rename FetchResult to StmtResult or something.
+ // * Make is so the user can change the package for tests created by UserQueryGenerator
+ // * Split up the infra package into interface and impl, it's getting too big and confusing.
+ // Move DataGenerator from top to interface.
+
+ // TODO - needs tested
+ // * Test ability to generate data in parallel (on cluster) for large data
+ // * Test ability to compare data in tables, for ETL type queries
+
+ // FIXME
+ // * Make decimal work with default precision and scale
+ // * Make binary work with Derby
+ // * We don't properly drop old records in the testtables when we discover a wrong version of
+ // the table.
+ // * We don't do anything to assure that joined user tables generate records that will join.
+ // This is somewhat hard in that Hive statistics don't help us, but we may want to at least
+ // detect the join conditions and do something to make sure we don't get null results. In
+ // particular we could infer pk/fk relationships in star schemas.
+
+ @AcidOn // Turn acid on for this test (ie set the appropriate config values)
+ @NoParquet @NoRcFile @NoTextFile
+ @Test
+ public void updateAllNonPartitioned() throws Exception {
+ TableTool.createAllTypes();
+
+ // Run a query. Complain if it fails.
+ runQuery("drop table if exists acid_uanp");
+ runQuery("create table acid_uanp(a int, b varchar(128)) clustered by (a) into 2 buckets " +
+ "stored as orc TBLPROPERTIES ('transactional'='true')");
+ runQuery("insert into acid_uanp select cint, cast(cstring as varchar(128)) " +
+ "from alltypes where cint < 0");
+ runQuery("select a,b from acid_uanp order by a");
+ compare(); // compare the results of the previous query against the source of truth.
+ runQuery("update acid_uanp set b = 'fred'");
+ runQuery("select a,b from acid_uanp");
+ sortAndCompare();
+ }
+
+
+}
diff --git itests/capybara/src/test/java/org/apache/hive/test/capybara/examples/SecondExampleTest.java itests/capybara/src/test/java/org/apache/hive/test/capybara/examples/SecondExampleTest.java
new file mode 100644
index 0000000..11e0049
--- /dev/null
+++ itests/capybara/src/test/java/org/apache/hive/test/capybara/examples/SecondExampleTest.java
@@ -0,0 +1,49 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hive.test.capybara.examples;
+
+import org.apache.hive.test.capybara.IntegrationTest;
+import org.apache.hive.test.capybara.TableTool;
+import org.apache.hive.test.capybara.iface.TestTable;
+import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * More example tests.
+ */
+public class SecondExampleTest extends IntegrationTest {
+ static final private Logger LOG = LoggerFactory.getLogger(SecondExampleTest.class);
+
+ @Test
+ public void insert() throws Exception {
+ TableTool.createAllTypes();
+
+ TestTable target = TestTable.getBuilder("INSERT_EXAMPLE")
+ .addCol("cstring", "varchar(120)")
+ .addCol("cbool", "boolean")
+ .addCol("clong", "bigint")
+ .addCol("cfloat", "float")
+ .addCol("cint", "int")
+ .build();
+ target.createTargetTable();
+ runQuery("insert into INSERT_EXAMPLE select cvarchar, cboolean, cbigint, cfloat, cint " +
+ "from alltypes");
+ tableCompare(target);
+ }
+}
diff --git itests/capybara/src/test/java/org/apache/hive/test/capybara/examples/TestExplain.java itests/capybara/src/test/java/org/apache/hive/test/capybara/examples/TestExplain.java
new file mode 100644
index 0000000..a145858
--- /dev/null
+++ itests/capybara/src/test/java/org/apache/hive/test/capybara/examples/TestExplain.java
@@ -0,0 +1,75 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hive.test.capybara.examples;
+
+import org.apache.hadoop.hive.ql.exec.TableScanOperator;
+import org.apache.hadoop.hive.ql.exec.mr.MapRedTask;
+import org.apache.hive.test.capybara.Explain;
+import org.apache.hive.test.capybara.IntegrationTest;
+import org.apache.hive.test.capybara.TableTool;
+import org.junit.Assert;
+import org.junit.Test;
+
+import java.util.List;
+
+public class TestExplain extends IntegrationTest {
+
+ @Test
+ public void limitPushdown() throws Exception {
+ // Set configuration values for this test
+ set("hive.explain.user", false);
+ set("hive.limit.pushdown.memory.usage", 0.3f);
+ set("hive.optimize.reducededuplication.min.reducer", 1);
+
+ // TableTool provides a set of standard table for testing. This particular one gets the
+ // src table, which has two string fields, k and value.
+ TableTool.createCapySrc();
+
+ runQuery("select k,value from capysrc order by k limit 20");
+
+ // Explain fetches an Explain object for the query, which contains the QueryPlan. It also
+ // includes tools for validating the tree.
+ Explain explain = explain();
+
+ // Expect that somewhere in the plan is a TezTask. Fetch that task. This will assert if it
+ // can't find the task.
+ MapRedTask mrTask = explain.expect(MapRedTask.class);
+ // Expect that somewhere in the TezTask there's a limit operator. Fetch that operator. This
+ // will assert if it can't find the operator.
+ TableScanOperator scan = explain.expect(mrTask, TableScanOperator.class);
+ Assert.assertNotNull(scan);
+ compare();
+ }
+
+ @Test
+ public void noRunQuery() throws Exception {
+ // TableTool provides a set of standard table for testing. This particular one gets the
+ // src table, which has two string fields, k and value.
+ TableTool.createCapySrc();
+
+ Explain explain = explain("select k,value from capysrc order by k");
+
+ // Expect that somewhere in the plan is a TezTask. Fetch that task. This will assert if it
+ // can't find the task.
+ MapRedTask mrTask = explain.expect(MapRedTask.class);
+ // Expect that somewhere in the MapRedTask there's a limit operator. Fetch that operator. This
+ // will assert if it can't find the operator.
+ List scans = explain.findAll(mrTask, TableScanOperator.class);
+ Assert.assertEquals(1, scans.size());
+ }
+}
diff --git itests/capybara/src/test/java/org/apache/hive/test/capybara/infra/TestAnsiSqlStore.java itests/capybara/src/test/java/org/apache/hive/test/capybara/infra/TestAnsiSqlStore.java
new file mode 100644
index 0000000..4395f60
--- /dev/null
+++ itests/capybara/src/test/java/org/apache/hive/test/capybara/infra/TestAnsiSqlStore.java
@@ -0,0 +1,139 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hive.test.capybara.infra;
+
+import org.apache.hive.test.capybara.data.DataSet;
+import org.apache.hive.test.capybara.data.ResultCode;
+import org.apache.hive.test.capybara.iface.TestTable;
+import org.junit.Assert;
+import org.junit.Test;
+
+import java.io.IOException;
+import java.sql.SQLException;
+import java.util.Properties;
+
+public class TestAnsiSqlStore {
+
+ private AnsiSqlStore store = new AnsiSqlStore() {
+ @Override
+ protected String ifExists() {
+ return null;
+ }
+
+ @Override
+ public String getTableName(TestTable table) {
+ return null;
+ }
+
+ @Override
+ protected String markColumnPrimaryKey() {
+ return null;
+ }
+
+ @Override
+ public void loadData(TestTable table, DataSet rows) throws SQLException, IOException {
+
+ }
+
+ @Override
+ protected String fileColumnDelimiter() {
+ return null;
+ }
+
+ @Override
+ protected String fileNull() {
+ return null;
+ }
+
+ @Override
+ protected String fileStringQuotes() {
+ return null;
+ }
+
+ @Override
+ protected Properties connectionProperties() {
+ return null;
+ }
+
+ @Override
+ protected String connectionURL() {
+ return null;
+ }
+
+ @Override
+ public Class getDriverClass() {
+ return null;
+ }
+
+ @Override
+ protected SQLTranslator getTranslator() {
+ return new SQLTranslator() {
+ @Override
+ protected String translateDataTypes(String hiveSql) {
+ return hiveSql;
+ }
+
+ @Override
+ protected String translateAlterTableRename(String tableName, String remainder) throws
+ TranslationException {
+ return null;
+ }
+
+ @Override
+ protected char identifierQuote() {
+ return '"';
+ }
+ };
+ }
+
+ @Override
+ public void forceCreateTable(TestTable table) throws SQLException, IOException {
+
+ }
+ };
+
+ @Test
+ public void createTable() throws Exception {
+ String hiveSql = "create table if not exists acid_uanp(a int, b varchar(128)) partitioned by " +
+ "(c string) clustered by (a) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='true')";
+ Assert.assertEquals("create table if not exists acid_uanp (a int, b varchar(128))",
+ store.hiveSqlToAnsiSql(hiveSql));
+ }
+
+ @Test
+ public void insert() throws Exception {
+ String hiveSql = "insert into table acid_uanp partition (c = 'fred') values (1, 'boy')";
+ Assert.assertEquals("insert into acid_uanp values (1, 'boy')",
+ store.hiveSqlToAnsiSql(hiveSql));
+ Assert.assertFalse(store.failureOk);
+ }
+
+ @Test
+ public void dropTable() throws Exception {
+ String hiveSql = "drop table if exists acid_uanp";
+ Assert.assertEquals("drop table if exists acid_uanp", store.hiveSqlToAnsiSql(hiveSql));
+ }
+
+ @Test
+ public void emptySqlSucceeds() throws Exception {
+ // Make sure a Hive SQL statement like alter database which is a NOP for the benchmark succeeds
+ String hiveSQL = "alter database fred set owner user user1";
+ Assert.assertEquals("", store.hiveSqlToAnsiSql(hiveSQL));
+ Assert.assertEquals(ResultCode.SUCCESS, store.fetchData(hiveSQL).rc);
+ }
+}
diff --git itests/capybara/src/test/java/org/apache/hive/test/capybara/infra/TestBoundedQueue.java itests/capybara/src/test/java/org/apache/hive/test/capybara/infra/TestBoundedQueue.java
new file mode 100644
index 0000000..5774a7b
--- /dev/null
+++ itests/capybara/src/test/java/org/apache/hive/test/capybara/infra/TestBoundedQueue.java
@@ -0,0 +1,123 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hive.test.capybara.infra;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+import java.util.Arrays;
+import java.util.Iterator;
+
+public class TestBoundedQueue {
+
+ @Test
+ public void underCapacity() {
+ BoundedQueue queue = new BoundedQueue<>(10);
+ queue.add(1);
+ queue.add(2);
+
+ Assert.assertEquals(2, queue.size());
+ Iterator iter = queue.iterator();
+ Assert.assertEquals(1, (int)iter.next());
+ Assert.assertEquals(2, (int)iter.next());
+ Assert.assertFalse(iter.hasNext());
+
+ Assert.assertEquals(1, (int)queue.poll());
+ Assert.assertEquals(2, (int)queue.peek());
+ Assert.assertEquals(2, (int)queue.poll());
+ Assert.assertTrue(queue.isEmpty());
+ }
+
+ @Test
+ public void atCapacity() {
+ BoundedQueue queue = new BoundedQueue<>(2);
+ queue.add(1);
+ queue.add(2);
+
+ Assert.assertEquals(2, queue.size());
+ Iterator iter = queue.iterator();
+ Assert.assertEquals(1, (int)iter.next());
+ Assert.assertEquals(2, (int)iter.next());
+ Assert.assertFalse(iter.hasNext());
+
+ Assert.assertEquals(1, (int)queue.poll());
+ Assert.assertEquals(2, (int)queue.poll());
+ Assert.assertTrue(queue.isEmpty());
+ }
+
+ @Test
+ public void overCapacity() {
+ BoundedQueue queue = new BoundedQueue<>(2);
+ queue.add(1);
+ queue.add(2);
+ queue.add(3);
+ queue.add(4);
+
+ Assert.assertEquals(2, queue.size());
+ Iterator iter = queue.iterator();
+ Assert.assertEquals(3, (int)iter.next());
+ Assert.assertEquals(4, (int)iter.next());
+ Assert.assertFalse(iter.hasNext());
+
+ Assert.assertEquals(3, (int)queue.poll());
+ Assert.assertEquals(4, (int)queue.poll());
+ Assert.assertTrue(queue.isEmpty());
+ }
+
+ @Test
+ public void addAll() {
+ BoundedQueue queue = new BoundedQueue<>(5);
+ queue.addAll(Arrays.asList(1, 2, 3, 4));
+ Assert.assertEquals(4, queue.size());
+ Iterator iter = queue.iterator();
+ Assert.assertEquals(1, (int)iter.next());
+ Assert.assertEquals(2, (int)iter.next());
+ Assert.assertEquals(3, (int)iter.next());
+ Assert.assertEquals(4, (int)iter.next());
+ Assert.assertFalse(iter.hasNext());
+
+ queue.addAll(Arrays.asList(5, 6, 7, 8));
+ Assert.assertEquals(5, queue.size());
+ Assert.assertEquals(4, (int)queue.poll());
+ Assert.assertEquals(5, (int)queue.poll());
+ Assert.assertEquals(6, (int)queue.poll());
+ Assert.assertEquals(7, (int)queue.poll());
+ Assert.assertEquals(8, (int)queue.poll());
+ Assert.assertTrue(queue.isEmpty());
+ }
+
+ @Test
+ public void offer() {
+ BoundedQueue queue = new BoundedQueue<>(2);
+ queue.offer(1);
+ queue.offer(2);
+
+ Assert.assertEquals(2, queue.size());
+ Iterator iter = queue.iterator();
+ Assert.assertEquals(1, (int)iter.next());
+ Assert.assertEquals(2, (int)iter.next());
+ Assert.assertFalse(iter.hasNext());
+
+ queue.offer(3);
+ queue.offer(4);
+
+ Assert.assertEquals(3, (int)queue.poll());
+ Assert.assertEquals(4, (int)queue.poll());
+ Assert.assertTrue(queue.isEmpty());
+ }
+}
diff --git itests/capybara/src/test/java/org/apache/hive/test/capybara/infra/TestDerbyStore.java itests/capybara/src/test/java/org/apache/hive/test/capybara/infra/TestDerbyStore.java
new file mode 100644
index 0000000..4d9c3cc
--- /dev/null
+++ itests/capybara/src/test/java/org/apache/hive/test/capybara/infra/TestDerbyStore.java
@@ -0,0 +1,197 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hive.test.capybara.infra;
+
+import org.apache.hive.test.capybara.data.DataSet;
+import org.apache.hive.test.capybara.data.FetchResult;
+import org.apache.hive.test.capybara.data.ResultCode;
+import org.apache.hive.test.capybara.data.Row;
+import org.apache.hive.test.capybara.iface.TestTable;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.apache.hadoop.hive.metastore.api.FieldSchema;
+import org.apache.hive.test.capybara.iface.DataGenerator;
+import org.junit.Assert;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+import java.util.Arrays;
+import java.util.Iterator;
+import java.util.List;
+
+public class TestDerbyStore {
+ static final private Logger LOG = LoggerFactory.getLogger(TestDerbyStore.class.getName());
+ static private DerbyStore derby;
+
+ @BeforeClass
+ public static void setup() {
+ derby = new DerbyStore();
+ }
+
+ @Test
+ public void allTypes() throws Exception {
+ TestTable table = TestTable.getBuilder("derbyAllTypes")
+ .addCol("c1", "bigint")
+ .addCol("c2", "int")
+ .addCol("c3", "smallint")
+ .addCol("c4", "tinyint")
+ .addCol("c5", "float")
+ .addCol("c6", "double")
+ .addCol("c7", "decimal(19,2)")
+ .addCol("c8", "date")
+ .addCol("c9", "timestamp")
+ .addCol("c10", "varchar(32)")
+ .addCol("c11", "char(32)")
+ .addCol("c12", "string")
+ .addCol("c13", "boolean")
+ // Binary doesn't work on derby at the moment
+ .build();
+ derby.dropTable(table);
+ derby.createTable(table);
+
+ DataGenerator gen = new RandomDataGenerator(1);
+ derby.loadData(table, gen.generateData(table));
+ }
+
+ @Test
+ public void derby() throws Exception {
+ TestTable table = TestTable.getBuilder("foo")
+ .addCol("c1", "int")
+ .addCol("c2", "varchar(25)")
+ .build();
+
+ derby.dropTable(table);
+ derby.createTable(table);
+
+ List rows = Arrays.asList("1,fred", "2,bob");
+ StaticDataGenerator gen = new StaticDataGenerator(rows, ",");
+ DataSet data = gen.generateData(table);
+
+ derby.loadData(table, data);
+
+ // Re-create and if it returns true re-load the table to make sure that piece works correctly.
+ if (derby.createTable(table)) {
+ Assert.fail();
+ }
+
+ FetchResult fetch = derby.fetchData("select c1 from foo");
+ Assert.assertEquals(ResultCode.SUCCESS, fetch.rc);
+
+ Iterator iter = fetch.data.iterator();
+ Assert.assertTrue(iter.hasNext());
+ Assert.assertEquals(1, iter.next().get(0).asInt());
+ Assert.assertTrue(iter.hasNext());
+ Assert.assertEquals(2, iter.next().get(0).asInt());
+ Assert.assertFalse(iter.hasNext());
+ }
+
+ @Test
+ public void sameNameDifferentDbs() throws Exception {
+ TestTable otherTable = null;
+ boolean createdSchema = false;
+ try {
+ List cols = Arrays.asList(
+ new FieldSchema("c1", "int", ""),
+ new FieldSchema("c2", "varchar(25)", "")
+ );
+ TestTable defaultTable = TestTable.getBuilder("tind").setCols(cols).build();
+ derby.dropTable(defaultTable);
+ derby.createTable(defaultTable);
+
+ List rows = Arrays.asList("1,fred", "2,bob");
+ StaticDataGenerator gen = new StaticDataGenerator(rows, ",");
+ DataSet data = gen.generateData(defaultTable);
+
+ derby.loadData(defaultTable, data);
+
+ derby.fetchData("create schema testschema");
+ createdSchema = true;
+ otherTable = TestTable.getBuilder("tind").setDbName("testschema").setCols(cols).build();
+ rows = Arrays.asList("3,mary", "4,elizabeth");
+ gen = new StaticDataGenerator(rows, ",");
+ data = gen.generateData(otherTable);
+ derby.dropTable(otherTable);
+ derby.createTable(otherTable);
+ derby.loadData(otherTable, data);
+
+ FetchResult fetch = derby.fetchData("select c1 from tind");
+ Assert.assertEquals(ResultCode.SUCCESS, fetch.rc);
+
+ Iterator iter = fetch.data.iterator();
+ Assert.assertTrue(iter.hasNext());
+ Assert.assertEquals(1, iter.next().get(0).asInt());
+ Assert.assertTrue(iter.hasNext());
+ Assert.assertEquals(2, iter.next().get(0).asInt());
+ Assert.assertFalse(iter.hasNext());
+
+ fetch = derby.fetchData("select c1 from testschema.tind");
+ Assert.assertEquals(ResultCode.SUCCESS, fetch.rc);
+
+ iter = fetch.data.iterator();
+ Assert.assertTrue(iter.hasNext());
+ Assert.assertEquals(3, iter.next().get(0).asInt());
+ Assert.assertTrue(iter.hasNext());
+ Assert.assertEquals(4, iter.next().get(0).asInt());
+ Assert.assertFalse(iter.hasNext());
+ } finally {
+ if (otherTable != null) derby.dropTable(otherTable);
+ if (createdSchema) derby.fetchData("drop schema testschema restrict");
+ }
+ }
+
+ @Test
+ public void createTable() throws Exception {
+ List cols = Arrays.asList(
+ new FieldSchema("c1", "int", ""),
+ new FieldSchema("c2", "varchar(25)", "")
+ );
+ derby.createTable(TestTable.getBuilder("foodle").setCols(cols).build());
+
+ // Do it twice so we can see that we handle it properly if it already exists.
+ derby.createTable(TestTable.getBuilder("foodle").setCols(cols).build());
+
+ }
+
+ @Test
+ public void createPartitionedTable() throws Exception {
+ TestTable table = TestTable.getBuilder("part_voo")
+ .addCol("c1", "int")
+ .addCol("c2", "varchar(25)")
+ .addPartCol("p", "int")
+ .build();
+ derby.createTable(table);
+
+ List rows = Arrays.asList("1,fred,3", "2,bob,3");
+ StaticDataGenerator gen = new StaticDataGenerator(rows, ",");
+ DataSet data = gen.generateData(table);
+
+ derby.loadData(table, data);
+ }
+
+ @Test
+ public void failureOk() throws Exception {
+ FetchResult fetch = derby.fetchData("drop table if exists fred");
+ Assert.assertEquals(ResultCode.SUCCESS, fetch.rc);
+
+ fetch = derby.fetchData("create table fred (a int)");
+ Assert.assertEquals(ResultCode.SUCCESS, fetch.rc);
+
+ fetch = derby.fetchData("create table if not exists fred (a int)");
+ Assert.assertEquals(ResultCode.SUCCESS, fetch.rc);
+ }
+}
diff --git itests/capybara/src/test/java/org/apache/hive/test/capybara/infra/TestDerbyTranslator.java itests/capybara/src/test/java/org/apache/hive/test/capybara/infra/TestDerbyTranslator.java
new file mode 100644
index 0000000..12d34c2
--- /dev/null
+++ itests/capybara/src/test/java/org/apache/hive/test/capybara/infra/TestDerbyTranslator.java
@@ -0,0 +1,204 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hive.test.capybara.infra;
+
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Ignore;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.ExpectedException;
+
+public class TestDerbyTranslator {
+ private DerbyStore store;
+ private SQLTranslator translator;
+
+ @Rule
+ public ExpectedException thrown = ExpectedException.none();
+
+ @Before
+ public void getTranslator() {
+ store = new DerbyStore();
+ translator = store.getTranslator();
+ }
+
+ @Test
+ public void createDatabase() throws Exception {
+ Assert.assertEquals("create schema add_part_test_db",
+ translator.translate("CREATE DATABASE add_part_test_db"));
+ Assert.assertFalse(translator.isFailureOk());
+ Assert.assertEquals("create schema newdb",
+ translator.translate("create database newDB location \"/tmp/\""));
+ Assert.assertEquals("create schema dummydb",
+ translator.translate("create database if not exists dummydb"));
+ Assert.assertEquals("create schema test_db",
+ translator.translate("CREATE DATABASE IF NOT EXISTS test_db COMMENT 'Hive test database'"));
+ Assert.assertTrue(translator.isFailureOk());
+ Assert.assertEquals("create schema test_db",
+ translator.translate("CREATE DATABASE test_db COMMENT 'Hive test database'"));
+ Assert.assertEquals("create schema db2",
+ translator.translate("create database db2 with dbproperties (\n" +
+ "'mapred.jobtracker.url'='http://my.jobtracker.com:53000',\n" +
+ "'hive.warehouse.dir' = '/user/hive/warehouse',\n" +
+ "'mapred.scratch.dir' = 'hdfs://tmp.dfs.com:50029/tmp')"));
+ Assert.assertEquals("create schema test_db",
+ translator.translate("create database test_db with dbproperties ('key1' = 'value1', 'key2' = 'value2')"));
+ Assert.assertEquals("create schema jsondb1",
+ translator.translate("CREATE DATABASE IF NOT EXISTS jsondb1 COMMENT 'Test database' " +
+ "LOCATION '${hiveconf:hive.metastore.warehouse.dir}/jsondb1' WITH DBPROPERTIES ('id' = 'jsondb1')"));
+ Assert.assertEquals("create schema some_database",
+ translator.translate("CREATE DATABASE some_database comment 'for show create db test' WITH DBPROPERTIES ('somekey'='somevalue')"));
+ Assert.assertEquals("create schema \"name with a space\"",
+ translator.translate("CREATE DATABASE `name with a space` location somewhere"));
+
+ Assert.assertEquals("create schema add_part_test_db",
+ translator.translate("CREATE SCHEMA add_part_test_db"));
+ Assert.assertEquals("create schema newdb",
+ translator.translate("create schema newDB location \"/tmp/\""));
+ Assert.assertEquals("create schema dummydb",
+ translator.translate("create schema if not exists dummydb"));
+ Assert.assertEquals("create schema test_db",
+ translator.translate("CREATE SCHEMA IF NOT EXISTS test_db COMMENT 'Hive test database'"));
+ Assert.assertEquals("create schema test_db",
+ translator.translate("CREATE SCHEMA test_db COMMENT 'Hive test database'"));
+ Assert.assertEquals("create schema db2",
+ translator.translate("create schema db2 with dbproperties (\n" +
+ "'mapred.jobtracker.url'='http://my.jobtracker.com:53000'\n" +
+ "'hive.warehouse.dir' = '/user/hive/warehouse'\n" +
+ "'mapred.scratch.dir' = 'hdfs://tmp.dfs.com:50029/tmp')"));
+ Assert.assertEquals("create schema test_db",
+ translator.translate("create schema test_db with dbproperties ('key1' = 'value1', 'key2' " +
+ "= 'value2')"));
+ Assert.assertEquals("create schema jsondb1",
+ translator.translate("CREATE SCHEMA IF NOT EXISTS jsondb1 COMMENT 'Test database' " +
+ "LOCATION '${hiveconf:hive.metastore.warehouse.dir}/jsondb1' WITH DBPROPERTIES ('id' = 'jsondb1')"));
+ Assert.assertEquals("create schema some_database",
+ translator.translate("CREATE SCHEMA some_database comment 'for show create db test' WITH " +
+ "DBPROPERTIES ('somekey'='somevalue')"));
+ Assert.assertEquals("create schema \"name with a space\"",
+ translator.translate("CREATE SCHEMA `name with a space`"));
+ }
+
+ @Test
+ public void dropDatabase() throws Exception {
+ Assert.assertEquals("drop schema add_part_test_db restrict",
+ translator.translate("DROP DATABASE add_part_test_db"));
+ Assert.assertEquals("drop schema statsdb1 restrict",
+ translator.translate("drop database if exists statsdb1"));
+ Assert.assertEquals("drop schema to_drop_db1 restrict",
+ translator.translate("DROP DATABASE to_drop_db1 CASCADE"));
+ Assert.assertEquals("drop schema non_exists_db3 restrict",
+ translator.translate("DROP DATABASE IF EXISTS non_exists_db3 RESTRICT"));
+ Assert.assertEquals("drop schema to_drop_db4 restrict",
+ translator.translate("DROP DATABASE to_drop_db4 RESTRICT"));
+
+ Assert.assertEquals("drop schema add_part_test_db restrict",
+ translator.translate("DROP SCHEMA add_part_test_db"));
+ Assert.assertEquals("drop schema statsdb1 restrict",
+ translator.translate("drop schema if exists statsdb1"));
+ Assert.assertEquals("drop schema to_drop_db1 restrict",
+ translator.translate("DROP SCHEMA to_drop_db1 CASCADE"));
+ Assert.assertEquals("drop schema non_exists_db3 restrict",
+ translator.translate("DROP SCHEMA IF EXISTS non_exists_db3 RESTRICT"));
+ Assert.assertEquals("drop schema to_drop_db4 restrict",
+ translator.translate("DROP SCHEMA to_drop_db4 RESTRICT"));
+ }
+
+ @Test
+ public void createTableLike() throws Exception {
+ Assert.assertEquals("create table alter3_like as select * from alter3",
+ translator.translate("create table alter3_like like alter3"));
+ Assert.assertEquals("create table emp_orc as select * from emp_staging",
+ translator.translate("create table if not exists emp_orc like emp_staging"));
+ Assert.assertTrue(translator.isFailureOk());
+ Assert.assertEquals("create table source.srcpart as select * from default.srcpart",
+ translator.translate("create table source.srcpart like default.srcpart;"));
+ }
+
+ @Test
+ public void createTableWithCols() throws Exception {
+ Assert.assertEquals("create table acidjoin1 (name varchar(50), age int)",
+ translator.translate("create table acidjoin1(name varchar(50), age int) clustered by " +
+ "(age) into 2 buckets stored as orc TBLPROPERTIES (\"transactional\"=\"true\")"));
+ Assert.assertEquals("create table alter1 (a int, b int)",
+ translator.translate("create table alter1(a int, b int)"));
+ Assert.assertEquals("create table alter2 (a int, b int)",
+ translator.translate("create table alter2(a int, b int) partitioned by (insertdate string)"));
+ Assert.assertEquals("create table alter3_src ( col1 varchar(255) )",
+ translator.translate("create table alter3_src ( col1 string ) stored as textfile "));
+ Assert.assertEquals("create table alter3 ( col1 varchar(255) )",
+ translator.translate("create table alter3 ( col1 string ) partitioned by (pcol1 string , " +
+ "pcol2 string) stored as sequencefile"));
+ Assert.assertEquals("create table ac.alter_char_1 (key varchar(255), value varchar(255))",
+ translator.translate("create table ac.alter_char_1 (key string, value string)"));
+ Assert.assertEquals("create table tst1 (key varchar(255), value varchar(255))",
+ translator.translate("create table tst1(key string, value string) partitioned by (ds " +
+ "string) clustered by (key) into 10 buckets"));
+ Assert.assertEquals("create table over1k ( t smallint, si smallint, i int, b bigint, f float, " +
+ "d double, bo boolean, s varchar(255), ts timestamp, dec decimal(4,2), bin " +
+ "blob)",
+ translator.translate("create table over1k( t tinyint, si smallint, i int, b bigint, f " +
+ "float, d double, bo boolean, s string, ts timestamp, dec decimal(4,2), bin binary) " +
+ "ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' STORED AS TEXTFILE"));
+ Assert.assertEquals("create table loc_staging (state varchar(255),locid int,zip bigint,year " +
+ "int )",
+ translator.translate("create table if not exists loc_staging (state string,locid int,zip " +
+ "bigint,year int ) row format delimited fields terminated by '|' stored as textfile"));
+ Assert.assertEquals("declare global temporary table acid_dtt (a int, b varchar(128))",
+ translator.translate("create temporary table acid_dtt(a int, b varchar(128)) clustered by" +
+ " (a) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='true')"));
+ Assert.assertEquals("create table roottable (key varchar(255))",
+ translator.translate("create external table roottable (key string) row format delimited " +
+ "fields terminated by '\\t' stored as textfile"));
+ }
+
+ @Test
+ public void dropTable() throws Exception {
+ Assert.assertEquals("drop table t", translator.translate("drop table t"));
+ Assert.assertEquals("drop table t", translator.translate("drop table if exists t"));
+ Assert.assertTrue(translator.isFailureOk());
+ Assert.assertEquals("drop table db.t", translator.translate("drop table db.t"));
+ Assert.assertEquals("drop table t", translator.translate("drop table t purge"));
+ }
+
+ @Test
+ public void alterTable() throws Exception {
+ thrown.expect(TranslationException.class);
+ thrown.expectMessage("Could not translate alter table rename, Hive SQL:");
+ translator.translate("alter table tab1 rename to tab2");
+ }
+
+ @Test
+ public void selectLimit() throws Exception {
+ Assert.assertEquals("select key from src_autho_test order by key",
+ translator.translate("select key from src_autho_test order by key limit 20"));
+ Assert.assertEquals(20, store.getLimit());
+ }
+
+ @Ignore
+ public void constantCast() throws Exception {
+ Assert.assertEquals("select dateval - '1999-06-07' from interval_arithmetic_1",
+ translator.translate("select dateval - date '1999-06-07' from interval_arithmetic_1"));
+ Assert.assertEquals("select dateval - '1999-06-07' from interval_arithmetic_1",
+ translator.translate("select dateval - date '1999-6-7' from interval_arithmetic_1"));
+ Assert.assertEquals("select '1999-01-01 01:00:00' from interval_arithmetic_1",
+ translator.translate("select timestamp '1999-01-01 01:00:00' from interval_arithmetic_1"));
+ Assert.assertEquals("select '1999-01-01 01:00:00' from interval_arithmetic_1",
+ translator.translate("select timestamp '1999-1-1 01:00:00' from interval_arithmetic_1"));
+ }
+}
diff --git itests/capybara/src/test/java/org/apache/hive/test/capybara/infra/TestHiveStore.java itests/capybara/src/test/java/org/apache/hive/test/capybara/infra/TestHiveStore.java
new file mode 100644
index 0000000..67f0e5f
--- /dev/null
+++ itests/capybara/src/test/java/org/apache/hive/test/capybara/infra/TestHiveStore.java
@@ -0,0 +1,232 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hive.test.capybara.infra;
+
+import org.apache.commons.lang.StringUtils;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.metastore.api.FieldSchema;
+import org.apache.hive.test.capybara.data.DataSet;
+import org.apache.hive.test.capybara.data.FetchResult;
+import org.apache.hive.test.capybara.data.ResultCode;
+import org.apache.hive.test.capybara.data.Row;
+import org.apache.hive.test.capybara.iface.ClusterManager;
+import org.apache.hive.test.capybara.iface.TestTable;
+import org.junit.AfterClass;
+import org.junit.Assert;
+import org.junit.BeforeClass;
+import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.Iterator;
+import java.util.List;
+
+public class TestHiveStore {
+
+ static final private Logger LOG = LoggerFactory.getLogger(TestHiveStore.class.getName());
+
+ private static HiveConf conf;
+ private static ClusterManager mgr;
+ private static HiveStore hive;
+
+ @BeforeClass
+ public static void setup() throws IOException {
+ conf = new HiveConf();
+ conf.setVar(HiveConf.ConfVars.DYNAMICPARTITIONINGMODE, "nonstrict");
+ conf.setVar(HiveConf.ConfVars.HIVEMAPREDMODE, "nonstrict");
+ TestConf.setEngine(TestConf.ENGINE_UNSPECIFIED);
+ mgr = new MiniClusterManager();
+ mgr.setConf(conf);
+ mgr.setup();
+ hive = mgr.getHive();
+ TestManager.getTestManager().setClusterManager(mgr);
+ TestManager.getTestManager().setConf(conf);
+
+ }
+
+ @AfterClass
+ public static void tearDown() {
+ mgr.tearDown();
+ }
+
+ @Test
+ public void hive() throws Exception {
+ // Load some data, then read it back.
+ FetchResult fetch = hive.fetchData("create table foo (c1 int, c2 varchar(32))");
+ Assert.assertEquals(ResultCode.SUCCESS, fetch.rc);
+
+ TestTable table = TestTable.fromHiveMetastore("default", "foo");
+
+ List rows = Arrays.asList("1,fred", "2,bob");
+ StaticDataGenerator gen = new StaticDataGenerator(rows, ",");
+ DataSet data = gen.generateData(table);
+
+ hive.loadData(table, data);
+
+ fetch = hive.fetchData("select c1 from foo");
+ Assert.assertEquals(ResultCode.SUCCESS, fetch.rc);
+
+ fetch.data.setSchema(Arrays.asList(new FieldSchema("c1", "int", "")));
+ Iterator iter = fetch.data.iterator();
+ Assert.assertTrue(iter.hasNext());
+ Row row = iter.next();
+ Assert.assertEquals(1, row.get(0).asInt());
+ Assert.assertTrue(iter.hasNext());
+
+ row = iter.next();
+ Assert.assertEquals(2, row.get(0).asInt());
+ Assert.assertFalse(iter.hasNext());
+ }
+
+ @Test
+ public void sameNameDifferentDbs() throws Exception {
+ boolean createdSchema = false;
+ try {
+ List cols = Arrays.asList(
+ new FieldSchema("c1", "int", ""),
+ new FieldSchema("c2", "varchar(25)", "")
+ );
+ TestTable defaultTable = TestTable.getBuilder("tind").setCols(cols).build();
+ hive.dropTable(defaultTable);
+ Assert.assertTrue(hive.createTable(defaultTable));
+
+ List rows = Arrays.asList("1,fred", "2,bob");
+ StaticDataGenerator gen = new StaticDataGenerator(rows, ",");
+ DataSet data = gen.generateData(defaultTable);
+
+ hive.loadData(defaultTable, data);
+
+ hive.fetchData("create database testschema");
+ createdSchema = true;
+ TestTable otherTable = TestTable.getBuilder("tind").setDbName("testschema").setCols(cols).build();
+ rows = Arrays.asList("3,mary", "4,elizabeth");
+ gen = new StaticDataGenerator(rows, ",");
+ data = gen.generateData(otherTable);
+ hive.dropTable(otherTable);
+ Assert.assertTrue(hive.createTable(otherTable));
+ hive.loadData(otherTable, data);
+
+ FetchResult fetch = hive.fetchData("select c1 from tind");
+ Assert.assertEquals(ResultCode.SUCCESS, fetch.rc);
+
+ fetch.data.setSchema(Arrays.asList(new FieldSchema("c1", "int", "")));
+ Iterator iter = fetch.data.iterator();
+ Assert.assertTrue(iter.hasNext());
+ Row row = iter.next();
+ Assert.assertEquals(1, row.get(0).asInt());
+ Assert.assertTrue(iter.hasNext());
+
+ row = iter.next();
+ Assert.assertEquals(2, row.get(0).asInt());
+ Assert.assertFalse(iter.hasNext());
+
+ fetch = hive.fetchData("select c1 from testschema.tind");
+ Assert.assertEquals(ResultCode.SUCCESS, fetch.rc);
+
+
+ fetch.data.setSchema(Arrays.asList(new FieldSchema("c1", "int", "")));
+ iter = fetch.data.iterator();
+ Assert.assertTrue(iter.hasNext());
+ row = iter.next();
+ Assert.assertEquals(3, row.get(0).asInt());
+ Assert.assertTrue(iter.hasNext());
+
+ row = iter.next();
+ Assert.assertEquals(4, row.get(0).asInt());
+ Assert.assertFalse(iter.hasNext());
+ } finally {
+ if (createdSchema) hive.fetchData("drop database testschema cascade");
+ }
+ }
+
+ @Test
+ public void hiveWithCreateTable() throws Exception {
+ // Load some data, then read it back.
+
+ TestTable table = TestTable.getBuilder("foozle")
+ .addCol("c1", "int")
+ .addCol("c2", "varchar(25)")
+ .build();
+ hive.dropTable(table);
+ hive.createTable(table);
+
+ // Re-create and if it returns true re-load the table to make sure that piece works correctly.
+ if (hive.createTable(table)) {
+ Assert.fail();
+ }
+
+ List rows = Arrays.asList("1,fred", "2,bob");
+ StaticDataGenerator gen = new StaticDataGenerator(rows, ",");
+ DataSet data = gen.generateData(table);
+
+ hive.loadData(table, data);
+
+ FetchResult fetch = hive.fetchData("select c1 from foozle");
+ Assert.assertEquals(ResultCode.SUCCESS, fetch.rc);
+
+ fetch.data.setSchema(Arrays.asList(new FieldSchema("c1", "int", "")));
+ Iterator iter = fetch.data.iterator();
+ Assert.assertTrue(iter.hasNext());
+
+ Row row = iter.next();
+ Assert.assertEquals(1, row.get(0).asInt());
+ Assert.assertTrue(iter.hasNext());
+ row = iter.next();
+ Assert.assertEquals(2, row.get(0).asInt());
+ Assert.assertFalse(iter.hasNext());
+ }
+
+ @Test
+ public void createTable() throws Exception {
+ hive.setConf(conf);
+ List cols = Arrays.asList(
+ new FieldSchema("c1", "int", ""),
+ new FieldSchema("c2", "varchar(25)", "")
+ );
+ hive.createTable(TestTable.getBuilder("foodle").setCols(cols).build());
+
+ // Make sure we drop and re-create the table as necessary
+ hive.createTable(TestTable.getBuilder("foodle").setCols(cols).build());
+ }
+
+ @Test
+ public void createPartitionedTable() throws Exception {
+ hive.setConf(conf);
+ TestTable table = TestTable.getBuilder("foo_part")
+ .addCol("c1", "int")
+ .addCol("c2", "varchar(25)")
+ .addPartCol("pcol", "string")
+ .build();
+ hive.dropTable(table);
+ hive.createTable(table);
+
+ List rows = Arrays.asList("1,fred,3", "2,bob,3");
+ StaticDataGenerator gen = new StaticDataGenerator(rows, ",");
+ DataSet data = gen.generateData(table);
+
+ hive.loadData(table, data);
+
+ FetchResult fetch = hive.fetchData("select count(*) from foo_part");
+ Assert.assertEquals(ResultCode.SUCCESS, fetch.rc);
+ fetch.data.setSchema(Arrays.asList(new FieldSchema("c0", "bigint", "")));
+ Iterator output = fetch.data.stringIterator(",", "", "\"");
+ LOG.debug("Query result: " + StringUtils.join(output, "\n"));
+ }
+}
diff --git itests/capybara/src/test/java/org/apache/hive/test/capybara/infra/TestIntegrationRunner.java itests/capybara/src/test/java/org/apache/hive/test/capybara/infra/TestIntegrationRunner.java
new file mode 100644
index 0000000..57a43e5
--- /dev/null
+++ itests/capybara/src/test/java/org/apache/hive/test/capybara/infra/TestIntegrationRunner.java
@@ -0,0 +1,77 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hive.test.capybara.infra;
+
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hive.test.capybara.IntegrationTest;
+import org.apache.hive.test.capybara.annotations.AcidOn;
+import org.apache.hive.test.capybara.annotations.NoCli;
+import org.apache.hive.test.capybara.annotations.NoJdbc;
+import org.apache.hive.test.capybara.annotations.NoOrc;
+import org.apache.hive.test.capybara.annotations.NoParquet;
+import org.apache.hive.test.capybara.annotations.NoRcFile;
+import org.apache.hive.test.capybara.annotations.NoSpark;
+import org.apache.hive.test.capybara.annotations.NoTextFile;
+import org.apache.hive.test.capybara.annotations.NoTez;
+import org.junit.Assert;
+import org.junit.Test;
+
+import java.io.IOException;
+import java.sql.SQLException;
+
+@NoSpark // Make sure no tests run when spark is set
+public class TestIntegrationRunner extends IntegrationTest {
+
+ @NoCli @Test public void testNoCli() {
+ Assert.assertNotEquals("cli", TestConf.access());
+ }
+
+ @NoJdbc @Test public void testNoJdbc() {
+ Assert.assertNotEquals("jdbc", TestConf.access());
+ }
+
+ @NoOrc @Test public void testNoOrc() {
+ Assert.assertNotEquals("orc", TestConf.fileFormat());
+ }
+
+ @NoParquet @Test public void testNoParquet() {
+ Assert.assertNotEquals("parquet", TestConf.fileFormat());
+ }
+
+ @NoRcFile @Test public void testNoRcFile() {
+ Assert.assertNotEquals("rcfile", TestConf.fileFormat());
+ }
+
+ @NoTextFile @Test public void testTextFile() {
+ Assert.assertNotEquals("text", TestConf.fileFormat());
+ }
+
+ @NoTez @Test public void testTez() {
+ Assert.assertNotEquals("tez", TestConf.engine());
+ }
+
+ @AcidOn @Test public void testAcid() throws IOException, SQLException {
+ Assert.assertTrue(getCurrentConf().getBoolVar(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY));
+ }
+
+ @Test public void testNoSpark() {
+ Assert.assertNotEquals("spark", TestConf.engine());
+ Assert.assertFalse(getCurrentConf().getBoolVar(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY));
+ }
+
+}
diff --git itests/capybara/src/test/java/org/apache/hive/test/capybara/infra/TestMiniHS2HiveStore.java itests/capybara/src/test/java/org/apache/hive/test/capybara/infra/TestMiniHS2HiveStore.java
new file mode 100644
index 0000000..a6f256e
--- /dev/null
+++ itests/capybara/src/test/java/org/apache/hive/test/capybara/infra/TestMiniHS2HiveStore.java
@@ -0,0 +1,95 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hive.test.capybara.infra;
+
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.metastore.api.FieldSchema;
+import org.apache.hive.test.capybara.data.DataSet;
+import org.apache.hive.test.capybara.data.FetchResult;
+import org.apache.hive.test.capybara.data.ResultCode;
+import org.apache.hive.test.capybara.data.Row;
+import org.apache.hive.test.capybara.iface.ClusterManager;
+import org.apache.hive.test.capybara.iface.TestTable;
+import org.junit.AfterClass;
+import org.junit.Assert;
+import org.junit.BeforeClass;
+import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.Iterator;
+import java.util.List;
+
+public class TestMiniHS2HiveStore {
+ static final private Logger LOG = LoggerFactory.getLogger(TestHiveStore.class.getName());
+
+ private static HiveConf conf;
+ private static ClusterManager mgr;
+ private static HiveStore hive;
+
+ @BeforeClass
+ public static void setup() throws IOException {
+ conf = new HiveConf();
+ conf.setVar(HiveConf.ConfVars.DYNAMICPARTITIONINGMODE, "nonstrict");
+ //conf.setBoolVar(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY, false);
+
+ TestManager.getTestManager().setConf(conf);
+ TestConf.setEngine(TestConf.ENGINE_UNSPECIFIED);
+ TestConf.setAccess(TestConf.ACCESS_JDBC);
+ mgr = TestManager.getTestManager().getClusterManager();
+ mgr.setup();
+ mgr.setConfVar(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY.varname, "false");
+ hive = mgr.getHive();
+ }
+
+ @AfterClass
+ public static void tearDown() {
+ mgr.tearDown();
+ }
+
+ @Test
+ public void hive() throws Exception {
+ // Load some data, then read it back.
+ FetchResult fetch = hive.fetchData("create table foo (c1 int, c2 varchar(32))");
+ Assert.assertEquals(ResultCode.SUCCESS, fetch.rc);
+
+ TestTable table = TestTable.fromHiveMetastore("default", "foo");
+
+ List rows = Arrays.asList("1,fred", "2,bob");
+ StaticDataGenerator gen = new StaticDataGenerator(rows, ",");
+ DataSet data = gen.generateData(table);
+
+ hive.loadData(table, data);
+
+ fetch = hive.fetchData("select c1 from foo");
+ Assert.assertEquals(ResultCode.SUCCESS, fetch.rc);
+
+ fetch.data.setSchema(Arrays.asList(new FieldSchema("c1", "int", "")));
+ Iterator iter = fetch.data.iterator();
+ Assert.assertTrue(iter.hasNext());
+ Row row = iter.next();
+ Assert.assertEquals(1, row.get(0).asInt());
+ Assert.assertTrue(iter.hasNext());
+
+ row = iter.next();
+ Assert.assertEquals(2, row.get(0).asInt());
+ Assert.assertFalse(iter.hasNext());
+ }
+}
diff --git itests/capybara/src/test/java/org/apache/hive/test/capybara/infra/TestNonSortingComparator.java itests/capybara/src/test/java/org/apache/hive/test/capybara/infra/TestNonSortingComparator.java
new file mode 100644
index 0000000..813ed65
--- /dev/null
+++ itests/capybara/src/test/java/org/apache/hive/test/capybara/infra/TestNonSortingComparator.java
@@ -0,0 +1,261 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hive.test.capybara.infra;
+
+import org.apache.hive.test.capybara.data.DataSet;
+import org.apache.hive.test.capybara.iface.ResultComparator;
+import org.apache.hive.test.capybara.iface.TestTable;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.apache.hadoop.hive.metastore.api.FieldSchema;
+import org.apache.hive.test.capybara.iface.DataGenerator;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+import java.io.IOException;
+import java.sql.SQLException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+public class TestNonSortingComparator {
+
+ static final private Logger LOG = LoggerFactory.getLogger(TestNonSortingComparator.class.getName());
+
+ private TestTable table;
+ private List rows;
+ private List cols;
+
+ @Before
+ public void createTable() {
+ final String tableName = "alltypes";
+
+
+ cols = Arrays.asList(
+ new FieldSchema("col_bi", "bigint", ""),
+ new FieldSchema("col_i", "int", ""),
+ new FieldSchema("col_si", "smallint", ""),
+ new FieldSchema("col_ti", "tinyint", ""),
+ new FieldSchema("col_bin", "binary", ""),
+ new FieldSchema("col_bool", "boolean", ""),
+ new FieldSchema("col_ch", "char(8)", ""),
+ new FieldSchema("col_vc", "varchar(89)", ""),
+ new FieldSchema("col_str", "string", ""),
+ new FieldSchema("col_date", "date", ""),
+ new FieldSchema("col_dec", "decimal(10,2)", ""),
+ new FieldSchema("col_fl", "float", ""),
+ new FieldSchema("col_dbl", "double", ""),
+ new FieldSchema("col_tm", "timestamp", "")
+ );
+
+ table = TestTable.getBuilder(tableName).setCols(cols).build();
+
+ rows = new ArrayList<>();
+ rows.add("-6022141300000000000,-299792458,-1432,-7,def,false,joe,mary had a little " +
+ "lamb,her fleece was white as snow,2015-08-04,371.89,-1.234,-6.0221413E-23,NULL");
+ rows.add("6,2,1432,7,abc,true,NULL,mary had a little lamb,her" +
+ " fleece was white as snow,2015-08-04,371.89,1.234,6.0221413E23,2015-08-04 17:16:32");
+ }
+
+ @Test
+ public void allGood() throws SQLException, IOException {
+ DataGenerator gen1 = new StaticDataGenerator(rows, ",");
+ DataSet hive = gen1.generateData(table);
+ DataGenerator gen2 = new StaticDataGenerator(rows, ",");
+ DataSet bench = gen2.generateData(table);
+ ResultComparator comparator = new NonSortingComparator();
+ comparator.compare(hive, bench);
+
+ }
+
+ @Test
+ public void hiveMoreRows() throws SQLException, IOException {
+ DataGenerator gen1 = new StaticDataGenerator(rows, ",");
+ DataSet hive = gen1.generateData(table);
+ DataGenerator gen2 = new StaticDataGenerator(rows.subList(0, 1), ",");
+ DataSet bench = gen2.generateData(table);
+ ResultComparator comparator = new NonSortingComparator();
+ try {
+ comparator.compare(hive, bench);
+ Assert.fail();
+ } catch (AssertionError e) {
+ Assert.assertEquals("Benchmark ran out of rows at 1 but hive still has rows", e.getMessage());
+ }
+ }
+
+ @Test
+ public void benchMoreRows() throws SQLException, IOException {
+ DataGenerator gen1 = new StaticDataGenerator(rows.subList(0, 1), ",");
+ DataSet hive = gen1.generateData(table);
+ DataGenerator gen2 = new StaticDataGenerator(rows, ",");
+ DataSet bench = gen2.generateData(table);
+ ResultComparator comparator = new NonSortingComparator();
+ try {
+ comparator.compare(hive, bench);
+ Assert.fail();
+ } catch (AssertionError e) {
+ Assert.assertEquals("Hive ran out of rows at 1 but benchmark still has rows", e.getMessage());
+ }
+ }
+
+ @Test
+ public void diffNumberColumns() throws Exception {
+
+ TestTable diffTable = TestTable.getBuilder("difftable").addCol("col_bi", "bigint").build();
+
+ List diffRows = new ArrayList<>();
+ diffRows.add("6");
+ diffRows.add("7");
+ DataGenerator gen1 = new StaticDataGenerator(rows, ",");
+ DataSet hive = gen1.generateData(table);
+ DataGenerator gen2 = new StaticDataGenerator(diffRows, ",");
+ DataSet bench = gen2.generateData(diffTable);
+ ResultComparator comparator = new NonSortingComparator();
+ try {
+ comparator.compare(hive, bench);
+ Assert.fail();
+ } catch (AssertionError e) {
+ Assert.assertEquals("Different number of columns expected:<1> but was:<14>", e.getMessage());
+ }
+ }
+
+ @Test
+ public void uncompatibleDataTypes() throws Exception {
+
+ TestTable diffTable = TestTable.getBuilder("difftable")
+ .addCol("col_bi", "bigint")
+ .addCol("col_i", "string")
+ .addCol("col_si", "smallint")
+ .addCol("col_ti", "tinyint")
+ .addCol("col_bin", "binary")
+ .addCol("col_bool", "boolean")
+ .addCol("col_ch", "char(8)")
+ .addCol("col_vc", "varchar(89)")
+ .addCol("col_str", "string")
+ .addCol("col_date", "date")
+ .addCol("col_dec", "decimal(10,2)")
+ .addCol("col_fl", "float")
+ .addCol("col_dbl", "double")
+ .addCol("col_tm", "timestamp")
+ .build();
+
+ List diffRows = new ArrayList<>();
+ diffRows.add("6,2,1432,7,abc,true,NULL,mary had a little lamb,her" +
+ " fleece was white as snow,2015-08-04,371.89,1.234,6.0221413E23,2015-08-04 17:16:32");
+ diffRows.add("-6022141300000000000,-299792458,-1432,-7,def,false,joe,mary had a little " +
+ "lamb,her fleece was white as snow,2015-08-04,371.89,-1.234,-6.0221413E-23,NULL");
+ DataGenerator gen1 = new StaticDataGenerator(rows, ",");
+ DataSet hive = gen1.generateData(table);
+ DataGenerator gen2 = new StaticDataGenerator(diffRows, ",");
+ DataSet bench = gen2.generateData(diffTable);
+ ResultComparator comparator = new NonSortingComparator();
+ try {
+ comparator.compare(hive, bench);
+ Assert.fail();
+ } catch (AssertionError e) {
+ Assert.assertEquals("Found discrepency in metadata at column 1", e.getMessage());
+ }
+ }
+
+ @Test
+ public void diffNull() throws Exception {
+ List diffRows = new ArrayList<>();
+ diffRows.add("6,2,1432,7,abc,true,NULL,mary had a little lamb,her" +
+ " fleece was white as snow,2015-08-04,371.89,1.234,6.0221413E23,2015-08-04 17:16:32");
+ diffRows.add("NULL,-299792458,-1432,-7,def,false,joe,mary had a little " +
+ "lamb,her fleece was white as snow,2015-08-04,371.89,-1.234,-6.0221413E-23,NULL");
+ DataGenerator gen1 = new StaticDataGenerator(rows, ",");
+ DataSet hive = gen1.generateData(table);
+ DataGenerator gen2 = new StaticDataGenerator(diffRows, ",");
+ DataSet bench = gen2.generateData(table);
+ ResultComparator comparator = new NonSortingComparator();
+ try {
+ comparator.compare(hive, bench);
+ Assert.fail();
+ } catch (AssertionError e) {
+ Assert.assertEquals("Mismatch at row 1 hive row is <-6022141300000000000,-299792458,-1432," +
+ "-7,dec,false,joe,mary had a little lamb,her fleece was white as snow,2015-08-04," +
+ "371.89,-1.234,-6.0221413E-23,NULL> bench row is ", e.getMessage());
+ }
+ }
+
+ @Test
+ public void diff() throws Exception {
+ List diffRows = new ArrayList<>();
+ diffRows.add("6,2,1432,7,abc,true,NULL,mary had a little lamb,her" +
+ " fleece was white as snow,2015-08-04,371.89,1.234,6.0221413E23,2015-08-04 17:16:32");
+ diffRows.add("-6022141300000000000,-299792458,-1432,-7,def,false,joe,mary had a little " +
+ "lamb,her fleece was white as snow,2015-08-04,371.89,1.234,-6.0221413E-23,NULL");
+ DataGenerator gen1 = new StaticDataGenerator(rows, ",");
+ DataSet hive = gen1.generateData(table);
+ DataGenerator gen2 = new StaticDataGenerator(diffRows, ",");
+ DataSet bench = gen2.generateData(table);
+ ResultComparator comparator = new NonSortingComparator();
+ try {
+ comparator.compare(hive, bench);
+ Assert.fail();
+ } catch (AssertionError e) {
+ Assert.assertEquals("Mismatch at row 1 hive row is <-6022141300000000000," +
+ "-299792458,-1432,-7,dec,false,joe,mary had a little lamb,her fleece was white as snow," +
+ "2015-08-04,371" +
+ ".89,-1.234,-6.0221413E-23,NULL> bench row is <-6022141300000000000,-299792458,-1432," +
+ "-7,dec,false,joe,mary had a little lamb,her fleece was white as snow,2015-08-04,371" +
+ ".89,1.234,-6.0221413E-23,NULL>", e.getMessage());
+ }
+ }
+
+ @Test
+ public void cli() throws Exception {
+ DataGenerator gen1 = new StaticDataGenerator(rows, ",");
+ DataSet bench = gen1.generateData(table);
+
+ List cliRows = new ArrayList<>();
+ cliRows.add("-6022141300000000000,-299792458,-1432,-7,def,false,joe,mary had a little " +
+ "lamb,her fleece was white as snow,2015-08-04,371.89,-1.234,-6" +
+ ".0221413E-23,NULL");
+ cliRows.add("6,2,1432,7,abc,true,NULL,mary had a little " +
+ "lamb,her fleece was white as snow,2015-08-04,371.89,1.234,6" +
+ ".0221413E23,2015-08-04 17:16:32");
+
+ DataSet hive = new StringDataSet(cols, cliRows, ",", "NULL");
+ ResultComparator comparator = new NonSortingComparator();
+ comparator.compare(hive, bench);
+ }
+
+ @Test
+ public void setSchema() throws Exception {
+ DataGenerator gen1 = new StaticDataGenerator(rows, ",");
+ DataSet bench = gen1.generateData(table);
+
+ List cliRows = new ArrayList<>();
+ cliRows.add("-6022141300000000000,-299792458,-1432,-7,def,false,joe,mary had a little " +
+ "lamb,her fleece was white as snow,2015-08-04,371.89,-1.234,-6" +
+ ".0221413E-23,NULL");
+ cliRows.add("6,2,1432,7,abc,true,NULL,mary had a little " +
+ "lamb,her fleece was white as snow,2015-08-04,371.89,1.234,6" +
+ ".0221413E23,2015-08-04 17:16:32");
+
+ DataSet hive = new StringDataSet(cliRows, ",", "NULL");
+ hive.setSchema(cols);
+ ResultComparator comparator = new NonSortingComparator();
+ comparator.compare(hive, bench);
+ }
+}
diff --git itests/capybara/src/test/java/org/apache/hive/test/capybara/infra/TestPostgresTranslator.java itests/capybara/src/test/java/org/apache/hive/test/capybara/infra/TestPostgresTranslator.java
new file mode 100644
index 0000000..c00c711
--- /dev/null
+++ itests/capybara/src/test/java/org/apache/hive/test/capybara/infra/TestPostgresTranslator.java
@@ -0,0 +1,475 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hive.test.capybara.infra;
+
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Ignore;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.ExpectedException;
+
+public class TestPostgresTranslator {
+ private SQLTranslator translator;
+
+ @Rule
+ public ExpectedException thrown = ExpectedException.none();
+
+ @Before
+ public void getTranslator() {
+ translator = new PostgresStore().getTranslator();
+ }
+
+ @Test
+ public void unrecognized() throws Exception {
+ thrown.expect(TranslationException.class);
+ translator.translate("bla bla bla");
+ }
+
+ @Test
+ public void createDatabase() throws Exception {
+ Assert.assertEquals("create schema add_part_test_db",
+ translator.translate("CREATE DATABASE add_part_test_db"));
+ Assert.assertFalse(translator.isFailureOk());
+ Assert.assertEquals("create schema newdb",
+ translator.translate("create database newDB location \"/tmp/\""));
+ Assert.assertEquals("create schema if not exists dummydb",
+ translator.translate("create database if not exists dummydb"));
+ Assert.assertEquals("create schema if not exists test_db",
+ translator.translate("CREATE DATABASE IF NOT EXISTS test_db COMMENT 'Hive test database'"));
+ Assert.assertFalse(translator.isFailureOk());
+ Assert.assertEquals("create schema test_db",
+ translator.translate("CREATE DATABASE test_db COMMENT 'Hive test database'"));
+ Assert.assertEquals("create schema db2",
+ translator.translate("create database db2 with dbproperties (\n" +
+ "'mapred.jobtracker.url'='http://my.jobtracker.com:53000',\n" +
+ "'hive.warehouse.dir' = '/user/hive/warehouse',\n" +
+ "'mapred.scratch.dir' = 'hdfs://tmp.dfs.com:50029/tmp')"));
+ Assert.assertEquals("create schema test_db",
+ translator.translate(
+ "create database test_db with dbproperties ('key1' = 'value1', 'key2' = 'value2')"));
+ Assert.assertEquals("create schema if not exists jsondb1",
+ translator.translate("CREATE DATABASE IF NOT EXISTS jsondb1 COMMENT 'Test database' " +
+ "LOCATION '${hiveconf:hive.metastore.warehouse.dir}/jsondb1' WITH DBPROPERTIES ('id' = 'jsondb1')"));
+ Assert.assertEquals("create schema some_database",
+ translator.translate("CREATE DATABASE some_database comment 'for show create db test' WITH DBPROPERTIES ('somekey'='somevalue')"));
+ Assert.assertEquals("create schema \"name with a space\"",
+ translator.translate("CREATE DATABASE `name with a space` location somewhere"));
+
+ Assert.assertEquals("create schema add_part_test_db",
+ translator.translate("CREATE SCHEMA add_part_test_db"));
+ Assert.assertEquals("create schema newdb",
+ translator.translate("create schema newDB location \"/tmp/\""));
+ Assert.assertEquals("create schema if not exists dummydb",
+ translator.translate("create schema if not exists dummydb"));
+ Assert.assertEquals("create schema if not exists test_db",
+ translator.translate("CREATE SCHEMA IF NOT EXISTS test_db COMMENT 'Hive test database'"));
+ Assert.assertEquals("create schema test_db",
+ translator.translate("CREATE SCHEMA test_db COMMENT 'Hive test database'"));
+ Assert.assertEquals("create schema db2",
+ translator.translate("create schema db2 with dbproperties (\n" +
+ "'mapred.jobtracker.url'='http://my.jobtracker.com:53000'\n" +
+ "'hive.warehouse.dir' = '/user/hive/warehouse'\n" +
+ "'mapred.scratch.dir' = 'hdfs://tmp.dfs.com:50029/tmp')"));
+ Assert.assertEquals("create schema test_db",
+ translator.translate("create schema test_db with dbproperties ('key1' = 'value1', 'key2' " +
+ "= 'value2')"));
+ Assert.assertEquals("create schema if not exists jsondb1",
+ translator.translate("CREATE SCHEMA IF NOT EXISTS jsondb1 COMMENT 'Test database' " +
+ "LOCATION '${hiveconf:hive.metastore.warehouse.dir}/jsondb1' WITH DBPROPERTIES ('id' = 'jsondb1')"));
+ Assert.assertEquals("create schema some_database",
+ translator.translate("CREATE SCHEMA some_database comment 'for show create db test' WITH " +
+ "DBPROPERTIES ('somekey'='somevalue')"));
+ Assert.assertEquals("create schema \"name with a space\"",
+ translator.translate("CREATE SCHEMA `name with a space`"));
+ }
+
+ @Test
+ public void dropDatabase() throws Exception {
+ Assert.assertEquals("drop schema add_part_test_db",
+ translator.translate("DROP DATABASE add_part_test_db"));
+ Assert.assertEquals("drop schema if exists statsdb1",
+ translator.translate("drop database if exists statsdb1"));
+ Assert.assertEquals("drop schema to_drop_db1 cascade",
+ translator.translate("DROP DATABASE to_drop_db1 CASCADE"));
+ Assert.assertEquals("drop schema if exists non_exists_db3 restrict",
+ translator.translate("DROP DATABASE IF EXISTS non_exists_db3 RESTRICT"));
+ Assert.assertEquals("drop schema to_drop_db4 restrict",
+ translator.translate("DROP DATABASE to_drop_db4 RESTRICT"));
+
+ Assert.assertEquals("drop schema add_part_test_db",
+ translator.translate("DROP SCHEMA add_part_test_db"));
+ Assert.assertEquals("drop schema if exists statsdb1",
+ translator.translate("drop schema if exists statsdb1"));
+ Assert.assertEquals("drop schema to_drop_db1 cascade",
+ translator.translate("DROP SCHEMA to_drop_db1 CASCADE"));
+ Assert.assertEquals("drop schema if exists non_exists_db3 restrict",
+ translator.translate("DROP SCHEMA IF EXISTS non_exists_db3 RESTRICT"));
+ Assert.assertEquals("drop schema to_drop_db4 restrict",
+ translator.translate("DROP SCHEMA to_drop_db4 RESTRICT"));
+ }
+
+ @Test
+ public void createTableLike() throws Exception {
+ Assert.assertEquals("create table alter3_like like alter3",
+ translator.translate("create table alter3_like like alter3"));
+ Assert.assertEquals("create table if not exists emp_orc like emp_staging",
+ translator.translate("create table if not exists emp_orc like emp_staging"));
+ Assert.assertEquals("create table source.srcpart like default.srcpart",
+ translator.translate("create table source.srcpart like default.srcpart;"));
+ Assert.assertFalse(translator.isFailureOk());
+ }
+
+ @Test
+ public void createTableAs() throws Exception {
+ Assert.assertEquals("create table src_stat as select * from src1",
+ translator.translate("create table src_stat as select * from src1"));
+ Assert.assertEquals("create table dest_grouped_old1 as select 1+1, " +
+ "2+2 as zz, src.key, src.value, count(src.value), count(src.value)" +
+ ", count(src.value), sum(value) from src group by src.key",
+ translator.translate("create table dest_grouped_old1 as select 1+1, " +
+ "2+2 as zz, src.key, src.value, count(src.value), count(src.value)" +
+ ", count(src.value), SUM(value) from src group by src.key"));
+ }
+
+ @Test
+ public void createTableWithCols() throws Exception {
+ Assert.assertEquals("create table acidjoin1 (name varchar(50), age int)",
+ translator.translate("create table acidjoin1(name varchar(50), age int) clustered by " +
+ "(age) into 2 buckets stored as orc TBLPROPERTIES (\"transactional\"=\"true\")"));
+ Assert.assertEquals("create table alter1 (a int, b int)",
+ translator.translate("create table alter1(a int, b int)"));
+ Assert.assertEquals("create table alter2 (a int, b int)",
+ translator.translate("create table alter2(a int, b int) partitioned by (insertdate string)"));
+ Assert.assertEquals("create table alter3_src ( col1 varchar(255) )",
+ translator.translate("create table alter3_src ( col1 string ) stored as textfile "));
+ Assert.assertEquals("create table alter3 ( col1 varchar(255) )",
+ translator.translate("create table alter3 ( col1 string ) partitioned by (pcol1 string , " +
+ "pcol2 string) stored as sequencefile"));
+ Assert.assertEquals("create table ac.alter_char_1 (key varchar(255), value varchar(255))",
+ translator.translate("create table ac.alter_char_1 (key string, value string)"));
+ Assert.assertEquals("create table tst1 (key varchar(255), value varchar(255))",
+ translator.translate("create table tst1(key string, value string) partitioned by (ds " +
+ "string) clustered by (key) into 10 buckets"));
+ Assert.assertEquals("create table over1k ( t smallint, si smallint, i int, b bigint, f real, " +
+ "d double precision, bo boolean, s varchar(255), ts timestamp, dec decimal(4,2), bin " +
+ "blob)",
+ translator.translate("create table over1k( t tinyint, si smallint, i int, b bigint, f " +
+ "float, d double, bo boolean, s string, ts timestamp, dec decimal(4,2), bin binary) " +
+ "ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' STORED AS TEXTFILE"));
+ Assert.assertEquals("create table if not exists loc_staging (state varchar(255),locid int,zip" +
+ " bigint,year int )",
+ translator.translate("create table if not exists loc_staging (state string,locid int,zip " +
+ "bigint,year int ) row format delimited fields terminated by '|' stored as textfile"));
+ Assert.assertEquals("create temporary table acid_dtt (a int, b varchar(128))",
+ translator.translate("create temporary table acid_dtt(a int, b varchar(128)) clustered by" +
+ " (a) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='true')"));
+ Assert.assertEquals("create table roottable (key varchar(255))",
+ translator.translate("create external table roottable (key string) row format delimited " +
+ "fields terminated by '\\t' stored as textfile"));
+ }
+
+ @Test
+ public void dropTable() throws Exception {
+ Assert.assertEquals("drop table t", translator.translate("drop table t"));
+ Assert.assertEquals("drop table if exists t", translator.translate("drop table if exists t"));
+ Assert.assertFalse(translator.isFailureOk());
+ Assert.assertEquals("drop table db.t", translator.translate("drop table db.t"));
+ Assert.assertEquals("drop table t", translator.translate("drop table t purge"));
+ }
+
+ @Test
+ public void alterTable() throws Exception {
+ Assert.assertEquals("alter table tab1 rename to tab2", translator.translate("alter table tab1 rename to tab2"));
+ Assert.assertEquals("", translator.translate("alter table test set fileformat orc"));
+ Assert.assertEquals("",
+ translator.translate("alter table tst1 clustered by (key) into 8 buckets"));
+ Assert.assertEquals("", translator.translate("alter table fact_daily skewed by (key, value) on (('484','val_484'),('238','val_238')) stored as DIRECTORIES"));
+ Assert.assertEquals("", translator.translate("alter table skew_test.original3 not skewed"));
+ Assert.assertEquals("", translator.translate("alter table stored_as_dirs_multiple not stored as DIRECTORIES"));
+ Assert.assertEquals("", translator.translate("alter table T1 add partition (ds = 'today')"));
+ Assert.assertEquals("", translator.translate("alter table temp add if not exists partition (p ='p1')"));
+
+ }
+
+ @Test
+ public void selectSimple() throws Exception {
+ Assert.assertEquals("select * from add_part_test",
+ translator.translate("select * from add_part_test"));
+ Assert.assertEquals("select key, value from dest1",
+ translator.translate("select key, value from dest1"));
+ Assert.assertEquals("select count(key) from src",
+ translator.translate("select count(key) from src"));
+ Assert.assertEquals("select count(key), sum(key) from src",
+ translator.translate("select count(key), sum(key) from src"));
+ Assert.assertEquals("select sum(sin(key)), sum(cos(value)) from src_rc_concatenate_test",
+ translator.translate("select sum(sin(key)), sum(cos(value)) from src_rc_concatenate_test"));
+ Assert.assertEquals("select cast(key as int) / cast(key as varchar(255)) from src",
+ translator.translate("select cast(key as int) / cast(key as string) from src"));
+ Assert.assertEquals("select 1", translator.translate("select 1"));
+ Assert.assertEquals("select distinct l_partkey as p_partkey from lineitem",
+ translator.translate("select distinct l_partkey as p_partkey from Lineitem"));
+ Assert.assertEquals("select all l_partkey as p_partkey from lineitem",
+ translator.translate("select all l_partkey as p_partkey from Lineitem"));
+ }
+
+ @Ignore
+ public void selectInterval() throws Exception {
+ thrown.expect(TranslationException.class);
+ thrown.expectMessage("Could not translate interval, Hive SQL:");
+ translator.translate(
+ "select interval '10-11' year to month, interval '10' year, interval '11' month from src limit 1");
+ }
+
+ @Ignore
+ public void selectConstantCasts() throws Exception {
+ Assert.assertEquals("select dateval - date '1999-06-07' from interval_arithmetic_1",
+ translator.translate("select dateval - date '1999-06-07' from interval_arithmetic_1"));
+ Assert.assertEquals("select dateval - date '1999-06-07' from interval_arithmetic_1",
+ translator.translate("select dateval - date '1999-6-7' from interval_arithmetic_1"));
+ Assert.assertEquals("select timestamp '1999-01-01 01:00:00' from interval_arithmetic_1",
+ translator.translate("select timestamp '1999-01-01 01:00:00' from interval_arithmetic_1"));
+ Assert.assertEquals("select timestamp '1999-01-01 01:00:00' from interval_arithmetic_1",
+ translator.translate("select timestamp '1999-1-1 01:00:00' from interval_arithmetic_1"));
+ Assert.assertEquals("select 101, -101, 100, -100, 100.00 from t",
+ translator.translate("select 101Y, -101S, 100, -100L, 100.00BD from T"));
+
+ }
+
+ @Test
+ public void selectJoin() throws Exception {
+ Assert.assertEquals("select s.name, count(distinct registration) from studenttab10k s join " +
+ "votertab10k v on (s.name = v.name) group by s.name",
+ translator.translate("select s.name, count(distinct registration) from studenttab10k s " +
+ "join votertab10k v on (s.name = v.name) group by s.name"));
+ Assert.assertEquals("select count(*) from bucket_small a join bucket_big b on a.key = b.key",
+ translator.translate("select /*+ mapjoin(a) */ count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key"));
+ Assert.assertEquals("select count(*) from tbl1 a left outer join tbl2 b on a.key = b.key",
+ translator.translate("select count(*) FROM tbl1 a LEFT OUTER JOIN tbl2 b ON a.key = b.key"));
+
+ }
+
+ @Test
+ public void selectFromSubquery() throws Exception {
+ Assert.assertEquals("select a, b from t", translator.translate("select a, b from default.t"));
+ Assert.assertEquals("select count(*) from (select a.key as key, a.value as val1, b.value " +
+ "as val2 from tbl1 a join tbl2 b on a.key = b.key) subq1",
+ translator.translate("select count(*) from ( select a.key as key, a.value as val1, b" +
+ ".value as val2 from tbl1 a join tbl2 b on a.key = b.key ) subq1"));
+ Assert.assertEquals("select count(*) from (select key, count(*) from (select a.key " +
+ "as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key) " +
+ "subq1 group by key) subq2",
+ translator.translate("select count(*) from ( select key, count(*) from ( select a" +
+ ".key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b" +
+ ".key ) subq1 group by key ) subq2"));
+ Assert.assertEquals("select sum(subq.key) from (select a.key, a.value from " +
+ "src a where a.key > 10) subq join src tab on (subq.key = tab.key and subq.key > 20 and " +
+ "subq.value = tab.value) where tab.value < 200",
+ translator.translate("SELECT sum(subq.key) FROM (select a.key, a.value " +
+ "from src a where a.key > 10 ) subq JOIN src tab ON (subq.key = tab.key and subq.key " +
+ "> 20 and subq.value = tab.value) where tab.value < 200"));
+ Assert.assertEquals("select s1.k, count(*) from (select k from t) s1 join (select k from u) " +
+ "on s1.k = s2.k s2 group by s1.k",
+ translator.translate("select s1.k, count(*) from ( select k from t ) s1 join ( select k " +
+ "from u ) on s1.k = s2.k s2 group by s1.k"));
+ Assert.assertEquals("select src1.key, src1.cnt1, src2.cnt1 from (select key, count(*) as " +
+ "cnt1 from (select a.key as key, a.value as val1, b.value as val2 from tbl1 a join" +
+ " tbl2 b on a.key = b.key) subq1 group by key) src1 join (select key, count(*) as " +
+ "cnt1 from (select a.key as key, a.value as val1, b.value as val2 from tbl1 a join" +
+ " tbl2 b on a.key = b.key) subq2 group by key) src2 on src1.key = src2.key",
+ translator.translate("select src1.key, src1.cnt1, src2.cnt1 from ( select key, count(*)" +
+ " as cnt1 from ( select a.key as key, a.value as val1, b.value as val2 from " +
+ "tbl1 a join tbl2 b on a.key = b.key ) subq1 group by key ) src1 join ( select " +
+ "key, count(*) as cnt1 from ( select a.key as key, a.value as val1, b.value as " +
+ "val2 from tbl1 a join tbl2 b on a.key = b.key ) subq2 group by key ) src2 on src1" +
+ ".key = src2.key"));
+ }
+
+ @Test
+ public void selectWhere() throws Exception {
+ Assert.assertEquals("select * from alter5 where dt='a'",
+ translator.translate("select * from alter5 where dt='a'"));
+ Assert.assertEquals("select hr, c1, length(c1) from alter_char2 where hr = 1",
+ translator.translate("select hr, c1, length(c1) from alter_char2 where hr = 1"));
+ Assert.assertEquals("select key, value, count(*) from src_cbo b where b.key in (select key " +
+ "from src_cbo where src_cbo.key > '8') group by key, value order by key",
+ translator.translate("select key, value, count(*) from src_cbo b where b.key in ( select " +
+ "key from src_cbo where src_cbo.key > '8' ) group by key, value order by key"));
+ }
+
+ @Test
+ public void selectGroupBy() throws Exception {
+ Assert.assertEquals("select c1, count(*) from tmp1 group by c1",
+ translator.translate("select c1, count(*) from tmp1 group by c1"));
+ Assert.assertEquals("select k, count(*) from b group by k having count(*) > 100",
+ translator.translate("select k, count(*) from b group by k having count(*) > 100"));
+ Assert.assertEquals("select * from src_cbo b group by key, value having not exists (select a" +
+ ".key from src_cbo a where b.value = a.value and a.key = b.key and a.value > 'val_12')",
+ translator.translate("select * from src_cbo b group by key, value having not exists ( " +
+ "select a.key from src_cbo a where b.value = a.value and a.key = b.key and a" +
+ ".value > 'val_12' )"));
+ Assert.assertEquals("select key, value, count(*) from src_cbo b where b.key in (select key " +
+ "from src_cbo where src_cbo.key > '8') group by key, value having count(*) in (select" +
+ " count(*) from src_cbo s1 where s1.key > '9' group by s1.key) order by key",
+ translator.translate("select key, value, count(*) from src_cbo b where b.key in (select " +
+ "key from src_cbo where src_cbo.key > '8') group by key, value having count(*) in " +
+ "(select count(*) from src_cbo s1 where s1.key > '9' group by s1.key ) order by key"));
+
+ }
+
+ @Test
+ public void selectOrderBy() throws Exception {
+ Assert.assertEquals("select a, b from acid_vectorized order by a, b",
+ translator.translate("select a, b from acid_vectorized order by a, b"));
+ Assert.assertEquals("select c1, count(*) from tmp1 group by c1 order by c1",
+ translator.translate("select c1, count(*) from tmp1 group by c1 order by c1"));
+ }
+
+ @Test
+ public void selectLimit() throws Exception {
+ Assert.assertEquals("select key from src_autho_test order by key limit 20",
+ translator.translate("select key from src_autho_test order by key limit 20"));
+ }
+
+ @Test
+ public void selectUnion() throws Exception {
+ Assert.assertEquals("select key, value from u1 union all select key, value from u2",
+ translator.translate("select key, value from u1 union all select key, value FROM u2"));
+ Assert.assertEquals("select key, value from u1 union distinct select key, value from u2",
+ translator.translate("select key, value from u1 union distinct select key, value FROM u2"));
+ Assert.assertEquals("select key, value from u1 union all select key, value from u2 union all select key as key, value from u",
+ translator.translate("select key, value from u1 union all select key, value from u2 union all select key as key, value FROM u"));
+ Assert.assertEquals("select key from src1 union select key2 from src2 order by key",
+ translator.translate("select key from src1 union select key2 from src2 order BY key"));
+ Assert.assertEquals("select key from src1 union select key2 from src2 order by key limit 5",
+ translator.translate("select key from src1 union select key2 from src2 order BY key limit 5"));
+ }
+
+ @Test
+ public void insert() throws Exception {
+ Assert.assertEquals(
+ "insert into acidjoin1 values ('aaa', 35), ('bbb', 32), ('ccc', 32), ('ddd', 35), ('eee', 32)",
+ translator.translate(
+ "insert into table acidjoin1 values ('aaa', 35), ('bbb', 32), ('ccc', 32), ('ddd', 35), ('eee', 32)"));
+ Assert.assertEquals(
+ "insert into acid_vectorized select cint, cstring1 from alltypesorc where cint is not null order by cint limit 10",
+ translator.translate(
+ "insert into table acid_vectorized select cint, cstring1 from alltypesorc where cint is not null order by cint limit 10"));
+ Assert.assertEquals(
+ "insert into ac.alter_char_1 select key, value from src order by key limit 5",
+ translator.translate(
+ "insert overwrite table ac.alter_char_1 select key, value from src order by key limit 5"));
+ Assert.assertEquals("insert into acid values(\"foo\", \"bar\")",
+ translator.translate(
+ "insert into table acid partition(ds='2008-04-08') values(\"foo\", \"bar\")"));
+ Assert.assertEquals("insert into acid select key,value,ds from srcpart",
+ translator.translate(
+ "insert into table acid partition(ds) select key,value,ds from srcpart"));
+ Assert.assertEquals("insert into tab_part select key,value from srcbucket_mapjoin_part",
+ translator.translate(
+ "insert overwrite table tab_part partition (ds='2008-04-08') select key,value from srcbucket_mapjoin_part"));
+ }
+
+ @Test
+ public void update() throws Exception {
+ Assert.assertEquals("update t_auth_up set j = 0 where i > 0",
+ translator.translate("update t_auth_up set j = 0 where i > 0"));
+ Assert.assertEquals("update acid set value = 'bar'",
+ translator.translate("update acid set value = 'bar'"));
+ }
+
+ @Test
+ public void delete() throws Exception {
+ Assert.assertEquals("delete from acid_iud",
+ translator.translate("delete from acid_iud"));
+ Assert.assertEquals("delete from acid where key = 'foo' and ds='2008-04-08'",
+ translator.translate("delete from acid where key = 'foo' and ds='2008-04-08'"));
+ }
+
+ @Test
+ public void nullTranslator() throws Exception {
+ Assert.assertEquals("", translator.translate("show tables"));
+ Assert.assertEquals("", translator.translate("describe t"));
+ Assert.assertEquals("", translator.translate("explain select * from t"));
+ Assert.assertEquals("", translator.translate("analyze table src_rc_merge_test_stat compute statistics"));
+ Assert.assertEquals("", translator.translate("grant select on table src_auth_tmp to user hive_test_user"));
+ Assert.assertEquals("", translator.translate("revoke select on table src_autho_test from user hive_test_user"));
+ Assert.assertEquals("", translator.translate("create index t1_index on table t1(a) as 'COMPACT' WITH DEFERRED REBUILD"));
+ Assert.assertEquals("", translator.translate("alter index t1_index on t1 rebuild"));
+ Assert.assertEquals("", translator.translate("drop index src_index_2 on src"));
+ Assert.assertEquals("", translator.translate("create role role1"));
+ Assert.assertEquals("", translator.translate("drop role sRc_roLE"));
+ Assert.assertEquals("", translator.translate("set role ADMIN"));
+ Assert.assertEquals("", translator.translate("alter database db_alter_onr set owner user user1"));
+ Assert.assertEquals("", translator.translate("alter schema db_alter_onr set owner user user1"));
+ }
+
+ @Test
+ public void createFunction() throws Exception {
+ thrown.expect(TranslationException.class);
+ thrown.expectMessage("Could not translate create function, Hive SQL:");
+ translator.translate(
+ "create function lookup as 'org.apache.hadoop.hive.ql.udf.UDFFileLookup' using file 'hdfs:///tmp/udf_using/sales.txt'");
+ }
+
+ @Test
+ public void createTemporaryFunction() throws Exception {
+ thrown.expect(TranslationException.class);
+ thrown.expectMessage("Could not translate create function, Hive SQL:");
+ translator.translate(
+ "create temporary function udtfCount2 as 'org.apache.hadoop.hive.contrib.udtf.example.GenericUDTFCount2'");
+ }
+
+ @Test
+ public void reloadFunction() throws Exception {
+ thrown.expect(TranslationException.class);
+ thrown.expectMessage("Could not translate reload function, Hive SQL:");
+ translator.translate("reload function");
+ }
+
+ @Test
+ public void dropFunction() throws Exception {
+ thrown.expect(TranslationException.class);
+ thrown.expectMessage("Could not translate drop function, Hive SQL:");
+ translator.translate("drop function perm_fn");
+ }
+
+ @Test
+ public void dropTemporaryFunction() throws Exception {
+ thrown.expect(TranslationException.class);
+ thrown.expectMessage("Could not translate drop function, Hive SQL:");
+ translator.translate("drop temporary function matchpathtest");
+ }
+
+ @Test
+ public void quoting() throws Exception {
+ Assert.assertEquals("select * from t",
+ translator.translate("select * from t"));
+ Assert.assertEquals("select 'select from where' from t",
+ translator.translate("select 'select from where' from T"));
+ Assert.assertEquals("select \"select from where\" from \"table with a space\"",
+ translator.translate("select \"select from where\" from `table with a space`"));
+ Assert.assertEquals("select 'escaped ''quote' from t",
+ translator.translate("select 'escaped ''quote' from t"));
+ Assert.assertEquals("select 'ends on quote'",
+ translator.translate("select 'ends on quote'"));
+ Assert.assertEquals("select 'ends on escaped quote'''",
+ translator.translate("select 'ends on escaped quote'''"));
+ }
+
+}
diff --git itests/capybara/src/test/java/org/apache/hive/test/capybara/infra/TestRandomDataGenerator.java itests/capybara/src/test/java/org/apache/hive/test/capybara/infra/TestRandomDataGenerator.java
new file mode 100644
index 0000000..2b76351
--- /dev/null
+++ itests/capybara/src/test/java/org/apache/hive/test/capybara/infra/TestRandomDataGenerator.java
@@ -0,0 +1,360 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hive.test.capybara.infra;
+
+import org.apache.hive.test.capybara.data.Column;
+import org.apache.hive.test.capybara.data.DataSet;
+import org.apache.hive.test.capybara.data.Row;
+import org.apache.hive.test.capybara.iface.TestTable;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.apache.hadoop.hive.metastore.api.FieldSchema;
+import org.apache.hive.test.capybara.iface.DataGenerator;
+import org.junit.Assert;
+import org.junit.Test;
+
+import java.math.BigDecimal;
+import java.sql.Date;
+import java.sql.Timestamp;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Set;
+import java.util.SortedSet;
+import java.util.TreeSet;
+
+public class TestRandomDataGenerator {
+ static final private Logger LOG = LoggerFactory.getLogger(TestRandomDataGenerator.class.getName());
+
+ @Test
+ public void nonPartitionedAllTypes() {
+ List cols = new ArrayList<>();
+ cols.add(new FieldSchema("col1", "bigint", ""));
+ cols.add(new FieldSchema("col2", "int", ""));
+ cols.add(new FieldSchema("col3", "smallint", ""));
+ cols.add(new FieldSchema("col4", "tinyint", ""));
+ cols.add(new FieldSchema("col5", "float", ""));
+ cols.add(new FieldSchema("col6", "double", ""));
+ cols.add(new FieldSchema("col7", "decimal(12,4)", ""));
+ cols.add(new FieldSchema("col8", "date", ""));
+ cols.add(new FieldSchema("col9", "timestamp", ""));
+ cols.add(new FieldSchema("col10", "varchar(32)", ""));
+ cols.add(new FieldSchema("col11", "char(9)", ""));
+ cols.add(new FieldSchema("col12", "string", ""));
+ cols.add(new FieldSchema("col13", "boolean", ""));
+ cols.add(new FieldSchema("col14", "binary", ""));
+ TestTable table = TestTable.getBuilder("t1").setCols(cols).build();
+
+ RandomDataGenerator rand = new RandomDataGenerator(1);
+
+ DataSet data = rand.generateData(table, 100);
+
+ int rowCnt = 0;
+ int[] nullsSeen = new int[cols.size()];
+ Arrays.fill(nullsSeen, 0);
+ Iterator strIter = data.stringIterator(",", "NULL", "");
+ for (Row row : data) {
+ Assert.assertTrue(strIter.hasNext());
+ LOG.debug("Row is " + strIter.next());
+ // All the column sizes combined
+ rowCnt++;
+ Assert.assertEquals(cols.size(), row.size());
+
+ // Check each of the columns to make sure we got something valid. Many of tests are
+ // bogus; they are there to make sure the compiler doesn't outsmart us and optimize out the
+ // line.
+ for (int i = 0; i < row.size(); i++) {
+ if (row.get(i).isNull()) {
+ nullsSeen[i]++;
+ } else {
+ switch (i) {
+ case 0: Assert.assertTrue(row.get(i).asLong() >= Long.MIN_VALUE); break;
+
+ case 1: Assert.assertTrue(row.get(i).asInt() >= Integer.MIN_VALUE); break;
+
+ case 2: Assert.assertTrue(row.get(i).asShort() >= Short.MIN_VALUE); break;
+
+ case 3: Assert.assertTrue(row.get(i).asByte() >= Byte.MIN_VALUE); break;
+
+ case 4:
+ // For strange reasons I can't fathom the float value >= min_value fails
+ Assert.assertTrue(row.get(i).asFloat() != Float.NaN); break;
+
+ case 5: Assert.assertTrue(row.get(i).asDouble() != Double.NaN); break;
+
+ case 6:
+ BigDecimal bd = row.get(i).asBigDecimal();
+ // Precision might be as low as scale, since BigDecimal.precision returns real
+ // precision for this instance.
+ Assert.assertTrue(bd.precision() >= 4 && bd.precision() < 13);
+ Assert.assertEquals(4, bd.scale());
+ break;
+
+ case 7:
+ Assert.assertTrue(row.get(i).asDate().after(new Date(-100000000000000000L))); break;
+
+ case 8:
+ Assert.assertTrue(row.get(i).asTimestamp().after(new Timestamp(-10000000000000L)));
+ break;
+
+ case 9: Assert.assertTrue(row.get(i).asString().length() <= 32); break;
+
+ case 10: Assert.assertTrue(row.get(i).asString().length() <= 9); break;
+
+ case 11: Assert.assertTrue(row.get(i).asString().length() <= 20); break;
+
+ case 12:
+ Assert.assertTrue(row.get(i).asBoolean() || !row.get(i).asBoolean());
+ break;
+
+ case 13:
+ Assert.assertTrue(row.get(i).asBytes().length <= 100); break;
+
+ default: throw new RuntimeException("Too many columns");
+
+ }
+ }
+ }
+ }
+ Assert.assertFalse(strIter.hasNext());
+
+ long totalSize = data.lengthInBytes();
+ Assert.assertTrue("Expected totalSize > 102400, but was " + totalSize, totalSize >= 102400);
+ // For all rows we should have around 1% nulls
+ for (int i = 0; i < cols.size(); i++) {
+ LOG.debug("For column " + i + " nulls seen is " + nullsSeen[i] + " rowCnt is " + rowCnt);
+ Assert.assertTrue(nullsSeen[i] > rowCnt * 0.001 && nullsSeen[i] < rowCnt * 0.019);
+ }
+ }
+
+ @Test
+ public void differentNumNulls() {
+ List cols = new ArrayList<>();
+ cols.add(new FieldSchema("col1", "bigint", ""));
+ cols.add(new FieldSchema("col2", "int", ""));
+ TestTable table = TestTable.getBuilder("t2").setCols(cols).build();
+
+ RandomDataGenerator rand = new RandomDataGenerator(2);
+
+ DataSet data = rand.generateData(table, 100, new double[]{0.05, 0.02});
+
+ int rowCnt = 0;
+ int[] nullsSeen = new int[cols.size()];
+ Arrays.fill(nullsSeen, 0);
+ Iterator strIter = data.stringIterator(",", "NULL", "");
+ for (Row row : data) {
+ LOG.debug("Row is " + strIter.next());
+ rowCnt++;
+ Assert.assertEquals(cols.size(), row.size());
+
+ for (int i = 0; i < row.size(); i++) {
+ if (row.get(i).isNull()) {
+ nullsSeen[i]++;
+ }
+ }
+ }
+
+ for (int i = 0; i < cols.size(); i++) {
+ LOG.debug("For column " + i + " nulls seen is " + nullsSeen[i] + " rowCnt is " + rowCnt);
+ }
+ Assert.assertTrue(nullsSeen[0] > rowCnt * 0.04 && nullsSeen[0] < rowCnt * 0.06);
+ Assert.assertTrue(nullsSeen[1] > rowCnt * 0.01 && nullsSeen[1] < rowCnt * 0.03);
+ }
+
+ @Test
+ public void partitions() {
+ List cols = new ArrayList<>();
+ cols.add(new FieldSchema("col1", "varchar(3)", ""));
+ cols.add(new FieldSchema("col2", "date", ""));
+ List partCols = new ArrayList<>();
+ partCols.add(new FieldSchema("p1", "smallint", ""));
+ TestTable table = TestTable.getBuilder("t3")
+ .setCols(cols)
+ .setPartCols(partCols)
+ .setNumParts(5)
+ .build();
+
+ RandomDataGenerator rand = new RandomDataGenerator(3);
+
+ DataSet data = rand.generateData(table, 100);
+
+ Set partValsSeen = new HashSet<>();
+ Iterator strIter = data.stringIterator(",", "NULL", "");
+ for (Row row : data) {
+ LOG.debug("Row is " + strIter.next());
+ Assert.assertEquals(cols.size() + partCols.size(), row.size());
+
+ partValsSeen.add(row.get(cols.size()));
+ }
+
+ Assert.assertEquals(5, partValsSeen.size());
+ }
+
+ @Test
+ public void presetPartitions() {
+ List