diff --git a/.gitignore b/.gitignore
index d0c97d1..fa9773a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -13,6 +13,7 @@ common/src/gen
*.iml
*.ipr
*.iws
+*.swp
derby.log
datanucleus.log
.arc
diff --git a/hcatalog/core/pom.xml b/hcatalog/core/pom.xml
index b5e85cd..24aecf9 100644
--- a/hcatalog/core/pom.xml
+++ b/hcatalog/core/pom.xml
@@ -71,6 +71,12 @@
jackson-mapper-asl${jackson.version}
+
+ org.reflections
+ reflections
+ 0.9.9-RC1
+ test
+
diff --git a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/fileformats/TestOrcDynamicPartitioned.java b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/fileformats/TestOrcDynamicPartitioned.java
deleted file mode 100644
index f68dbb8..0000000
--- a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/fileformats/TestOrcDynamicPartitioned.java
+++ /dev/null
@@ -1,52 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.hive.hcatalog.fileformats;
-
-import org.apache.hadoop.hive.ql.io.orc.OrcInputFormat;
-import org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat;
-import org.apache.hadoop.hive.ql.io.orc.OrcSerde;
-import org.apache.hive.hcatalog.mapreduce.TestHCatDynamicPartitioned;
-import org.junit.BeforeClass;
-
-public class TestOrcDynamicPartitioned extends TestHCatDynamicPartitioned {
-
- @BeforeClass
- public static void generateInputData() throws Exception {
- tableName = "testOrcDynamicPartitionedTable";
- generateWriteRecords(NUM_RECORDS, NUM_PARTITIONS, 0);
- generateDataColumns();
- }
-
- @Override
- protected String inputFormat() {
- return OrcInputFormat.class.getName();
- }
-
- @Override
- protected String outputFormat() {
- return OrcOutputFormat.class.getName();
- }
-
- @Override
- protected String serdeClass() {
- return OrcSerde.class.getName();
- }
-
-}
diff --git a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/HCatMapReduceTest.java b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/HCatMapReduceTest.java
index 9ddc3a6..3efbed2 100644
--- a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/HCatMapReduceTest.java
+++ b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/HCatMapReduceTest.java
@@ -20,12 +20,16 @@
package org.apache.hive.hcatalog.mapreduce;
import java.io.IOException;
+import java.lang.reflect.Modifier;
import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
import java.util.HashMap;
+import java.util.HashSet;
import java.util.List;
import java.util.Map;
-
-import junit.framework.Assert;
+import java.util.ServiceLoader;
+import java.util.Set;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
@@ -40,9 +44,31 @@
import org.apache.hadoop.hive.metastore.api.StorageDescriptor;
import org.apache.hadoop.hive.metastore.api.Table;
import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedColumnarSerDe;
import org.apache.hadoop.hive.ql.io.RCFileInputFormat;
import org.apache.hadoop.hive.ql.io.RCFileOutputFormat;
+import org.apache.hadoop.hive.ql.io.RCFileStorageFormatDescriptor;
+import org.apache.hadoop.hive.ql.io.StorageFormatDescriptor;
+import org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat;
+import org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat;
+import org.apache.hadoop.hive.ql.io.orc.OrcSerde;
+import org.apache.hadoop.hive.ql.io.orc.VectorizedOrcSerde;
+import org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe;
+import org.apache.hadoop.hive.serde2.ByteStreamTypedSerDe;
+import org.apache.hadoop.hive.serde2.DelimitedJSONSerDe;
+import org.apache.hadoop.hive.serde2.MetadataTypedColumnsetSerDe;
+import org.apache.hadoop.hive.serde2.NullStructSerDe;
+import org.apache.hadoop.hive.serde2.RegexSerDe;
+import org.apache.hadoop.hive.serde2.SerDe;
+import org.apache.hadoop.hive.serde2.TypedSerDe;
+import org.apache.hadoop.hive.serde2.avro.AvroSerDe;
+import org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe;
+import org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe;
import org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe;
+import org.apache.hadoop.hive.serde2.dynamic_type.DynamicSerDe;
+import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe;
+import org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe;
+import org.apache.hadoop.hive.serde2.thrift.ThriftByteStreamTypedSerDe;
import org.apache.hadoop.hive.serde.serdeConstants;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.LongWritable;
@@ -59,52 +85,249 @@
import org.apache.hive.hcatalog.data.HCatRecord;
import org.apache.hive.hcatalog.data.schema.HCatFieldSchema;
import org.apache.hive.hcatalog.data.schema.HCatSchema;
+import org.apache.hive.hcatalog.mapreduce.storage.AvroStorageCustomHandler;
+import org.apache.hive.hcatalog.mapreduce.storage.StorageCustomHandler;
+
+import junit.framework.Assert;
+
import org.junit.After;
+import org.junit.Assume;
import org.junit.Before;
import org.junit.BeforeClass;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
+import org.reflections.Reflections;
+
import static org.junit.Assert.assertTrue;
/**
* Test for HCatOutputFormat. Writes a partition using HCatOutputFormat and reads
- * it back using HCatInputFormat, checks the column values and counts.
+ * it back using HCatInputFormat, checks the column values and counts. This class
+ * can be tested to test different partitioning schemes.
+ *
+ * This is a parameterized test that tests HCatOutputFormat and HCatInputFormat against all
+ * storage formats in the Hive codebase. All SerDes must be either registered with Hive
+ * as a native storage format via {@link org.apache.hadoop.hive.ql.io.StorageFormatDescriptor},
+ * enumerated in ADDITIONAL_STORAGE_FORMATS; otherwise, the test will raise a test failure.
+ * Storage formats that fail HCatalog Core tests or are untested against HCatalog can be marked
+ * as disabled by being registered in DISABLED_SERDES to skip running tests against them.
*/
+@RunWith(Parameterized.class)
public abstract class HCatMapReduceTest extends HCatBaseTest {
-
private static final Logger LOG = LoggerFactory.getLogger(HCatMapReduceTest.class);
+
+ /**
+ * Table of additional storage formats for HCatMapReduceTest. These are SerDes or combinations
+ * of SerDe with InputFormat and OutputFormat that are not registered as a native Hive storage
+ * format.
+ *
+ * Each row in this table has the following fields:
+ * - formatName - A string name for the storage format. This is used to give the table created
+ * for the test a unique name.
+ * - serdeClass - The name of the SerDe class used by the storage format.
+ * - inputFormatClass - The name of the InputFormat class.
+ * - outputFormatClass - The name of the OutputFormat class.
+ * - storageCustomHandlerClass - The name of the StorageCustomHandler class. See
+ * {@link org.apache.hadoop.hive.hcatalog.mapreduce.storage.StorageCustomHandler}.
+ */
+ protected static final Object[][] ADDITIONAL_STORAGE_FORMATS = new Object[][] {
+ {
+ "rcfile_columnar",
+ ColumnarSerDe.class.getName(),
+ RCFileInputFormat.class.getName(),
+ RCFileOutputFormat.class.getName(),
+ null,
+ }, {
+ "avro",
+ AvroSerDe.class.getName(),
+ AvroContainerInputFormat.class.getName(),
+ AvroContainerOutputFormat.class.getName(),
+ AvroStorageCustomHandler.class.getName(),
+ }
+ };
+
+ /**
+ * List of SerDe classes that the HCatalog core tests will not be run against.
+ */
+ protected static final String[] DISABLED_SERDES = new String[] {
+ AvroSerDe.class.getName(),
+ ParquetHiveSerDe.class.getName(),
+ MetadataTypedColumnsetSerDe.class.getName(),
+ LazyBinarySerDe.class.getName(),
+ NullStructSerDe.class.getName(),
+ RegexSerDe.class.getName(),
+ VectorizedOrcSerde.class.getName(),
+ ThriftByteStreamTypedSerDe.class.getName(),
+ ByteStreamTypedSerDe.class.getName(),
+ VectorizedColumnarSerDe.class.getName(),
+ DelimitedJSONSerDe.class.getName(),
+ DynamicSerDe.class.getName(),
+ BinarySortableSerDe.class.getName(),
+ TypedSerDe.class.getName(),
+ };
+
protected static String dbName = MetaStoreUtils.DEFAULT_DATABASE_NAME;
- protected static String tableName = "testHCatMapReduceTable";
+ protected static final String TABLE_NAME = "testHCatMapReduceTable";
private static List writeRecords = new ArrayList();
private static List readRecords = new ArrayList();
- protected abstract List getPartitionKeys();
-
- protected abstract List getTableColumns();
-
private static FileSystem fs;
private String externalTableLocation = null;
+ protected String tableName;
+ protected String serdeClass;
+ protected String inputFormatClass;
+ protected String outputFormatClass;
+ protected StorageCustomHandler storageCustomHandler;
- protected Boolean isTableExternal() {
- return false;
+ /**
+ * Create an array of Objects used to populate the test paramters.
+ *
+ * @param name Name of the storage format.
+ * @param serdeClass Name of the SerDe class.
+ * @param inputFormatClass Name of the InputFormat class.
+ * @param outputFormatClass Name of the OutputFormat class.
+ * @return Object array containing the arguments.
+ */
+ protected static Object[] createTestArguments(String name, String serdeClass,
+ String inputFormatClass, String outputFormatClass) {
+ return createTestArguments(name, serdeClass, inputFormatClass, outputFormatClass, null);
}
- protected boolean isTableImmutable() {
- return true;
+ /**
+ * Create an array of Objects used to populate the test paramters.
+ *
+ * @param name Name of the storage format.
+ * @param serdeClass Name of the SerDe class.
+ * @param inputFormatClass Name of the InputFormat class.
+ * @param outputFormatClass Name of the OutputFormat class.
+ * @param storageCustomHandlerClass Name of the StorageCustomHandler class.
+ * @return Object array containing the arguments.
+ */
+ protected static Object[] createTestArguments(String name, String serdeClass,
+ String inputFormatClass, String outputFormatClass, String storageCustomHandlerClass) {
+ Object[] args = {
+ name,
+ serdeClass,
+ inputFormatClass,
+ outputFormatClass,
+ storageCustomHandlerClass,
+ };
+ return args;
+ }
+
+ /**
+ * Generate parameters that the test fixture will be run against.
+ * Each parameter represents one storage format that the fixture will run against.
+ * First, the native Hive storage formats registered with Hive with
+ * {@link org.apache.hadoop.hive.ql.io.StorageFormatDescriptor} are added. Then, storage formats
+ * enumerated in the ADDITIONAL_STORAGE_FORMATS table are added.
+ *
+ * Finally, all classes in the classpath that implement the
+ * {@link org.apache.hadoop.hive.serde2.SerDe} interface are enumerated using reflection. For
+ * each SerDe, check whether it has added due to being a native storage format or registered
+ * in ADDITIONAL_STORAGEFORMATS or is disabled by being registered in DISABLED_SERDES. If not,
+ * then raise a failure.
+ *
+ * @return Parameters for the test.
+ */
+ @Parameterized.Parameters
+ public static Collection