From 116eb5620437247e6a79b16a0db4235af7249952 Mon Sep 17 00:00:00 2001 From: "David Z. Chen" Date: Tue, 17 Jun 2014 13:01:43 -0700 Subject: [PATCH] HIVE-7286: Parameterize HCatMapReduceTest for testing against all Hive storage formats. --- .gitignore | 1 + hcatalog/core/pom.xml | 6 + .../fileformats/TestOrcDynamicPartitioned.java | 52 ---- .../hive/hcatalog/mapreduce/HCatMapReduceTest.java | 331 ++++++++++++++++----- .../mapreduce/TestHCatDynamicPartitioned.java | 7 +- .../TestHCatExternalDynamicPartitioned.java | 15 +- .../mapreduce/TestHCatExternalNonPartitioned.java | 4 + .../mapreduce/TestHCatExternalPartitioned.java | 4 + .../TestHCatMutableDynamicPartitioned.java | 4 + .../mapreduce/TestHCatMutableNonPartitioned.java | 5 +- .../mapreduce/TestHCatMutablePartitioned.java | 4 + .../hcatalog/mapreduce/TestHCatNonPartitioned.java | 10 +- .../hcatalog/mapreduce/TestHCatPartitioned.java | 9 +- .../mapreduce/storage/AvroTestStorageFormat.java | 65 ++++ .../mapreduce/storage/OrcTestStorageFormat.java | 42 +++ .../storage/ParquetTestStorageFormat.java | 42 +++ .../mapreduce/storage/RCFileTestStorageFormat.java | 42 +++ .../storage/SequenceFileTestStorageFormat.java | 42 +++ .../mapreduce/storage/TestStorageFormat.java | 33 ++ .../mapreduce/storage/TextTestStorageFormat.java | 42 +++ .../hcatalog/util/AvroTypeInfoSchemaConverter.java | 150 ++++++++++ .../util/TestAvroTypeInfoSchemaConverter.java | 74 +++++ hcatalog/core/src/test/resources/alltypes.avsc | 48 +++ 23 files changed, 884 insertions(+), 148 deletions(-) delete mode 100644 hcatalog/core/src/test/java/org/apache/hive/hcatalog/fileformats/TestOrcDynamicPartitioned.java create mode 100644 hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/storage/AvroTestStorageFormat.java create mode 100644 hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/storage/OrcTestStorageFormat.java create mode 100644 hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/storage/ParquetTestStorageFormat.java create mode 100644 hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/storage/RCFileTestStorageFormat.java create mode 100644 hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/storage/SequenceFileTestStorageFormat.java create mode 100644 hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/storage/TestStorageFormat.java create mode 100644 hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/storage/TextTestStorageFormat.java create mode 100644 hcatalog/core/src/test/java/org/apache/hive/hcatalog/util/AvroTypeInfoSchemaConverter.java create mode 100644 hcatalog/core/src/test/java/org/apache/hive/hcatalog/util/TestAvroTypeInfoSchemaConverter.java create mode 100644 hcatalog/core/src/test/resources/alltypes.avsc diff --git a/.gitignore b/.gitignore index d0c97d1..fa9773a 100644 --- a/.gitignore +++ b/.gitignore @@ -13,6 +13,7 @@ common/src/gen *.iml *.ipr *.iws +*.swp derby.log datanucleus.log .arc diff --git a/hcatalog/core/pom.xml b/hcatalog/core/pom.xml index b5e85cd..24aecf9 100644 --- a/hcatalog/core/pom.xml +++ b/hcatalog/core/pom.xml @@ -71,6 +71,12 @@ jackson-mapper-asl ${jackson.version} + + org.reflections + reflections + 0.9.9-RC1 + test + diff --git a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/fileformats/TestOrcDynamicPartitioned.java b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/fileformats/TestOrcDynamicPartitioned.java deleted file mode 100644 index f68dbb8..0000000 --- a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/fileformats/TestOrcDynamicPartitioned.java +++ /dev/null @@ -1,52 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hive.hcatalog.fileformats; - -import org.apache.hadoop.hive.ql.io.orc.OrcInputFormat; -import org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat; -import org.apache.hadoop.hive.ql.io.orc.OrcSerde; -import org.apache.hive.hcatalog.mapreduce.TestHCatDynamicPartitioned; -import org.junit.BeforeClass; - -public class TestOrcDynamicPartitioned extends TestHCatDynamicPartitioned { - - @BeforeClass - public static void generateInputData() throws Exception { - tableName = "testOrcDynamicPartitionedTable"; - generateWriteRecords(NUM_RECORDS, NUM_PARTITIONS, 0); - generateDataColumns(); - } - - @Override - protected String inputFormat() { - return OrcInputFormat.class.getName(); - } - - @Override - protected String outputFormat() { - return OrcOutputFormat.class.getName(); - } - - @Override - protected String serdeClass() { - return OrcSerde.class.getName(); - } - -} diff --git a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/HCatMapReduceTest.java b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/HCatMapReduceTest.java index 9ddc3a6..632e415 100644 --- a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/HCatMapReduceTest.java +++ b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/HCatMapReduceTest.java @@ -20,12 +20,15 @@ package org.apache.hive.hcatalog.mapreduce; import java.io.IOException; +import java.lang.reflect.Modifier; import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; import java.util.HashMap; +import java.util.HashSet; import java.util.List; import java.util.Map; - -import junit.framework.Assert; +import java.util.Set; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataOutputStream; @@ -40,9 +43,25 @@ import org.apache.hadoop.hive.metastore.api.StorageDescriptor; import org.apache.hadoop.hive.metastore.api.Table; import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants; -import org.apache.hadoop.hive.ql.io.RCFileInputFormat; -import org.apache.hadoop.hive.ql.io.RCFileOutputFormat; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedColumnarSerDe; +import org.apache.hadoop.hive.ql.io.orc.OrcSerde; +import org.apache.hadoop.hive.ql.io.orc.VectorizedOrcSerde; +import org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe; +import org.apache.hadoop.hive.serde2.ByteStreamTypedSerDe; +import org.apache.hadoop.hive.serde2.DelimitedJSONSerDe; +import org.apache.hadoop.hive.serde2.MetadataTypedColumnsetSerDe; +import org.apache.hadoop.hive.serde2.NullStructSerDe; +import org.apache.hadoop.hive.serde2.RegexSerDe; +import org.apache.hadoop.hive.serde2.SerDe; +import org.apache.hadoop.hive.serde2.TypedSerDe; +import org.apache.hadoop.hive.serde2.avro.AvroSerDe; +import org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe; +import org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe; import org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe; +import org.apache.hadoop.hive.serde2.dynamic_type.DynamicSerDe; +import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe; +import org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe; +import org.apache.hadoop.hive.serde2.thrift.ThriftByteStreamTypedSerDe; import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.LongWritable; @@ -59,52 +78,226 @@ import org.apache.hive.hcatalog.data.HCatRecord; import org.apache.hive.hcatalog.data.schema.HCatFieldSchema; import org.apache.hive.hcatalog.data.schema.HCatSchema; +import org.apache.hive.hcatalog.mapreduce.storage.AvroTestStorageFormat; +import org.apache.hive.hcatalog.mapreduce.storage.OrcTestStorageFormat; +import org.apache.hive.hcatalog.mapreduce.storage.ParquetTestStorageFormat; +import org.apache.hive.hcatalog.mapreduce.storage.RCFileTestStorageFormat; +import org.apache.hive.hcatalog.mapreduce.storage.SequenceFileTestStorageFormat; +import org.apache.hive.hcatalog.mapreduce.storage.TextTestStorageFormat; +import org.apache.hive.hcatalog.mapreduce.storage.TestStorageFormat; + +import junit.framework.Assert; + import org.junit.After; +import org.junit.Assume; import org.junit.Before; import org.junit.BeforeClass; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.reflections.Reflections; + import static org.junit.Assert.assertTrue; /** * Test for HCatOutputFormat. Writes a partition using HCatOutputFormat and reads - * it back using HCatInputFormat, checks the column values and counts. + * it back using HCatInputFormat, checks the column values and counts. This class + * can be tested to test different partitioning schemes. + * + * This is a parameterized test that tests HCatOutputFormat and HCatInputFormat against all + * storage formats in the Hive codebase. All SerDes must be enumerated in the STORAGE_FORMAT + * table; otherwise, the test will raise a test failure. Storage formats that fail HCatalog + * Core tests or are untested against HCatalog can be marked as disabled to skip running + * tests against them. */ +@RunWith(Parameterized.class) public abstract class HCatMapReduceTest extends HCatBaseTest { - private static final Logger LOG = LoggerFactory.getLogger(HCatMapReduceTest.class); + + /** + * Table of storage formats used as parameters for HCatMapReduceTest. + * Each row has the following fields: + * - formatName - A string name for the storage format. + * - serdeClass - The name of the SerDe class used by the storage format. + * - storageFormat - A TestStorageFormat class + * {@link org.apache.hadoop.hive.hcatalog.mapreduce.storage.TestStorageFormat} for the + * storage format. + * - enabled - Whether the HCatMapReduce tests should be run against the storage format. If this + * is set to false, then the storage format either has known test failures when + * running agains tHCatalog Core tests or has not been tested with HCatalog. + */ + protected static final Object[][] STORAGE_FORMATS = new Object[][] { + { + "rcfile", + ColumnarSerDe.class.getName(), + RCFileTestStorageFormat.class.getName(), + true, + }, { + "orc", + OrcSerde.class.getName(), + OrcTestStorageFormat.class.getName(), + true, + }, { + "avro", + AvroSerDe.class.getName(), + AvroTestStorageFormat.class.getName(), + false, + }, { + "parquet", + ParquetHiveSerDe.class.getName(), + ParquetTestStorageFormat.class.getName(), + false, + }, { + "sequencefile", + LazySimpleSerDe.class.getName(), + SequenceFileTestStorageFormat.class.getName(), + true, + }, { + "text", + LazySimpleSerDe.class.getName(), + TextTestStorageFormat.class.getName(), + false, + }, { + "metadataTypedColumnset", + MetadataTypedColumnsetSerDe.class.getName(), + null, + false, + }, { + "lazyBinary", + LazyBinarySerDe.class.getName(), + null, + false, + }, { + "lazyBinaryColumnar", + LazyBinaryColumnarSerDe.class.getName(), + null, + false, + }, { + "nullStruct", + NullStructSerDe.class.getName(), + null, + false, + }, { + "regex", + RegexSerDe.class.getName(), + null, + false, + }, { + "vectorizedOrc", + VectorizedOrcSerde.class.getName(), + null, + false, + }, { + "thrift", + ThriftByteStreamTypedSerDe.class.getName(), + null, + false, + }, { + "byteStreamTyped", + ByteStreamTypedSerDe.class.getName(), + null, + false, + }, { + "vectorizedColumnar", + VectorizedColumnarSerDe.class.getName(), + null, + false, + }, { + "delimitedJSON", + DelimitedJSONSerDe.class.getName(), + null, + false, + }, { + "dynamic", + DynamicSerDe.class.getName(), + null, + false, + }, { + "binarySortable", + BinarySortableSerDe.class.getName(), + null, + false, + }, { + "typed", + TypedSerDe.class.getName(), + null, + false, + } + }; + protected static String dbName = MetaStoreUtils.DEFAULT_DATABASE_NAME; - protected static String tableName = "testHCatMapReduceTable"; + protected static final String TABLE_NAME = "testHCatMapReduceTable"; private static List writeRecords = new ArrayList(); private static List readRecords = new ArrayList(); - protected abstract List getPartitionKeys(); - - protected abstract List getTableColumns(); - private static FileSystem fs; private String externalTableLocation = null; + protected String tableName; - protected Boolean isTableExternal() { - return false; + protected TestStorageFormat storageFormat; + protected boolean enabled; + + /** + * Use reflection to find all classes in the classpath that implement the SerDe interface + * {@link org.apache.hadoop.hive.serde2.SerDe}. For each SerDe, check whether it has been + * enumerated in the STORAGE_FORMATS table. If it is not, then raise a test failure. + */ + @BeforeClass + public static void findSerDes() { + Set testSerDes = new HashSet(); + for (int i = 0; i < STORAGE_FORMATS.length; i++) { + testSerDes.add((String) STORAGE_FORMATS[i][1]); + } + + Reflections reflections = new Reflections("org.apache.hadoop.hive"); + Set> serDes = reflections.getSubTypesOf(SerDe.class); + for (Class serDeClass : serDes) { + // Skip if SerDe class is abstract. For example, AbstractSerDe and ColumnarSerDeBase. + if (Modifier.isAbstract(serDeClass.getModifiers())) { + continue; + } + assertTrue("SerDe " + serDeClass.getName() + " has not been added to the HCatMapReduceTest " + + "parameters. Please add " + serDeClass.getName() + " to the test parameters.", + testSerDes.contains(serDeClass.getName())); + } } - protected boolean isTableImmutable() { - return true; + /** + * Returns the parameters for the test. + * + * @return Parameters for the test. + */ + @Parameterized.Parameters + public static Collection generateParameters() { + return Arrays.asList(STORAGE_FORMATS); } - protected String inputFormat() { - return RCFileInputFormat.class.getName(); + /** + * Test constructor that sets the storage format class names provided by the test parameter. + */ + public HCatMapReduceTest(String formatName, String serdeClass, String testStorageFormatClass, + boolean enabled) throws Exception { + if (testStorageFormatClass != null) { + this.storageFormat = (TestStorageFormat) Class.forName(testStorageFormatClass).newInstance(); + } + this.enabled = enabled; + this.tableName = TABLE_NAME + "_" + formatName; } - protected String outputFormat() { - return RCFileOutputFormat.class.getName(); + protected abstract List getPartitionKeys(); + + protected abstract List getTableColumns(); + + protected Boolean isTableExternal() { + return false; } - protected String serdeClass() { - return ColumnarSerDe.class.getName(); + protected boolean isTableImmutable() { + return true; } @BeforeClass @@ -143,13 +336,16 @@ public void deleteTable() throws Exception { @Before public void createTable() throws Exception { - String databaseName = (dbName == null) ? MetaStoreUtils.DEFAULT_DATABASE_NAME : dbName; + // Use JUnit's Assume to skip running this fixture against any storage formats that are not + // marked as enabled. These storage formats have known failures when run against the tests. + Assume.assumeTrue(enabled); + String databaseName = (dbName == null) ? MetaStoreUtils.DEFAULT_DATABASE_NAME : dbName; try { client.dropTable(databaseName, tableName); } catch (Exception e) { - } //can fail with NoSuchObjectException - + // Can fail with NoSuchObjectException. + } Table tbl = new Table(); tbl.setDbName(databaseName); @@ -160,10 +356,9 @@ public void createTable() throws Exception { tbl.setTableType(TableType.MANAGED_TABLE.toString()); } StorageDescriptor sd = new StorageDescriptor(); - sd.setCols(getTableColumns()); - tbl.setPartitionKeys(getPartitionKeys()); + tbl.setPartitionKeys(getPartitionKeys()); tbl.setSd(sd); sd.setBucketCols(new ArrayList(2)); @@ -171,12 +366,12 @@ public void createTable() throws Exception { sd.getSerdeInfo().setName(tbl.getTableName()); sd.getSerdeInfo().setParameters(new HashMap()); sd.getSerdeInfo().getParameters().put(serdeConstants.SERIALIZATION_FORMAT, "1"); - if (isTableExternal()){ + if (isTableExternal()) { sd.getSerdeInfo().getParameters().put("EXTERNAL", "TRUE"); } - sd.getSerdeInfo().setSerializationLib(serdeClass()); - sd.setInputFormat(inputFormat()); - sd.setOutputFormat(outputFormat()); + sd.getSerdeInfo().setSerializationLib(storageFormat.getSerdeClass()); + sd.setInputFormat(storageFormat.getInputFormatClass()); + sd.setOutputFormat(storageFormat.getOutputFormatClass()); Map tableParams = new HashMap(); if (isTableExternal()) { @@ -186,72 +381,64 @@ public void createTable() throws Exception { tableParams.put(hive_metastoreConstants.IS_IMMUTABLE,"true"); } tbl.setParameters(tableParams); + storageFormat.setTableCustom(tbl); client.createTable(tbl); } - //Create test input file with specified number of rows + /* + * Create test input file with specified number of rows + */ private void createInputFile(Path path, int rowCount) throws IOException { - if (fs.exists(path)) { fs.delete(path, true); } FSDataOutputStream os = fs.create(path); - for (int i = 0; i < rowCount; i++) { os.writeChars(i + "\n"); } - os.close(); } - public static class MapCreate extends - Mapper { - - static int writeCount = 0; //test will be in local mode + public static class MapCreate extends Mapper { + // Test will be in local mode. + static int writeCount = 0; @Override - public void map(LongWritable key, Text value, Context context - ) throws IOException, InterruptedException { - { - try { - HCatRecord rec = writeRecords.get(writeCount); - context.write(null, rec); - writeCount++; - - } catch (Exception e) { - - e.printStackTrace(System.err); //print since otherwise exception is lost - throw new IOException(e); - } + public void map(LongWritable key, Text value, Context context) + throws IOException, InterruptedException { + try { + HCatRecord rec = writeRecords.get(writeCount); + context.write(null, rec); + writeCount++; + } catch (Exception e) { + // Print since otherwise exception is lost. + e.printStackTrace(System.err); + throw new IOException(e); } } } - public static class MapRead extends - Mapper { - + public static class MapRead extends Mapper { static int readCount = 0; //test will be in local mode @Override - public void map(WritableComparable key, HCatRecord value, Context context - ) throws IOException, InterruptedException { - { - try { - readRecords.add(value); - readCount++; - } catch (Exception e) { - e.printStackTrace(); //print since otherwise exception is lost - throw new IOException(e); - } + public void map(WritableComparable key, HCatRecord value, Context context) + throws IOException, InterruptedException { + try { + readRecords.add(value); + readCount++; + } catch (Exception e) { + // Print since otherwise exception is lost. + e.printStackTrace(); + throw new IOException(e); } } } - Job runMRCreate(Map partitionValues, - List partitionColumns, List records, - int writeCount, boolean assertWrite) throws Exception { + Job runMRCreate(Map partitionValues, List partitionColumns, + List records, int writeCount, boolean assertWrite) throws Exception { return runMRCreate(partitionValues, partitionColumns, records, writeCount, assertWrite, true, null); } @@ -267,10 +454,9 @@ Job runMRCreate(Map partitionValues, * @return * @throws Exception */ - Job runMRCreate(Map partitionValues, - List partitionColumns, List records, - int writeCount, boolean assertWrite, boolean asSingleMapTask, - String customDynamicPathPattern) throws Exception { + Job runMRCreate(Map partitionValues, List partitionColumns, + List records, int writeCount, boolean assertWrite, boolean asSingleMapTask, + String customDynamicPathPattern) throws Exception { writeRecords = records; MapCreate.writeCount = 0; @@ -355,7 +541,6 @@ Job runMRCreate(Map partitionValues, * @throws Exception */ List runMRRead(int readCount, String filter) throws Exception { - MapRead.readCount = 0; readRecords.clear(); @@ -388,9 +573,7 @@ Job runMRCreate(Map partitionValues, return readRecords; } - protected HCatSchema getTableSchema() throws Exception { - Configuration conf = new Configuration(); Job job = new Job(conf, "hcat mapreduce read schema test"); job.setJarByClass(this.getClass()); diff --git a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatDynamicPartitioned.java b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatDynamicPartitioned.java index 39e9208..ca68189 100644 --- a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatDynamicPartitioned.java +++ b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatDynamicPartitioned.java @@ -53,9 +53,10 @@ protected static final int NUM_RECORDS = 20; protected static final int NUM_PARTITIONS = 5; - @BeforeClass - public static void generateInputData() throws Exception { - tableName = "testHCatDynamicPartitionedTable"; + public TestHCatDynamicPartitioned(String formatName, String serdeClass, String testStorageFormatClass, + boolean enabled) throws Exception { + super(formatName, serdeClass, testStorageFormatClass, enabled); + tableName = "testHCatDynamicPartitionedTable_" + formatName; generateWriteRecords(NUM_RECORDS, NUM_PARTITIONS, 0); generateDataColumns(); } diff --git a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatExternalDynamicPartitioned.java b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatExternalDynamicPartitioned.java index 0838765..4b9546b 100644 --- a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatExternalDynamicPartitioned.java +++ b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatExternalDynamicPartitioned.java @@ -24,18 +24,19 @@ public class TestHCatExternalDynamicPartitioned extends TestHCatDynamicPartitioned { + public TestHCatExternalDynamicPartitioned(String formatName, String serdeClass, + String testStorageFormatClass, boolean enabled) throws Exception { + super(formatName, serdeClass, testStorageFormatClass, enabled); + tableName = "testHCatExternalDynamicPartitionedTable_" + formatName; + generateWriteRecords(NUM_RECORDS, NUM_PARTITIONS, 0); + generateDataColumns(); + } + @Override protected Boolean isTableExternal() { return true; } - @BeforeClass - public static void generateInputData() throws Exception { - tableName = "testHCatExternalDynamicPartitionedTable"; - generateWriteRecords(NUM_RECORDS, NUM_PARTITIONS, 0); - generateDataColumns(); - } - /** * Run the external dynamic partitioning test but with single map task * @throws Exception diff --git a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatExternalNonPartitioned.java b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatExternalNonPartitioned.java index 01b2ad6..23a1ad0 100644 --- a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatExternalNonPartitioned.java +++ b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatExternalNonPartitioned.java @@ -20,6 +20,10 @@ package org.apache.hive.hcatalog.mapreduce; public class TestHCatExternalNonPartitioned extends TestHCatNonPartitioned { + public TestHCatExternalNonPartitioned(String formatName, String serdeName, + String testStorageFormatClass, boolean enabled) throws Exception { + super(formatName, serdeName, testStorageFormatClass, enabled); + } @Override protected Boolean isTableExternal() { diff --git a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatExternalPartitioned.java b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatExternalPartitioned.java index e5f8d1e..227f777 100644 --- a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatExternalPartitioned.java +++ b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatExternalPartitioned.java @@ -20,6 +20,10 @@ package org.apache.hive.hcatalog.mapreduce; public class TestHCatExternalPartitioned extends TestHCatPartitioned { + public TestHCatExternalPartitioned(String formatName, String serdeClass, + String testStorageFormatClass, boolean enabled) throws Exception { + super(formatName, serdeClass, testStorageFormatClass, enabled); + } @Override protected Boolean isTableExternal() { diff --git a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatMutableDynamicPartitioned.java b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatMutableDynamicPartitioned.java index bfc6a4f..37d1213 100644 --- a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatMutableDynamicPartitioned.java +++ b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatMutableDynamicPartitioned.java @@ -20,6 +20,10 @@ package org.apache.hive.hcatalog.mapreduce; public class TestHCatMutableDynamicPartitioned extends TestHCatDynamicPartitioned { + public TestHCatMutableDynamicPartitioned(String formatName, String serdeClass, + String testStorageFormatClass, boolean enabled) throws Exception { + super(formatName, serdeClass, testStorageFormatClass, enabled); + } @Override protected boolean isTableImmutable() { diff --git a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatMutableNonPartitioned.java b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatMutableNonPartitioned.java index a944023..719aabf 100644 --- a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatMutableNonPartitioned.java +++ b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatMutableNonPartitioned.java @@ -20,7 +20,10 @@ package org.apache.hive.hcatalog.mapreduce; public class TestHCatMutableNonPartitioned extends TestHCatNonPartitioned { - + public TestHCatMutableNonPartitioned(String formatName, String serdeClass, + String testStorageFormatClass, boolean enabled) throws Exception { + super(formatName, serdeClass, testStorageFormatClass, enabled); + } @Override protected boolean isTableImmutable() { diff --git a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatMutablePartitioned.java b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatMutablePartitioned.java index 73d6e80..d7b226d 100644 --- a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatMutablePartitioned.java +++ b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatMutablePartitioned.java @@ -20,6 +20,10 @@ package org.apache.hive.hcatalog.mapreduce; public class TestHCatMutablePartitioned extends TestHCatPartitioned { + public TestHCatMutablePartitioned(String formatName, String serdeClass, + String testStorageFormatClass, boolean enabled) throws Exception { + super(formatName, serdeClass, testStorageFormatClass, enabled); + } @Override protected boolean isTableImmutable() { diff --git a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatNonPartitioned.java b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatNonPartitioned.java index d639e99..73723c5 100644 --- a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatNonPartitioned.java +++ b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatNonPartitioned.java @@ -43,16 +43,14 @@ import static org.junit.Assert.assertNull; public class TestHCatNonPartitioned extends HCatMapReduceTest { - private static List writeRecords; static List partitionColumns; - @BeforeClass - public static void oneTimeSetUp() throws Exception { - + public TestHCatNonPartitioned(String formatName, String serdeClass, String testStorageFormatClass, + boolean enabled) throws Exception { + super(formatName, serdeClass, testStorageFormatClass, enabled); dbName = null; //test if null dbName works ("default" is used) - tableName = "testHCatNonPartitionedTable"; - + tableName = "testHCatNonPartitionedTable_" + formatName; writeRecords = new ArrayList(); for (int i = 0; i < 20; i++) { diff --git a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatPartitioned.java b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatPartitioned.java index dc05f43..87241fa 100644 --- a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatPartitioned.java +++ b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatPartitioned.java @@ -49,10 +49,10 @@ private static List writeRecords; private static List partitionColumns; - @BeforeClass - public static void oneTimeSetUp() throws Exception { - - tableName = "testHCatPartitionedTable"; + public TestHCatPartitioned(String formatName, String serdeClass, String testStorageFormatClass, + boolean enabled) throws Exception { + super(formatName, serdeClass, testStorageFormatClass, enabled); + tableName = "testHCatPartitionedTable_" + formatName; writeRecords = new ArrayList(); for (int i = 0; i < 20; i++) { @@ -68,7 +68,6 @@ public static void oneTimeSetUp() throws Exception { partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c2", serdeConstants.STRING_TYPE_NAME, ""))); } - @Override protected List getPartitionKeys() { List fields = new ArrayList(); diff --git a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/storage/AvroTestStorageFormat.java b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/storage/AvroTestStorageFormat.java new file mode 100644 index 0000000..93e8287 --- /dev/null +++ b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/storage/AvroTestStorageFormat.java @@ -0,0 +1,65 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.hcatalog.mapreduce.storage; + +import com.google.common.io.Resources; + +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import org.apache.hadoop.hive.metastore.api.FieldSchema; +import org.apache.hadoop.hive.metastore.api.StorageDescriptor; +import org.apache.hadoop.hive.metastore.api.Table; +import org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat; +import org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat; +import org.apache.hadoop.hive.serde2.avro.AvroSerDe; +import org.apache.hive.hcatalog.util.AvroTypeInfoSchemaConverter; + +import org.junit.Test; + +import static org.junit.Assert.assertEquals; + +public class AvroTestStorageFormat extends TestStorageFormat { + @Override + public String getInputFormatClass() { + return AvroContainerInputFormat.class.getName(); + } + + @Override + public String getOutputFormatClass() { + return AvroContainerOutputFormat.class.getName(); + } + + @Override + public String getSerdeClass() { + return AvroSerDe.class.getName(); + } + + @Override + public void setTableCustom(Table table) { + StorageDescriptor sd = table.getSd(); + List cols = sd.getCols(); + Map tableParams = new HashMap(); + tableParams.put("avro.schema.literal", AvroTypeInfoSchemaConverter.convert(cols).toString()); + table.setParameters(tableParams); + } +} + diff --git a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/storage/OrcTestStorageFormat.java b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/storage/OrcTestStorageFormat.java new file mode 100644 index 0000000..096b0c4 --- /dev/null +++ b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/storage/OrcTestStorageFormat.java @@ -0,0 +1,42 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.hcatalog.mapreduce.storage; + +import org.apache.hadoop.hive.ql.io.orc.OrcInputFormat; +import org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat; +import org.apache.hadoop.hive.ql.io.orc.OrcSerde; + +public class OrcTestStorageFormat extends TestStorageFormat { + @Override + public String getInputFormatClass() { + return OrcInputFormat.class.getName(); + } + + @Override + public String getOutputFormatClass() { + return OrcOutputFormat.class.getName(); + } + + @Override + public String getSerdeClass() { + return OrcSerde.class.getName(); + } +} + diff --git a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/storage/ParquetTestStorageFormat.java b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/storage/ParquetTestStorageFormat.java new file mode 100644 index 0000000..6609bbe --- /dev/null +++ b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/storage/ParquetTestStorageFormat.java @@ -0,0 +1,42 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.hcatalog.mapreduce.storage; + +import org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat; +import org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat; +import org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe; + +public class ParquetTestStorageFormat extends TestStorageFormat { + @Override + public String getInputFormatClass() { + return MapredParquetInputFormat.class.getName(); + } + + @Override + public String getOutputFormatClass() { + return MapredParquetInputFormat.class.getName(); + } + + @Override + public String getSerdeClass() { + return ParquetHiveSerDe.class.getName(); + } +} + diff --git a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/storage/RCFileTestStorageFormat.java b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/storage/RCFileTestStorageFormat.java new file mode 100644 index 0000000..449978b --- /dev/null +++ b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/storage/RCFileTestStorageFormat.java @@ -0,0 +1,42 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.hcatalog.mapreduce.storage; + +import org.apache.hadoop.hive.ql.io.RCFileInputFormat; +import org.apache.hadoop.hive.ql.io.RCFileOutputFormat; +import org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe; + +public class RCFileTestStorageFormat extends TestStorageFormat { + @Override + public String getInputFormatClass() { + return RCFileInputFormat.class.getName(); + } + + @Override + public String getOutputFormatClass() { + return RCFileOutputFormat.class.getName(); + } + + @Override + public String getSerdeClass() { + return ColumnarSerDe.class.getName(); + } +} + diff --git a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/storage/SequenceFileTestStorageFormat.java b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/storage/SequenceFileTestStorageFormat.java new file mode 100644 index 0000000..2605cf9 --- /dev/null +++ b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/storage/SequenceFileTestStorageFormat.java @@ -0,0 +1,42 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.hcatalog.mapreduce.storage; + +import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe; +import org.apache.hadoop.mapred.SequenceFileInputFormat; +import org.apache.hadoop.mapred.SequenceFileOutputFormat; + +public class SequenceFileTestStorageFormat extends TestStorageFormat { + @Override + public String getInputFormatClass() { + return SequenceFileInputFormat.class.getName(); + } + + @Override + public String getOutputFormatClass() { + return SequenceFileOutputFormat.class.getName(); + } + + @Override + public String getSerdeClass() { + return LazySimpleSerDe.class.getName(); + } +} + diff --git a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/storage/TestStorageFormat.java b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/storage/TestStorageFormat.java new file mode 100644 index 0000000..ea42410 --- /dev/null +++ b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/storage/TestStorageFormat.java @@ -0,0 +1,33 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.hcatalog.mapreduce.storage; + +import org.apache.hadoop.hive.metastore.api.Table; + +public abstract class TestStorageFormat { + public abstract String getInputFormatClass(); + + public abstract String getOutputFormatClass(); + + public abstract String getSerdeClass(); + + public void setTableCustom(Table tbl) { + } +} diff --git a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/storage/TextTestStorageFormat.java b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/storage/TextTestStorageFormat.java new file mode 100644 index 0000000..6b1edff --- /dev/null +++ b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/storage/TextTestStorageFormat.java @@ -0,0 +1,42 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.hcatalog.mapreduce.storage; + +import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe; +import org.apache.hadoop.mapred.TextInputFormat; +import org.apache.hadoop.mapred.TextOutputFormat; + +public class TextTestStorageFormat extends TestStorageFormat { + @Override + public String getInputFormatClass() { + return TextInputFormat.class.getName(); + } + + @Override + public String getOutputFormatClass() { + return TextOutputFormat.class.getName(); + } + + @Override + public String getSerdeClass() { + return LazySimpleSerDe.class.getName(); + } +} + diff --git a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/util/AvroTypeInfoSchemaConverter.java b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/util/AvroTypeInfoSchemaConverter.java new file mode 100644 index 0000000..5f7ef31 --- /dev/null +++ b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/util/AvroTypeInfoSchemaConverter.java @@ -0,0 +1,150 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.hcatalog.util; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import org.apache.avro.Schema; +import org.apache.hadoop.hive.metastore.api.FieldSchema; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; +import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; + +public class AvroTypeInfoSchemaConverter { + /** + * Converts a list of Hive FieldSchema to an Avro Schema. + * + * @param cols + * @return Avro schema + */ + public static Schema convert(List cols) { + List columnNames = new ArrayList(); + List columnTypes = new ArrayList(); + for (FieldSchema col : cols) { + columnNames.add(col.getName()); + columnTypes.add(TypeInfoUtils.getTypeInfoFromTypeString(col.getType())); + } + return convert(columnTypes, columnNames); + } + + /** + * Converts a list of Hive TypeInfo and column names into an Avro Schema. + * + * @param columnTypes + * @param columnNames + * @return Avro schema + */ + public static Schema convert(List columnTypes, List columnNames) { + List fields = new ArrayList(); + for (int i = 0; i < columnTypes.size(); i++) { + final Schema fieldSchema = convert(columnTypes.get(i), columnNames.get(i)); + fields.add(new Schema.Field(columnNames.get(i), fieldSchema, null, null)); + } + Schema recordSchema = Schema.createRecord("schema", null, null, false); + recordSchema.setFields(fields); + return recordSchema; + } + + private static Schema convert(TypeInfo typeInfo, String fieldName) { + final Category typeCategory = typeInfo.getCategory(); + switch (typeCategory) { + case PRIMITIVE: + return convertPrimitive(fieldName, (PrimitiveTypeInfo) typeInfo); + case LIST: + return convertList(fieldName, (ListTypeInfo) typeInfo); + case STRUCT: + return convertStruct(fieldName, (StructTypeInfo) typeInfo); + case MAP: + return convertMap(fieldName, (MapTypeInfo) typeInfo); + default: + throw new TypeNotPresentException(typeInfo.getTypeName(), null); + } + } + + private static Schema convertPrimitive(String fieldName, PrimitiveTypeInfo typeInfo) { + if (typeInfo.equals(TypeInfoFactory.intTypeInfo) + || typeInfo.equals(TypeInfoFactory.shortTypeInfo) + || typeInfo.equals(TypeInfoFactory.byteTypeInfo)) { + return Schema.create(Schema.Type.INT); + } else if (typeInfo.equals(TypeInfoFactory.longTypeInfo)) { + return Schema.create(Schema.Type.LONG); + } else if (typeInfo.equals(TypeInfoFactory.floatTypeInfo)) { + return Schema.create(Schema.Type.FLOAT); + } else if (typeInfo.equals(TypeInfoFactory.doubleTypeInfo)) { + return Schema.create(Schema.Type.DOUBLE); + } else if (typeInfo.equals(TypeInfoFactory.booleanTypeInfo)) { + return Schema.create(Schema.Type.BOOLEAN); + } else if (typeInfo.equals(TypeInfoFactory.stringTypeInfo)) { + return Schema.create(Schema.Type.STRING); + } else if (typeInfo.equals(TypeInfoFactory.binaryTypeInfo)) { + return Schema.create(Schema.Type.BYTES); + } else if (typeInfo.equals(TypeInfoFactory.decimalTypeInfo)) { + throw new RuntimeException("Decimal type not supported."); + } else if (typeInfo.equals(TypeInfoFactory.timestampTypeInfo)) { + throw new RuntimeException("Timestamp type not supported."); + } else if (typeInfo.equals(TypeInfoFactory.dateTypeInfo)) { + throw new RuntimeException("Date type not supported."); + } else if (typeInfo.equals(TypeInfoFactory.varcharTypeInfo)) { + throw new RuntimeException("Varchar type not supported."); + } else if (typeInfo.equals(TypeInfoFactory.unknownTypeInfo)) { + throw new RuntimeException("Unknown type not supported."); + } else { + throw new RuntimeException("Unknown type: " + typeInfo); + } + } + + private static Schema convertList(String fieldName, ListTypeInfo typeInfo) { + final TypeInfo elementTypeInfo = typeInfo.getListElementTypeInfo(); + final Schema elementType = convert(elementTypeInfo, "array_element"); + return Schema.createArray(elementType); + } + + private static Schema convertStruct(String fieldName, StructTypeInfo typeInfo) { + final List columnNames = typeInfo.getAllStructFieldNames(); + final List columnTypeInfos = typeInfo.getAllStructFieldTypeInfos(); + List fields = new ArrayList(); + for (int i = 0; i < columnNames.size(); i++) { + final String columnFieldName = columnNames.get(i); + final Schema fieldSchema = convert(columnTypeInfos.get(i), columnFieldName); + fields.add(new Schema.Field(columnFieldName, fieldSchema, null, null)); + } + final Schema recordSchema = Schema.createRecord(fieldName, null, null, false); + recordSchema.setFields(fields); + return recordSchema; + } + + private static Schema convertMap(String fieldName, MapTypeInfo typeInfo) { + final TypeInfo keyTypeInfo = typeInfo.getMapKeyTypeInfo(); + if (!keyTypeInfo.equals(TypeInfoFactory.stringTypeInfo)) { + throw new RuntimeException("Avro does not support maps with key type: " + keyTypeInfo); + } + final Schema valueType = convert(typeInfo.getMapValueTypeInfo(), "value"); + return Schema.createMap(valueType); + } +} diff --git a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/util/TestAvroTypeInfoSchemaConverter.java b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/util/TestAvroTypeInfoSchemaConverter.java new file mode 100644 index 0000000..43c2e7a --- /dev/null +++ b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/util/TestAvroTypeInfoSchemaConverter.java @@ -0,0 +1,74 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.hcatalog.util; + +import com.google.common.io.Resources; + +import java.util.Arrays; +import java.util.List; + +import org.apache.avro.Schema; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; + +import org.junit.Test; + +import static org.junit.Assert.assertEquals; + +public class TestAvroTypeInfoSchemaConverter { + @Test + public void testAllTypes() throws Exception { + String fieldNames = + "myboolean," + + "myint," + + "mylong," + + "myfloat," + + "mydouble," + + "mybytes," + + "mystring," + + "myrecord," + + "myarray," + + "mymap"; + String fieldTypes = + "boolean," + + "int," + + "bigint," + + "float," + + "double," + + "binary," + + "string," + + "struct," + + "array," + + "map"; + + List columnNames = Arrays.asList(fieldNames.split(",")); + List columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(fieldTypes); + + assertEquals(columnNames.size(), columnTypes.size()); + + Schema avroSchema = AvroTypeInfoSchemaConverter.convert(columnTypes, columnNames); + Schema expectedSchema = new Schema.Parser().parse( + Resources.getResource("alltypes.avsc").openStream()); + + assertEquals("Expected: " + expectedSchema.toString(true) + + "\nGot: " + avroSchema.toString(true), + expectedSchema.toString(), avroSchema.toString()); + } +} diff --git a/hcatalog/core/src/test/resources/alltypes.avsc b/hcatalog/core/src/test/resources/alltypes.avsc new file mode 100644 index 0000000..39be023 --- /dev/null +++ b/hcatalog/core/src/test/resources/alltypes.avsc @@ -0,0 +1,48 @@ +{ + "type" : "record", + "name" : "schema", + "fields" : [ { + "name" : "myboolean", + "type" : "boolean" + }, { + "name" : "myint", + "type" : "int" + }, { + "name" : "mylong", + "type" : "long" + }, { + "name" : "myfloat", + "type" : "float" + }, { + "name" : "mydouble", + "type" : "double" + }, { + "name" : "mybytes", + "type" : "bytes" + }, { + "name" : "mystring", + "type" : "string" + }, { + "name" : "myrecord", + "type" : { + "type" : "record", + "name" : "myrecord", + "fields" : [ { + "name" : "mynestedint", + "type" : "int" + } ] + } + }, { + "name" : "myarray", + "type" : { + "type" : "array", + "items" : "int" + } + }, { + "name" : "mymap", + "type" : { + "type" : "map", + "values" : "int" + } + } ] +} -- 1.8.3.4 (Apple Git-47)