diff --git common/src/java/org/apache/hadoop/hive/conf/HiveConf.java common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 65ec1b9..71c8e47 100644 --- common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -809,7 +809,7 @@ public void setSparkConfigUpdated(boolean isSparkConfigUpdated) { "org.apache.hadoop.hive.ql.io.orc.OrcSerde,org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe," + "org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe,org.apache.hadoop.hive.serde2.dynamic_type.DynamicSerDe," + "org.apache.hadoop.hive.serde2.MetadataTypedColumnsetSerDe,org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe," + - "org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe,org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe", + "org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe", "SerDes retriving schema from metastore. This an internal parameter. Check with the hive dev. team"), HIVEHISTORYFILELOC("hive.querylog.location", diff --git hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseStorageHandler.java hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseStorageHandler.java index 50e022d..eb44c0e 100644 --- hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseStorageHandler.java +++ hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseStorageHandler.java @@ -178,11 +178,8 @@ public void commitDropTable( public void preCreateTable(Table tbl) throws MetaException { boolean isExternal = MetaStoreUtils.isExternalTable(tbl); - // We'd like to move this to HiveMetaStore for any non-native table, but - // first we need to support storing NULL for location on a table - if (tbl.getSd().getLocation() != null) { - throw new MetaException("LOCATION may not be specified for HBase."); - } + // HIVE-8950 removes assert for location in table's sd to be null. The change allows Serdes to + // receive table location, which is required by serdes like ParquetHiveSerde. HTable htable = null; diff --git metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java index 3f267ff..b889c2b 100644 --- metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java +++ metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java @@ -2326,6 +2326,12 @@ private boolean createLocationForAddedPartition( boolean result = false; if (partLocation != null) { + if (part.getSd() == null) { + // If partition does not have a storage descriptor, get one from table + // Using deepCopy as different partitions of a table + // can have different storage descriptors. + part.setSd(tbl.getSd().deepCopy()); + } part.getSd().setLocation(partLocation.toString()); // Check to see if the directory already exists before calling diff --git metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java index b89b07a..78bfc5c 100644 --- metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java +++ metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java @@ -384,11 +384,17 @@ static public Deserializer getDeserializer(Configuration conf, try { Deserializer deserializer = ReflectionUtil.newInstance(conf.getClassByName(lib). asSubclass(Deserializer.class), conf); + + final Properties tableMetadata = MetaStoreUtils.getTableMetadata(table); + if (tableMetadata.getProperty("location", null) == null && + table.getSd().getLocation() != null) { + tableMetadata.setProperty("location", table.getSd().getLocation()); + } if (skipConfError) { SerDeUtils.initializeSerDeWithoutErrorCheck(deserializer, conf, - MetaStoreUtils.getTableMetadata(table), null); + tableMetadata, null); } else { - SerDeUtils.initializeSerDe(deserializer, conf, MetaStoreUtils.getTableMetadata(table), null); + SerDeUtils.initializeSerDe(deserializer, conf, tableMetadata, null); } return deserializer; } catch (RuntimeException e) { diff --git ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ParquetSchemaReader.java ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ParquetSchemaReader.java new file mode 100644 index 0000000..05fea0f --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ParquetSchemaReader.java @@ -0,0 +1,36 @@ +/** + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.io.parquet.convert; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; +import parquet.hadoop.ParquetFileReader; +import parquet.hadoop.metadata.ParquetMetadata; +import parquet.schema.GroupType; + +import java.io.IOException; + +public class ParquetSchemaReader { + public static GroupType read(Path parquetFile) throws IOException { + + Configuration conf = new Configuration(); + ParquetMetadata metaData; + try { + metaData = ParquetFileReader.readFooter(conf, parquetFile); + } catch (IOException e) { + throw new IOException("Error reading footer from: " + parquetFile, e); + } + return metaData.getFileMetaData().getSchema(); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ParquetToHiveSchemaConverter.java ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ParquetToHiveSchemaConverter.java new file mode 100644 index 0000000..33253eb --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ParquetToHiveSchemaConverter.java @@ -0,0 +1,224 @@ +/** + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.io.parquet.convert; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; +import parquet.schema.DecimalMetadata; +import parquet.schema.GroupType; +import parquet.schema.OriginalType; +import parquet.schema.PrimitiveType; +import parquet.schema.Type; + +import java.util.ArrayList; +import java.util.List; +import java.util.Properties; + +public class ParquetToHiveSchemaConverter { + + private static final String INT96_IS_TS_PROPERTY_KEY = "parquet.int96.is.timestamp"; + + private Properties properties = null; + + public ParquetToHiveSchemaConverter() {} + public ParquetToHiveSchemaConverter(Properties props) { + properties = props; + } + + private static final Log LOG = LogFactory.getLog(ParquetToHiveSchemaConverter.class); + + public StructTypeInfo convert(GroupType parquetSchema) { + return convertFields(parquetSchema.getFields()); + } + + private StructTypeInfo convertFields(List parquetFields) { + StructTypeInfo structTypeInfo = new StructTypeInfo(); + ArrayList names = new ArrayList(); + ArrayList types = new ArrayList(); + + for (Type parquetType : parquetFields) { + + TypeInfo type; + if (parquetType.isRepetition(Type.Repetition.REPEATED)) { + type = createHiveArray(parquetType, ""); + } else { + type = convertField(parquetType); + } + + names.add(parquetType.getName()); + types.add(type); + } + + structTypeInfo.setAllStructFieldNames(names); + structTypeInfo.setAllStructFieldTypeInfos(types); + + LOG.info("Generated Hive's StructTypeInfo from parquet schema is: " + structTypeInfo); + + return structTypeInfo; + } + + private TypeInfo convertField(final Type parquetType) { + if (parquetType.isPrimitive()) { + final PrimitiveType.PrimitiveTypeName parquetPrimitiveTypeName = + parquetType.asPrimitiveType().getPrimitiveTypeName(); + final OriginalType originalType = parquetType.getOriginalType(); + + if (originalType == OriginalType.DECIMAL) { + final DecimalMetadata decimalMetadata = parquetType.asPrimitiveType() + .getDecimalMetadata(); + return TypeInfoFactory.getDecimalTypeInfo(decimalMetadata.getPrecision(), + decimalMetadata.getScale()); + } + + if (parquetPrimitiveTypeName.equals(PrimitiveType.PrimitiveTypeName.INT96)) { + if (properties == null || !properties.containsKey(INT96_IS_TS_PROPERTY_KEY)) { + throw new UnsupportedOperationException("Parquet's INT96 does not have a valid mapping" + + " to a Hive type.\nIf you want Parquet's INT96 to be mapped to Hive's timestamp," + + " then set '" + INT96_IS_TS_PROPERTY_KEY + "' in the table properties. Otherwise, " + + "provide hive schema explicitly in the DDL statement"); + } + } + + return parquetPrimitiveTypeName.convert( + new PrimitiveType.PrimitiveTypeNameConverter() { + @Override + public TypeInfo convertBOOLEAN(PrimitiveType.PrimitiveTypeName primitiveTypeName) { + return TypeInfoFactory.booleanTypeInfo; + } + + @Override + public TypeInfo convertINT32(PrimitiveType.PrimitiveTypeName primitiveTypeName) { + return TypeInfoFactory.intTypeInfo; + } + + @Override + public TypeInfo convertINT64(PrimitiveType.PrimitiveTypeName primitiveTypeName) { + return TypeInfoFactory.longTypeInfo; + } + + @Override + public TypeInfo convertINT96(PrimitiveType.PrimitiveTypeName primitiveTypeName) { + return TypeInfoFactory.timestampTypeInfo; + } + + @Override + public TypeInfo convertFLOAT(PrimitiveType.PrimitiveTypeName primitiveTypeName) { + return TypeInfoFactory.floatTypeInfo; + } + + @Override + public TypeInfo convertDOUBLE(PrimitiveType.PrimitiveTypeName primitiveTypeName) { + return TypeInfoFactory.doubleTypeInfo; + } + + @Override + public TypeInfo convertFIXED_LEN_BYTE_ARRAY(PrimitiveType.PrimitiveTypeName + primitiveTypeName) { + return TypeInfoFactory.binaryTypeInfo; + } + + @Override + public TypeInfo convertBINARY(PrimitiveType.PrimitiveTypeName primitiveTypeName) { + if (originalType == OriginalType.UTF8 || originalType == OriginalType.ENUM) { + return TypeInfoFactory.stringTypeInfo; + } else { + return TypeInfoFactory.binaryTypeInfo; + } + } + }); + } else { + GroupType parquetGroupType = parquetType.asGroupType(); + OriginalType originalType = parquetGroupType.getOriginalType(); + if (originalType != null) { + switch (originalType) { + case LIST: + if (parquetGroupType.getFieldCount() != 1) { + throw new UnsupportedOperationException("Invalid list type " + parquetGroupType); + } + Type elementType = parquetGroupType.getType(0); + if (!elementType.isRepetition(Type.Repetition.REPEATED)) { + throw new UnsupportedOperationException("Invalid list type " + parquetGroupType); + } + return createHiveArray(elementType, parquetGroupType.getName()); + case MAP: + case MAP_KEY_VALUE: + if (parquetGroupType.getFieldCount() != 1 || parquetGroupType.getType(0).isPrimitive()) { + throw new UnsupportedOperationException("Invalid map type " + parquetGroupType); + } + GroupType mapKeyValType = parquetGroupType.getType(0).asGroupType(); + if (!mapKeyValType.isRepetition(Type.Repetition.REPEATED) || + !mapKeyValType.getOriginalType().equals(OriginalType.MAP_KEY_VALUE) || + mapKeyValType.getFieldCount() != 2) { + throw new UnsupportedOperationException("Invalid map type " + parquetGroupType); + } + Type keyType = mapKeyValType.getType(0); + if (!keyType.isPrimitive() || + !keyType.asPrimitiveType().getPrimitiveTypeName().equals(PrimitiveType + .PrimitiveTypeName.BINARY) || + !keyType.getOriginalType().equals(OriginalType.UTF8)) { + throw new UnsupportedOperationException("Map key type must be binary (UTF8): " + + keyType); + } + Type valueType = mapKeyValType.getType(1); + return createHiveMap(convertField(keyType), convertField(valueType)); + case ENUM: + case UTF8: + return TypeInfoFactory.stringTypeInfo; + default: + throw new UnsupportedOperationException("Cannot convert Parquet type " + + parquetType); + } + } else { + // if no original type then it's a record + return createHiveStruct(parquetGroupType.getFields()); + } + } + } + + private TypeInfo createHiveStruct(List parquetFields) { + List names = new ArrayList(); + List typeInfos = new ArrayList(); + + for (Type field: parquetFields) { + names.add(field.getName()); + typeInfos.add(convertField(field)); + } + + return TypeInfoFactory.getStructTypeInfo(names, typeInfos); + } + + private TypeInfo createHiveMap(TypeInfo keyType, TypeInfo valueType) { + return TypeInfoFactory.getMapTypeInfo(keyType, valueType); + } + + private TypeInfo createHiveArray(Type elementType, String elementName) { + if (elementType.isPrimitive()) { + return TypeInfoFactory.getListTypeInfo(convertField(elementType)); + } else { + final GroupType groupType = elementType.asGroupType(); + final List groupFields = groupType.getFields(); + if (groupFields.size() > 1 || + (groupFields.size() == 1 && + (elementType.getName().equals("array") || + elementType.getName().equals(elementName + "_tuple")))) { + return TypeInfoFactory.getListTypeInfo(createHiveStruct(groupFields)); + } else { + return TypeInfoFactory.getListTypeInfo(convertField(groupType.getFields().get(0))); + } + } + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ParquetHiveSerDe.java ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ParquetHiveSerDe.java index 7fd5e96..5070517 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ParquetHiveSerDe.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ParquetHiveSerDe.java @@ -13,12 +13,21 @@ */ package org.apache.hadoop.hive.ql.io.parquet.serde; +import java.io.IOException; +import java.net.URI; import java.util.ArrayList; import java.util.Arrays; import java.util.List; import java.util.Properties; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.ql.io.parquet.convert.ParquetSchemaReader; +import org.apache.hadoop.hive.ql.io.parquet.convert.ParquetToHiveSchemaConverter; import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.AbstractSerDe; import org.apache.hadoop.hive.serde2.SerDeException; @@ -47,6 +56,8 @@ @SerDeSpec(schemaProps = {serdeConstants.LIST_COLUMNS, serdeConstants.LIST_COLUMN_TYPES, ParquetOutputFormat.COMPRESSION}) public class ParquetHiveSerDe extends AbstractSerDe { + private static final Log LOG = LogFactory.getLog(ParquetHiveSerDe.class); + public static final Text MAP_KEY = new Text("key"); public static final Text MAP_VALUE = new Text("value"); public static final Text MAP = new Text("map"); @@ -91,8 +102,8 @@ public ParquetHiveSerDe() { public final void initialize(final Configuration conf, final Properties tbl) throws SerDeException { final TypeInfo rowTypeInfo; - final List columnNames; - final List columnTypes; + List columnNames; + List columnTypes; // Get column names and sort order final String columnNameProperty = tbl.getProperty(serdeConstants.LIST_COLUMNS); final String columnTypeProperty = tbl.getProperty(serdeConstants.LIST_COLUMN_TYPES); @@ -100,21 +111,62 @@ public final void initialize(final Configuration conf, final Properties tbl) thr // Get compression properties compressionType = tbl.getProperty(ParquetOutputFormat.COMPRESSION, DEFAULTCOMPRESSION); - if (columnNameProperty.length() == 0) { - columnNames = new ArrayList(); - } else { - columnNames = Arrays.asList(columnNameProperty.split(",")); - } - if (columnTypeProperty.length() == 0) { - columnTypes = new ArrayList(); + if (columnNameProperty.length() == 0 && columnTypeProperty.length() == 0) { + final String locationProperty = tbl.getProperty("location", null); + Path parquetFile = locationProperty != null ? getParquetFile(conf, + new Path(locationProperty)) : null; + + if (parquetFile == null) { + /** + * Attempt to determine hive schema failed, but can not throw + * an exception, as Hive calls init on the serde during + * any call, including calls to update the serde properties, meaning + * if the serde is in a bad state, there is no way to update that state. + */ + LOG.error("Failed to create hive schema for the parquet backed table.\n" + + "Either provide schema for table,\n" + + "OR make sure that external table's path has at least one parquet file with required " + + "metadata"); + columnNames = new ArrayList(); + columnTypes = new ArrayList(); + } else { + StructTypeInfo structTypeInfo = null; + try { + structTypeInfo = new ParquetToHiveSchemaConverter(tbl).convert( + ParquetSchemaReader.read(parquetFile)); + } catch (IOException ioe) { + LOG.error(ioe.getMessage(), ioe); + } catch (UnsupportedOperationException ue) { + LOG.error(ue.getMessage(), ue); + } catch (RuntimeException ex) { + LOG.error(ex.getMessage(), ex); + } + if (structTypeInfo == null) { + columnNames = new ArrayList(); + columnTypes = new ArrayList(); + } else { + columnNames = structTypeInfo.getAllStructFieldNames(); + columnTypes = structTypeInfo.getAllStructFieldTypeInfos(); + } + } } else { - columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty); + if (columnNameProperty.length() == 0) { + columnNames = new ArrayList(); + } else { + columnNames = Arrays.asList(columnNameProperty.split(",")); + } + if (columnTypeProperty.length() == 0) { + columnTypes = new ArrayList(); + } else { + columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty); + } } if (columnNames.size() != columnTypes.size()) { - throw new IllegalArgumentException("ParquetHiveSerde initialization failed. Number of column " + - "name and column type differs. columnNames = " + columnNames + ", columnTypes = " + - columnTypes); + LOG.error("ParquetHiveSerde initialization failed. Number of column name and column type " + + "differs. columnNames = " + columnNames + ", columnTypes = " + columnTypes); + columnNames = new ArrayList(); + columnTypes = new ArrayList(); } // Create row related objects rowTypeInfo = TypeInfoFactory.getStructTypeInfo(columnNames, columnTypes); @@ -172,4 +224,42 @@ public SerDeStats getSerDeStats() { } return stats; } + + private Path getParquetFile(Configuration conf, Path loc) { + if (loc == null) { + return null; + } + + Path parquetFile; + try { + parquetFile = getAFile(FileSystem.get(new URI(loc.toString()), conf), loc); + } catch (Exception e) { + LOG.error("Unable to read file from " + loc + ": " + e, e); + parquetFile = null; + } + + return parquetFile; + } + + private Path getAFile(FileSystem fs, Path path) throws IOException { + FileStatus status = fs.getFileStatus(path); + + if (status.isFile()) { + if (status.getLen() > 0) { + return path; + } else { + return null; + } + } + + for(FileStatus childStatus: fs.listStatus(path)) { + Path file = getAFile(fs, childStatus.getPath()); + + if (file != null) { + return file; + } + } + + return null; + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java index 39c48b5..951c3ea 100644 --- ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java +++ ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java @@ -691,6 +691,8 @@ public void createTable(Table tbl, boolean ifNotExists) throws HiveException { if (tbl.getDbName() == null || "".equals(tbl.getDbName().trim())) { tbl.setDbName(SessionState.get().getCurrentDatabase()); } + + setTableLocInTableProperties(tbl); if (tbl.getCols().size() == 0 || tbl.getSd().getColsSize() == 0) { tbl.setFields(MetaStoreUtils.getFieldsFromDeserializer(tbl.getTableName(), tbl.getDeserializer())); @@ -721,6 +723,30 @@ public void createTable(Table tbl, boolean ifNotExists) throws HiveException { } } + private void setTableLocInTableProperties(Table tbl) throws TException { + tbl.getTTable().getSd().setLocation(getTablePath(tbl)); + } + + private String getTablePath(Table table) throws TException { + Warehouse wh = new Warehouse(conf); + Path tablePath; + + final String location = table.getSd().getLocation(); + if ((location == null || location.isEmpty())) { + tablePath = wh.getTablePath( + getMSC().getDatabase(table.getDbName()), table.getTableName()); + } else { + tablePath = wh.getDnsPath(new Path(location)); + } + + if (tablePath != null) { + LOG.info("Table path is: " + tablePath); + return tablePath.toString(); + } else { + return null; + } + } + /** * * @param tableName diff --git ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestParquetToHiveSchemaConverter.java ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestParquetToHiveSchemaConverter.java new file mode 100644 index 0000000..a5d93f9 --- /dev/null +++ ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestParquetToHiveSchemaConverter.java @@ -0,0 +1,410 @@ +/** + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.io.parquet; + +import org.apache.hadoop.hive.ql.io.parquet.convert.ParquetToHiveSchemaConverter; +import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; +import org.junit.Assert; +import org.junit.BeforeClass; +import org.junit.Test; +import parquet.schema.ConversionPatterns; +import parquet.schema.GroupType; +import parquet.schema.OriginalType; +import parquet.schema.PrimitiveType; +import parquet.schema.Type; +import parquet.schema.Types; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Properties; + +public class TestParquetToHiveSchemaConverter { + + static ParquetToHiveSchemaConverter parquetToHiveSchemaConverter; + + @BeforeClass + public static void setUp() { + parquetToHiveSchemaConverter = new ParquetToHiveSchemaConverter(); + } + + @Test + public void testUtf8() { + GroupType groupType = Types.repeatedGroup(). + required(PrimitiveType.PrimitiveTypeName.BINARY). + as(OriginalType.UTF8). + named("utf8"). + named("top"); + + ArrayList names = new ArrayList(); + names.add("utf8"); + ArrayList typeInfos = new ArrayList(); + typeInfos.add(TypeInfoFactory.stringTypeInfo); + + test(groupType, names, typeInfos); + } + + @Test + public void testEnum() { + GroupType groupType = Types.repeatedGroup(). + required(PrimitiveType.PrimitiveTypeName.BINARY). + as(OriginalType.ENUM). + named("enum"). + named("top"); + + ArrayList names = new ArrayList(); + names.add("enum"); + ArrayList typeInfos = new ArrayList(); + typeInfos.add(TypeInfoFactory.stringTypeInfo); + + test(groupType, names, typeInfos); + } + + @Test + public void testBinary() { + GroupType groupType = Types.requiredGroup(). + required(PrimitiveType.PrimitiveTypeName.BINARY). + named("binary"). + named("top"); + + ArrayList names = new ArrayList(); + names.add("binary"); + ArrayList typeInfos = new ArrayList(); + typeInfos.add(TypeInfoFactory.binaryTypeInfo); + + test(groupType, names, typeInfos); + } + + @Test + public void testBoolean() { + GroupType groupType = Types.requiredGroup(). + required(PrimitiveType.PrimitiveTypeName.BOOLEAN). + named("boolean"). + named("top"); + + ArrayList names = new ArrayList(); + names.add("boolean"); + ArrayList typeInfos = new ArrayList(); + typeInfos.add(TypeInfoFactory.booleanTypeInfo); + + test(groupType, names, typeInfos); + } + + @Test + public void testDouble() { + GroupType groupType = Types.repeatedGroup(). + required(PrimitiveType.PrimitiveTypeName.DOUBLE). + named("double"). + named("top"); + + ArrayList names = new ArrayList(); + names.add("double"); + ArrayList typeInfos = new ArrayList(); + typeInfos.add(TypeInfoFactory.doubleTypeInfo); + + test(groupType, names, typeInfos); + } + + @Test + public void testFixedLenByteArray() { + GroupType groupType = Types.requiredGroup(). + required(PrimitiveType.PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY). + length(8). + named("fixedLenByteArray"). + named("top"); + + ArrayList names = new ArrayList(); + names.add("fixedLenByteArray"); + ArrayList typeInfos = new ArrayList(); + typeInfos.add(TypeInfoFactory.binaryTypeInfo); + + test(groupType, names, typeInfos); + } + + @Test + public void testFloat() { + GroupType groupType = Types.requiredGroup(). + required(PrimitiveType.PrimitiveTypeName.FLOAT). + named("float"). + named("top"); + + ArrayList names = new ArrayList(); + names.add("float"); + ArrayList typeInfos = new ArrayList(); + typeInfos.add(TypeInfoFactory.floatTypeInfo); + + test(groupType, names, typeInfos); + } + + @Test + public void testInt32() { + GroupType groupType = Types.repeatedGroup(). + required(PrimitiveType.PrimitiveTypeName.INT32). + named("int32"). + named("top"); + + + ArrayList names = new ArrayList(); + names.add("int32"); + ArrayList typeInfos = new ArrayList(); + typeInfos.add(TypeInfoFactory.intTypeInfo); + + test(groupType, names, typeInfos); + } + + @Test + public void testInt64() { + GroupType groupType = Types.repeatedGroup(). + required(PrimitiveType.PrimitiveTypeName.INT64). + named("int64"). + named("top"); + + ArrayList names = new ArrayList(); + names.add("int64"); + ArrayList typeInfos = new ArrayList(); + typeInfos.add(TypeInfoFactory.longTypeInfo); + + test(groupType, names, typeInfos); + } + + @Test + public void testInt96() { + GroupType groupType = Types.requiredGroup(). + required(PrimitiveType.PrimitiveTypeName.INT96). + named("int96"). + named("top"); + + ArrayList names = new ArrayList(); + names.add("int96"); + ArrayList typeInfos = new ArrayList(); + typeInfos.add(TypeInfoFactory.timestampTypeInfo); + + Properties props = new Properties(); + props.setProperty("parquet.int96.is.timestamp", ""); + + test(groupType, names, typeInfos, new ParquetToHiveSchemaConverter(props)); + } + + @Test + public void testInt96Negative() { + GroupType groupType = Types.requiredGroup(). + required(PrimitiveType.PrimitiveTypeName.INT96). + named("int96"). + named("top"); + + ArrayList names = new ArrayList(); + names.add("int96"); + ArrayList typeInfos = new ArrayList(); + typeInfos.add(TypeInfoFactory.timestampTypeInfo); + + try { + test(groupType, names, typeInfos); + Assert.fail("Failed to throw UnsupportedOperationException for INT96"); + } catch (UnsupportedOperationException use) { + // It's good! + } + } + + @Test + public void primitiveOptional() { + GroupType groupType = Types.optionalGroup(). + optional(PrimitiveType.PrimitiveTypeName.INT32). + named("int32"). + named("top"); + + ArrayList names = new ArrayList(); + names.add("int32"); + ArrayList typeInfos = new ArrayList(); + typeInfos.add(TypeInfoFactory.intTypeInfo); + + test(groupType, names, typeInfos); + } + + @Test + public void listOfPrimitives() { + GroupType groupType = Types.requiredGroup(). + repeated(PrimitiveType.PrimitiveTypeName.INT32). + named("intlist"). + named("top"); + + ArrayList names = new ArrayList(); + names.add("intlist"); + ArrayList typeInfos = new ArrayList(); + typeInfos.add(TypeInfoFactory.getListTypeInfo(TypeInfoFactory.intTypeInfo)); + + test(groupType, names, typeInfos); + } + + @Test + public void listOfStruct() { + GroupType groupType = Types.requiredGroup(). + repeatedGroup(). + required(PrimitiveType.PrimitiveTypeName.BINARY). + as(OriginalType.UTF8). + named("string"). + required(PrimitiveType.PrimitiveTypeName.FLOAT). + named("float"). + named("structlist"). + named("top"); + + ArrayList names = new ArrayList(); + names.add("structlist"); + final List structTypeInfos = new ArrayList(); + structTypeInfos.add(TypeInfoFactory.stringTypeInfo); + structTypeInfos.add(TypeInfoFactory.floatTypeInfo); + ArrayList typeInfos = new ArrayList(); + typeInfos.add(TypeInfoFactory.getListTypeInfo(TypeInfoFactory.getStructTypeInfo(Arrays.asList + ("string", "float"), structTypeInfos))); + + test(groupType, names, typeInfos); + } + + @Test + public void structOfPrimitives() { + GroupType groupType = Types.requiredGroup(). + requiredGroup(). + required(PrimitiveType.PrimitiveTypeName.BINARY). + as(OriginalType.UTF8). + named("utf8"). + required(PrimitiveType.PrimitiveTypeName.BINARY). + as(OriginalType.ENUM). + named("enum"). + required(PrimitiveType.PrimitiveTypeName.BINARY). + named("binary"). + required(PrimitiveType.PrimitiveTypeName.BOOLEAN). + named("boolean"). + required(PrimitiveType.PrimitiveTypeName.DOUBLE). + named("double"). + required(PrimitiveType.PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY). + length(8). + named("fixedLenByteArray"). + required(PrimitiveType.PrimitiveTypeName.FLOAT). + named("float"). + required(PrimitiveType.PrimitiveTypeName.INT32). + named("int32"). + required(PrimitiveType.PrimitiveTypeName.INT64). + named("int64"). + named("struct"). + named("top"); + + ArrayList names = new ArrayList(); + names.add("struct"); + final List structTypeInfos = new ArrayList(); + structTypeInfos.add(TypeInfoFactory.stringTypeInfo); + structTypeInfos.add(TypeInfoFactory.stringTypeInfo); + structTypeInfos.add(TypeInfoFactory.binaryTypeInfo); + structTypeInfos.add(TypeInfoFactory.booleanTypeInfo); + structTypeInfos.add(TypeInfoFactory.doubleTypeInfo); + structTypeInfos.add(TypeInfoFactory.binaryTypeInfo); + structTypeInfos.add(TypeInfoFactory.floatTypeInfo); + structTypeInfos.add(TypeInfoFactory.intTypeInfo); + structTypeInfos.add(TypeInfoFactory.longTypeInfo); + ArrayList typeInfos = new ArrayList(); + typeInfos.add(TypeInfoFactory.getStructTypeInfo(Arrays.asList + ("utf8", "enum", "binary", "boolean", "double", "fixedLenByteArray", "float", + "int32", "int64"), + structTypeInfos)); + + test(groupType, names, typeInfos); + } + + @Test + public void nestedStruct() { + GroupType groupType = Types.requiredGroup(). + requiredGroup(). + requiredGroup(). + required(PrimitiveType.PrimitiveTypeName.INT32). + named("int"). + required(PrimitiveType.PrimitiveTypeName.BINARY). + as(OriginalType.UTF8). + named("string"). + named("innerstruct"). + named("outerstruct"). + named("top"); + + final ArrayList typeInfos = new ArrayList(Arrays.asList( + TypeInfoFactory.getStructTypeInfo(Arrays.asList("innerstruct"), Arrays.asList( + TypeInfoFactory.getStructTypeInfo(new ArrayList(Arrays.asList("int", "string")), + new ArrayList(Arrays.asList( + TypeInfoFactory.intTypeInfo, + TypeInfoFactory.stringTypeInfo))) + )) + )); + + test(groupType, new ArrayList(Arrays.asList("outerstruct")), typeInfos); + } + + @Test + public void map() { + GroupType groupType = Types.requiredGroup(). + addField( + ConversionPatterns.mapType( + Type.Repetition.REQUIRED, + "map", + Types.required(PrimitiveType.PrimitiveTypeName.BINARY).as(OriginalType.UTF8).named("key"), + Types.required(PrimitiveType.PrimitiveTypeName.DOUBLE).named("value")) + ).named("top"); + + + final ArrayList typeInfos = new ArrayList(Arrays.asList( + TypeInfoFactory.getMapTypeInfo(TypeInfoFactory.stringTypeInfo, TypeInfoFactory.doubleTypeInfo) + )); + + test(groupType, new ArrayList(Arrays.asList("map")), typeInfos); + } + + @Test + public void nestedMap() { + + GroupType groupType = Types.requiredGroup(). + addField( + ConversionPatterns.mapType( + Type.Repetition.REQUIRED, + "map", + Types.required(PrimitiveType.PrimitiveTypeName.BINARY).as(OriginalType.UTF8).named("key"), + ConversionPatterns.mapType( + Type.Repetition.REQUIRED, + "value", + Types.required(PrimitiveType.PrimitiveTypeName.BINARY).as(OriginalType.UTF8).named + ("key"), + Types.required(PrimitiveType.PrimitiveTypeName.INT64).named("value") + ) + ) + ).named("top"); + + final ArrayList typeInfos = new ArrayList(Arrays.asList( + TypeInfoFactory.getMapTypeInfo(TypeInfoFactory.stringTypeInfo, + TypeInfoFactory.getMapTypeInfo(TypeInfoFactory.stringTypeInfo, + TypeInfoFactory.longTypeInfo)) + )); + + test(groupType, new ArrayList(Arrays.asList("map")), typeInfos); + } + + private void test(GroupType groupType, ArrayList names, ArrayList typeInfos) { + test(groupType, names, typeInfos, parquetToHiveSchemaConverter); + } + + private void test(GroupType groupType, ArrayList names, ArrayList typeInfos, + ParquetToHiveSchemaConverter converter) { + StructTypeInfo structTypeInfo = new StructTypeInfo(); + structTypeInfo.setAllStructFieldNames(names); + structTypeInfo.setAllStructFieldTypeInfos(typeInfos); + + final StructTypeInfo actualTypeInfo = converter.convert(groupType); + Assert.assertEquals(structTypeInfo, actualTypeInfo); + } +} \ No newline at end of file diff --git ql/src/test/queries/clientpositive/parquet_array_of_multi_field_struct_gen_schema.q ql/src/test/queries/clientpositive/parquet_array_of_multi_field_struct_gen_schema.q new file mode 100644 index 0000000..bb3872e --- /dev/null +++ ql/src/test/queries/clientpositive/parquet_array_of_multi_field_struct_gen_schema.q @@ -0,0 +1,16 @@ +dfs ${system:test.dfs.mkdir} ${system:test.tmp.dir}/parquet_array_of_multi_field_structs_gen_schema; +dfs -cp ${system:hive.root}/data/files/MultiFieldGroupInList.parquet ${system:test.tmp.dir}/parquet_array_of_multi_field_structs_gen_schema; + +CREATE EXTERNAL TABLE parquet_array_of_multi_field_structs_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +LOCATION + '${system:test.tmp.dir}/parquet_array_of_multi_field_structs_gen_schema'; + +SELECT * FROM parquet_array_of_multi_field_structs_gen_schema; + +DROP TABLE parquet_array_of_multi_field_structs_gen_schema; diff --git ql/src/test/queries/clientpositive/parquet_array_of_optional_elements_gen_schema.q ql/src/test/queries/clientpositive/parquet_array_of_optional_elements_gen_schema.q new file mode 100644 index 0000000..21cc9e2 --- /dev/null +++ ql/src/test/queries/clientpositive/parquet_array_of_optional_elements_gen_schema.q @@ -0,0 +1,16 @@ +dfs ${system:test.dfs.mkdir} ${system:test.tmp.dir}/parquet_array_of_optional_elements_gen_schema; +dfs -cp ${system:hive.root}/data/files/NewOptionalGroupInList.parquet ${system:test.tmp.dir}/parquet_array_of_optional_elements_gen_schema; + +CREATE TABLE parquet_array_of_optional_elements_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +LOCATION + '${system:test.tmp.dir}/parquet_array_of_optional_elements_gen_schema'; + +SELECT * FROM parquet_array_of_optional_elements_gen_schema; + +DROP TABLE parquet_array_of_optional_elements_gen_schema; diff --git ql/src/test/queries/clientpositive/parquet_array_of_required_elements_gen_schema.q ql/src/test/queries/clientpositive/parquet_array_of_required_elements_gen_schema.q new file mode 100644 index 0000000..c71cbe3 --- /dev/null +++ ql/src/test/queries/clientpositive/parquet_array_of_required_elements_gen_schema.q @@ -0,0 +1,16 @@ +dfs ${system:test.dfs.mkdir} ${system:test.tmp.dir}/parquet_array_of_required_elements_gen_schema; +dfs -cp ${system:hive.root}/data/files/NewRequiredGroupInList.parquet ${system:test.tmp.dir}/parquet_array_of_required_elements_gen_schema; + +CREATE TABLE parquet_array_of_required_elements_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +LOCATION + '${system:test.tmp.dir}/parquet_array_of_required_elements_gen_schema'; + +SELECT * FROM parquet_array_of_required_elements_gen_schema; + +DROP TABLE parquet_array_of_required_elements_gen_schema; diff --git ql/src/test/queries/clientpositive/parquet_array_of_single_field_struct_gen_schema.q ql/src/test/queries/clientpositive/parquet_array_of_single_field_struct_gen_schema.q new file mode 100644 index 0000000..2de661c --- /dev/null +++ ql/src/test/queries/clientpositive/parquet_array_of_single_field_struct_gen_schema.q @@ -0,0 +1,16 @@ +dfs ${system:test.dfs.mkdir} ${system:test.tmp.dir}/parquet_ambiguous_array_of_single_field_structs_gen_schema; +dfs -cp ${system:hive.root}/data/files/SingleFieldGroupInList.parquet ${system:test.tmp.dir}/parquet_ambiguous_array_of_single_field_structs_gen_schema; + +CREATE TABLE parquet_ambiguous_array_of_single_field_structs_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +LOCATION + '${system:test.tmp.dir}/parquet_ambiguous_array_of_single_field_structs_gen_schema'; + +SELECT * FROM parquet_ambiguous_array_of_single_field_structs_gen_schema; + +DROP TABLE parquet_ambiguous_array_of_single_field_structs_gen_schema; diff --git ql/src/test/queries/clientpositive/parquet_array_of_structs_gen_schema.q ql/src/test/queries/clientpositive/parquet_array_of_structs_gen_schema.q new file mode 100644 index 0000000..1ae564c --- /dev/null +++ ql/src/test/queries/clientpositive/parquet_array_of_structs_gen_schema.q @@ -0,0 +1,16 @@ +dfs ${system:test.dfs.mkdir} ${system:test.tmp.dir}/parquet_array_of_structs_gen_schema; +dfs -cp ${system:hive.root}/data/files/HiveRequiredGroupInList.parquet ${system:test.tmp.dir}/parquet_array_of_structs_gen_schema; + +CREATE TABLE parquet_array_of_structs_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +LOCATION + '${system:test.tmp.dir}/parquet_array_of_structs_gen_schema'; + +SELECT * FROM parquet_array_of_structs_gen_schema; + +DROP TABLE parquet_array_of_structs_gen_schema; diff --git ql/src/test/queries/clientpositive/parquet_array_of_structs_gen_schema_ext.q ql/src/test/queries/clientpositive/parquet_array_of_structs_gen_schema_ext.q new file mode 100644 index 0000000..17a6e22 --- /dev/null +++ ql/src/test/queries/clientpositive/parquet_array_of_structs_gen_schema_ext.q @@ -0,0 +1,16 @@ +dfs ${system:test.dfs.mkdir} ${system:test.tmp.dir}/parquet_decimal_gen_schema_ext_tmp; +dfs -cp ${system:hive.root}/data/files/HiveRequiredGroupInList.parquet ${system:test.tmp.dir}/parquet_decimal_gen_schema_ext_tmp; + +CREATE EXTERNAL TABLE parquet_array_of_structs_gen_schema_ext +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +LOCATION + '${system:test.tmp.dir}/parquet_decimal_gen_schema_ext_tmp'; + +SELECT * FROM parquet_array_of_structs_gen_schema_ext; + +DROP TABLE parquet_array_of_structs_gen_schema_ext; diff --git ql/src/test/queries/clientpositive/parquet_array_of_unannotated_groups_gen_schema.q ql/src/test/queries/clientpositive/parquet_array_of_unannotated_groups_gen_schema.q new file mode 100644 index 0000000..30078c9 --- /dev/null +++ ql/src/test/queries/clientpositive/parquet_array_of_unannotated_groups_gen_schema.q @@ -0,0 +1,16 @@ +dfs ${system:test.dfs.mkdir} ${system:test.tmp.dir}/parquet_array_of_unannotated_groups_gen_schema; +dfs -cp ${system:hive.root}/data/files/UnannotatedListOfGroups.parquet ${system:test.tmp.dir}/parquet_array_of_unannotated_groups_gen_schema; + +CREATE TABLE parquet_array_of_unannotated_groups_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +LOCATION + '${system:test.tmp.dir}/parquet_array_of_unannotated_groups_gen_schema'; + +SELECT * FROM parquet_array_of_unannotated_groups_gen_schema; + +DROP TABLE parquet_array_of_unannotated_groups_gen_schema; diff --git ql/src/test/queries/clientpositive/parquet_array_of_unannotated_primitives_gen_schema.q ql/src/test/queries/clientpositive/parquet_array_of_unannotated_primitives_gen_schema.q new file mode 100644 index 0000000..29aa95a --- /dev/null +++ ql/src/test/queries/clientpositive/parquet_array_of_unannotated_primitives_gen_schema.q @@ -0,0 +1,16 @@ +dfs ${system:test.dfs.mkdir} ${system:test.tmp.dir}/parquet_array_of_unannotated_ints_gen_schema; +dfs -cp ${system:hive.root}/data/files/UnannotatedListOfPrimitives.parquet ${system:test.tmp.dir}/parquet_array_of_unannotated_ints_gen_schema; + +CREATE TABLE parquet_array_of_unannotated_ints_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +LOCATION + '${system:test.tmp.dir}/parquet_array_of_unannotated_ints_gen_schema'; + +SELECT * FROM parquet_array_of_unannotated_ints_gen_schema; + +DROP TABLE parquet_array_of_unannotated_ints_gen_schema; \ No newline at end of file diff --git ql/src/test/queries/clientpositive/parquet_avro_array_of_primitives_gen_schema.q ql/src/test/queries/clientpositive/parquet_avro_array_of_primitives_gen_schema.q new file mode 100644 index 0000000..ee64e43 --- /dev/null +++ ql/src/test/queries/clientpositive/parquet_avro_array_of_primitives_gen_schema.q @@ -0,0 +1,16 @@ +dfs ${system:test.dfs.mkdir} ${system:test.tmp.dir}/parquet_avro_array_of_primitives_gen_schema; +dfs -cp ${system:hive.root}/data/files/AvroPrimitiveInList.parquet ${system:test.tmp.dir}/parquet_avro_array_of_primitives_gen_schema; + +CREATE TABLE parquet_avro_array_of_primitives_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +LOCATION + '${system:test.tmp.dir}/parquet_avro_array_of_primitives_gen_schema'; + +SELECT * FROM parquet_avro_array_of_primitives_gen_schema; + +DROP TABLE parquet_avro_array_of_primitives_gen_schema; diff --git ql/src/test/queries/clientpositive/parquet_avro_array_of_single_field_struct_gen_schema.q ql/src/test/queries/clientpositive/parquet_avro_array_of_single_field_struct_gen_schema.q new file mode 100644 index 0000000..2dbb9a5 --- /dev/null +++ ql/src/test/queries/clientpositive/parquet_avro_array_of_single_field_struct_gen_schema.q @@ -0,0 +1,16 @@ +dfs ${system:test.dfs.mkdir} ${system:test.tmp.dir}/parquet_avro_array_of_single_field_structs_gen_schema; +dfs -cp ${system:hive.root}/data/files/AvroSingleFieldGroupInList.parquet ${system:test.tmp.dir}/parquet_avro_array_of_single_field_structs_gen_schema; + +CREATE TABLE parquet_avro_array_of_single_field_structs_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +LOCATION + '${system:test.tmp.dir}/parquet_avro_array_of_single_field_structs_gen_schema'; + +SELECT * FROM parquet_avro_array_of_single_field_structs_gen_schema; + +DROP TABLE parquet_avro_array_of_single_field_structs_gen_schema; diff --git ql/src/test/queries/clientpositive/parquet_decimal_gen_schema.q ql/src/test/queries/clientpositive/parquet_decimal_gen_schema.q new file mode 100644 index 0000000..1176126 --- /dev/null +++ ql/src/test/queries/clientpositive/parquet_decimal_gen_schema.q @@ -0,0 +1,18 @@ +dfs ${system:test.dfs.mkdir} ${system:test.tmp.dir}/parquet_decimal_gen_schema; +dfs -cp ${system:hive.root}/data/files/dec.parq ${system:test.tmp.dir}/parquet_decimal_gen_schema; + +CREATE TABLE parquet_decimal_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +LOCATION + '${system:test.tmp.dir}/parquet_decimal_gen_schema'; + +DESCRIBE FORMATTED parquet_decimal_gen_schema; + +SELECT * FROM parquet_decimal_gen_schema; + +DROP TABLE parquet_decimal_gen_schema; \ No newline at end of file diff --git ql/src/test/queries/clientpositive/parquet_thrift_array_of_primitives_gen_schema.q ql/src/test/queries/clientpositive/parquet_thrift_array_of_primitives_gen_schema.q new file mode 100644 index 0000000..1f83684 --- /dev/null +++ ql/src/test/queries/clientpositive/parquet_thrift_array_of_primitives_gen_schema.q @@ -0,0 +1,16 @@ +dfs ${system:test.dfs.mkdir} ${system:test.tmp.dir}/parquet_thrift_array_of_primitives_gen_schema; +dfs -cp ${system:hive.root}/data/files/ThriftPrimitiveInList.parquet ${system:test.tmp.dir}/parquet_thrift_array_of_primitives_gen_schema; + +CREATE TABLE parquet_thrift_array_of_primitives_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +LOCATION + '${system:test.tmp.dir}/parquet_thrift_array_of_primitives_gen_schema'; + +SELECT * FROM parquet_thrift_array_of_primitives_gen_schema; + +DROP TABLE parquet_thrift_array_of_primitives_gen_schema; diff --git ql/src/test/queries/clientpositive/parquet_thrift_array_of_single_field_struct_gen_schema.q ql/src/test/queries/clientpositive/parquet_thrift_array_of_single_field_struct_gen_schema.q new file mode 100644 index 0000000..4cbd95c --- /dev/null +++ ql/src/test/queries/clientpositive/parquet_thrift_array_of_single_field_struct_gen_schema.q @@ -0,0 +1,16 @@ +dfs ${system:test.dfs.mkdir} ${system:test.tmp.dir}/parquet_thrift_array_of_single_field_structs_gen_schema; +dfs -cp ${system:hive.root}/data/files/ThriftSingleFieldGroupInList.parquet ${system:test.tmp.dir}/parquet_thrift_array_of_single_field_structs_gen_schema; + +CREATE TABLE parquet_thrift_array_of_single_field_structs_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +LOCATION + '${system:test.tmp.dir}/parquet_thrift_array_of_single_field_structs_gen_schema'; + +SELECT * FROM parquet_thrift_array_of_single_field_structs_gen_schema; + +DROP TABLE parquet_thrift_array_of_single_field_structs_gen_schema; diff --git ql/src/test/results/clientpositive/create_view_partitioned.q.out ql/src/test/results/clientpositive/create_view_partitioned.q.out index ebf9a6b..e1bf0ff 100644 --- ql/src/test/results/clientpositive/create_view_partitioned.q.out +++ ql/src/test/results/clientpositive/create_view_partitioned.q.out @@ -175,12 +175,16 @@ POSTHOOK: query: SHOW TABLE EXTENDED LIKE vp1 POSTHOOK: type: SHOW_TABLESTATUS tableName:vp1 #### A masked pattern was here #### -location:null inputformat:org.apache.hadoop.mapred.SequenceFileInputFormat outputformat:org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat columns:struct columns { string key} partitioned:true partitionColumns:struct partition_columns { string value} +totalNumberFiles:0 +totalFileSize:0 +maxFileSize:0 +minFileSize:0 +#### A masked pattern was here #### PREHOOK: query: SHOW TABLE EXTENDED LIKE vp1 PARTITION(value='val_86') PREHOOK: type: SHOW_TABLESTATUS @@ -188,12 +192,16 @@ POSTHOOK: query: SHOW TABLE EXTENDED LIKE vp1 PARTITION(value='val_86') POSTHOOK: type: SHOW_TABLESTATUS tableName:vp1 #### A masked pattern was here #### -location:null inputformat:org.apache.hadoop.mapred.SequenceFileInputFormat outputformat:org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat columns:struct columns { string key} partitioned:true partitionColumns:struct partition_columns { string value} +totalNumberFiles:0 +totalFileSize:0 +maxFileSize:0 +minFileSize:0 +#### A masked pattern was here #### PREHOOK: query: ALTER VIEW vp1 DROP PARTITION (value='val_xyz') diff --git ql/src/test/results/clientpositive/parquet_array_of_multi_field_struct_gen_schema.q.out ql/src/test/results/clientpositive/parquet_array_of_multi_field_struct_gen_schema.q.out new file mode 100644 index 0000000..4c02ebe --- /dev/null +++ ql/src/test/results/clientpositive/parquet_array_of_multi_field_struct_gen_schema.q.out @@ -0,0 +1,43 @@ +PREHOOK: query: CREATE EXTERNAL TABLE parquet_array_of_multi_field_structs_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +LOCATION +#### A masked pattern was here #### +PREHOOK: type: CREATETABLE +#### A masked pattern was here #### +PREHOOK: Output: database:default +PREHOOK: Output: default@parquet_array_of_multi_field_structs_gen_schema +POSTHOOK: query: CREATE EXTERNAL TABLE parquet_array_of_multi_field_structs_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +LOCATION +#### A masked pattern was here #### +POSTHOOK: type: CREATETABLE +#### A masked pattern was here #### +POSTHOOK: Output: database:default +POSTHOOK: Output: default@parquet_array_of_multi_field_structs_gen_schema +PREHOOK: query: SELECT * FROM parquet_array_of_multi_field_structs_gen_schema +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_array_of_multi_field_structs_gen_schema +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM parquet_array_of_multi_field_structs_gen_schema +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_array_of_multi_field_structs_gen_schema +#### A masked pattern was here #### +[{"latitude":0.0,"longitude":0.0},{"latitude":0.0,"longitude":180.0}] +PREHOOK: query: DROP TABLE parquet_array_of_multi_field_structs_gen_schema +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@parquet_array_of_multi_field_structs_gen_schema +PREHOOK: Output: default@parquet_array_of_multi_field_structs_gen_schema +POSTHOOK: query: DROP TABLE parquet_array_of_multi_field_structs_gen_schema +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@parquet_array_of_multi_field_structs_gen_schema +POSTHOOK: Output: default@parquet_array_of_multi_field_structs_gen_schema diff --git ql/src/test/results/clientpositive/parquet_array_of_optional_elements_gen_schema.q.out ql/src/test/results/clientpositive/parquet_array_of_optional_elements_gen_schema.q.out new file mode 100644 index 0000000..0de5b65 --- /dev/null +++ ql/src/test/results/clientpositive/parquet_array_of_optional_elements_gen_schema.q.out @@ -0,0 +1,43 @@ +PREHOOK: query: CREATE TABLE parquet_array_of_optional_elements_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +LOCATION +#### A masked pattern was here #### +PREHOOK: type: CREATETABLE +#### A masked pattern was here #### +PREHOOK: Output: database:default +PREHOOK: Output: default@parquet_array_of_optional_elements_gen_schema +POSTHOOK: query: CREATE TABLE parquet_array_of_optional_elements_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +LOCATION +#### A masked pattern was here #### +POSTHOOK: type: CREATETABLE +#### A masked pattern was here #### +POSTHOOK: Output: database:default +POSTHOOK: Output: default@parquet_array_of_optional_elements_gen_schema +PREHOOK: query: SELECT * FROM parquet_array_of_optional_elements_gen_schema +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_array_of_optional_elements_gen_schema +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM parquet_array_of_optional_elements_gen_schema +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_array_of_optional_elements_gen_schema +#### A masked pattern was here #### +[{"latitude":0.0,"longitude":0.0},null,{"latitude":0.0,"longitude":180.0}] +PREHOOK: query: DROP TABLE parquet_array_of_optional_elements_gen_schema +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@parquet_array_of_optional_elements_gen_schema +PREHOOK: Output: default@parquet_array_of_optional_elements_gen_schema +POSTHOOK: query: DROP TABLE parquet_array_of_optional_elements_gen_schema +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@parquet_array_of_optional_elements_gen_schema +POSTHOOK: Output: default@parquet_array_of_optional_elements_gen_schema diff --git ql/src/test/results/clientpositive/parquet_array_of_required_elements_gen_schema.q.out ql/src/test/results/clientpositive/parquet_array_of_required_elements_gen_schema.q.out new file mode 100644 index 0000000..4303f0f --- /dev/null +++ ql/src/test/results/clientpositive/parquet_array_of_required_elements_gen_schema.q.out @@ -0,0 +1,43 @@ +PREHOOK: query: CREATE TABLE parquet_array_of_required_elements_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +LOCATION +#### A masked pattern was here #### +PREHOOK: type: CREATETABLE +#### A masked pattern was here #### +PREHOOK: Output: database:default +PREHOOK: Output: default@parquet_array_of_required_elements_gen_schema +POSTHOOK: query: CREATE TABLE parquet_array_of_required_elements_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +LOCATION +#### A masked pattern was here #### +POSTHOOK: type: CREATETABLE +#### A masked pattern was here #### +POSTHOOK: Output: database:default +POSTHOOK: Output: default@parquet_array_of_required_elements_gen_schema +PREHOOK: query: SELECT * FROM parquet_array_of_required_elements_gen_schema +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_array_of_required_elements_gen_schema +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM parquet_array_of_required_elements_gen_schema +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_array_of_required_elements_gen_schema +#### A masked pattern was here #### +[{"latitude":0.0,"longitude":180.0},{"latitude":0.0,"longitude":0.0}] +PREHOOK: query: DROP TABLE parquet_array_of_required_elements_gen_schema +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@parquet_array_of_required_elements_gen_schema +PREHOOK: Output: default@parquet_array_of_required_elements_gen_schema +POSTHOOK: query: DROP TABLE parquet_array_of_required_elements_gen_schema +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@parquet_array_of_required_elements_gen_schema +POSTHOOK: Output: default@parquet_array_of_required_elements_gen_schema diff --git ql/src/test/results/clientpositive/parquet_array_of_single_field_struct_gen_schema.q.out ql/src/test/results/clientpositive/parquet_array_of_single_field_struct_gen_schema.q.out new file mode 100644 index 0000000..e9ca9ab --- /dev/null +++ ql/src/test/results/clientpositive/parquet_array_of_single_field_struct_gen_schema.q.out @@ -0,0 +1,43 @@ +PREHOOK: query: CREATE TABLE parquet_ambiguous_array_of_single_field_structs_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +LOCATION +#### A masked pattern was here #### +PREHOOK: type: CREATETABLE +#### A masked pattern was here #### +PREHOOK: Output: database:default +PREHOOK: Output: default@parquet_ambiguous_array_of_single_field_structs_gen_schema +POSTHOOK: query: CREATE TABLE parquet_ambiguous_array_of_single_field_structs_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +LOCATION +#### A masked pattern was here #### +POSTHOOK: type: CREATETABLE +#### A masked pattern was here #### +POSTHOOK: Output: database:default +POSTHOOK: Output: default@parquet_ambiguous_array_of_single_field_structs_gen_schema +PREHOOK: query: SELECT * FROM parquet_ambiguous_array_of_single_field_structs_gen_schema +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_ambiguous_array_of_single_field_structs_gen_schema +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM parquet_ambiguous_array_of_single_field_structs_gen_schema +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_ambiguous_array_of_single_field_structs_gen_schema +#### A masked pattern was here #### +[1234,2345] +PREHOOK: query: DROP TABLE parquet_ambiguous_array_of_single_field_structs_gen_schema +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@parquet_ambiguous_array_of_single_field_structs_gen_schema +PREHOOK: Output: default@parquet_ambiguous_array_of_single_field_structs_gen_schema +POSTHOOK: query: DROP TABLE parquet_ambiguous_array_of_single_field_structs_gen_schema +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@parquet_ambiguous_array_of_single_field_structs_gen_schema +POSTHOOK: Output: default@parquet_ambiguous_array_of_single_field_structs_gen_schema diff --git ql/src/test/results/clientpositive/parquet_array_of_structs_gen_schema.q.out ql/src/test/results/clientpositive/parquet_array_of_structs_gen_schema.q.out new file mode 100644 index 0000000..c9b1f3f --- /dev/null +++ ql/src/test/results/clientpositive/parquet_array_of_structs_gen_schema.q.out @@ -0,0 +1,43 @@ +PREHOOK: query: CREATE TABLE parquet_array_of_structs_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +LOCATION +#### A masked pattern was here #### +PREHOOK: type: CREATETABLE +#### A masked pattern was here #### +PREHOOK: Output: database:default +PREHOOK: Output: default@parquet_array_of_structs_gen_schema +POSTHOOK: query: CREATE TABLE parquet_array_of_structs_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +LOCATION +#### A masked pattern was here #### +POSTHOOK: type: CREATETABLE +#### A masked pattern was here #### +POSTHOOK: Output: database:default +POSTHOOK: Output: default@parquet_array_of_structs_gen_schema +PREHOOK: query: SELECT * FROM parquet_array_of_structs_gen_schema +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_array_of_structs_gen_schema +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM parquet_array_of_structs_gen_schema +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_array_of_structs_gen_schema +#### A masked pattern was here #### +[{"latitude":0.0,"longitude":180.0},{"latitude":0.0,"longitude":0.0}] +PREHOOK: query: DROP TABLE parquet_array_of_structs_gen_schema +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@parquet_array_of_structs_gen_schema +PREHOOK: Output: default@parquet_array_of_structs_gen_schema +POSTHOOK: query: DROP TABLE parquet_array_of_structs_gen_schema +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@parquet_array_of_structs_gen_schema +POSTHOOK: Output: default@parquet_array_of_structs_gen_schema diff --git ql/src/test/results/clientpositive/parquet_array_of_structs_gen_schema_ext.q.out ql/src/test/results/clientpositive/parquet_array_of_structs_gen_schema_ext.q.out new file mode 100644 index 0000000..30acdae --- /dev/null +++ ql/src/test/results/clientpositive/parquet_array_of_structs_gen_schema_ext.q.out @@ -0,0 +1,43 @@ +PREHOOK: query: CREATE EXTERNAL TABLE parquet_array_of_structs_gen_schema_ext +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +LOCATION +#### A masked pattern was here #### +PREHOOK: type: CREATETABLE +#### A masked pattern was here #### +PREHOOK: Output: database:default +PREHOOK: Output: default@parquet_array_of_structs_gen_schema_ext +POSTHOOK: query: CREATE EXTERNAL TABLE parquet_array_of_structs_gen_schema_ext +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +LOCATION +#### A masked pattern was here #### +POSTHOOK: type: CREATETABLE +#### A masked pattern was here #### +POSTHOOK: Output: database:default +POSTHOOK: Output: default@parquet_array_of_structs_gen_schema_ext +PREHOOK: query: SELECT * FROM parquet_array_of_structs_gen_schema_ext +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_array_of_structs_gen_schema_ext +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM parquet_array_of_structs_gen_schema_ext +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_array_of_structs_gen_schema_ext +#### A masked pattern was here #### +[{"latitude":0.0,"longitude":180.0},{"latitude":0.0,"longitude":0.0}] +PREHOOK: query: DROP TABLE parquet_array_of_structs_gen_schema_ext +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@parquet_array_of_structs_gen_schema_ext +PREHOOK: Output: default@parquet_array_of_structs_gen_schema_ext +POSTHOOK: query: DROP TABLE parquet_array_of_structs_gen_schema_ext +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@parquet_array_of_structs_gen_schema_ext +POSTHOOK: Output: default@parquet_array_of_structs_gen_schema_ext diff --git ql/src/test/results/clientpositive/parquet_array_of_unannotated_groups_gen_schema.q.out ql/src/test/results/clientpositive/parquet_array_of_unannotated_groups_gen_schema.q.out new file mode 100644 index 0000000..dd738dd --- /dev/null +++ ql/src/test/results/clientpositive/parquet_array_of_unannotated_groups_gen_schema.q.out @@ -0,0 +1,43 @@ +PREHOOK: query: CREATE TABLE parquet_array_of_unannotated_groups_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +LOCATION +#### A masked pattern was here #### +PREHOOK: type: CREATETABLE +#### A masked pattern was here #### +PREHOOK: Output: database:default +PREHOOK: Output: default@parquet_array_of_unannotated_groups_gen_schema +POSTHOOK: query: CREATE TABLE parquet_array_of_unannotated_groups_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +LOCATION +#### A masked pattern was here #### +POSTHOOK: type: CREATETABLE +#### A masked pattern was here #### +POSTHOOK: Output: database:default +POSTHOOK: Output: default@parquet_array_of_unannotated_groups_gen_schema +PREHOOK: query: SELECT * FROM parquet_array_of_unannotated_groups_gen_schema +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_array_of_unannotated_groups_gen_schema +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM parquet_array_of_unannotated_groups_gen_schema +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_array_of_unannotated_groups_gen_schema +#### A masked pattern was here #### +[{"x":1.0,"y":1.0},{"x":2.0,"y":2.0}] +PREHOOK: query: DROP TABLE parquet_array_of_unannotated_groups_gen_schema +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@parquet_array_of_unannotated_groups_gen_schema +PREHOOK: Output: default@parquet_array_of_unannotated_groups_gen_schema +POSTHOOK: query: DROP TABLE parquet_array_of_unannotated_groups_gen_schema +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@parquet_array_of_unannotated_groups_gen_schema +POSTHOOK: Output: default@parquet_array_of_unannotated_groups_gen_schema diff --git ql/src/test/results/clientpositive/parquet_array_of_unannotated_primitives_gen_schema.q.out ql/src/test/results/clientpositive/parquet_array_of_unannotated_primitives_gen_schema.q.out new file mode 100644 index 0000000..14b52ba --- /dev/null +++ ql/src/test/results/clientpositive/parquet_array_of_unannotated_primitives_gen_schema.q.out @@ -0,0 +1,43 @@ +PREHOOK: query: CREATE TABLE parquet_array_of_unannotated_ints_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +LOCATION +#### A masked pattern was here #### +PREHOOK: type: CREATETABLE +#### A masked pattern was here #### +PREHOOK: Output: database:default +PREHOOK: Output: default@parquet_array_of_unannotated_ints_gen_schema +POSTHOOK: query: CREATE TABLE parquet_array_of_unannotated_ints_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +LOCATION +#### A masked pattern was here #### +POSTHOOK: type: CREATETABLE +#### A masked pattern was here #### +POSTHOOK: Output: database:default +POSTHOOK: Output: default@parquet_array_of_unannotated_ints_gen_schema +PREHOOK: query: SELECT * FROM parquet_array_of_unannotated_ints_gen_schema +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_array_of_unannotated_ints_gen_schema +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM parquet_array_of_unannotated_ints_gen_schema +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_array_of_unannotated_ints_gen_schema +#### A masked pattern was here #### +[34,35,36] +PREHOOK: query: DROP TABLE parquet_array_of_unannotated_ints_gen_schema +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@parquet_array_of_unannotated_ints_gen_schema +PREHOOK: Output: default@parquet_array_of_unannotated_ints_gen_schema +POSTHOOK: query: DROP TABLE parquet_array_of_unannotated_ints_gen_schema +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@parquet_array_of_unannotated_ints_gen_schema +POSTHOOK: Output: default@parquet_array_of_unannotated_ints_gen_schema diff --git ql/src/test/results/clientpositive/parquet_avro_array_of_primitives_gen_schema.q.out ql/src/test/results/clientpositive/parquet_avro_array_of_primitives_gen_schema.q.out new file mode 100644 index 0000000..2cad677 --- /dev/null +++ ql/src/test/results/clientpositive/parquet_avro_array_of_primitives_gen_schema.q.out @@ -0,0 +1,43 @@ +PREHOOK: query: CREATE TABLE parquet_avro_array_of_primitives_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +LOCATION +#### A masked pattern was here #### +PREHOOK: type: CREATETABLE +#### A masked pattern was here #### +PREHOOK: Output: database:default +PREHOOK: Output: default@parquet_avro_array_of_primitives_gen_schema +POSTHOOK: query: CREATE TABLE parquet_avro_array_of_primitives_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +LOCATION +#### A masked pattern was here #### +POSTHOOK: type: CREATETABLE +#### A masked pattern was here #### +POSTHOOK: Output: database:default +POSTHOOK: Output: default@parquet_avro_array_of_primitives_gen_schema +PREHOOK: query: SELECT * FROM parquet_avro_array_of_primitives_gen_schema +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_avro_array_of_primitives_gen_schema +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM parquet_avro_array_of_primitives_gen_schema +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_avro_array_of_primitives_gen_schema +#### A masked pattern was here #### +[34,35,36] +PREHOOK: query: DROP TABLE parquet_avro_array_of_primitives_gen_schema +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@parquet_avro_array_of_primitives_gen_schema +PREHOOK: Output: default@parquet_avro_array_of_primitives_gen_schema +POSTHOOK: query: DROP TABLE parquet_avro_array_of_primitives_gen_schema +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@parquet_avro_array_of_primitives_gen_schema +POSTHOOK: Output: default@parquet_avro_array_of_primitives_gen_schema diff --git ql/src/test/results/clientpositive/parquet_avro_array_of_single_field_struct_gen_schema.q.out ql/src/test/results/clientpositive/parquet_avro_array_of_single_field_struct_gen_schema.q.out new file mode 100644 index 0000000..c285687 --- /dev/null +++ ql/src/test/results/clientpositive/parquet_avro_array_of_single_field_struct_gen_schema.q.out @@ -0,0 +1,43 @@ +PREHOOK: query: CREATE TABLE parquet_avro_array_of_single_field_structs_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +LOCATION +#### A masked pattern was here #### +PREHOOK: type: CREATETABLE +#### A masked pattern was here #### +PREHOOK: Output: database:default +PREHOOK: Output: default@parquet_avro_array_of_single_field_structs_gen_schema +POSTHOOK: query: CREATE TABLE parquet_avro_array_of_single_field_structs_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +LOCATION +#### A masked pattern was here #### +POSTHOOK: type: CREATETABLE +#### A masked pattern was here #### +POSTHOOK: Output: database:default +POSTHOOK: Output: default@parquet_avro_array_of_single_field_structs_gen_schema +PREHOOK: query: SELECT * FROM parquet_avro_array_of_single_field_structs_gen_schema +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_avro_array_of_single_field_structs_gen_schema +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM parquet_avro_array_of_single_field_structs_gen_schema +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_avro_array_of_single_field_structs_gen_schema +#### A masked pattern was here #### +[{"count":1234},{"count":2345}] +PREHOOK: query: DROP TABLE parquet_avro_array_of_single_field_structs_gen_schema +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@parquet_avro_array_of_single_field_structs_gen_schema +PREHOOK: Output: default@parquet_avro_array_of_single_field_structs_gen_schema +POSTHOOK: query: DROP TABLE parquet_avro_array_of_single_field_structs_gen_schema +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@parquet_avro_array_of_single_field_structs_gen_schema +POSTHOOK: Output: default@parquet_avro_array_of_single_field_structs_gen_schema diff --git ql/src/test/results/clientpositive/parquet_decimal_gen_schema.q.out ql/src/test/results/clientpositive/parquet_decimal_gen_schema.q.out new file mode 100644 index 0000000..90a3266 --- /dev/null +++ ql/src/test/results/clientpositive/parquet_decimal_gen_schema.q.out @@ -0,0 +1,88 @@ +PREHOOK: query: CREATE TABLE parquet_decimal_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +LOCATION +#### A masked pattern was here #### +PREHOOK: type: CREATETABLE +#### A masked pattern was here #### +PREHOOK: Output: database:default +PREHOOK: Output: default@parquet_decimal_gen_schema +POSTHOOK: query: CREATE TABLE parquet_decimal_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +LOCATION +#### A masked pattern was here #### +POSTHOOK: type: CREATETABLE +#### A masked pattern was here #### +POSTHOOK: Output: database:default +POSTHOOK: Output: default@parquet_decimal_gen_schema +PREHOOK: query: DESCRIBE FORMATTED parquet_decimal_gen_schema +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@parquet_decimal_gen_schema +POSTHOOK: query: DESCRIBE FORMATTED parquet_decimal_gen_schema +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@parquet_decimal_gen_schema +# col_name data_type comment + +name string +value decimal(5,2) + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Protect Mode: None +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: + COLUMN_STATS_ACCURATE false + numFiles 0 + numRows -1 + rawDataSize -1 + totalSize 0 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe +InputFormat: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: SELECT * FROM parquet_decimal_gen_schema +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_decimal_gen_schema +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM parquet_decimal_gen_schema +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_decimal_gen_schema +#### A masked pattern was here #### +Tom 234.79 +Beck 77.34 +Snow 55.71 +Mary 4.33 +Cluck 5.96 +Tom 12.25 +Mary 33.33 +Tom 0.19 +Beck 3.15 +Beck 7.99 +PREHOOK: query: DROP TABLE parquet_decimal_gen_schema +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@parquet_decimal_gen_schema +PREHOOK: Output: default@parquet_decimal_gen_schema +POSTHOOK: query: DROP TABLE parquet_decimal_gen_schema +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@parquet_decimal_gen_schema +POSTHOOK: Output: default@parquet_decimal_gen_schema diff --git ql/src/test/results/clientpositive/parquet_thrift_array_of_primitives_gen_schema.q.out ql/src/test/results/clientpositive/parquet_thrift_array_of_primitives_gen_schema.q.out new file mode 100644 index 0000000..c7a21dd --- /dev/null +++ ql/src/test/results/clientpositive/parquet_thrift_array_of_primitives_gen_schema.q.out @@ -0,0 +1,43 @@ +PREHOOK: query: CREATE TABLE parquet_thrift_array_of_primitives_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +LOCATION +#### A masked pattern was here #### +PREHOOK: type: CREATETABLE +#### A masked pattern was here #### +PREHOOK: Output: database:default +PREHOOK: Output: default@parquet_thrift_array_of_primitives_gen_schema +POSTHOOK: query: CREATE TABLE parquet_thrift_array_of_primitives_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +LOCATION +#### A masked pattern was here #### +POSTHOOK: type: CREATETABLE +#### A masked pattern was here #### +POSTHOOK: Output: database:default +POSTHOOK: Output: default@parquet_thrift_array_of_primitives_gen_schema +PREHOOK: query: SELECT * FROM parquet_thrift_array_of_primitives_gen_schema +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_thrift_array_of_primitives_gen_schema +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM parquet_thrift_array_of_primitives_gen_schema +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_thrift_array_of_primitives_gen_schema +#### A masked pattern was here #### +[34,35,36] +PREHOOK: query: DROP TABLE parquet_thrift_array_of_primitives_gen_schema +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@parquet_thrift_array_of_primitives_gen_schema +PREHOOK: Output: default@parquet_thrift_array_of_primitives_gen_schema +POSTHOOK: query: DROP TABLE parquet_thrift_array_of_primitives_gen_schema +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@parquet_thrift_array_of_primitives_gen_schema +POSTHOOK: Output: default@parquet_thrift_array_of_primitives_gen_schema diff --git ql/src/test/results/clientpositive/parquet_thrift_array_of_single_field_struct_gen_schema.q.out ql/src/test/results/clientpositive/parquet_thrift_array_of_single_field_struct_gen_schema.q.out new file mode 100644 index 0000000..0218f25 --- /dev/null +++ ql/src/test/results/clientpositive/parquet_thrift_array_of_single_field_struct_gen_schema.q.out @@ -0,0 +1,43 @@ +PREHOOK: query: CREATE TABLE parquet_thrift_array_of_single_field_structs_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +LOCATION +#### A masked pattern was here #### +PREHOOK: type: CREATETABLE +#### A masked pattern was here #### +PREHOOK: Output: database:default +PREHOOK: Output: default@parquet_thrift_array_of_single_field_structs_gen_schema +POSTHOOK: query: CREATE TABLE parquet_thrift_array_of_single_field_structs_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +LOCATION +#### A masked pattern was here #### +POSTHOOK: type: CREATETABLE +#### A masked pattern was here #### +POSTHOOK: Output: database:default +POSTHOOK: Output: default@parquet_thrift_array_of_single_field_structs_gen_schema +PREHOOK: query: SELECT * FROM parquet_thrift_array_of_single_field_structs_gen_schema +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_thrift_array_of_single_field_structs_gen_schema +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM parquet_thrift_array_of_single_field_structs_gen_schema +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_thrift_array_of_single_field_structs_gen_schema +#### A masked pattern was here #### +[{"count":1234},{"count":2345}] +PREHOOK: query: DROP TABLE parquet_thrift_array_of_single_field_structs_gen_schema +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@parquet_thrift_array_of_single_field_structs_gen_schema +PREHOOK: Output: default@parquet_thrift_array_of_single_field_structs_gen_schema +POSTHOOK: query: DROP TABLE parquet_thrift_array_of_single_field_structs_gen_schema +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@parquet_thrift_array_of_single_field_structs_gen_schema +POSTHOOK: Output: default@parquet_thrift_array_of_single_field_structs_gen_schema