diff --git common/src/java/org/apache/hadoop/hive/conf/HiveConf.java common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index fafd78e..6123c11 100644 --- common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -759,7 +759,7 @@ "org.apache.hadoop.hive.ql.io.orc.OrcSerde,org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe," + "org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe,org.apache.hadoop.hive.serde2.dynamic_type.DynamicSerDe," + "org.apache.hadoop.hive.serde2.MetadataTypedColumnsetSerDe,org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe," + - "org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe,org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe", + "org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe", "SerDes retriving schema from metastore. This an internal parameter. Check with the hive dev. team"), HIVEHISTORYFILELOC("hive.querylog.location", diff --git metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java index 2db2658..5f5a7f0 100644 --- metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java +++ metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java @@ -366,11 +366,17 @@ static public Deserializer getDeserializer(Configuration conf, try { Deserializer deserializer = ReflectionUtils.newInstance(conf.getClassByName(lib). asSubclass(Deserializer.class), conf); + + final Properties tableMetadata = MetaStoreUtils.getTableMetadata(table); + if (tableMetadata.getProperty("location", null) == null && + table.getParameters().containsKey("location")) { + tableMetadata.setProperty("location", table.getParameters().get("location")); + } if (skipConfError) { SerDeUtils.initializeSerDeWithoutErrorCheck(deserializer, conf, - MetaStoreUtils.getTableMetadata(table), null); + tableMetadata, null); } else { - SerDeUtils.initializeSerDe(deserializer, conf, MetaStoreUtils.getTableMetadata(table), null); + SerDeUtils.initializeSerDe(deserializer, conf, tableMetadata, null); } return deserializer; } catch (RuntimeException e) { diff --git ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ParquetSchemaReader.java ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ParquetSchemaReader.java new file mode 100644 index 0000000..1884878 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ParquetSchemaReader.java @@ -0,0 +1,36 @@ +/** + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.io.parquet.convert; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; +import parquet.hadoop.ParquetFileReader; +import parquet.hadoop.metadata.ParquetMetadata; +import parquet.schema.GroupType; + +import java.io.IOException; + +public class ParquetSchemaReader { + public static GroupType read(String parquetFilePath) { + + Configuration conf = new Configuration(); + ParquetMetadata metaData; + try { + metaData = ParquetFileReader.readFooter(conf, new Path(parquetFilePath)); + } catch (IOException e) { + throw new RuntimeException(e); + } + return metaData.getFileMetaData().getSchema(); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ParquetToHiveSchemaConverter.java ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ParquetToHiveSchemaConverter.java new file mode 100644 index 0000000..ee77447 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ParquetToHiveSchemaConverter.java @@ -0,0 +1,202 @@ +/** + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.io.parquet.convert; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; +import parquet.schema.DecimalMetadata; +import parquet.schema.GroupType; +import parquet.schema.OriginalType; +import parquet.schema.PrimitiveType; +import parquet.schema.Type; + +import java.util.ArrayList; +import java.util.List; + +public class ParquetToHiveSchemaConverter { + + private static final Log LOG = LogFactory.getLog(ParquetToHiveSchemaConverter.class); + + public StructTypeInfo convert(GroupType parquetSchema) { + return convertFields(parquetSchema.getFields()); + } + + private StructTypeInfo convertFields(List parquetFields) { + StructTypeInfo structTypeInfo = new StructTypeInfo(); + ArrayList names = new ArrayList(); + ArrayList types = new ArrayList(); + + for (Type parquetType : parquetFields) { + + TypeInfo type; + if (parquetType.isRepetition(Type.Repetition.REPEATED)) { + type = createHiveArray(parquetType, ""); + } else { + type = convertField(parquetType); + } + + names.add(parquetType.getName()); + types.add(type); + } + + structTypeInfo.setAllStructFieldNames(names); + structTypeInfo.setAllStructFieldTypeInfos(types); + + LOG.info("Generated Hive's StructTypeInfo from parquet schema is: " + structTypeInfo); + + return structTypeInfo; + } + + private TypeInfo convertField(final Type parquetType) { + if (parquetType.isPrimitive()) { + final PrimitiveType.PrimitiveTypeName parquetPrimitiveTypeName = + parquetType.asPrimitiveType().getPrimitiveTypeName(); + final OriginalType originalType = parquetType.getOriginalType(); + return parquetPrimitiveTypeName.convert( + new PrimitiveType.PrimitiveTypeNameConverter() { + @Override + public TypeInfo convertBOOLEAN(PrimitiveType.PrimitiveTypeName primitiveTypeName) { + return TypeInfoFactory.booleanTypeInfo; + } + + @Override + public TypeInfo convertINT32(PrimitiveType.PrimitiveTypeName primitiveTypeName) { + return TypeInfoFactory.intTypeInfo; + } + + @Override + public TypeInfo convertINT64(PrimitiveType.PrimitiveTypeName primitiveTypeName) { + return TypeInfoFactory.longTypeInfo; + } + + @Override + public TypeInfo convertINT96(PrimitiveType.PrimitiveTypeName primitiveTypeName) { + throw new IllegalArgumentException("INT96 not yet implemented."); + } + + @Override + public TypeInfo convertFLOAT(PrimitiveType.PrimitiveTypeName primitiveTypeName) { + return TypeInfoFactory.floatTypeInfo; + } + + @Override + public TypeInfo convertDOUBLE(PrimitiveType.PrimitiveTypeName primitiveTypeName) { + return TypeInfoFactory.doubleTypeInfo; + } + + @Override + public TypeInfo convertFIXED_LEN_BYTE_ARRAY(PrimitiveType.PrimitiveTypeName + primitiveTypeName) { + return TypeInfoFactory.binaryTypeInfo; + } + + @Override + public TypeInfo convertBINARY(PrimitiveType.PrimitiveTypeName primitiveTypeName) { + if (originalType == OriginalType.UTF8 || originalType == OriginalType.ENUM) { + return TypeInfoFactory.stringTypeInfo; + } else if (originalType == OriginalType.DECIMAL) { + final DecimalMetadata decimalMetadata = parquetType.asPrimitiveType() + .getDecimalMetadata(); + return TypeInfoFactory.getDecimalTypeInfo(decimalMetadata.getPrecision(), + decimalMetadata.getScale()); + } else { + return TypeInfoFactory.binaryTypeInfo; + } + } + }); + } else { + GroupType parquetGroupType = parquetType.asGroupType(); + OriginalType originalType = parquetGroupType.getOriginalType(); + if (originalType != null) { + switch (originalType) { + case LIST: + if (parquetGroupType.getFieldCount() != 1) { + throw new UnsupportedOperationException("Invalid list type " + parquetGroupType); + } + Type elementType = parquetGroupType.getType(0); + if (!elementType.isRepetition(Type.Repetition.REPEATED)) { + throw new UnsupportedOperationException("Invalid list type " + parquetGroupType); + } + return createHiveArray(elementType, parquetGroupType.getName()); + case MAP: + if (parquetGroupType.getFieldCount() != 1 || parquetGroupType.getType(0).isPrimitive()) { + throw new UnsupportedOperationException("Invalid map type " + parquetGroupType); + } + GroupType mapKeyValType = parquetGroupType.getType(0).asGroupType(); + if (!mapKeyValType.isRepetition(Type.Repetition.REPEATED) || + !mapKeyValType.getOriginalType().equals(OriginalType.MAP_KEY_VALUE) || + mapKeyValType.getFieldCount() != 2) { + throw new UnsupportedOperationException("Invalid map type " + parquetGroupType); + } + Type keyType = mapKeyValType.getType(0); + if (!keyType.isPrimitive() || + !keyType.asPrimitiveType().getPrimitiveTypeName().equals(PrimitiveType + .PrimitiveTypeName.BINARY) || + !keyType.getOriginalType().equals(OriginalType.UTF8)) { + throw new IllegalArgumentException("Map key type must be binary (UTF8): " + + keyType); + } + Type valueType = mapKeyValType.getType(1); + return createHiveMap(convertField(keyType), convertField(valueType)); + case ENUM: + return TypeInfoFactory.stringTypeInfo; + case MAP_KEY_VALUE: + case UTF8: + default: + throw new UnsupportedOperationException("Cannot convert Parquet type " + + parquetType); + } + } else { + // if no original type then it's a record + return createHiveStruct(parquetGroupType.getFields()); + } + } + } + + private TypeInfo createHiveStruct(List parquetFields) { + List names = new ArrayList(); + List typeInfos = new ArrayList(); + + for (Type field: parquetFields) { + names.add(field.getName()); + typeInfos.add(convertField(field)); + } + + return TypeInfoFactory.getStructTypeInfo(names, typeInfos); + } + + private TypeInfo createHiveMap(TypeInfo keyType, TypeInfo valueType) { + return TypeInfoFactory.getMapTypeInfo(keyType, valueType); + } + + private TypeInfo createHiveArray(Type elementType, String elementName) { + if (elementType.isPrimitive()) { + return TypeInfoFactory.getListTypeInfo(convertField(elementType)); + } else { + final GroupType groupType = elementType.asGroupType(); + final List groupFields = groupType.getFields(); + if (groupFields.size() > 1 || + (groupFields.size() == 1 && + (elementType.getName().equals("array") || + elementType.getName().equals(elementName + "_tuple")))) { + return TypeInfoFactory.getListTypeInfo(createHiveStruct(groupFields)); + } else { + return TypeInfoFactory.getListTypeInfo(convertField(groupType.getFields().get(0))); + } + } + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ParquetHiveSerDe.java ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ParquetHiveSerDe.java index 4effe73..793674d 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ParquetHiveSerDe.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ParquetHiveSerDe.java @@ -13,7 +13,9 @@ */ package org.apache.hadoop.hive.ql.io.parquet.serde; +import java.io.IOException; import java.io.UnsupportedEncodingException; +import java.net.URI; import java.util.ArrayList; import java.util.Arrays; import java.util.List; @@ -21,8 +23,15 @@ import java.util.Map.Entry; import java.util.Properties; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.common.type.HiveDecimal; +import org.apache.hadoop.hive.ql.io.parquet.convert.ParquetSchemaReader; +import org.apache.hadoop.hive.ql.io.parquet.convert.ParquetToHiveSchemaConverter; import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.AbstractSerDe; import org.apache.hadoop.hive.serde2.SerDeException; @@ -94,6 +103,8 @@ } } + private final String PARQUET_FILE = "parquet.file"; + private SerDeStats stats; private ObjectInspector objInspector; @@ -107,6 +118,7 @@ private long serializedSize; private long deserializedSize; private String compressionType; + private static final Log LOG = LogFactory.getLog(ParquetHiveSerDe.class); @Override public final void initialize(final Configuration conf, final Properties tbl) throws SerDeException { @@ -121,15 +133,34 @@ public final void initialize(final Configuration conf, final Properties tbl) thr // Get compression properties compressionType = tbl.getProperty(ParquetOutputFormat.COMPRESSION, DEFAULTCOMPRESSION); - if (columnNameProperty.length() == 0) { - columnNames = new ArrayList(); - } else { - columnNames = Arrays.asList(columnNameProperty.split(",")); - } - if (columnTypeProperty.length() == 0) { - columnTypes = new ArrayList(); + if (columnNameProperty.length() == 0 && columnTypeProperty.length() == 0) { + String parquetFile = getParquetFile(conf, tbl.getProperty(PARQUET_FILE, null)); + + if (parquetFile == null) { + parquetFile = getParquetFile(conf, tbl.getProperty("location", null)); + } + + if (parquetFile == null) { + throw new RuntimeException("Either provide schema for table or point to parquet file " + + "using " + PARQUET_FILE + " in tblproperties or make sure that table has atleast one " + + "parquet file with required metadata"); + } + + StructTypeInfo structTypeInfo = new ParquetToHiveSchemaConverter().convert( + ParquetSchemaReader.read(parquetFile)); + columnNames = structTypeInfo.getAllStructFieldNames(); + columnTypes = structTypeInfo.getAllStructFieldTypeInfos(); } else { - columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty); + if (columnNameProperty.length() == 0) { + columnNames = new ArrayList(); + } else { + columnNames = Arrays.asList(columnNameProperty.split(",")); + } + if (columnTypeProperty.length() == 0) { + columnTypes = new ArrayList(); + } else { + columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty); + } } if (columnNames.size() != columnTypes.size()) { throw new IllegalArgumentException("ParquetHiveSerde initialization failed. Number of column " + @@ -330,4 +361,44 @@ public SerDeStats getSerDeStats() { } return stats; } + + private String getParquetFile(Configuration conf, String loc) { + if (loc == null) { + return null; + } + + String parquetFile; + try { + parquetFile = getAFile(FileSystem.get(new URI(loc), conf), loc); + } catch (Exception e) { + LOG.info("Unable to read file from " + loc); + e.printStackTrace(); + parquetFile = null; + } + + return parquetFile; + } + + private String getAFile(FileSystem fs, String loc) throws IOException { + final Path path = new Path(loc); + FileStatus status = fs.getFileStatus(path); + + if (status.isFile()) { + if (status.getLen() > 0) { + return loc; + } else { + return null; + } + } + + for(FileStatus childStatus: fs.listStatus(path)) { + String file = getAFile(fs, childStatus.getPath().toString()); + + if (file != null) { + return file; + } + } + + return null; + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java index cd3d349..de9d161 100644 --- ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java +++ ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java @@ -653,6 +653,7 @@ public void createTable(Table tbl, boolean ifNotExists) throws HiveException { if (tbl.getDbName() == null || "".equals(tbl.getDbName().trim())) { tbl.setDbName(SessionState.get().getCurrentDatabase()); } + setTableLocInTableProperties(tbl); if (tbl.getCols().size() == 0) { tbl.setFields(MetaStoreUtils.getFieldsFromDeserializer(tbl.getTableName(), tbl.getDeserializer())); @@ -684,6 +685,47 @@ public void createTable(Table tbl, boolean ifNotExists) throws HiveException { } /** + * Store table's path in table's Table properties. + * Serdes like ParquetHiveSerDe need table path to initialize. + * On the other hand, serdes like HBaseSerde explicitly need the + * location te be null. So, location can not be saved in table's + * storage descriptor. + * @param tbl + * @throws TException + */ + private void setTableLocInTableProperties(Table tbl) throws TException { + tbl.getTTable().putToParameters("location", getTablePath(tbl)); + } + + private String getTablePath(Table table) throws TException { + Warehouse wh = new Warehouse(conf); + Path tablePath = null; + + if ((table.getSd().getLocation() == null + || table.getSd().getLocation().isEmpty())) { + tablePath = wh.getTablePath( + getMSC().getDatabase(table.getDbName()), table.getTableName()); + } else { + if (!MetaStoreUtils.isExternalTable(table.getTTable()) && !MetaStoreUtils.isNonNativeTable + (table.getTTable())) { + LOG.warn("Location: " + table.getSd().getLocation() + + " specified for non-external table:" + table.getTableName()); + } + final String location = table.getSd().getLocation(); + if (location != null) { + tablePath = wh.getDnsPath(new Path(location)); + } + } + + if (tablePath != null) { + LOG.info("Table path is: " + tablePath); + return tablePath.toString(); + } else { + return ""; + } + } + + /** * * @param tableName * table name diff --git ql/src/test/queries/clientpositive/parquet_array_of_multi_field_struct_gen_schema.q ql/src/test/queries/clientpositive/parquet_array_of_multi_field_struct_gen_schema.q new file mode 100644 index 0000000..affcc5f --- /dev/null +++ ql/src/test/queries/clientpositive/parquet_array_of_multi_field_struct_gen_schema.q @@ -0,0 +1,17 @@ +-- this test creates a Parquet table with an array of multi-field structs + +CREATE TABLE parquet_array_of_multi_field_structs_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +TBLPROPERTIES ('parquet.file'='../../data/files/MultiFieldGroupInList.parquet'); + +LOAD DATA LOCAL INPATH '../../data/files/MultiFieldGroupInList.parquet' +OVERWRITE INTO TABLE parquet_array_of_multi_field_structs_gen_schema; + +SELECT * FROM parquet_array_of_multi_field_structs_gen_schema; + +DROP TABLE parquet_array_of_multi_field_structs_gen_schema; diff --git ql/src/test/queries/clientpositive/parquet_array_of_optional_elements_gen_schema.q ql/src/test/queries/clientpositive/parquet_array_of_optional_elements_gen_schema.q new file mode 100644 index 0000000..73e93e7 --- /dev/null +++ ql/src/test/queries/clientpositive/parquet_array_of_optional_elements_gen_schema.q @@ -0,0 +1,17 @@ +-- this test creates a Parquet table with an array of optional structs + +CREATE TABLE parquet_array_of_optional_elements_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +TBLPROPERTIES ('parquet.file'='../../data/files/NewOptionalGroupInList.parquet'); + +LOAD DATA LOCAL INPATH '../../data/files/NewOptionalGroupInList.parquet' +OVERWRITE INTO TABLE parquet_array_of_optional_elements_gen_schema; + +SELECT * FROM parquet_array_of_optional_elements_gen_schema; + +DROP TABLE parquet_array_of_optional_elements_gen_schema; diff --git ql/src/test/queries/clientpositive/parquet_array_of_required_elements_gen_schema.q ql/src/test/queries/clientpositive/parquet_array_of_required_elements_gen_schema.q new file mode 100644 index 0000000..b3c26d3 --- /dev/null +++ ql/src/test/queries/clientpositive/parquet_array_of_required_elements_gen_schema.q @@ -0,0 +1,17 @@ +-- this test creates a Parquet table with an array of structs + +CREATE TABLE parquet_array_of_required_elements_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +TBLPROPERTIES ('parquet.file'='../../data/files/NewRequiredGroupInList.parquet'); + +LOAD DATA LOCAL INPATH '../../data/files/NewRequiredGroupInList.parquet' +OVERWRITE INTO TABLE parquet_array_of_required_elements_gen_schema; + +SELECT * FROM parquet_array_of_required_elements_gen_schema; + +DROP TABLE parquet_array_of_required_elements_gen_schema; diff --git ql/src/test/queries/clientpositive/parquet_array_of_single_field_struct_gen_schema.q ql/src/test/queries/clientpositive/parquet_array_of_single_field_struct_gen_schema.q new file mode 100644 index 0000000..7612dc9 --- /dev/null +++ ql/src/test/queries/clientpositive/parquet_array_of_single_field_struct_gen_schema.q @@ -0,0 +1,19 @@ +-- this test creates a Parquet table with an array of single-field structs +-- that has an ambiguous Parquet schema that is assumed to be a list of bigints +-- This is verifies compliance with the spec for this case. + +CREATE TABLE parquet_ambiguous_array_of_single_field_structs_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +TBLPROPERTIES ('parquet.file'='../../data/files/SingleFieldGroupInList.parquet'); + +LOAD DATA LOCAL INPATH '../../data/files/SingleFieldGroupInList.parquet' +OVERWRITE INTO TABLE parquet_ambiguous_array_of_single_field_structs_gen_schema; + +SELECT * FROM parquet_ambiguous_array_of_single_field_structs_gen_schema; + +DROP TABLE parquet_ambiguous_array_of_single_field_structs_gen_schema; diff --git ql/src/test/queries/clientpositive/parquet_array_of_structs_gen_schema.q ql/src/test/queries/clientpositive/parquet_array_of_structs_gen_schema.q new file mode 100644 index 0000000..05c9f63 --- /dev/null +++ ql/src/test/queries/clientpositive/parquet_array_of_structs_gen_schema.q @@ -0,0 +1,17 @@ +-- this test creates a Parquet table with an array of structs + +CREATE TABLE parquet_array_of_structs_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +TBLPROPERTIES ('parquet.file'='../../data/files/HiveRequiredGroupInList.parquet'); + +LOAD DATA LOCAL INPATH '../../data/files/HiveRequiredGroupInList.parquet' +OVERWRITE INTO TABLE parquet_array_of_structs_gen_schema; + +SELECT * FROM parquet_array_of_structs_gen_schema; + +DROP TABLE parquet_array_of_structs_gen_schema; diff --git ql/src/test/queries/clientpositive/parquet_array_of_structs_gen_schema_ext.q ql/src/test/queries/clientpositive/parquet_array_of_structs_gen_schema_ext.q new file mode 100644 index 0000000..17a6e22 --- /dev/null +++ ql/src/test/queries/clientpositive/parquet_array_of_structs_gen_schema_ext.q @@ -0,0 +1,16 @@ +dfs ${system:test.dfs.mkdir} ${system:test.tmp.dir}/parquet_decimal_gen_schema_ext_tmp; +dfs -cp ${system:hive.root}/data/files/HiveRequiredGroupInList.parquet ${system:test.tmp.dir}/parquet_decimal_gen_schema_ext_tmp; + +CREATE EXTERNAL TABLE parquet_array_of_structs_gen_schema_ext +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +LOCATION + '${system:test.tmp.dir}/parquet_decimal_gen_schema_ext_tmp'; + +SELECT * FROM parquet_array_of_structs_gen_schema_ext; + +DROP TABLE parquet_array_of_structs_gen_schema_ext; diff --git ql/src/test/queries/clientpositive/parquet_array_of_unannotated_groups_gen_schema.q ql/src/test/queries/clientpositive/parquet_array_of_unannotated_groups_gen_schema.q new file mode 100644 index 0000000..3f356c4 --- /dev/null +++ ql/src/test/queries/clientpositive/parquet_array_of_unannotated_groups_gen_schema.q @@ -0,0 +1,18 @@ +-- this test creates a Parquet table from a structure with an unannotated +-- repeated structure of (x,y) structs + +CREATE TABLE parquet_array_of_unannotated_groups_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +TBLPROPERTIES ('parquet.file'='../../data/files/UnannotatedListOfGroups.parquet'); + +LOAD DATA LOCAL INPATH '../../data/files/UnannotatedListOfGroups.parquet' +OVERWRITE INTO TABLE parquet_array_of_unannotated_groups_gen_schema; + +SELECT * FROM parquet_array_of_unannotated_groups_gen_schema; + +DROP TABLE parquet_array_of_unannotated_groups_gen_schema; diff --git ql/src/test/queries/clientpositive/parquet_array_of_unannotated_primitives_gen_schema.q ql/src/test/queries/clientpositive/parquet_array_of_unannotated_primitives_gen_schema.q new file mode 100644 index 0000000..0549808 --- /dev/null +++ ql/src/test/queries/clientpositive/parquet_array_of_unannotated_primitives_gen_schema.q @@ -0,0 +1,18 @@ +-- this test creates a Parquet table from a structure with an unannotated +-- repeated structure of int32s + +CREATE TABLE parquet_array_of_unannotated_ints_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +TBLPROPERTIES ('parquet.file'='../../data/files/UnannotatedListOfPrimitives.parquet'); + +LOAD DATA LOCAL INPATH '../../data/files/UnannotatedListOfPrimitives.parquet' +OVERWRITE INTO TABLE parquet_array_of_unannotated_ints_gen_schema; + +SELECT * FROM parquet_array_of_unannotated_ints_gen_schema; + +DROP TABLE parquet_array_of_unannotated_ints_gen_schema; \ No newline at end of file diff --git ql/src/test/queries/clientpositive/parquet_avro_array_of_primitives_gen_schema.q ql/src/test/queries/clientpositive/parquet_avro_array_of_primitives_gen_schema.q new file mode 100644 index 0000000..51eb6fd --- /dev/null +++ ql/src/test/queries/clientpositive/parquet_avro_array_of_primitives_gen_schema.q @@ -0,0 +1,15 @@ +CREATE TABLE parquet_avro_array_of_primitives_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +TBLPROPERTIES ('parquet.file'='../../data/files/AvroPrimitiveInList.parquet'); + +LOAD DATA LOCAL INPATH '../../data/files/AvroPrimitiveInList.parquet' OVERWRITE INTO TABLE +parquet_avro_array_of_primitives_gen_schema; + +SELECT * FROM parquet_avro_array_of_primitives_gen_schema; + +DROP TABLE parquet_avro_array_of_primitives_gen_schema; diff --git ql/src/test/queries/clientpositive/parquet_avro_array_of_single_field_struct_gen_schema.q ql/src/test/queries/clientpositive/parquet_avro_array_of_single_field_struct_gen_schema.q new file mode 100644 index 0000000..51dba64 --- /dev/null +++ ql/src/test/queries/clientpositive/parquet_avro_array_of_single_field_struct_gen_schema.q @@ -0,0 +1,18 @@ +-- this test creates a Parquet table with an array of single-field structs +-- as written by parquet-avro + +CREATE TABLE parquet_avro_array_of_single_field_structs_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +TBLPROPERTIES ('parquet.file'='../../data/files/AvroSingleFieldGroupInList.parquet'); + +LOAD DATA LOCAL INPATH '../../data/files/AvroSingleFieldGroupInList.parquet' +OVERWRITE INTO TABLE parquet_avro_array_of_single_field_structs_gen_schema; + +SELECT * FROM parquet_avro_array_of_single_field_structs_gen_schema; + +DROP TABLE parquet_avro_array_of_single_field_structs_gen_schema; diff --git ql/src/test/queries/clientpositive/parquet_decimal_gen_schema.q ql/src/test/queries/clientpositive/parquet_decimal_gen_schema.q new file mode 100644 index 0000000..7d39d36 --- /dev/null +++ ql/src/test/queries/clientpositive/parquet_decimal_gen_schema.q @@ -0,0 +1,16 @@ +CREATE TABLE parquet_decimal_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +TBLPROPERTIES ('parquet.file'='../../data/files/dec.parq'); + +DESCRIBE FORMATTED parquet_decimal_gen_schema; + +LOAD DATA LOCAL INPATH '../../data/files/dec.parq' OVERWRITE INTO TABLE parquet_decimal_gen_schema; + +SELECT * FROM parquet_decimal_gen_schema; + +DROP TABLE parquet_decimal_gen_schema; \ No newline at end of file diff --git ql/src/test/queries/clientpositive/parquet_thrift_array_of_primitives_gen_schema.q ql/src/test/queries/clientpositive/parquet_thrift_array_of_primitives_gen_schema.q new file mode 100644 index 0000000..0fcc356 --- /dev/null +++ ql/src/test/queries/clientpositive/parquet_thrift_array_of_primitives_gen_schema.q @@ -0,0 +1,17 @@ +-- this test creates a Parquet table with an array of structs + +CREATE TABLE parquet_thrift_array_of_primitives_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +TBLPROPERTIES ('parquet.file'='../../data/files/ThriftPrimitiveInList.parquet'); + +LOAD DATA LOCAL INPATH '../../data/files/ThriftPrimitiveInList.parquet' +OVERWRITE INTO TABLE parquet_thrift_array_of_primitives_gen_schema; + +SELECT * FROM parquet_thrift_array_of_primitives_gen_schema; + +DROP TABLE parquet_thrift_array_of_primitives_gen_schema; diff --git ql/src/test/queries/clientpositive/parquet_thrift_array_of_single_field_struct_gen_schema.q ql/src/test/queries/clientpositive/parquet_thrift_array_of_single_field_struct_gen_schema.q new file mode 100644 index 0000000..1646118 --- /dev/null +++ ql/src/test/queries/clientpositive/parquet_thrift_array_of_single_field_struct_gen_schema.q @@ -0,0 +1,18 @@ +-- this test creates a Parquet table with an array of single-field structs +-- as written by parquet-thrift + +CREATE TABLE parquet_thrift_array_of_single_field_structs_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +TBLPROPERTIES ('parquet.file'='../../data/files/ThriftSingleFieldGroupInList.parquet'); + +LOAD DATA LOCAL INPATH '../../data/files/ThriftSingleFieldGroupInList.parquet' +OVERWRITE INTO TABLE parquet_thrift_array_of_single_field_structs_gen_schema; + +SELECT * FROM parquet_thrift_array_of_single_field_structs_gen_schema; + +DROP TABLE parquet_thrift_array_of_single_field_structs_gen_schema; diff --git ql/src/test/results/clientpositive/parquet_array_of_multi_field_struct_gen_schema.q.out ql/src/test/results/clientpositive/parquet_array_of_multi_field_struct_gen_schema.q.out new file mode 100644 index 0000000..ffe77ff --- /dev/null +++ ql/src/test/results/clientpositive/parquet_array_of_multi_field_struct_gen_schema.q.out @@ -0,0 +1,53 @@ +PREHOOK: query: -- this test creates a Parquet table with an array of multi-field structs + +CREATE TABLE parquet_array_of_multi_field_structs_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +TBLPROPERTIES ('parquet.file'='../../data/files/MultiFieldGroupInList.parquet') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@parquet_array_of_multi_field_structs_gen_schema +POSTHOOK: query: -- this test creates a Parquet table with an array of multi-field structs + +CREATE TABLE parquet_array_of_multi_field_structs_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +TBLPROPERTIES ('parquet.file'='../../data/files/MultiFieldGroupInList.parquet') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@parquet_array_of_multi_field_structs_gen_schema +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/MultiFieldGroupInList.parquet' +OVERWRITE INTO TABLE parquet_array_of_multi_field_structs_gen_schema +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@parquet_array_of_multi_field_structs_gen_schema +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/MultiFieldGroupInList.parquet' +OVERWRITE INTO TABLE parquet_array_of_multi_field_structs_gen_schema +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@parquet_array_of_multi_field_structs_gen_schema +PREHOOK: query: SELECT * FROM parquet_array_of_multi_field_structs_gen_schema +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_array_of_multi_field_structs_gen_schema +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM parquet_array_of_multi_field_structs_gen_schema +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_array_of_multi_field_structs_gen_schema +#### A masked pattern was here #### +[{"latitude":0.0,"longitude":0.0},{"latitude":0.0,"longitude":180.0}] +PREHOOK: query: DROP TABLE parquet_array_of_multi_field_structs_gen_schema +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@parquet_array_of_multi_field_structs_gen_schema +PREHOOK: Output: default@parquet_array_of_multi_field_structs_gen_schema +POSTHOOK: query: DROP TABLE parquet_array_of_multi_field_structs_gen_schema +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@parquet_array_of_multi_field_structs_gen_schema +POSTHOOK: Output: default@parquet_array_of_multi_field_structs_gen_schema diff --git ql/src/test/results/clientpositive/parquet_array_of_optional_elements_gen_schema.q.out ql/src/test/results/clientpositive/parquet_array_of_optional_elements_gen_schema.q.out new file mode 100644 index 0000000..69e920f --- /dev/null +++ ql/src/test/results/clientpositive/parquet_array_of_optional_elements_gen_schema.q.out @@ -0,0 +1,53 @@ +PREHOOK: query: -- this test creates a Parquet table with an array of optional structs + +CREATE TABLE parquet_array_of_optional_elements_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +TBLPROPERTIES ('parquet.file'='../../data/files/NewOptionalGroupInList.parquet') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@parquet_array_of_optional_elements_gen_schema +POSTHOOK: query: -- this test creates a Parquet table with an array of optional structs + +CREATE TABLE parquet_array_of_optional_elements_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +TBLPROPERTIES ('parquet.file'='../../data/files/NewOptionalGroupInList.parquet') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@parquet_array_of_optional_elements_gen_schema +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/NewOptionalGroupInList.parquet' +OVERWRITE INTO TABLE parquet_array_of_optional_elements_gen_schema +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@parquet_array_of_optional_elements_gen_schema +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/NewOptionalGroupInList.parquet' +OVERWRITE INTO TABLE parquet_array_of_optional_elements_gen_schema +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@parquet_array_of_optional_elements_gen_schema +PREHOOK: query: SELECT * FROM parquet_array_of_optional_elements_gen_schema +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_array_of_optional_elements_gen_schema +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM parquet_array_of_optional_elements_gen_schema +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_array_of_optional_elements_gen_schema +#### A masked pattern was here #### +[{"latitude":0.0,"longitude":0.0},null,{"latitude":0.0,"longitude":180.0}] +PREHOOK: query: DROP TABLE parquet_array_of_optional_elements_gen_schema +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@parquet_array_of_optional_elements_gen_schema +PREHOOK: Output: default@parquet_array_of_optional_elements_gen_schema +POSTHOOK: query: DROP TABLE parquet_array_of_optional_elements_gen_schema +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@parquet_array_of_optional_elements_gen_schema +POSTHOOK: Output: default@parquet_array_of_optional_elements_gen_schema diff --git ql/src/test/results/clientpositive/parquet_array_of_required_elements_gen_schema.q.out ql/src/test/results/clientpositive/parquet_array_of_required_elements_gen_schema.q.out new file mode 100644 index 0000000..6129468 --- /dev/null +++ ql/src/test/results/clientpositive/parquet_array_of_required_elements_gen_schema.q.out @@ -0,0 +1,53 @@ +PREHOOK: query: -- this test creates a Parquet table with an array of structs + +CREATE TABLE parquet_array_of_required_elements_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +TBLPROPERTIES ('parquet.file'='../../data/files/NewRequiredGroupInList.parquet') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@parquet_array_of_required_elements_gen_schema +POSTHOOK: query: -- this test creates a Parquet table with an array of structs + +CREATE TABLE parquet_array_of_required_elements_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +TBLPROPERTIES ('parquet.file'='../../data/files/NewRequiredGroupInList.parquet') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@parquet_array_of_required_elements_gen_schema +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/NewRequiredGroupInList.parquet' +OVERWRITE INTO TABLE parquet_array_of_required_elements_gen_schema +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@parquet_array_of_required_elements_gen_schema +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/NewRequiredGroupInList.parquet' +OVERWRITE INTO TABLE parquet_array_of_required_elements_gen_schema +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@parquet_array_of_required_elements_gen_schema +PREHOOK: query: SELECT * FROM parquet_array_of_required_elements_gen_schema +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_array_of_required_elements_gen_schema +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM parquet_array_of_required_elements_gen_schema +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_array_of_required_elements_gen_schema +#### A masked pattern was here #### +[{"latitude":0.0,"longitude":180.0},{"latitude":0.0,"longitude":0.0}] +PREHOOK: query: DROP TABLE parquet_array_of_required_elements_gen_schema +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@parquet_array_of_required_elements_gen_schema +PREHOOK: Output: default@parquet_array_of_required_elements_gen_schema +POSTHOOK: query: DROP TABLE parquet_array_of_required_elements_gen_schema +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@parquet_array_of_required_elements_gen_schema +POSTHOOK: Output: default@parquet_array_of_required_elements_gen_schema diff --git ql/src/test/results/clientpositive/parquet_array_of_single_field_struct_gen_schema.q.out ql/src/test/results/clientpositive/parquet_array_of_single_field_struct_gen_schema.q.out new file mode 100644 index 0000000..e5adf8a --- /dev/null +++ ql/src/test/results/clientpositive/parquet_array_of_single_field_struct_gen_schema.q.out @@ -0,0 +1,57 @@ +PREHOOK: query: -- this test creates a Parquet table with an array of single-field structs +-- that has an ambiguous Parquet schema that is assumed to be a list of bigints +-- This is verifies compliance with the spec for this case. + +CREATE TABLE parquet_ambiguous_array_of_single_field_structs_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +TBLPROPERTIES ('parquet.file'='../../data/files/SingleFieldGroupInList.parquet') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@parquet_ambiguous_array_of_single_field_structs_gen_schema +POSTHOOK: query: -- this test creates a Parquet table with an array of single-field structs +-- that has an ambiguous Parquet schema that is assumed to be a list of bigints +-- This is verifies compliance with the spec for this case. + +CREATE TABLE parquet_ambiguous_array_of_single_field_structs_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +TBLPROPERTIES ('parquet.file'='../../data/files/SingleFieldGroupInList.parquet') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@parquet_ambiguous_array_of_single_field_structs_gen_schema +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/SingleFieldGroupInList.parquet' +OVERWRITE INTO TABLE parquet_ambiguous_array_of_single_field_structs_gen_schema +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@parquet_ambiguous_array_of_single_field_structs_gen_schema +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/SingleFieldGroupInList.parquet' +OVERWRITE INTO TABLE parquet_ambiguous_array_of_single_field_structs_gen_schema +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@parquet_ambiguous_array_of_single_field_structs_gen_schema +PREHOOK: query: SELECT * FROM parquet_ambiguous_array_of_single_field_structs_gen_schema +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_ambiguous_array_of_single_field_structs_gen_schema +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM parquet_ambiguous_array_of_single_field_structs_gen_schema +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_ambiguous_array_of_single_field_structs_gen_schema +#### A masked pattern was here #### +[1234,2345] +PREHOOK: query: DROP TABLE parquet_ambiguous_array_of_single_field_structs_gen_schema +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@parquet_ambiguous_array_of_single_field_structs_gen_schema +PREHOOK: Output: default@parquet_ambiguous_array_of_single_field_structs_gen_schema +POSTHOOK: query: DROP TABLE parquet_ambiguous_array_of_single_field_structs_gen_schema +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@parquet_ambiguous_array_of_single_field_structs_gen_schema +POSTHOOK: Output: default@parquet_ambiguous_array_of_single_field_structs_gen_schema diff --git ql/src/test/results/clientpositive/parquet_array_of_structs_gen_schema.q.out ql/src/test/results/clientpositive/parquet_array_of_structs_gen_schema.q.out new file mode 100644 index 0000000..d502ee1 --- /dev/null +++ ql/src/test/results/clientpositive/parquet_array_of_structs_gen_schema.q.out @@ -0,0 +1,53 @@ +PREHOOK: query: -- this test creates a Parquet table with an array of structs + +CREATE TABLE parquet_array_of_structs_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +TBLPROPERTIES ('parquet.file'='../../data/files/HiveRequiredGroupInList.parquet') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@parquet_array_of_structs_gen_schema +POSTHOOK: query: -- this test creates a Parquet table with an array of structs + +CREATE TABLE parquet_array_of_structs_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +TBLPROPERTIES ('parquet.file'='../../data/files/HiveRequiredGroupInList.parquet') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@parquet_array_of_structs_gen_schema +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/HiveRequiredGroupInList.parquet' +OVERWRITE INTO TABLE parquet_array_of_structs_gen_schema +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@parquet_array_of_structs_gen_schema +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/HiveRequiredGroupInList.parquet' +OVERWRITE INTO TABLE parquet_array_of_structs_gen_schema +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@parquet_array_of_structs_gen_schema +PREHOOK: query: SELECT * FROM parquet_array_of_structs_gen_schema +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_array_of_structs_gen_schema +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM parquet_array_of_structs_gen_schema +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_array_of_structs_gen_schema +#### A masked pattern was here #### +[{"latitude":0.0,"longitude":180.0},{"latitude":0.0,"longitude":0.0}] +PREHOOK: query: DROP TABLE parquet_array_of_structs_gen_schema +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@parquet_array_of_structs_gen_schema +PREHOOK: Output: default@parquet_array_of_structs_gen_schema +POSTHOOK: query: DROP TABLE parquet_array_of_structs_gen_schema +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@parquet_array_of_structs_gen_schema +POSTHOOK: Output: default@parquet_array_of_structs_gen_schema diff --git ql/src/test/results/clientpositive/parquet_array_of_structs_gen_schema_ext.q.out ql/src/test/results/clientpositive/parquet_array_of_structs_gen_schema_ext.q.out new file mode 100644 index 0000000..30acdae --- /dev/null +++ ql/src/test/results/clientpositive/parquet_array_of_structs_gen_schema_ext.q.out @@ -0,0 +1,43 @@ +PREHOOK: query: CREATE EXTERNAL TABLE parquet_array_of_structs_gen_schema_ext +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +LOCATION +#### A masked pattern was here #### +PREHOOK: type: CREATETABLE +#### A masked pattern was here #### +PREHOOK: Output: database:default +PREHOOK: Output: default@parquet_array_of_structs_gen_schema_ext +POSTHOOK: query: CREATE EXTERNAL TABLE parquet_array_of_structs_gen_schema_ext +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +LOCATION +#### A masked pattern was here #### +POSTHOOK: type: CREATETABLE +#### A masked pattern was here #### +POSTHOOK: Output: database:default +POSTHOOK: Output: default@parquet_array_of_structs_gen_schema_ext +PREHOOK: query: SELECT * FROM parquet_array_of_structs_gen_schema_ext +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_array_of_structs_gen_schema_ext +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM parquet_array_of_structs_gen_schema_ext +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_array_of_structs_gen_schema_ext +#### A masked pattern was here #### +[{"latitude":0.0,"longitude":180.0},{"latitude":0.0,"longitude":0.0}] +PREHOOK: query: DROP TABLE parquet_array_of_structs_gen_schema_ext +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@parquet_array_of_structs_gen_schema_ext +PREHOOK: Output: default@parquet_array_of_structs_gen_schema_ext +POSTHOOK: query: DROP TABLE parquet_array_of_structs_gen_schema_ext +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@parquet_array_of_structs_gen_schema_ext +POSTHOOK: Output: default@parquet_array_of_structs_gen_schema_ext diff --git ql/src/test/results/clientpositive/parquet_array_of_unannotated_groups_gen_schema.q.out ql/src/test/results/clientpositive/parquet_array_of_unannotated_groups_gen_schema.q.out new file mode 100644 index 0000000..b7faa1d --- /dev/null +++ ql/src/test/results/clientpositive/parquet_array_of_unannotated_groups_gen_schema.q.out @@ -0,0 +1,55 @@ +PREHOOK: query: -- this test creates a Parquet table from a structure with an unannotated +-- repeated structure of (x,y) structs + +CREATE TABLE parquet_array_of_unannotated_groups_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +TBLPROPERTIES ('parquet.file'='../../data/files/UnannotatedListOfGroups.parquet') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@parquet_array_of_unannotated_groups_gen_schema +POSTHOOK: query: -- this test creates a Parquet table from a structure with an unannotated +-- repeated structure of (x,y) structs + +CREATE TABLE parquet_array_of_unannotated_groups_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +TBLPROPERTIES ('parquet.file'='../../data/files/UnannotatedListOfGroups.parquet') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@parquet_array_of_unannotated_groups_gen_schema +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/UnannotatedListOfGroups.parquet' +OVERWRITE INTO TABLE parquet_array_of_unannotated_groups_gen_schema +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@parquet_array_of_unannotated_groups_gen_schema +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/UnannotatedListOfGroups.parquet' +OVERWRITE INTO TABLE parquet_array_of_unannotated_groups_gen_schema +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@parquet_array_of_unannotated_groups_gen_schema +PREHOOK: query: SELECT * FROM parquet_array_of_unannotated_groups_gen_schema +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_array_of_unannotated_groups_gen_schema +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM parquet_array_of_unannotated_groups_gen_schema +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_array_of_unannotated_groups_gen_schema +#### A masked pattern was here #### +[{"x":1.0,"y":1.0},{"x":2.0,"y":2.0}] +PREHOOK: query: DROP TABLE parquet_array_of_unannotated_groups_gen_schema +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@parquet_array_of_unannotated_groups_gen_schema +PREHOOK: Output: default@parquet_array_of_unannotated_groups_gen_schema +POSTHOOK: query: DROP TABLE parquet_array_of_unannotated_groups_gen_schema +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@parquet_array_of_unannotated_groups_gen_schema +POSTHOOK: Output: default@parquet_array_of_unannotated_groups_gen_schema diff --git ql/src/test/results/clientpositive/parquet_array_of_unannotated_primitives_gen_schema.q.out ql/src/test/results/clientpositive/parquet_array_of_unannotated_primitives_gen_schema.q.out new file mode 100644 index 0000000..474a0ca --- /dev/null +++ ql/src/test/results/clientpositive/parquet_array_of_unannotated_primitives_gen_schema.q.out @@ -0,0 +1,55 @@ +PREHOOK: query: -- this test creates a Parquet table from a structure with an unannotated +-- repeated structure of int32s + +CREATE TABLE parquet_array_of_unannotated_ints_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +TBLPROPERTIES ('parquet.file'='../../data/files/UnannotatedListOfPrimitives.parquet') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@parquet_array_of_unannotated_ints_gen_schema +POSTHOOK: query: -- this test creates a Parquet table from a structure with an unannotated +-- repeated structure of int32s + +CREATE TABLE parquet_array_of_unannotated_ints_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +TBLPROPERTIES ('parquet.file'='../../data/files/UnannotatedListOfPrimitives.parquet') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@parquet_array_of_unannotated_ints_gen_schema +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/UnannotatedListOfPrimitives.parquet' +OVERWRITE INTO TABLE parquet_array_of_unannotated_ints_gen_schema +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@parquet_array_of_unannotated_ints_gen_schema +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/UnannotatedListOfPrimitives.parquet' +OVERWRITE INTO TABLE parquet_array_of_unannotated_ints_gen_schema +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@parquet_array_of_unannotated_ints_gen_schema +PREHOOK: query: SELECT * FROM parquet_array_of_unannotated_ints_gen_schema +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_array_of_unannotated_ints_gen_schema +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM parquet_array_of_unannotated_ints_gen_schema +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_array_of_unannotated_ints_gen_schema +#### A masked pattern was here #### +[34,35,36] +PREHOOK: query: DROP TABLE parquet_array_of_unannotated_ints_gen_schema +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@parquet_array_of_unannotated_ints_gen_schema +PREHOOK: Output: default@parquet_array_of_unannotated_ints_gen_schema +POSTHOOK: query: DROP TABLE parquet_array_of_unannotated_ints_gen_schema +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@parquet_array_of_unannotated_ints_gen_schema +POSTHOOK: Output: default@parquet_array_of_unannotated_ints_gen_schema diff --git ql/src/test/results/clientpositive/parquet_avro_array_of_primitives_gen_schema.q.out ql/src/test/results/clientpositive/parquet_avro_array_of_primitives_gen_schema.q.out new file mode 100644 index 0000000..dc7be20 --- /dev/null +++ ql/src/test/results/clientpositive/parquet_avro_array_of_primitives_gen_schema.q.out @@ -0,0 +1,49 @@ +PREHOOK: query: CREATE TABLE parquet_avro_array_of_primitives_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +TBLPROPERTIES ('parquet.file'='../../data/files/AvroPrimitiveInList.parquet') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@parquet_avro_array_of_primitives_gen_schema +POSTHOOK: query: CREATE TABLE parquet_avro_array_of_primitives_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +TBLPROPERTIES ('parquet.file'='../../data/files/AvroPrimitiveInList.parquet') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@parquet_avro_array_of_primitives_gen_schema +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/AvroPrimitiveInList.parquet' OVERWRITE INTO TABLE +parquet_avro_array_of_primitives_gen_schema +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@parquet_avro_array_of_primitives_gen_schema +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/AvroPrimitiveInList.parquet' OVERWRITE INTO TABLE +parquet_avro_array_of_primitives_gen_schema +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@parquet_avro_array_of_primitives_gen_schema +PREHOOK: query: SELECT * FROM parquet_avro_array_of_primitives_gen_schema +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_avro_array_of_primitives_gen_schema +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM parquet_avro_array_of_primitives_gen_schema +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_avro_array_of_primitives_gen_schema +#### A masked pattern was here #### +[34,35,36] +PREHOOK: query: DROP TABLE parquet_avro_array_of_primitives_gen_schema +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@parquet_avro_array_of_primitives_gen_schema +PREHOOK: Output: default@parquet_avro_array_of_primitives_gen_schema +POSTHOOK: query: DROP TABLE parquet_avro_array_of_primitives_gen_schema +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@parquet_avro_array_of_primitives_gen_schema +POSTHOOK: Output: default@parquet_avro_array_of_primitives_gen_schema diff --git ql/src/test/results/clientpositive/parquet_avro_array_of_single_field_struct_gen_schema.q.out ql/src/test/results/clientpositive/parquet_avro_array_of_single_field_struct_gen_schema.q.out new file mode 100644 index 0000000..5aad90e --- /dev/null +++ ql/src/test/results/clientpositive/parquet_avro_array_of_single_field_struct_gen_schema.q.out @@ -0,0 +1,55 @@ +PREHOOK: query: -- this test creates a Parquet table with an array of single-field structs +-- as written by parquet-avro + +CREATE TABLE parquet_avro_array_of_single_field_structs_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +TBLPROPERTIES ('parquet.file'='../../data/files/AvroSingleFieldGroupInList.parquet') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@parquet_avro_array_of_single_field_structs_gen_schema +POSTHOOK: query: -- this test creates a Parquet table with an array of single-field structs +-- as written by parquet-avro + +CREATE TABLE parquet_avro_array_of_single_field_structs_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +TBLPROPERTIES ('parquet.file'='../../data/files/AvroSingleFieldGroupInList.parquet') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@parquet_avro_array_of_single_field_structs_gen_schema +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/AvroSingleFieldGroupInList.parquet' +OVERWRITE INTO TABLE parquet_avro_array_of_single_field_structs_gen_schema +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@parquet_avro_array_of_single_field_structs_gen_schema +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/AvroSingleFieldGroupInList.parquet' +OVERWRITE INTO TABLE parquet_avro_array_of_single_field_structs_gen_schema +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@parquet_avro_array_of_single_field_structs_gen_schema +PREHOOK: query: SELECT * FROM parquet_avro_array_of_single_field_structs_gen_schema +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_avro_array_of_single_field_structs_gen_schema +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM parquet_avro_array_of_single_field_structs_gen_schema +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_avro_array_of_single_field_structs_gen_schema +#### A masked pattern was here #### +[{"count":1234},{"count":2345}] +PREHOOK: query: DROP TABLE parquet_avro_array_of_single_field_structs_gen_schema +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@parquet_avro_array_of_single_field_structs_gen_schema +PREHOOK: Output: default@parquet_avro_array_of_single_field_structs_gen_schema +POSTHOOK: query: DROP TABLE parquet_avro_array_of_single_field_structs_gen_schema +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@parquet_avro_array_of_single_field_structs_gen_schema +POSTHOOK: Output: default@parquet_avro_array_of_single_field_structs_gen_schema diff --git ql/src/test/results/clientpositive/parquet_decimal_gen_schema.q.out ql/src/test/results/clientpositive/parquet_decimal_gen_schema.q.out new file mode 100644 index 0000000..e3ab207 --- /dev/null +++ ql/src/test/results/clientpositive/parquet_decimal_gen_schema.q.out @@ -0,0 +1,88 @@ +PREHOOK: query: CREATE TABLE parquet_decimal_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +TBLPROPERTIES ('parquet.file'='../../data/files/dec.parq') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@parquet_decimal_gen_schema +POSTHOOK: query: CREATE TABLE parquet_decimal_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +TBLPROPERTIES ('parquet.file'='../../data/files/dec.parq') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@parquet_decimal_gen_schema +PREHOOK: query: DESCRIBE FORMATTED parquet_decimal_gen_schema +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@parquet_decimal_gen_schema +POSTHOOK: query: DESCRIBE FORMATTED parquet_decimal_gen_schema +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@parquet_decimal_gen_schema +# col_name data_type comment + +name string +value decimal(5,2) + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Protect Mode: None +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: + parquet.file ../../data/files/dec.parq +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe +InputFormat: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/dec.parq' OVERWRITE INTO TABLE parquet_decimal_gen_schema +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@parquet_decimal_gen_schema +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/dec.parq' OVERWRITE INTO TABLE parquet_decimal_gen_schema +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@parquet_decimal_gen_schema +PREHOOK: query: SELECT * FROM parquet_decimal_gen_schema +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_decimal_gen_schema +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM parquet_decimal_gen_schema +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_decimal_gen_schema +#### A masked pattern was here #### +Tom 234.79 +Beck 77.34 +Snow 55.71 +Mary 4.33 +Cluck 5.96 +Tom 12.25 +Mary 33.33 +Tom 0.19 +Beck 3.15 +Beck 7.99 +PREHOOK: query: DROP TABLE parquet_decimal_gen_schema +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@parquet_decimal_gen_schema +PREHOOK: Output: default@parquet_decimal_gen_schema +POSTHOOK: query: DROP TABLE parquet_decimal_gen_schema +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@parquet_decimal_gen_schema +POSTHOOK: Output: default@parquet_decimal_gen_schema diff --git ql/src/test/results/clientpositive/parquet_thrift_array_of_primitives_gen_schema.q.out ql/src/test/results/clientpositive/parquet_thrift_array_of_primitives_gen_schema.q.out new file mode 100644 index 0000000..79c9e05 --- /dev/null +++ ql/src/test/results/clientpositive/parquet_thrift_array_of_primitives_gen_schema.q.out @@ -0,0 +1,53 @@ +PREHOOK: query: -- this test creates a Parquet table with an array of structs + +CREATE TABLE parquet_thrift_array_of_primitives_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +TBLPROPERTIES ('parquet.file'='../../data/files/ThriftPrimitiveInList.parquet') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@parquet_thrift_array_of_primitives_gen_schema +POSTHOOK: query: -- this test creates a Parquet table with an array of structs + +CREATE TABLE parquet_thrift_array_of_primitives_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +TBLPROPERTIES ('parquet.file'='../../data/files/ThriftPrimitiveInList.parquet') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@parquet_thrift_array_of_primitives_gen_schema +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/ThriftPrimitiveInList.parquet' +OVERWRITE INTO TABLE parquet_thrift_array_of_primitives_gen_schema +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@parquet_thrift_array_of_primitives_gen_schema +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/ThriftPrimitiveInList.parquet' +OVERWRITE INTO TABLE parquet_thrift_array_of_primitives_gen_schema +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@parquet_thrift_array_of_primitives_gen_schema +PREHOOK: query: SELECT * FROM parquet_thrift_array_of_primitives_gen_schema +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_thrift_array_of_primitives_gen_schema +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM parquet_thrift_array_of_primitives_gen_schema +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_thrift_array_of_primitives_gen_schema +#### A masked pattern was here #### +[34,35,36] +PREHOOK: query: DROP TABLE parquet_thrift_array_of_primitives_gen_schema +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@parquet_thrift_array_of_primitives_gen_schema +PREHOOK: Output: default@parquet_thrift_array_of_primitives_gen_schema +POSTHOOK: query: DROP TABLE parquet_thrift_array_of_primitives_gen_schema +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@parquet_thrift_array_of_primitives_gen_schema +POSTHOOK: Output: default@parquet_thrift_array_of_primitives_gen_schema diff --git ql/src/test/results/clientpositive/parquet_thrift_array_of_single_field_struct_gen_schema.q.out ql/src/test/results/clientpositive/parquet_thrift_array_of_single_field_struct_gen_schema.q.out new file mode 100644 index 0000000..ec476bb --- /dev/null +++ ql/src/test/results/clientpositive/parquet_thrift_array_of_single_field_struct_gen_schema.q.out @@ -0,0 +1,55 @@ +PREHOOK: query: -- this test creates a Parquet table with an array of single-field structs +-- as written by parquet-thrift + +CREATE TABLE parquet_thrift_array_of_single_field_structs_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +TBLPROPERTIES ('parquet.file'='../../data/files/ThriftSingleFieldGroupInList.parquet') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@parquet_thrift_array_of_single_field_structs_gen_schema +POSTHOOK: query: -- this test creates a Parquet table with an array of single-field structs +-- as written by parquet-thrift + +CREATE TABLE parquet_thrift_array_of_single_field_structs_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +TBLPROPERTIES ('parquet.file'='../../data/files/ThriftSingleFieldGroupInList.parquet') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@parquet_thrift_array_of_single_field_structs_gen_schema +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/ThriftSingleFieldGroupInList.parquet' +OVERWRITE INTO TABLE parquet_thrift_array_of_single_field_structs_gen_schema +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@parquet_thrift_array_of_single_field_structs_gen_schema +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/ThriftSingleFieldGroupInList.parquet' +OVERWRITE INTO TABLE parquet_thrift_array_of_single_field_structs_gen_schema +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@parquet_thrift_array_of_single_field_structs_gen_schema +PREHOOK: query: SELECT * FROM parquet_thrift_array_of_single_field_structs_gen_schema +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_thrift_array_of_single_field_structs_gen_schema +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM parquet_thrift_array_of_single_field_structs_gen_schema +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_thrift_array_of_single_field_structs_gen_schema +#### A masked pattern was here #### +[{"count":1234},{"count":2345}] +PREHOOK: query: DROP TABLE parquet_thrift_array_of_single_field_structs_gen_schema +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@parquet_thrift_array_of_single_field_structs_gen_schema +PREHOOK: Output: default@parquet_thrift_array_of_single_field_structs_gen_schema +POSTHOOK: query: DROP TABLE parquet_thrift_array_of_single_field_structs_gen_schema +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@parquet_thrift_array_of_single_field_structs_gen_schema +POSTHOOK: Output: default@parquet_thrift_array_of_single_field_structs_gen_schema