diff --git common/src/java/org/apache/hadoop/hive/conf/HiveConf.java common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index fafd78e..6123c11 100644 --- common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -759,7 +759,7 @@ "org.apache.hadoop.hive.ql.io.orc.OrcSerde,org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe," + "org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe,org.apache.hadoop.hive.serde2.dynamic_type.DynamicSerDe," + "org.apache.hadoop.hive.serde2.MetadataTypedColumnsetSerDe,org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe," + - "org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe,org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe", + "org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe", "SerDes retriving schema from metastore. This an internal parameter. Check with the hive dev. team"), HIVEHISTORYFILELOC("hive.querylog.location", diff --git ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ColInfoFromParquetFile.java ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ColInfoFromParquetFile.java new file mode 100644 index 0000000..57cd000 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ColInfoFromParquetFile.java @@ -0,0 +1,238 @@ +/** + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.io.parquet.convert; + +import org.apache.commons.lang3.tuple.ImmutablePair; +import org.apache.commons.lang3.tuple.Pair; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; +import parquet.hadoop.ParquetFileReader; +import parquet.hadoop.metadata.FileMetaData; +import parquet.hadoop.metadata.ParquetMetadata; +import parquet.schema.DecimalMetadata; +import parquet.schema.GroupType; +import parquet.schema.MessageType; +import parquet.schema.OriginalType; +import parquet.schema.PrimitiveType; +import parquet.schema.Type; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +public class ColInfoFromParquetFile { + + private static final Log LOG = LogFactory.getLog(ColInfoFromParquetFile.class); + + /** + * Generates hive schema from a parquet file + * + * @param parquetFile Parquet file from which schema has to be read + * @return Pair containing list of hive column names and ":" separated hive data types + */ + public Pair, String> convert(String parquetFile) { + Configuration conf = new Configuration(); + ParquetMetadata metaData; + try { + metaData = ParquetFileReader.readFooter(conf, new Path(parquetFile)); + } catch (IOException e) { + throw new RuntimeException(e); + } + MessageType schema = metaData.getFileMetaData().getSchema(); + + FileMetaData fileMetaData = metaData.getFileMetaData(); + LOG.info("FileMetaData: " + fileMetaData); + + List colNames = new ArrayList(); + StringBuffer colTypes = new StringBuffer(); + String hiveSchema = convert(colNames, colTypes, schema); + + LOG.info("Generated hive schema is " + hiveSchema); + + return new ImmutablePair, String>(colNames, + colTypes.substring(0, colTypes.length() - 1)); + } + + private String convert(List colNames, StringBuffer colTypes, MessageType parquetSchema) { + return convertFields(colNames, colTypes, parquetSchema.getFields()); + } + + private String convertFields(List colNames, StringBuffer colTypes, + List parquetFields) { + StringBuilder hiveSchema = new StringBuilder(); + for (Type parquetType : parquetFields) { + + String fieldSchema; + if (parquetType.isRepetition(Type.Repetition.REPEATED)) { + fieldSchema = createHiveArray(parquetType, ""); + } else { + fieldSchema = convertField(parquetType); + } + + colNames.add(parquetType.getName()); + colTypes.append(fieldSchema + ":"); + + hiveSchema.append(parquetType.getName() + " " + fieldSchema + ", "); + } + + return hiveSchema.substring(0, hiveSchema.length() - 1); + } + + private String convertField(final Type parquetType) { + if (parquetType.isPrimitive()) { + final PrimitiveType.PrimitiveTypeName parquetPrimitiveTypeName = + parquetType.asPrimitiveType().getPrimitiveTypeName(); + final OriginalType originalType = parquetType.getOriginalType(); + return parquetPrimitiveTypeName.convert( + new PrimitiveType.PrimitiveTypeNameConverter() { + @Override + public String convertBOOLEAN(PrimitiveType.PrimitiveTypeName primitiveTypeName) { + return "boolean"; + } + + @Override + public String convertINT32(PrimitiveType.PrimitiveTypeName primitiveTypeName) { + return "int"; + } + + @Override + public String convertINT64(PrimitiveType.PrimitiveTypeName primitiveTypeName) { + return "bigint"; + } + + @Override + public String convertINT96(PrimitiveType.PrimitiveTypeName primitiveTypeName) { + throw new IllegalArgumentException("INT96 not yet implemented."); + } + + @Override + public String convertFLOAT(PrimitiveType.PrimitiveTypeName primitiveTypeName) { + return "float"; + } + + @Override + public String convertDOUBLE(PrimitiveType.PrimitiveTypeName primitiveTypeName) { + return "double"; + } + + @Override + public String convertFIXED_LEN_BYTE_ARRAY(PrimitiveType.PrimitiveTypeName + primitiveTypeName) { + return "binary"; + } + + @Override + public String convertBINARY(PrimitiveType.PrimitiveTypeName primitiveTypeName) { + if (originalType == OriginalType.UTF8 || originalType == OriginalType.ENUM) { + return "string"; + } else if (originalType == OriginalType.DECIMAL) { + final DecimalMetadata decimalMetadata = parquetType.asPrimitiveType() + .getDecimalMetadata(); + return "decimal(" + decimalMetadata.getPrecision() + "," + + decimalMetadata.getScale() + ")"; + } else { + return "binary"; + } + } + }); + } else { + GroupType parquetGroupType = parquetType.asGroupType(); + OriginalType originalType = parquetGroupType.getOriginalType(); + if (originalType != null) { + switch (originalType) { + case LIST: + if (parquetGroupType.getFieldCount() != 1) { + throw new UnsupportedOperationException("Invalid list type " + parquetGroupType); + } + Type elementType = parquetGroupType.getType(0); + if (!elementType.isRepetition(Type.Repetition.REPEATED)) { + throw new UnsupportedOperationException("Invalid list type " + parquetGroupType); + } + return createHiveArray(elementType, parquetGroupType.getName()); + case MAP: + if (parquetGroupType.getFieldCount() != 1 || parquetGroupType.getType(0).isPrimitive()) { + throw new UnsupportedOperationException("Invalid map type " + parquetGroupType); + } + GroupType mapKeyValType = parquetGroupType.getType(0).asGroupType(); + if (!mapKeyValType.isRepetition(Type.Repetition.REPEATED) || + !mapKeyValType.getOriginalType().equals(OriginalType.MAP_KEY_VALUE) || + mapKeyValType.getFieldCount() != 2) { + throw new UnsupportedOperationException("Invalid map type " + parquetGroupType); + } + Type keyType = mapKeyValType.getType(0); + if (!keyType.isPrimitive() || + !keyType.asPrimitiveType().getPrimitiveTypeName().equals(PrimitiveType + .PrimitiveTypeName.BINARY) || + !keyType.getOriginalType().equals(OriginalType.UTF8)) { + throw new IllegalArgumentException("Map key type must be binary (UTF8): " + + keyType); + } + Type valueType = mapKeyValType.getType(1); + return createHiveMap(convertField(keyType), convertField(valueType)); + case ENUM: + return "string"; + case MAP_KEY_VALUE: + case UTF8: + default: + throw new UnsupportedOperationException("Cannot convert Parquet type " + + parquetType); + } + } else { + // if no original type then it's a record + return createHiveStruct(parquetGroupType.getFields()); + } + } + } + + private String createHiveStruct(List parquetFields) { + StringBuilder structString = new StringBuilder(); + + structString.append("struct<"); + boolean needComma = false; + for (Type field: parquetFields) { + if (needComma) { + structString.append(","); + } else { + needComma = true; + } + structString.append(field.getName()+":"+convertField(field)); + } + structString.append(">"); + + return structString.toString(); + } + + private String createHiveMap(String keyType, String valueType) { + return "map<" + keyType + "," + valueType + ">"; + } + + private String createHiveArray(Type elementType, String elementName) { + if (elementType.isPrimitive()) { + return "array<" + convertField(elementType) + ">"; + } else { + final GroupType groupType = elementType.asGroupType(); + final List groupFields = groupType.getFields(); + if (groupFields.size() > 1 || + (groupFields.size() == 1 && + (elementType.getName().equals("array") || + elementType.getName().equals(elementName + "_tuple")))) { + return "array<" + createHiveStruct(groupFields) + ">"; + } else { + return "array<" + convertField(groupType.getFields().get(0)) + ">"; + } + } + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ParquetHiveSerDe.java ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ParquetHiveSerDe.java index 4effe73..34be855 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ParquetHiveSerDe.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ParquetHiveSerDe.java @@ -13,16 +13,10 @@ */ package org.apache.hadoop.hive.ql.io.parquet.serde; -import java.io.UnsupportedEncodingException; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; -import java.util.Map; -import java.util.Map.Entry; -import java.util.Properties; - +import org.apache.commons.lang3.tuple.Pair; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.common.type.HiveDecimal; +import org.apache.hadoop.hive.ql.io.parquet.convert.ColInfoFromParquetFile; import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.AbstractSerDe; import org.apache.hadoop.hive.serde2.SerDeException; @@ -49,8 +43,8 @@ import org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.ShortObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector; -import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; import org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampObjectInspector; +import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; @@ -67,6 +61,14 @@ import parquet.hadoop.ParquetWriter; import parquet.io.api.Binary; +import java.io.UnsupportedEncodingException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; +import java.util.Properties; + /** * * A ParquetHiveSerDe for Hive (with the deprecated package mapred) @@ -94,6 +96,8 @@ } } + private final String PARQUET_FILE = "parquet.file"; + private SerDeStats stats; private ObjectInspector objInspector; @@ -121,21 +125,34 @@ public final void initialize(final Configuration conf, final Properties tbl) thr // Get compression properties compressionType = tbl.getProperty(ParquetOutputFormat.COMPRESSION, DEFAULTCOMPRESSION); - if (columnNameProperty.length() == 0) { - columnNames = new ArrayList(); - } else { - columnNames = Arrays.asList(columnNameProperty.split(",")); - } - if (columnTypeProperty.length() == 0) { - columnTypes = new ArrayList(); + if (columnNameProperty.length() == 0 && columnTypeProperty.length() == 0) { + final String parquetFile = tbl.getProperty(PARQUET_FILE, null); + if (parquetFile == null) { + throw new RuntimeException("Either provide schema for table or point to parquet file"); + } + + Pair, String> colNameAndTypes = new ColInfoFromParquetFile().convert + (parquetFile); + columnNames = colNameAndTypes.getLeft(); + columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(colNameAndTypes.getRight()); } else { - columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty); + if (columnNameProperty.length() == 0) { + columnNames = new ArrayList(); + } else { + columnNames = Arrays.asList(columnNameProperty.split(",")); + } + if (columnTypeProperty.length() == 0) { + columnTypes = new ArrayList(); + } else { + columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty); + } } if (columnNames.size() != columnTypes.size()) { throw new IllegalArgumentException("ParquetHiveSerde initialization failed. Number of column " + "name and column type differs. columnNames = " + columnNames + ", columnTypes = " + columnTypes); } + // Create row related objects rowTypeInfo = TypeInfoFactory.getStructTypeInfo(columnNames, columnTypes); this.objInspector = new ArrayWritableObjectInspector((StructTypeInfo) rowTypeInfo); diff --git ql/src/test/queries/clientpositive/parquet_array_of_multi_field_struct_gen_schema.q ql/src/test/queries/clientpositive/parquet_array_of_multi_field_struct_gen_schema.q new file mode 100644 index 0000000..affcc5f --- /dev/null +++ ql/src/test/queries/clientpositive/parquet_array_of_multi_field_struct_gen_schema.q @@ -0,0 +1,17 @@ +-- this test creates a Parquet table with an array of multi-field structs + +CREATE TABLE parquet_array_of_multi_field_structs_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +TBLPROPERTIES ('parquet.file'='../../data/files/MultiFieldGroupInList.parquet'); + +LOAD DATA LOCAL INPATH '../../data/files/MultiFieldGroupInList.parquet' +OVERWRITE INTO TABLE parquet_array_of_multi_field_structs_gen_schema; + +SELECT * FROM parquet_array_of_multi_field_structs_gen_schema; + +DROP TABLE parquet_array_of_multi_field_structs_gen_schema; diff --git ql/src/test/queries/clientpositive/parquet_array_of_optional_elements_gen_schema.q ql/src/test/queries/clientpositive/parquet_array_of_optional_elements_gen_schema.q new file mode 100644 index 0000000..73e93e7 --- /dev/null +++ ql/src/test/queries/clientpositive/parquet_array_of_optional_elements_gen_schema.q @@ -0,0 +1,17 @@ +-- this test creates a Parquet table with an array of optional structs + +CREATE TABLE parquet_array_of_optional_elements_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +TBLPROPERTIES ('parquet.file'='../../data/files/NewOptionalGroupInList.parquet'); + +LOAD DATA LOCAL INPATH '../../data/files/NewOptionalGroupInList.parquet' +OVERWRITE INTO TABLE parquet_array_of_optional_elements_gen_schema; + +SELECT * FROM parquet_array_of_optional_elements_gen_schema; + +DROP TABLE parquet_array_of_optional_elements_gen_schema; diff --git ql/src/test/queries/clientpositive/parquet_array_of_required_elements_gen_schema.q ql/src/test/queries/clientpositive/parquet_array_of_required_elements_gen_schema.q new file mode 100644 index 0000000..b3c26d3 --- /dev/null +++ ql/src/test/queries/clientpositive/parquet_array_of_required_elements_gen_schema.q @@ -0,0 +1,17 @@ +-- this test creates a Parquet table with an array of structs + +CREATE TABLE parquet_array_of_required_elements_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +TBLPROPERTIES ('parquet.file'='../../data/files/NewRequiredGroupInList.parquet'); + +LOAD DATA LOCAL INPATH '../../data/files/NewRequiredGroupInList.parquet' +OVERWRITE INTO TABLE parquet_array_of_required_elements_gen_schema; + +SELECT * FROM parquet_array_of_required_elements_gen_schema; + +DROP TABLE parquet_array_of_required_elements_gen_schema; diff --git ql/src/test/queries/clientpositive/parquet_array_of_single_field_struct_gen_schema.q ql/src/test/queries/clientpositive/parquet_array_of_single_field_struct_gen_schema.q new file mode 100644 index 0000000..7612dc9 --- /dev/null +++ ql/src/test/queries/clientpositive/parquet_array_of_single_field_struct_gen_schema.q @@ -0,0 +1,19 @@ +-- this test creates a Parquet table with an array of single-field structs +-- that has an ambiguous Parquet schema that is assumed to be a list of bigints +-- This is verifies compliance with the spec for this case. + +CREATE TABLE parquet_ambiguous_array_of_single_field_structs_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +TBLPROPERTIES ('parquet.file'='../../data/files/SingleFieldGroupInList.parquet'); + +LOAD DATA LOCAL INPATH '../../data/files/SingleFieldGroupInList.parquet' +OVERWRITE INTO TABLE parquet_ambiguous_array_of_single_field_structs_gen_schema; + +SELECT * FROM parquet_ambiguous_array_of_single_field_structs_gen_schema; + +DROP TABLE parquet_ambiguous_array_of_single_field_structs_gen_schema; diff --git ql/src/test/queries/clientpositive/parquet_array_of_structs_gen_schema.q ql/src/test/queries/clientpositive/parquet_array_of_structs_gen_schema.q new file mode 100644 index 0000000..05c9f63 --- /dev/null +++ ql/src/test/queries/clientpositive/parquet_array_of_structs_gen_schema.q @@ -0,0 +1,17 @@ +-- this test creates a Parquet table with an array of structs + +CREATE TABLE parquet_array_of_structs_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +TBLPROPERTIES ('parquet.file'='../../data/files/HiveRequiredGroupInList.parquet'); + +LOAD DATA LOCAL INPATH '../../data/files/HiveRequiredGroupInList.parquet' +OVERWRITE INTO TABLE parquet_array_of_structs_gen_schema; + +SELECT * FROM parquet_array_of_structs_gen_schema; + +DROP TABLE parquet_array_of_structs_gen_schema; diff --git ql/src/test/queries/clientpositive/parquet_array_of_unannotated_groups_gen_schema.q ql/src/test/queries/clientpositive/parquet_array_of_unannotated_groups_gen_schema.q new file mode 100644 index 0000000..3f356c4 --- /dev/null +++ ql/src/test/queries/clientpositive/parquet_array_of_unannotated_groups_gen_schema.q @@ -0,0 +1,18 @@ +-- this test creates a Parquet table from a structure with an unannotated +-- repeated structure of (x,y) structs + +CREATE TABLE parquet_array_of_unannotated_groups_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +TBLPROPERTIES ('parquet.file'='../../data/files/UnannotatedListOfGroups.parquet'); + +LOAD DATA LOCAL INPATH '../../data/files/UnannotatedListOfGroups.parquet' +OVERWRITE INTO TABLE parquet_array_of_unannotated_groups_gen_schema; + +SELECT * FROM parquet_array_of_unannotated_groups_gen_schema; + +DROP TABLE parquet_array_of_unannotated_groups_gen_schema; diff --git ql/src/test/queries/clientpositive/parquet_array_of_unannotated_primitives_gen_schema.q ql/src/test/queries/clientpositive/parquet_array_of_unannotated_primitives_gen_schema.q new file mode 100644 index 0000000..0549808 --- /dev/null +++ ql/src/test/queries/clientpositive/parquet_array_of_unannotated_primitives_gen_schema.q @@ -0,0 +1,18 @@ +-- this test creates a Parquet table from a structure with an unannotated +-- repeated structure of int32s + +CREATE TABLE parquet_array_of_unannotated_ints_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +TBLPROPERTIES ('parquet.file'='../../data/files/UnannotatedListOfPrimitives.parquet'); + +LOAD DATA LOCAL INPATH '../../data/files/UnannotatedListOfPrimitives.parquet' +OVERWRITE INTO TABLE parquet_array_of_unannotated_ints_gen_schema; + +SELECT * FROM parquet_array_of_unannotated_ints_gen_schema; + +DROP TABLE parquet_array_of_unannotated_ints_gen_schema; \ No newline at end of file diff --git ql/src/test/queries/clientpositive/parquet_avro_array_of_primitives_gen_schema.q ql/src/test/queries/clientpositive/parquet_avro_array_of_primitives_gen_schema.q new file mode 100644 index 0000000..51eb6fd --- /dev/null +++ ql/src/test/queries/clientpositive/parquet_avro_array_of_primitives_gen_schema.q @@ -0,0 +1,15 @@ +CREATE TABLE parquet_avro_array_of_primitives_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +TBLPROPERTIES ('parquet.file'='../../data/files/AvroPrimitiveInList.parquet'); + +LOAD DATA LOCAL INPATH '../../data/files/AvroPrimitiveInList.parquet' OVERWRITE INTO TABLE +parquet_avro_array_of_primitives_gen_schema; + +SELECT * FROM parquet_avro_array_of_primitives_gen_schema; + +DROP TABLE parquet_avro_array_of_primitives_gen_schema; diff --git ql/src/test/queries/clientpositive/parquet_avro_array_of_single_field_struct_gen_schema.q ql/src/test/queries/clientpositive/parquet_avro_array_of_single_field_struct_gen_schema.q new file mode 100644 index 0000000..51dba64 --- /dev/null +++ ql/src/test/queries/clientpositive/parquet_avro_array_of_single_field_struct_gen_schema.q @@ -0,0 +1,18 @@ +-- this test creates a Parquet table with an array of single-field structs +-- as written by parquet-avro + +CREATE TABLE parquet_avro_array_of_single_field_structs_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +TBLPROPERTIES ('parquet.file'='../../data/files/AvroSingleFieldGroupInList.parquet'); + +LOAD DATA LOCAL INPATH '../../data/files/AvroSingleFieldGroupInList.parquet' +OVERWRITE INTO TABLE parquet_avro_array_of_single_field_structs_gen_schema; + +SELECT * FROM parquet_avro_array_of_single_field_structs_gen_schema; + +DROP TABLE parquet_avro_array_of_single_field_structs_gen_schema; diff --git ql/src/test/queries/clientpositive/parquet_decimal_gen_schema.q ql/src/test/queries/clientpositive/parquet_decimal_gen_schema.q new file mode 100644 index 0000000..7d39d36 --- /dev/null +++ ql/src/test/queries/clientpositive/parquet_decimal_gen_schema.q @@ -0,0 +1,16 @@ +CREATE TABLE parquet_decimal_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +TBLPROPERTIES ('parquet.file'='../../data/files/dec.parq'); + +DESCRIBE FORMATTED parquet_decimal_gen_schema; + +LOAD DATA LOCAL INPATH '../../data/files/dec.parq' OVERWRITE INTO TABLE parquet_decimal_gen_schema; + +SELECT * FROM parquet_decimal_gen_schema; + +DROP TABLE parquet_decimal_gen_schema; \ No newline at end of file diff --git ql/src/test/queries/clientpositive/parquet_thrift_array_of_primitives_gen_schema.q ql/src/test/queries/clientpositive/parquet_thrift_array_of_primitives_gen_schema.q new file mode 100644 index 0000000..0fcc356 --- /dev/null +++ ql/src/test/queries/clientpositive/parquet_thrift_array_of_primitives_gen_schema.q @@ -0,0 +1,17 @@ +-- this test creates a Parquet table with an array of structs + +CREATE TABLE parquet_thrift_array_of_primitives_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +TBLPROPERTIES ('parquet.file'='../../data/files/ThriftPrimitiveInList.parquet'); + +LOAD DATA LOCAL INPATH '../../data/files/ThriftPrimitiveInList.parquet' +OVERWRITE INTO TABLE parquet_thrift_array_of_primitives_gen_schema; + +SELECT * FROM parquet_thrift_array_of_primitives_gen_schema; + +DROP TABLE parquet_thrift_array_of_primitives_gen_schema; diff --git ql/src/test/queries/clientpositive/parquet_thrift_array_of_single_field_struct_gen_schema.q ql/src/test/queries/clientpositive/parquet_thrift_array_of_single_field_struct_gen_schema.q new file mode 100644 index 0000000..1646118 --- /dev/null +++ ql/src/test/queries/clientpositive/parquet_thrift_array_of_single_field_struct_gen_schema.q @@ -0,0 +1,18 @@ +-- this test creates a Parquet table with an array of single-field structs +-- as written by parquet-thrift + +CREATE TABLE parquet_thrift_array_of_single_field_structs_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +TBLPROPERTIES ('parquet.file'='../../data/files/ThriftSingleFieldGroupInList.parquet'); + +LOAD DATA LOCAL INPATH '../../data/files/ThriftSingleFieldGroupInList.parquet' +OVERWRITE INTO TABLE parquet_thrift_array_of_single_field_structs_gen_schema; + +SELECT * FROM parquet_thrift_array_of_single_field_structs_gen_schema; + +DROP TABLE parquet_thrift_array_of_single_field_structs_gen_schema; diff --git ql/src/test/results/clientpositive/parquet_array_of_multi_field_struct_gen_schema.q.out ql/src/test/results/clientpositive/parquet_array_of_multi_field_struct_gen_schema.q.out new file mode 100644 index 0000000..ffe77ff --- /dev/null +++ ql/src/test/results/clientpositive/parquet_array_of_multi_field_struct_gen_schema.q.out @@ -0,0 +1,53 @@ +PREHOOK: query: -- this test creates a Parquet table with an array of multi-field structs + +CREATE TABLE parquet_array_of_multi_field_structs_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +TBLPROPERTIES ('parquet.file'='../../data/files/MultiFieldGroupInList.parquet') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@parquet_array_of_multi_field_structs_gen_schema +POSTHOOK: query: -- this test creates a Parquet table with an array of multi-field structs + +CREATE TABLE parquet_array_of_multi_field_structs_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +TBLPROPERTIES ('parquet.file'='../../data/files/MultiFieldGroupInList.parquet') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@parquet_array_of_multi_field_structs_gen_schema +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/MultiFieldGroupInList.parquet' +OVERWRITE INTO TABLE parquet_array_of_multi_field_structs_gen_schema +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@parquet_array_of_multi_field_structs_gen_schema +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/MultiFieldGroupInList.parquet' +OVERWRITE INTO TABLE parquet_array_of_multi_field_structs_gen_schema +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@parquet_array_of_multi_field_structs_gen_schema +PREHOOK: query: SELECT * FROM parquet_array_of_multi_field_structs_gen_schema +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_array_of_multi_field_structs_gen_schema +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM parquet_array_of_multi_field_structs_gen_schema +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_array_of_multi_field_structs_gen_schema +#### A masked pattern was here #### +[{"latitude":0.0,"longitude":0.0},{"latitude":0.0,"longitude":180.0}] +PREHOOK: query: DROP TABLE parquet_array_of_multi_field_structs_gen_schema +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@parquet_array_of_multi_field_structs_gen_schema +PREHOOK: Output: default@parquet_array_of_multi_field_structs_gen_schema +POSTHOOK: query: DROP TABLE parquet_array_of_multi_field_structs_gen_schema +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@parquet_array_of_multi_field_structs_gen_schema +POSTHOOK: Output: default@parquet_array_of_multi_field_structs_gen_schema diff --git ql/src/test/results/clientpositive/parquet_array_of_optional_elements_gen_schema.q.out ql/src/test/results/clientpositive/parquet_array_of_optional_elements_gen_schema.q.out new file mode 100644 index 0000000..69e920f --- /dev/null +++ ql/src/test/results/clientpositive/parquet_array_of_optional_elements_gen_schema.q.out @@ -0,0 +1,53 @@ +PREHOOK: query: -- this test creates a Parquet table with an array of optional structs + +CREATE TABLE parquet_array_of_optional_elements_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +TBLPROPERTIES ('parquet.file'='../../data/files/NewOptionalGroupInList.parquet') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@parquet_array_of_optional_elements_gen_schema +POSTHOOK: query: -- this test creates a Parquet table with an array of optional structs + +CREATE TABLE parquet_array_of_optional_elements_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +TBLPROPERTIES ('parquet.file'='../../data/files/NewOptionalGroupInList.parquet') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@parquet_array_of_optional_elements_gen_schema +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/NewOptionalGroupInList.parquet' +OVERWRITE INTO TABLE parquet_array_of_optional_elements_gen_schema +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@parquet_array_of_optional_elements_gen_schema +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/NewOptionalGroupInList.parquet' +OVERWRITE INTO TABLE parquet_array_of_optional_elements_gen_schema +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@parquet_array_of_optional_elements_gen_schema +PREHOOK: query: SELECT * FROM parquet_array_of_optional_elements_gen_schema +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_array_of_optional_elements_gen_schema +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM parquet_array_of_optional_elements_gen_schema +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_array_of_optional_elements_gen_schema +#### A masked pattern was here #### +[{"latitude":0.0,"longitude":0.0},null,{"latitude":0.0,"longitude":180.0}] +PREHOOK: query: DROP TABLE parquet_array_of_optional_elements_gen_schema +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@parquet_array_of_optional_elements_gen_schema +PREHOOK: Output: default@parquet_array_of_optional_elements_gen_schema +POSTHOOK: query: DROP TABLE parquet_array_of_optional_elements_gen_schema +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@parquet_array_of_optional_elements_gen_schema +POSTHOOK: Output: default@parquet_array_of_optional_elements_gen_schema diff --git ql/src/test/results/clientpositive/parquet_array_of_required_elements_gen_schema.q.out ql/src/test/results/clientpositive/parquet_array_of_required_elements_gen_schema.q.out new file mode 100644 index 0000000..6129468 --- /dev/null +++ ql/src/test/results/clientpositive/parquet_array_of_required_elements_gen_schema.q.out @@ -0,0 +1,53 @@ +PREHOOK: query: -- this test creates a Parquet table with an array of structs + +CREATE TABLE parquet_array_of_required_elements_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +TBLPROPERTIES ('parquet.file'='../../data/files/NewRequiredGroupInList.parquet') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@parquet_array_of_required_elements_gen_schema +POSTHOOK: query: -- this test creates a Parquet table with an array of structs + +CREATE TABLE parquet_array_of_required_elements_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +TBLPROPERTIES ('parquet.file'='../../data/files/NewRequiredGroupInList.parquet') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@parquet_array_of_required_elements_gen_schema +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/NewRequiredGroupInList.parquet' +OVERWRITE INTO TABLE parquet_array_of_required_elements_gen_schema +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@parquet_array_of_required_elements_gen_schema +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/NewRequiredGroupInList.parquet' +OVERWRITE INTO TABLE parquet_array_of_required_elements_gen_schema +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@parquet_array_of_required_elements_gen_schema +PREHOOK: query: SELECT * FROM parquet_array_of_required_elements_gen_schema +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_array_of_required_elements_gen_schema +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM parquet_array_of_required_elements_gen_schema +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_array_of_required_elements_gen_schema +#### A masked pattern was here #### +[{"latitude":0.0,"longitude":180.0},{"latitude":0.0,"longitude":0.0}] +PREHOOK: query: DROP TABLE parquet_array_of_required_elements_gen_schema +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@parquet_array_of_required_elements_gen_schema +PREHOOK: Output: default@parquet_array_of_required_elements_gen_schema +POSTHOOK: query: DROP TABLE parquet_array_of_required_elements_gen_schema +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@parquet_array_of_required_elements_gen_schema +POSTHOOK: Output: default@parquet_array_of_required_elements_gen_schema diff --git ql/src/test/results/clientpositive/parquet_array_of_single_field_struct_gen_schema.q.out ql/src/test/results/clientpositive/parquet_array_of_single_field_struct_gen_schema.q.out new file mode 100644 index 0000000..e5adf8a --- /dev/null +++ ql/src/test/results/clientpositive/parquet_array_of_single_field_struct_gen_schema.q.out @@ -0,0 +1,57 @@ +PREHOOK: query: -- this test creates a Parquet table with an array of single-field structs +-- that has an ambiguous Parquet schema that is assumed to be a list of bigints +-- This is verifies compliance with the spec for this case. + +CREATE TABLE parquet_ambiguous_array_of_single_field_structs_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +TBLPROPERTIES ('parquet.file'='../../data/files/SingleFieldGroupInList.parquet') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@parquet_ambiguous_array_of_single_field_structs_gen_schema +POSTHOOK: query: -- this test creates a Parquet table with an array of single-field structs +-- that has an ambiguous Parquet schema that is assumed to be a list of bigints +-- This is verifies compliance with the spec for this case. + +CREATE TABLE parquet_ambiguous_array_of_single_field_structs_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +TBLPROPERTIES ('parquet.file'='../../data/files/SingleFieldGroupInList.parquet') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@parquet_ambiguous_array_of_single_field_structs_gen_schema +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/SingleFieldGroupInList.parquet' +OVERWRITE INTO TABLE parquet_ambiguous_array_of_single_field_structs_gen_schema +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@parquet_ambiguous_array_of_single_field_structs_gen_schema +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/SingleFieldGroupInList.parquet' +OVERWRITE INTO TABLE parquet_ambiguous_array_of_single_field_structs_gen_schema +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@parquet_ambiguous_array_of_single_field_structs_gen_schema +PREHOOK: query: SELECT * FROM parquet_ambiguous_array_of_single_field_structs_gen_schema +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_ambiguous_array_of_single_field_structs_gen_schema +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM parquet_ambiguous_array_of_single_field_structs_gen_schema +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_ambiguous_array_of_single_field_structs_gen_schema +#### A masked pattern was here #### +[1234,2345] +PREHOOK: query: DROP TABLE parquet_ambiguous_array_of_single_field_structs_gen_schema +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@parquet_ambiguous_array_of_single_field_structs_gen_schema +PREHOOK: Output: default@parquet_ambiguous_array_of_single_field_structs_gen_schema +POSTHOOK: query: DROP TABLE parquet_ambiguous_array_of_single_field_structs_gen_schema +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@parquet_ambiguous_array_of_single_field_structs_gen_schema +POSTHOOK: Output: default@parquet_ambiguous_array_of_single_field_structs_gen_schema diff --git ql/src/test/results/clientpositive/parquet_array_of_structs_gen_schema.q.out ql/src/test/results/clientpositive/parquet_array_of_structs_gen_schema.q.out new file mode 100644 index 0000000..d502ee1 --- /dev/null +++ ql/src/test/results/clientpositive/parquet_array_of_structs_gen_schema.q.out @@ -0,0 +1,53 @@ +PREHOOK: query: -- this test creates a Parquet table with an array of structs + +CREATE TABLE parquet_array_of_structs_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +TBLPROPERTIES ('parquet.file'='../../data/files/HiveRequiredGroupInList.parquet') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@parquet_array_of_structs_gen_schema +POSTHOOK: query: -- this test creates a Parquet table with an array of structs + +CREATE TABLE parquet_array_of_structs_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +TBLPROPERTIES ('parquet.file'='../../data/files/HiveRequiredGroupInList.parquet') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@parquet_array_of_structs_gen_schema +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/HiveRequiredGroupInList.parquet' +OVERWRITE INTO TABLE parquet_array_of_structs_gen_schema +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@parquet_array_of_structs_gen_schema +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/HiveRequiredGroupInList.parquet' +OVERWRITE INTO TABLE parquet_array_of_structs_gen_schema +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@parquet_array_of_structs_gen_schema +PREHOOK: query: SELECT * FROM parquet_array_of_structs_gen_schema +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_array_of_structs_gen_schema +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM parquet_array_of_structs_gen_schema +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_array_of_structs_gen_schema +#### A masked pattern was here #### +[{"latitude":0.0,"longitude":180.0},{"latitude":0.0,"longitude":0.0}] +PREHOOK: query: DROP TABLE parquet_array_of_structs_gen_schema +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@parquet_array_of_structs_gen_schema +PREHOOK: Output: default@parquet_array_of_structs_gen_schema +POSTHOOK: query: DROP TABLE parquet_array_of_structs_gen_schema +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@parquet_array_of_structs_gen_schema +POSTHOOK: Output: default@parquet_array_of_structs_gen_schema diff --git ql/src/test/results/clientpositive/parquet_array_of_unannotated_groups_gen_schema.q.out ql/src/test/results/clientpositive/parquet_array_of_unannotated_groups_gen_schema.q.out new file mode 100644 index 0000000..b7faa1d --- /dev/null +++ ql/src/test/results/clientpositive/parquet_array_of_unannotated_groups_gen_schema.q.out @@ -0,0 +1,55 @@ +PREHOOK: query: -- this test creates a Parquet table from a structure with an unannotated +-- repeated structure of (x,y) structs + +CREATE TABLE parquet_array_of_unannotated_groups_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +TBLPROPERTIES ('parquet.file'='../../data/files/UnannotatedListOfGroups.parquet') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@parquet_array_of_unannotated_groups_gen_schema +POSTHOOK: query: -- this test creates a Parquet table from a structure with an unannotated +-- repeated structure of (x,y) structs + +CREATE TABLE parquet_array_of_unannotated_groups_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +TBLPROPERTIES ('parquet.file'='../../data/files/UnannotatedListOfGroups.parquet') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@parquet_array_of_unannotated_groups_gen_schema +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/UnannotatedListOfGroups.parquet' +OVERWRITE INTO TABLE parquet_array_of_unannotated_groups_gen_schema +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@parquet_array_of_unannotated_groups_gen_schema +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/UnannotatedListOfGroups.parquet' +OVERWRITE INTO TABLE parquet_array_of_unannotated_groups_gen_schema +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@parquet_array_of_unannotated_groups_gen_schema +PREHOOK: query: SELECT * FROM parquet_array_of_unannotated_groups_gen_schema +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_array_of_unannotated_groups_gen_schema +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM parquet_array_of_unannotated_groups_gen_schema +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_array_of_unannotated_groups_gen_schema +#### A masked pattern was here #### +[{"x":1.0,"y":1.0},{"x":2.0,"y":2.0}] +PREHOOK: query: DROP TABLE parquet_array_of_unannotated_groups_gen_schema +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@parquet_array_of_unannotated_groups_gen_schema +PREHOOK: Output: default@parquet_array_of_unannotated_groups_gen_schema +POSTHOOK: query: DROP TABLE parquet_array_of_unannotated_groups_gen_schema +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@parquet_array_of_unannotated_groups_gen_schema +POSTHOOK: Output: default@parquet_array_of_unannotated_groups_gen_schema diff --git ql/src/test/results/clientpositive/parquet_array_of_unannotated_primitives_gen_schema.q.out ql/src/test/results/clientpositive/parquet_array_of_unannotated_primitives_gen_schema.q.out new file mode 100644 index 0000000..474a0ca --- /dev/null +++ ql/src/test/results/clientpositive/parquet_array_of_unannotated_primitives_gen_schema.q.out @@ -0,0 +1,55 @@ +PREHOOK: query: -- this test creates a Parquet table from a structure with an unannotated +-- repeated structure of int32s + +CREATE TABLE parquet_array_of_unannotated_ints_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +TBLPROPERTIES ('parquet.file'='../../data/files/UnannotatedListOfPrimitives.parquet') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@parquet_array_of_unannotated_ints_gen_schema +POSTHOOK: query: -- this test creates a Parquet table from a structure with an unannotated +-- repeated structure of int32s + +CREATE TABLE parquet_array_of_unannotated_ints_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +TBLPROPERTIES ('parquet.file'='../../data/files/UnannotatedListOfPrimitives.parquet') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@parquet_array_of_unannotated_ints_gen_schema +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/UnannotatedListOfPrimitives.parquet' +OVERWRITE INTO TABLE parquet_array_of_unannotated_ints_gen_schema +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@parquet_array_of_unannotated_ints_gen_schema +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/UnannotatedListOfPrimitives.parquet' +OVERWRITE INTO TABLE parquet_array_of_unannotated_ints_gen_schema +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@parquet_array_of_unannotated_ints_gen_schema +PREHOOK: query: SELECT * FROM parquet_array_of_unannotated_ints_gen_schema +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_array_of_unannotated_ints_gen_schema +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM parquet_array_of_unannotated_ints_gen_schema +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_array_of_unannotated_ints_gen_schema +#### A masked pattern was here #### +[34,35,36] +PREHOOK: query: DROP TABLE parquet_array_of_unannotated_ints_gen_schema +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@parquet_array_of_unannotated_ints_gen_schema +PREHOOK: Output: default@parquet_array_of_unannotated_ints_gen_schema +POSTHOOK: query: DROP TABLE parquet_array_of_unannotated_ints_gen_schema +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@parquet_array_of_unannotated_ints_gen_schema +POSTHOOK: Output: default@parquet_array_of_unannotated_ints_gen_schema diff --git ql/src/test/results/clientpositive/parquet_avro_array_of_primitives_gen_schema.q.out ql/src/test/results/clientpositive/parquet_avro_array_of_primitives_gen_schema.q.out new file mode 100644 index 0000000..dc7be20 --- /dev/null +++ ql/src/test/results/clientpositive/parquet_avro_array_of_primitives_gen_schema.q.out @@ -0,0 +1,49 @@ +PREHOOK: query: CREATE TABLE parquet_avro_array_of_primitives_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +TBLPROPERTIES ('parquet.file'='../../data/files/AvroPrimitiveInList.parquet') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@parquet_avro_array_of_primitives_gen_schema +POSTHOOK: query: CREATE TABLE parquet_avro_array_of_primitives_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +TBLPROPERTIES ('parquet.file'='../../data/files/AvroPrimitiveInList.parquet') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@parquet_avro_array_of_primitives_gen_schema +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/AvroPrimitiveInList.parquet' OVERWRITE INTO TABLE +parquet_avro_array_of_primitives_gen_schema +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@parquet_avro_array_of_primitives_gen_schema +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/AvroPrimitiveInList.parquet' OVERWRITE INTO TABLE +parquet_avro_array_of_primitives_gen_schema +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@parquet_avro_array_of_primitives_gen_schema +PREHOOK: query: SELECT * FROM parquet_avro_array_of_primitives_gen_schema +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_avro_array_of_primitives_gen_schema +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM parquet_avro_array_of_primitives_gen_schema +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_avro_array_of_primitives_gen_schema +#### A masked pattern was here #### +[34,35,36] +PREHOOK: query: DROP TABLE parquet_avro_array_of_primitives_gen_schema +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@parquet_avro_array_of_primitives_gen_schema +PREHOOK: Output: default@parquet_avro_array_of_primitives_gen_schema +POSTHOOK: query: DROP TABLE parquet_avro_array_of_primitives_gen_schema +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@parquet_avro_array_of_primitives_gen_schema +POSTHOOK: Output: default@parquet_avro_array_of_primitives_gen_schema diff --git ql/src/test/results/clientpositive/parquet_avro_array_of_single_field_struct_gen_schema.q.out ql/src/test/results/clientpositive/parquet_avro_array_of_single_field_struct_gen_schema.q.out new file mode 100644 index 0000000..5aad90e --- /dev/null +++ ql/src/test/results/clientpositive/parquet_avro_array_of_single_field_struct_gen_schema.q.out @@ -0,0 +1,55 @@ +PREHOOK: query: -- this test creates a Parquet table with an array of single-field structs +-- as written by parquet-avro + +CREATE TABLE parquet_avro_array_of_single_field_structs_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +TBLPROPERTIES ('parquet.file'='../../data/files/AvroSingleFieldGroupInList.parquet') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@parquet_avro_array_of_single_field_structs_gen_schema +POSTHOOK: query: -- this test creates a Parquet table with an array of single-field structs +-- as written by parquet-avro + +CREATE TABLE parquet_avro_array_of_single_field_structs_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +TBLPROPERTIES ('parquet.file'='../../data/files/AvroSingleFieldGroupInList.parquet') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@parquet_avro_array_of_single_field_structs_gen_schema +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/AvroSingleFieldGroupInList.parquet' +OVERWRITE INTO TABLE parquet_avro_array_of_single_field_structs_gen_schema +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@parquet_avro_array_of_single_field_structs_gen_schema +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/AvroSingleFieldGroupInList.parquet' +OVERWRITE INTO TABLE parquet_avro_array_of_single_field_structs_gen_schema +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@parquet_avro_array_of_single_field_structs_gen_schema +PREHOOK: query: SELECT * FROM parquet_avro_array_of_single_field_structs_gen_schema +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_avro_array_of_single_field_structs_gen_schema +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM parquet_avro_array_of_single_field_structs_gen_schema +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_avro_array_of_single_field_structs_gen_schema +#### A masked pattern was here #### +[{"count":1234},{"count":2345}] +PREHOOK: query: DROP TABLE parquet_avro_array_of_single_field_structs_gen_schema +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@parquet_avro_array_of_single_field_structs_gen_schema +PREHOOK: Output: default@parquet_avro_array_of_single_field_structs_gen_schema +POSTHOOK: query: DROP TABLE parquet_avro_array_of_single_field_structs_gen_schema +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@parquet_avro_array_of_single_field_structs_gen_schema +POSTHOOK: Output: default@parquet_avro_array_of_single_field_structs_gen_schema diff --git ql/src/test/results/clientpositive/parquet_decimal_gen_schema.q.out ql/src/test/results/clientpositive/parquet_decimal_gen_schema.q.out new file mode 100644 index 0000000..e3ab207 --- /dev/null +++ ql/src/test/results/clientpositive/parquet_decimal_gen_schema.q.out @@ -0,0 +1,88 @@ +PREHOOK: query: CREATE TABLE parquet_decimal_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +TBLPROPERTIES ('parquet.file'='../../data/files/dec.parq') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@parquet_decimal_gen_schema +POSTHOOK: query: CREATE TABLE parquet_decimal_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +TBLPROPERTIES ('parquet.file'='../../data/files/dec.parq') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@parquet_decimal_gen_schema +PREHOOK: query: DESCRIBE FORMATTED parquet_decimal_gen_schema +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@parquet_decimal_gen_schema +POSTHOOK: query: DESCRIBE FORMATTED parquet_decimal_gen_schema +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@parquet_decimal_gen_schema +# col_name data_type comment + +name string +value decimal(5,2) + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Protect Mode: None +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: + parquet.file ../../data/files/dec.parq +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe +InputFormat: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/dec.parq' OVERWRITE INTO TABLE parquet_decimal_gen_schema +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@parquet_decimal_gen_schema +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/dec.parq' OVERWRITE INTO TABLE parquet_decimal_gen_schema +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@parquet_decimal_gen_schema +PREHOOK: query: SELECT * FROM parquet_decimal_gen_schema +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_decimal_gen_schema +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM parquet_decimal_gen_schema +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_decimal_gen_schema +#### A masked pattern was here #### +Tom 234.79 +Beck 77.34 +Snow 55.71 +Mary 4.33 +Cluck 5.96 +Tom 12.25 +Mary 33.33 +Tom 0.19 +Beck 3.15 +Beck 7.99 +PREHOOK: query: DROP TABLE parquet_decimal_gen_schema +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@parquet_decimal_gen_schema +PREHOOK: Output: default@parquet_decimal_gen_schema +POSTHOOK: query: DROP TABLE parquet_decimal_gen_schema +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@parquet_decimal_gen_schema +POSTHOOK: Output: default@parquet_decimal_gen_schema diff --git ql/src/test/results/clientpositive/parquet_thrift_array_of_primitives_gen_schema.q.out ql/src/test/results/clientpositive/parquet_thrift_array_of_primitives_gen_schema.q.out new file mode 100644 index 0000000..79c9e05 --- /dev/null +++ ql/src/test/results/clientpositive/parquet_thrift_array_of_primitives_gen_schema.q.out @@ -0,0 +1,53 @@ +PREHOOK: query: -- this test creates a Parquet table with an array of structs + +CREATE TABLE parquet_thrift_array_of_primitives_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +TBLPROPERTIES ('parquet.file'='../../data/files/ThriftPrimitiveInList.parquet') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@parquet_thrift_array_of_primitives_gen_schema +POSTHOOK: query: -- this test creates a Parquet table with an array of structs + +CREATE TABLE parquet_thrift_array_of_primitives_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +TBLPROPERTIES ('parquet.file'='../../data/files/ThriftPrimitiveInList.parquet') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@parquet_thrift_array_of_primitives_gen_schema +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/ThriftPrimitiveInList.parquet' +OVERWRITE INTO TABLE parquet_thrift_array_of_primitives_gen_schema +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@parquet_thrift_array_of_primitives_gen_schema +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/ThriftPrimitiveInList.parquet' +OVERWRITE INTO TABLE parquet_thrift_array_of_primitives_gen_schema +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@parquet_thrift_array_of_primitives_gen_schema +PREHOOK: query: SELECT * FROM parquet_thrift_array_of_primitives_gen_schema +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_thrift_array_of_primitives_gen_schema +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM parquet_thrift_array_of_primitives_gen_schema +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_thrift_array_of_primitives_gen_schema +#### A masked pattern was here #### +[34,35,36] +PREHOOK: query: DROP TABLE parquet_thrift_array_of_primitives_gen_schema +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@parquet_thrift_array_of_primitives_gen_schema +PREHOOK: Output: default@parquet_thrift_array_of_primitives_gen_schema +POSTHOOK: query: DROP TABLE parquet_thrift_array_of_primitives_gen_schema +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@parquet_thrift_array_of_primitives_gen_schema +POSTHOOK: Output: default@parquet_thrift_array_of_primitives_gen_schema diff --git ql/src/test/results/clientpositive/parquet_thrift_array_of_single_field_struct_gen_schema.q.out ql/src/test/results/clientpositive/parquet_thrift_array_of_single_field_struct_gen_schema.q.out new file mode 100644 index 0000000..ec476bb --- /dev/null +++ ql/src/test/results/clientpositive/parquet_thrift_array_of_single_field_struct_gen_schema.q.out @@ -0,0 +1,55 @@ +PREHOOK: query: -- this test creates a Parquet table with an array of single-field structs +-- as written by parquet-thrift + +CREATE TABLE parquet_thrift_array_of_single_field_structs_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +TBLPROPERTIES ('parquet.file'='../../data/files/ThriftSingleFieldGroupInList.parquet') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@parquet_thrift_array_of_single_field_structs_gen_schema +POSTHOOK: query: -- this test creates a Parquet table with an array of single-field structs +-- as written by parquet-thrift + +CREATE TABLE parquet_thrift_array_of_single_field_structs_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +TBLPROPERTIES ('parquet.file'='../../data/files/ThriftSingleFieldGroupInList.parquet') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@parquet_thrift_array_of_single_field_structs_gen_schema +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/ThriftSingleFieldGroupInList.parquet' +OVERWRITE INTO TABLE parquet_thrift_array_of_single_field_structs_gen_schema +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@parquet_thrift_array_of_single_field_structs_gen_schema +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/ThriftSingleFieldGroupInList.parquet' +OVERWRITE INTO TABLE parquet_thrift_array_of_single_field_structs_gen_schema +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@parquet_thrift_array_of_single_field_structs_gen_schema +PREHOOK: query: SELECT * FROM parquet_thrift_array_of_single_field_structs_gen_schema +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_thrift_array_of_single_field_structs_gen_schema +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM parquet_thrift_array_of_single_field_structs_gen_schema +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_thrift_array_of_single_field_structs_gen_schema +#### A masked pattern was here #### +[{"count":1234},{"count":2345}] +PREHOOK: query: DROP TABLE parquet_thrift_array_of_single_field_structs_gen_schema +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@parquet_thrift_array_of_single_field_structs_gen_schema +PREHOOK: Output: default@parquet_thrift_array_of_single_field_structs_gen_schema +POSTHOOK: query: DROP TABLE parquet_thrift_array_of_single_field_structs_gen_schema +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@parquet_thrift_array_of_single_field_structs_gen_schema +POSTHOOK: Output: default@parquet_thrift_array_of_single_field_structs_gen_schema