diff --git common/src/java/org/apache/hadoop/hive/conf/HiveConf.java common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index fafd78e..6123c11 100644 --- common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -759,7 +759,7 @@ "org.apache.hadoop.hive.ql.io.orc.OrcSerde,org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe," + "org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe,org.apache.hadoop.hive.serde2.dynamic_type.DynamicSerDe," + "org.apache.hadoop.hive.serde2.MetadataTypedColumnsetSerDe,org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe," + - "org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe,org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe", + "org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe", "SerDes retriving schema from metastore. This an internal parameter. Check with the hive dev. team"), HIVEHISTORYFILELOC("hive.querylog.location", diff --git hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseStorageHandler.java hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseStorageHandler.java index 3218639..250bf16 100644 --- hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseStorageHandler.java +++ hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseStorageHandler.java @@ -169,11 +169,8 @@ public void commitDropTable( public void preCreateTable(Table tbl) throws MetaException { boolean isExternal = MetaStoreUtils.isExternalTable(tbl); - // We'd like to move this to HiveMetaStore for any non-native table, but - // first we need to support storing NULL for location on a table - if (tbl.getSd().getLocation() != null) { - throw new MetaException("LOCATION may not be specified for HBase."); - } + // HIVE-8950 removes assert for location in table's sd to be null. The change allows Serdes to + // receive table location, which is required by serdes like ParquetHiveSerde. HTable htable = null; diff --git metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java index b137fcb..305a8e6 100644 --- metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java +++ metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java @@ -2289,6 +2289,12 @@ private boolean createLocationForAddedPartition( boolean result = false; if (partLocation != null) { + if (part.getSd() == null) { + // If partition does not have a storage descriptor, get one from table + // Using deepCopy as different partitions of a table + // can have different storage descriptors. + part.setSd(tbl.getSd().deepCopy()); + } part.getSd().setLocation(partLocation.toString()); // Check to see if the directory already exists before calling diff --git metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java index 2db2658..3043771 100644 --- metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java +++ metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java @@ -366,11 +366,17 @@ static public Deserializer getDeserializer(Configuration conf, try { Deserializer deserializer = ReflectionUtils.newInstance(conf.getClassByName(lib). asSubclass(Deserializer.class), conf); + + final Properties tableMetadata = MetaStoreUtils.getTableMetadata(table); + if (tableMetadata.getProperty("location", null) == null && + table.getSd().getLocation() != null) { + tableMetadata.setProperty("location", table.getSd().getLocation()); + } if (skipConfError) { SerDeUtils.initializeSerDeWithoutErrorCheck(deserializer, conf, - MetaStoreUtils.getTableMetadata(table), null); + tableMetadata, null); } else { - SerDeUtils.initializeSerDe(deserializer, conf, MetaStoreUtils.getTableMetadata(table), null); + SerDeUtils.initializeSerDe(deserializer, conf, tableMetadata, null); } return deserializer; } catch (RuntimeException e) { diff --git ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ParquetSchemaReader.java ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ParquetSchemaReader.java new file mode 100644 index 0000000..05fea0f --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ParquetSchemaReader.java @@ -0,0 +1,36 @@ +/** + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.io.parquet.convert; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; +import parquet.hadoop.ParquetFileReader; +import parquet.hadoop.metadata.ParquetMetadata; +import parquet.schema.GroupType; + +import java.io.IOException; + +public class ParquetSchemaReader { + public static GroupType read(Path parquetFile) throws IOException { + + Configuration conf = new Configuration(); + ParquetMetadata metaData; + try { + metaData = ParquetFileReader.readFooter(conf, parquetFile); + } catch (IOException e) { + throw new IOException("Error reading footer from: " + parquetFile, e); + } + return metaData.getFileMetaData().getSchema(); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ParquetToHiveSchemaConverter.java ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ParquetToHiveSchemaConverter.java new file mode 100644 index 0000000..c0b536b --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ParquetToHiveSchemaConverter.java @@ -0,0 +1,227 @@ +/** + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.io.parquet.convert; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; +import parquet.schema.DecimalMetadata; +import parquet.schema.GroupType; +import parquet.schema.OriginalType; +import parquet.schema.PrimitiveType; +import parquet.schema.Type; + +import java.util.ArrayList; +import java.util.List; +import java.util.Properties; + +public class ParquetToHiveSchemaConverter { + + private static final String INT96_IS_TS_PROPERTY_KEY = "parquet.int96.is.timestamp"; + + private Properties properties = null; + + public ParquetToHiveSchemaConverter() {} + public ParquetToHiveSchemaConverter(Properties props) { + properties = props; + } + + private static final Log LOG = LogFactory.getLog(ParquetToHiveSchemaConverter.class); + + public StructTypeInfo convert(GroupType parquetSchema) { + return convertFields(parquetSchema.getFields()); + } + + private StructTypeInfo convertFields(List parquetFields) { + StructTypeInfo structTypeInfo = new StructTypeInfo(); + ArrayList names = new ArrayList(); + ArrayList types = new ArrayList(); + + for (Type parquetType : parquetFields) { + + TypeInfo type; + if (parquetType.isRepetition(Type.Repetition.REPEATED)) { + type = createHiveArray(parquetType, ""); + } else { + type = convertField(parquetType); + } + + names.add(parquetType.getName()); + types.add(type); + } + + structTypeInfo.setAllStructFieldNames(names); + structTypeInfo.setAllStructFieldTypeInfos(types); + + LOG.info("Generated Hive's StructTypeInfo from parquet schema is: " + structTypeInfo); + + return structTypeInfo; + } + + private TypeInfo convertField(final Type parquetType) { + if (parquetType.isPrimitive()) { + final PrimitiveType.PrimitiveTypeName parquetPrimitiveTypeName = + parquetType.asPrimitiveType().getPrimitiveTypeName(); + final OriginalType originalType = parquetType.getOriginalType(); + + if (originalType == OriginalType.DECIMAL) { + final DecimalMetadata decimalMetadata = parquetType.asPrimitiveType() + .getDecimalMetadata(); + return TypeInfoFactory.getDecimalTypeInfo(decimalMetadata.getPrecision(), + decimalMetadata.getScale()); + } + + if (parquetPrimitiveTypeName.equals(PrimitiveType.PrimitiveTypeName.INT96)) { + if (properties == null || !properties.containsKey(INT96_IS_TS_PROPERTY_KEY)) { + throw new UnsupportedOperationException("Parquet's INT96 does not have a valid mapping" + + " to a Hive type.\nIf you want Parquet's INT96 to be mapped to Hive's timestamp," + + " then set '" + INT96_IS_TS_PROPERTY_KEY + "' in the table properties. Otherwise, " + + "provide hive schema explicitly in the DDL statement"); + } + } + + return parquetPrimitiveTypeName.convert( + new PrimitiveType.PrimitiveTypeNameConverter() { + @Override + public TypeInfo convertBOOLEAN(PrimitiveType.PrimitiveTypeName primitiveTypeName) { + return TypeInfoFactory.booleanTypeInfo; + } + + @Override + public TypeInfo convertINT32(PrimitiveType.PrimitiveTypeName primitiveTypeName) { + return TypeInfoFactory.intTypeInfo; + } + + @Override + public TypeInfo convertINT64(PrimitiveType.PrimitiveTypeName primitiveTypeName) { + return TypeInfoFactory.longTypeInfo; + } + + @Override + public TypeInfo convertINT96(PrimitiveType.PrimitiveTypeName primitiveTypeName) { + return TypeInfoFactory.timestampTypeInfo; + } + + @Override + public TypeInfo convertFLOAT(PrimitiveType.PrimitiveTypeName primitiveTypeName) { + return TypeInfoFactory.floatTypeInfo; + } + + @Override + public TypeInfo convertDOUBLE(PrimitiveType.PrimitiveTypeName primitiveTypeName) { + return TypeInfoFactory.doubleTypeInfo; + } + + @Override + public TypeInfo convertFIXED_LEN_BYTE_ARRAY(PrimitiveType.PrimitiveTypeName + primitiveTypeName) { + return TypeInfoFactory.binaryTypeInfo; + } + + @Override + public TypeInfo convertBINARY(PrimitiveType.PrimitiveTypeName primitiveTypeName) { + if (originalType == OriginalType.UTF8 || originalType == OriginalType.ENUM) { + return TypeInfoFactory.stringTypeInfo; + } else { + return TypeInfoFactory.binaryTypeInfo; + } + } + }); + } else { + GroupType parquetGroupType = parquetType.asGroupType(); + OriginalType originalType = parquetGroupType.getOriginalType(); + if (originalType != null) { + switch (originalType) { + case LIST: + if (parquetGroupType.getFieldCount() != 1) { + throw new UnsupportedOperationException("Invalid list type " + parquetGroupType); + } + Type elementType = parquetGroupType.getType(0); + if (!elementType.isRepetition(Type.Repetition.REPEATED)) { + throw new UnsupportedOperationException("Invalid list type " + parquetGroupType); + } + return createHiveArray(elementType, parquetGroupType.getName()); + case MAP: + if (parquetGroupType.getFieldCount() != 1 || parquetGroupType.getType(0).isPrimitive()) { + throw new UnsupportedOperationException("Invalid map type " + parquetGroupType); + } + GroupType mapKeyValType = parquetGroupType.getType(0).asGroupType(); + if (!mapKeyValType.isRepetition(Type.Repetition.REPEATED) || + !mapKeyValType.getOriginalType().equals(OriginalType.MAP_KEY_VALUE) || + mapKeyValType.getFieldCount() != 2) { + throw new UnsupportedOperationException("Invalid map type " + parquetGroupType); + } + Type keyType = mapKeyValType.getType(0); + if (!keyType.isPrimitive() || + !keyType.asPrimitiveType().getPrimitiveTypeName().equals(PrimitiveType + .PrimitiveTypeName.BINARY) || + !keyType.getOriginalType().equals(OriginalType.UTF8)) { + throw new UnsupportedOperationException("Map key type must be binary (UTF8): " + + keyType); + } + Type valueType = mapKeyValType.getType(1); + return createHiveMap(convertField(keyType), convertField(valueType)); + case ENUM: + case UTF8: + return TypeInfoFactory.stringTypeInfo; + case MAP_KEY_VALUE: + // MAP_KEY_VALUE was supposed to be used to annotate key and value group levels in a + // MAP. However, that is always implied by the structure of MAP. Hence, PARQUET-113 + // dropped the requirement for having MAP_KEY_VALUE. + default: + throw new UnsupportedOperationException("Cannot convert Parquet type " + + parquetType); + } + } else { + // if no original type then it's a record + return createHiveStruct(parquetGroupType.getFields()); + } + } + } + + private TypeInfo createHiveStruct(List parquetFields) { + List names = new ArrayList(); + List typeInfos = new ArrayList(); + + for (Type field: parquetFields) { + names.add(field.getName()); + typeInfos.add(convertField(field)); + } + + return TypeInfoFactory.getStructTypeInfo(names, typeInfos); + } + + private TypeInfo createHiveMap(TypeInfo keyType, TypeInfo valueType) { + return TypeInfoFactory.getMapTypeInfo(keyType, valueType); + } + + private TypeInfo createHiveArray(Type elementType, String elementName) { + if (elementType.isPrimitive()) { + return TypeInfoFactory.getListTypeInfo(convertField(elementType)); + } else { + final GroupType groupType = elementType.asGroupType(); + final List groupFields = groupType.getFields(); + if (groupFields.size() > 1 || + (groupFields.size() == 1 && + (elementType.getName().equals("array") || + elementType.getName().equals(elementName + "_tuple")))) { + return TypeInfoFactory.getListTypeInfo(createHiveStruct(groupFields)); + } else { + return TypeInfoFactory.getListTypeInfo(convertField(groupType.getFields().get(0))); + } + } + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ParquetHiveSerDe.java ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ParquetHiveSerDe.java index 4effe73..b414f5a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ParquetHiveSerDe.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ParquetHiveSerDe.java @@ -13,7 +13,9 @@ */ package org.apache.hadoop.hive.ql.io.parquet.serde; +import java.io.IOException; import java.io.UnsupportedEncodingException; +import java.net.URI; import java.util.ArrayList; import java.util.Arrays; import java.util.List; @@ -21,8 +23,15 @@ import java.util.Map.Entry; import java.util.Properties; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.common.type.HiveDecimal; +import org.apache.hadoop.hive.ql.io.parquet.convert.ParquetSchemaReader; +import org.apache.hadoop.hive.ql.io.parquet.convert.ParquetToHiveSchemaConverter; import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.AbstractSerDe; import org.apache.hadoop.hive.serde2.SerDeException; @@ -75,6 +84,8 @@ @SerDeSpec(schemaProps = {serdeConstants.LIST_COLUMNS, serdeConstants.LIST_COLUMN_TYPES, ParquetOutputFormat.COMPRESSION}) public class ParquetHiveSerDe extends AbstractSerDe { + private static final Log LOG = LogFactory.getLog(ParquetHiveSerDe.class); + public static final Text MAP_KEY = new Text("key"); public static final Text MAP_VALUE = new Text("value"); public static final Text MAP = new Text("map"); @@ -94,6 +105,8 @@ } } + private static final String PARQUET_FILE = "parquet.file"; + private SerDeStats stats; private ObjectInspector objInspector; @@ -112,8 +125,8 @@ public final void initialize(final Configuration conf, final Properties tbl) throws SerDeException { final TypeInfo rowTypeInfo; - final List columnNames; - final List columnTypes; + List columnNames; + List columnTypes; // Get column names and sort order final String columnNameProperty = tbl.getProperty(serdeConstants.LIST_COLUMNS); final String columnTypeProperty = tbl.getProperty(serdeConstants.LIST_COLUMN_TYPES); @@ -121,20 +134,67 @@ public final void initialize(final Configuration conf, final Properties tbl) thr // Get compression properties compressionType = tbl.getProperty(ParquetOutputFormat.COMPRESSION, DEFAULTCOMPRESSION); - if (columnNameProperty.length() == 0) { - columnNames = new ArrayList(); - } else { - columnNames = Arrays.asList(columnNameProperty.split(",")); - } - if (columnTypeProperty.length() == 0) { - columnTypes = new ArrayList(); + if (columnNameProperty.length() == 0 && columnTypeProperty.length() == 0) { + final String parquetFileProperty = tbl.getProperty(PARQUET_FILE, null); + Path parquetFile = parquetFileProperty != null ? getParquetFile(conf, + new Path(parquetFileProperty)) : null; + + if (parquetFile == null) { + final String locationProperty = tbl.getProperty("location", null); + parquetFile = locationProperty != null ? getParquetFile(conf, + new Path(locationProperty)) : null; + } + + if (parquetFile == null) { + /** + * Attempt to determine hive schema failed, but can not throw + * an exception, as Hive calls init on the serde during + * any call, including calls to update the serde properties, meaning + * if the serde is in a bad state, there is no way to update that state. + */ + LOG.error("Failed to create hive schema for the parquet backed table.\n" + + "Either provide schema for table,\n" + + "OR point to parquet file using " + PARQUET_FILE + " in tblproperties,\n" + + "OR make sure that table has at least one parquet file with required metadata"); + columnNames = new ArrayList(); + columnTypes = new ArrayList(); + } else { + StructTypeInfo structTypeInfo = null; + try { + structTypeInfo = new ParquetToHiveSchemaConverter(tbl).convert( + ParquetSchemaReader.read(parquetFile)); + } catch (IOException ioe) { + LOG.error(ioe.getMessage()); + } catch (UnsupportedOperationException ue) { + LOG.error(ue.getMessage()); + } catch (RuntimeException ex) { + LOG.error(ex.getMessage()); + } + if (structTypeInfo == null) { + columnNames = new ArrayList(); + columnTypes = new ArrayList(); + } else { + columnNames = structTypeInfo.getAllStructFieldNames(); + columnTypes = structTypeInfo.getAllStructFieldTypeInfos(); + } + } } else { - columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty); + if (columnNameProperty.length() == 0) { + columnNames = new ArrayList(); + } else { + columnNames = Arrays.asList(columnNameProperty.split(",")); + } + if (columnTypeProperty.length() == 0) { + columnTypes = new ArrayList(); + } else { + columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty); + } } if (columnNames.size() != columnTypes.size()) { - throw new IllegalArgumentException("ParquetHiveSerde initialization failed. Number of column " + - "name and column type differs. columnNames = " + columnNames + ", columnTypes = " + - columnTypes); + LOG.error("ParquetHiveSerde initialization failed. Number of column name and column type " + + "differs. columnNames = " + columnNames + ", columnTypes = " + columnTypes); + columnNames = new ArrayList(); + columnTypes = new ArrayList(); } // Create row related objects rowTypeInfo = TypeInfoFactory.getStructTypeInfo(columnNames, columnTypes); @@ -330,4 +390,42 @@ public SerDeStats getSerDeStats() { } return stats; } + + private Path getParquetFile(Configuration conf, Path loc) { + if (loc == null) { + return null; + } + + Path parquetFile; + try { + parquetFile = getAFile(FileSystem.get(new URI(loc.toString()), conf), loc); + } catch (Exception e) { + LOG.error("Unable to read file from " + loc + ": " + e, e); + parquetFile = null; + } + + return parquetFile; + } + + private Path getAFile(FileSystem fs, Path path) throws IOException { + FileStatus status = fs.getFileStatus(path); + + if (status.isFile()) { + if (status.getLen() > 0) { + return path; + } else { + return null; + } + } + + for(FileStatus childStatus: fs.listStatus(path)) { + Path file = getAFile(fs, childStatus.getPath()); + + if (file != null) { + return file; + } + } + + return null; + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java index cd3d349..590c9a6 100644 --- ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java +++ ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java @@ -653,6 +653,7 @@ public void createTable(Table tbl, boolean ifNotExists) throws HiveException { if (tbl.getDbName() == null || "".equals(tbl.getDbName().trim())) { tbl.setDbName(SessionState.get().getCurrentDatabase()); } + setTableLocInTableProperties(tbl); if (tbl.getCols().size() == 0) { tbl.setFields(MetaStoreUtils.getFieldsFromDeserializer(tbl.getTableName(), tbl.getDeserializer())); @@ -683,6 +684,30 @@ public void createTable(Table tbl, boolean ifNotExists) throws HiveException { } } + private void setTableLocInTableProperties(Table tbl) throws TException { + tbl.getTTable().getSd().setLocation(getTablePath(tbl)); + } + + private String getTablePath(Table table) throws TException { + Warehouse wh = new Warehouse(conf); + Path tablePath; + + final String location = table.getSd().getLocation(); + if ((location == null || location.isEmpty())) { + tablePath = wh.getTablePath( + getMSC().getDatabase(table.getDbName()), table.getTableName()); + } else { + tablePath = wh.getDnsPath(new Path(location)); + } + + if (tablePath != null) { + LOG.info("Table path is: " + tablePath); + return tablePath.toString(); + } else { + return null; + } + } + /** * * @param tableName diff --git ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestParquetToHiveSchemaConverter.java ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestParquetToHiveSchemaConverter.java new file mode 100644 index 0000000..53577fa --- /dev/null +++ ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestParquetToHiveSchemaConverter.java @@ -0,0 +1,411 @@ +/** + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.io.parquet; + +import org.apache.hadoop.hive.ql.io.parquet.convert.ParquetToHiveSchemaConverter; +import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; +import org.junit.Assert; +import org.junit.BeforeClass; +import org.junit.Test; +import parquet.schema.GroupType; +import parquet.schema.OriginalType; +import parquet.schema.PrimitiveType; +import parquet.schema.Type; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Properties; + +public class TestParquetToHiveSchemaConverter { + + static ParquetToHiveSchemaConverter parquetToHiveSchemaConverter; + + @BeforeClass + public static void setUp() { + parquetToHiveSchemaConverter = new ParquetToHiveSchemaConverter(); + } + + @Test + public void testUtf8() { + List types = new ArrayList(); + types.add(new PrimitiveType(Type.Repetition.REQUIRED, PrimitiveType.PrimitiveTypeName.BINARY, + "utf8", OriginalType.UTF8)); + GroupType groupType = new GroupType(Type.Repetition.REQUIRED, "top", types); + + ArrayList names = new ArrayList(); + names.add("utf8"); + ArrayList typeInfos = new ArrayList(); + typeInfos.add(TypeInfoFactory.stringTypeInfo); + + test(groupType, names, typeInfos); + } + + @Test + public void testEnum() { + List types = new ArrayList(); + types.add(new PrimitiveType(Type.Repetition.REQUIRED, PrimitiveType.PrimitiveTypeName.BINARY, + "enum", OriginalType.ENUM)); + GroupType groupType = new GroupType(Type.Repetition.REQUIRED, "top", types); + + ArrayList names = new ArrayList(); + names.add("enum"); + ArrayList typeInfos = new ArrayList(); + typeInfos.add(TypeInfoFactory.stringTypeInfo); + + test(groupType, names, typeInfos); + } + + @Test + public void testBinary() { + List types = new ArrayList(); + types.add(new PrimitiveType(Type.Repetition.REQUIRED, PrimitiveType.PrimitiveTypeName.BINARY, + "binary", null)); + GroupType groupType = new GroupType(Type.Repetition.REQUIRED, "top", types); + + ArrayList names = new ArrayList(); + names.add("binary"); + ArrayList typeInfos = new ArrayList(); + typeInfos.add(TypeInfoFactory.binaryTypeInfo); + + test(groupType, names, typeInfos); + } + + @Test + public void testBoolean() { + List types = new ArrayList(); + types.add(new PrimitiveType(Type.Repetition.REQUIRED, PrimitiveType.PrimitiveTypeName.BOOLEAN, + "boolean", null)); + GroupType groupType = new GroupType(Type.Repetition.REQUIRED, "top", types); + + ArrayList names = new ArrayList(); + names.add("boolean"); + ArrayList typeInfos = new ArrayList(); + typeInfos.add(TypeInfoFactory.booleanTypeInfo); + + test(groupType, names, typeInfos); + } + + @Test + public void testDouble() { + List types = new ArrayList(); + types.add(new PrimitiveType(Type.Repetition.REQUIRED, PrimitiveType.PrimitiveTypeName.DOUBLE, + "double", null)); + GroupType groupType = new GroupType(Type.Repetition.REQUIRED, "top", types); + + ArrayList names = new ArrayList(); + names.add("double"); + ArrayList typeInfos = new ArrayList(); + typeInfos.add(TypeInfoFactory.doubleTypeInfo); + + test(groupType, names, typeInfos); + } + + @Test + public void testFixedLenByteArray() { + List types = new ArrayList(); + types.add(new PrimitiveType(Type.Repetition.REQUIRED, PrimitiveType.PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY, + "fixedLenByteArray", null)); + GroupType groupType = new GroupType(Type.Repetition.REQUIRED, "top", types); + + ArrayList names = new ArrayList(); + names.add("fixedLenByteArray"); + ArrayList typeInfos = new ArrayList(); + typeInfos.add(TypeInfoFactory.binaryTypeInfo); + + test(groupType, names, typeInfos); + } + + @Test + public void testFloat() { + List types = new ArrayList(); + types.add(new PrimitiveType(Type.Repetition.REQUIRED, PrimitiveType.PrimitiveTypeName.FLOAT, + "float", null)); + GroupType groupType = new GroupType(Type.Repetition.REQUIRED, "top", types); + + ArrayList names = new ArrayList(); + names.add("float"); + ArrayList typeInfos = new ArrayList(); + typeInfos.add(TypeInfoFactory.floatTypeInfo); + + test(groupType, names, typeInfos); + } + + @Test + public void testInt32() { + List types = new ArrayList(); + types.add(new PrimitiveType(Type.Repetition.REQUIRED, PrimitiveType.PrimitiveTypeName.INT32, + "int32", null)); + GroupType groupType = new GroupType(Type.Repetition.REQUIRED, "top", types); + + ArrayList names = new ArrayList(); + names.add("int32"); + ArrayList typeInfos = new ArrayList(); + typeInfos.add(TypeInfoFactory.intTypeInfo); + + test(groupType, names, typeInfos); + } + + @Test + public void testInt64() { + List types = new ArrayList(); + types.add(new PrimitiveType(Type.Repetition.REQUIRED, PrimitiveType.PrimitiveTypeName.INT64, + "int64", null)); + GroupType groupType = new GroupType(Type.Repetition.REQUIRED, "top", types); + + ArrayList names = new ArrayList(); + names.add("int64"); + ArrayList typeInfos = new ArrayList(); + typeInfos.add(TypeInfoFactory.longTypeInfo); + + test(groupType, names, typeInfos); + } + + @Test + public void testInt96() { + List types = new ArrayList(); + types.add(new PrimitiveType(Type.Repetition.REQUIRED, PrimitiveType.PrimitiveTypeName.INT96, + "int96", null)); + GroupType groupType = new GroupType(Type.Repetition.REQUIRED, "top", types); + + ArrayList names = new ArrayList(); + names.add("int96"); + ArrayList typeInfos = new ArrayList(); + typeInfos.add(TypeInfoFactory.timestampTypeInfo); + + Properties props = new Properties(); + props.setProperty("parquet.int96.is.timestamp", ""); + + test(groupType, names, typeInfos, new ParquetToHiveSchemaConverter(props)); + } + + @Test + public void testInt96Negative() { + List types = new ArrayList(); + types.add(new PrimitiveType(Type.Repetition.REQUIRED, PrimitiveType.PrimitiveTypeName.INT96, + "int96", null)); + GroupType groupType = new GroupType(Type.Repetition.REQUIRED, "top", types); + + ArrayList names = new ArrayList(); + names.add("int96"); + ArrayList typeInfos = new ArrayList(); + typeInfos.add(TypeInfoFactory.timestampTypeInfo); + + try { + test(groupType, names, typeInfos); + Assert.fail("Failed to throw UnsupportedOperationException for INT96"); + } catch (UnsupportedOperationException use) { + // It's good! + } + } + + @Test + public void primitiveOptional() { + List types = new ArrayList(); + types.add(new PrimitiveType(Type.Repetition.OPTIONAL, PrimitiveType.PrimitiveTypeName.INT32, + "int32", null)); + GroupType groupType = new GroupType(Type.Repetition.OPTIONAL, "top", types); + + ArrayList names = new ArrayList(); + names.add("int32"); + ArrayList typeInfos = new ArrayList(); + typeInfos.add(TypeInfoFactory.intTypeInfo); + + test(groupType, names, typeInfos); + } + + @Test + public void listOfPrimitives() { + List types = new ArrayList(); + types.add(new PrimitiveType(Type.Repetition.REPEATED, PrimitiveType.PrimitiveTypeName.INT32, + "intlist", OriginalType.LIST)); + GroupType groupType = new GroupType(Type.Repetition.REQUIRED, "top", types); + + ArrayList names = new ArrayList(); + names.add("intlist"); + ArrayList typeInfos = new ArrayList(); + typeInfos.add(TypeInfoFactory.getListTypeInfo(TypeInfoFactory.intTypeInfo)); + + test(groupType, names, typeInfos); + } + + @Test + public void listOfStruct() { + List types = new ArrayList(); + List struct = new ArrayList(); + struct.add(new PrimitiveType(Type.Repetition.REQUIRED, PrimitiveType.PrimitiveTypeName.BINARY, + "string", OriginalType.UTF8)); + struct.add(new PrimitiveType(Type.Repetition.REQUIRED, PrimitiveType.PrimitiveTypeName.FLOAT, + "float", null)); + types.add(new GroupType(Type.Repetition.REPEATED, "structlist", struct)); + GroupType groupType = new GroupType(Type.Repetition.REQUIRED, "top", types); + + ArrayList names = new ArrayList(); + names.add("structlist"); + final List structTypeInfos = new ArrayList(); + structTypeInfos.add(TypeInfoFactory.stringTypeInfo); + structTypeInfos.add(TypeInfoFactory.floatTypeInfo); + ArrayList typeInfos = new ArrayList(); + typeInfos.add(TypeInfoFactory.getListTypeInfo(TypeInfoFactory.getStructTypeInfo(Arrays.asList + ("string", "float"), structTypeInfos))); + + test(groupType, names, typeInfos); + } + + @Test + public void structOfPrimitives() { + List types = new ArrayList(); + List struct = new ArrayList(); + struct.add(new PrimitiveType(Type.Repetition.REQUIRED, PrimitiveType.PrimitiveTypeName.BINARY, + "utf8", OriginalType.UTF8)); + struct.add(new PrimitiveType(Type.Repetition.REQUIRED, PrimitiveType.PrimitiveTypeName.BINARY, + "enum", OriginalType.ENUM)); + struct.add(new PrimitiveType(Type.Repetition.REQUIRED, PrimitiveType.PrimitiveTypeName.BINARY, + "binary", null)); + struct.add(new PrimitiveType(Type.Repetition.REQUIRED, PrimitiveType.PrimitiveTypeName.BOOLEAN, + "boolean", null)); + struct.add(new PrimitiveType(Type.Repetition.REQUIRED, PrimitiveType.PrimitiveTypeName.DOUBLE, + "double", null)); + struct.add(new PrimitiveType(Type.Repetition.REQUIRED, PrimitiveType.PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY, + "fixedLenByteArray", null)); + struct.add(new PrimitiveType(Type.Repetition.REQUIRED, PrimitiveType.PrimitiveTypeName.FLOAT, + "float", null)); + struct.add(new PrimitiveType(Type.Repetition.REQUIRED, PrimitiveType.PrimitiveTypeName.INT32, + "int32", null)); + struct.add(new PrimitiveType(Type.Repetition.REQUIRED, PrimitiveType.PrimitiveTypeName.INT64, + "int64", null)); + types.add(new GroupType(Type.Repetition.REQUIRED, "struct", struct)); + GroupType groupType = new GroupType(Type.Repetition.REQUIRED, "top", types); + + ArrayList names = new ArrayList(); + names.add("struct"); + final List structTypeInfos = new ArrayList(); + structTypeInfos.add(TypeInfoFactory.stringTypeInfo); + structTypeInfos.add(TypeInfoFactory.stringTypeInfo); + structTypeInfos.add(TypeInfoFactory.binaryTypeInfo); + structTypeInfos.add(TypeInfoFactory.booleanTypeInfo); + structTypeInfos.add(TypeInfoFactory.doubleTypeInfo); + structTypeInfos.add(TypeInfoFactory.binaryTypeInfo); + structTypeInfos.add(TypeInfoFactory.floatTypeInfo); + structTypeInfos.add(TypeInfoFactory.intTypeInfo); + structTypeInfos.add(TypeInfoFactory.longTypeInfo); + ArrayList typeInfos = new ArrayList(); + typeInfos.add(TypeInfoFactory.getStructTypeInfo(Arrays.asList + ("utf8", "enum", "binary", "boolean", "double", "fixedLenByteArray", "float", + "int32", "int64"), + structTypeInfos)); + + test(groupType, names, typeInfos); + } + + @Test + public void nestedStruct() { + List innerStructFields = new ArrayList(); + innerStructFields.add(new PrimitiveType(Type.Repetition.REQUIRED, + PrimitiveType.PrimitiveTypeName.INT32, "int", null)); + innerStructFields.add(new PrimitiveType(Type.Repetition.REQUIRED, + PrimitiveType.PrimitiveTypeName.BINARY, "string", OriginalType.UTF8)); + + List outerStructFields = new ArrayList(); + outerStructFields.add(new GroupType(Type.Repetition.REQUIRED, "innerstruct", + innerStructFields)); + + List fields = new ArrayList(); + fields.add(new GroupType(Type.Repetition.REQUIRED, "outerstruct", outerStructFields)); + GroupType groupType = new GroupType(Type.Repetition.REQUIRED, "top", fields); + + final ArrayList typeInfos = new ArrayList(Arrays.asList( + TypeInfoFactory.getStructTypeInfo(Arrays.asList("innerstruct"), Arrays.asList( + TypeInfoFactory.getStructTypeInfo(new ArrayList(Arrays.asList("int", "string")), + new ArrayList(Arrays.asList( + TypeInfoFactory.intTypeInfo, + TypeInfoFactory.stringTypeInfo))) + )) + )); + + test(groupType, new ArrayList(Arrays.asList("outerstruct")), typeInfos); + } + + @Test + public void map() { + List fields = new ArrayList(); + List mapFields = new ArrayList(); + List keyValFields = new ArrayList(); + keyValFields.add(new PrimitiveType(Type.Repetition.REQUIRED, PrimitiveType.PrimitiveTypeName + .BINARY, "key", OriginalType.UTF8)); + keyValFields.add(new PrimitiveType(Type.Repetition.REQUIRED, PrimitiveType.PrimitiveTypeName + .DOUBLE, "value", OriginalType.UTF8)); + mapFields.add(new GroupType(Type.Repetition.REPEATED, "keyval", OriginalType.MAP_KEY_VALUE, + keyValFields)); + fields.add(new GroupType(Type.Repetition.REQUIRED, "map", + OriginalType.MAP, mapFields)); + GroupType groupType = new GroupType(Type.Repetition.REQUIRED, "top", fields); + + final ArrayList typeInfos = new ArrayList(Arrays.asList( + TypeInfoFactory.getMapTypeInfo(TypeInfoFactory.stringTypeInfo, TypeInfoFactory.doubleTypeInfo) + )); + + test(groupType, new ArrayList(Arrays.asList("map")), typeInfos); + } + + @Test + public void nestedMap() { + List fields = new ArrayList(); + List mapFields = new ArrayList(); + List keyValFields = new ArrayList(); + keyValFields.add(new PrimitiveType(Type.Repetition.REQUIRED, PrimitiveType.PrimitiveTypeName + .BINARY, "key", OriginalType.UTF8)); + + List innerMapFields = new ArrayList(); + List innerKeyValFields = new ArrayList(); + innerKeyValFields.add(new PrimitiveType(Type.Repetition.REQUIRED, PrimitiveType.PrimitiveTypeName + .BINARY, "key", OriginalType.UTF8)); + innerKeyValFields.add(new PrimitiveType(Type.Repetition.REQUIRED, PrimitiveType.PrimitiveTypeName + .INT64, "value", null)); + innerMapFields.add(new GroupType(Type.Repetition.REPEATED, "innerkeyval", + OriginalType.MAP_KEY_VALUE, innerKeyValFields)); + keyValFields.add(new GroupType(Type.Repetition.REQUIRED, "innermap", + OriginalType.MAP, innerMapFields)); + mapFields.add(new GroupType(Type.Repetition.REPEATED, "keyval", OriginalType.MAP_KEY_VALUE, + keyValFields)); + fields.add(new GroupType(Type.Repetition.REQUIRED, "map", + OriginalType.MAP, mapFields)); + GroupType groupType = new GroupType(Type.Repetition.REQUIRED, "top", fields); + + final ArrayList typeInfos = new ArrayList(Arrays.asList( + TypeInfoFactory.getMapTypeInfo(TypeInfoFactory.stringTypeInfo, + TypeInfoFactory.getMapTypeInfo(TypeInfoFactory.stringTypeInfo, + TypeInfoFactory.longTypeInfo)) + )); + + test(groupType, new ArrayList(Arrays.asList("map")), typeInfos); + + } + + private void test(GroupType groupType, ArrayList names, ArrayList typeInfos) { + test(groupType, names, typeInfos, parquetToHiveSchemaConverter); + } + + private void test(GroupType groupType, ArrayList names, ArrayList typeInfos, + ParquetToHiveSchemaConverter converter) { + StructTypeInfo structTypeInfo = new StructTypeInfo(); + structTypeInfo.setAllStructFieldNames(names); + structTypeInfo.setAllStructFieldTypeInfos(typeInfos); + + final StructTypeInfo actualTypeInfo = converter.convert(groupType); + Assert.assertEquals(structTypeInfo, actualTypeInfo); + } +} diff --git ql/src/test/queries/clientpositive/parquet_array_of_multi_field_struct_gen_schema.q ql/src/test/queries/clientpositive/parquet_array_of_multi_field_struct_gen_schema.q new file mode 100644 index 0000000..affcc5f --- /dev/null +++ ql/src/test/queries/clientpositive/parquet_array_of_multi_field_struct_gen_schema.q @@ -0,0 +1,17 @@ +-- this test creates a Parquet table with an array of multi-field structs + +CREATE TABLE parquet_array_of_multi_field_structs_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +TBLPROPERTIES ('parquet.file'='../../data/files/MultiFieldGroupInList.parquet'); + +LOAD DATA LOCAL INPATH '../../data/files/MultiFieldGroupInList.parquet' +OVERWRITE INTO TABLE parquet_array_of_multi_field_structs_gen_schema; + +SELECT * FROM parquet_array_of_multi_field_structs_gen_schema; + +DROP TABLE parquet_array_of_multi_field_structs_gen_schema; diff --git ql/src/test/queries/clientpositive/parquet_array_of_optional_elements_gen_schema.q ql/src/test/queries/clientpositive/parquet_array_of_optional_elements_gen_schema.q new file mode 100644 index 0000000..73e93e7 --- /dev/null +++ ql/src/test/queries/clientpositive/parquet_array_of_optional_elements_gen_schema.q @@ -0,0 +1,17 @@ +-- this test creates a Parquet table with an array of optional structs + +CREATE TABLE parquet_array_of_optional_elements_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +TBLPROPERTIES ('parquet.file'='../../data/files/NewOptionalGroupInList.parquet'); + +LOAD DATA LOCAL INPATH '../../data/files/NewOptionalGroupInList.parquet' +OVERWRITE INTO TABLE parquet_array_of_optional_elements_gen_schema; + +SELECT * FROM parquet_array_of_optional_elements_gen_schema; + +DROP TABLE parquet_array_of_optional_elements_gen_schema; diff --git ql/src/test/queries/clientpositive/parquet_array_of_required_elements_gen_schema.q ql/src/test/queries/clientpositive/parquet_array_of_required_elements_gen_schema.q new file mode 100644 index 0000000..b3c26d3 --- /dev/null +++ ql/src/test/queries/clientpositive/parquet_array_of_required_elements_gen_schema.q @@ -0,0 +1,17 @@ +-- this test creates a Parquet table with an array of structs + +CREATE TABLE parquet_array_of_required_elements_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +TBLPROPERTIES ('parquet.file'='../../data/files/NewRequiredGroupInList.parquet'); + +LOAD DATA LOCAL INPATH '../../data/files/NewRequiredGroupInList.parquet' +OVERWRITE INTO TABLE parquet_array_of_required_elements_gen_schema; + +SELECT * FROM parquet_array_of_required_elements_gen_schema; + +DROP TABLE parquet_array_of_required_elements_gen_schema; diff --git ql/src/test/queries/clientpositive/parquet_array_of_single_field_struct_gen_schema.q ql/src/test/queries/clientpositive/parquet_array_of_single_field_struct_gen_schema.q new file mode 100644 index 0000000..7612dc9 --- /dev/null +++ ql/src/test/queries/clientpositive/parquet_array_of_single_field_struct_gen_schema.q @@ -0,0 +1,19 @@ +-- this test creates a Parquet table with an array of single-field structs +-- that has an ambiguous Parquet schema that is assumed to be a list of bigints +-- This is verifies compliance with the spec for this case. + +CREATE TABLE parquet_ambiguous_array_of_single_field_structs_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +TBLPROPERTIES ('parquet.file'='../../data/files/SingleFieldGroupInList.parquet'); + +LOAD DATA LOCAL INPATH '../../data/files/SingleFieldGroupInList.parquet' +OVERWRITE INTO TABLE parquet_ambiguous_array_of_single_field_structs_gen_schema; + +SELECT * FROM parquet_ambiguous_array_of_single_field_structs_gen_schema; + +DROP TABLE parquet_ambiguous_array_of_single_field_structs_gen_schema; diff --git ql/src/test/queries/clientpositive/parquet_array_of_structs_gen_schema.q ql/src/test/queries/clientpositive/parquet_array_of_structs_gen_schema.q new file mode 100644 index 0000000..05c9f63 --- /dev/null +++ ql/src/test/queries/clientpositive/parquet_array_of_structs_gen_schema.q @@ -0,0 +1,17 @@ +-- this test creates a Parquet table with an array of structs + +CREATE TABLE parquet_array_of_structs_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +TBLPROPERTIES ('parquet.file'='../../data/files/HiveRequiredGroupInList.parquet'); + +LOAD DATA LOCAL INPATH '../../data/files/HiveRequiredGroupInList.parquet' +OVERWRITE INTO TABLE parquet_array_of_structs_gen_schema; + +SELECT * FROM parquet_array_of_structs_gen_schema; + +DROP TABLE parquet_array_of_structs_gen_schema; diff --git ql/src/test/queries/clientpositive/parquet_array_of_structs_gen_schema_ext.q ql/src/test/queries/clientpositive/parquet_array_of_structs_gen_schema_ext.q new file mode 100644 index 0000000..17a6e22 --- /dev/null +++ ql/src/test/queries/clientpositive/parquet_array_of_structs_gen_schema_ext.q @@ -0,0 +1,16 @@ +dfs ${system:test.dfs.mkdir} ${system:test.tmp.dir}/parquet_decimal_gen_schema_ext_tmp; +dfs -cp ${system:hive.root}/data/files/HiveRequiredGroupInList.parquet ${system:test.tmp.dir}/parquet_decimal_gen_schema_ext_tmp; + +CREATE EXTERNAL TABLE parquet_array_of_structs_gen_schema_ext +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +LOCATION + '${system:test.tmp.dir}/parquet_decimal_gen_schema_ext_tmp'; + +SELECT * FROM parquet_array_of_structs_gen_schema_ext; + +DROP TABLE parquet_array_of_structs_gen_schema_ext; diff --git ql/src/test/queries/clientpositive/parquet_array_of_unannotated_groups_gen_schema.q ql/src/test/queries/clientpositive/parquet_array_of_unannotated_groups_gen_schema.q new file mode 100644 index 0000000..3f356c4 --- /dev/null +++ ql/src/test/queries/clientpositive/parquet_array_of_unannotated_groups_gen_schema.q @@ -0,0 +1,18 @@ +-- this test creates a Parquet table from a structure with an unannotated +-- repeated structure of (x,y) structs + +CREATE TABLE parquet_array_of_unannotated_groups_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +TBLPROPERTIES ('parquet.file'='../../data/files/UnannotatedListOfGroups.parquet'); + +LOAD DATA LOCAL INPATH '../../data/files/UnannotatedListOfGroups.parquet' +OVERWRITE INTO TABLE parquet_array_of_unannotated_groups_gen_schema; + +SELECT * FROM parquet_array_of_unannotated_groups_gen_schema; + +DROP TABLE parquet_array_of_unannotated_groups_gen_schema; diff --git ql/src/test/queries/clientpositive/parquet_array_of_unannotated_primitives_gen_schema.q ql/src/test/queries/clientpositive/parquet_array_of_unannotated_primitives_gen_schema.q new file mode 100644 index 0000000..0549808 --- /dev/null +++ ql/src/test/queries/clientpositive/parquet_array_of_unannotated_primitives_gen_schema.q @@ -0,0 +1,18 @@ +-- this test creates a Parquet table from a structure with an unannotated +-- repeated structure of int32s + +CREATE TABLE parquet_array_of_unannotated_ints_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +TBLPROPERTIES ('parquet.file'='../../data/files/UnannotatedListOfPrimitives.parquet'); + +LOAD DATA LOCAL INPATH '../../data/files/UnannotatedListOfPrimitives.parquet' +OVERWRITE INTO TABLE parquet_array_of_unannotated_ints_gen_schema; + +SELECT * FROM parquet_array_of_unannotated_ints_gen_schema; + +DROP TABLE parquet_array_of_unannotated_ints_gen_schema; \ No newline at end of file diff --git ql/src/test/queries/clientpositive/parquet_avro_array_of_primitives_gen_schema.q ql/src/test/queries/clientpositive/parquet_avro_array_of_primitives_gen_schema.q new file mode 100644 index 0000000..51eb6fd --- /dev/null +++ ql/src/test/queries/clientpositive/parquet_avro_array_of_primitives_gen_schema.q @@ -0,0 +1,15 @@ +CREATE TABLE parquet_avro_array_of_primitives_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +TBLPROPERTIES ('parquet.file'='../../data/files/AvroPrimitiveInList.parquet'); + +LOAD DATA LOCAL INPATH '../../data/files/AvroPrimitiveInList.parquet' OVERWRITE INTO TABLE +parquet_avro_array_of_primitives_gen_schema; + +SELECT * FROM parquet_avro_array_of_primitives_gen_schema; + +DROP TABLE parquet_avro_array_of_primitives_gen_schema; diff --git ql/src/test/queries/clientpositive/parquet_avro_array_of_single_field_struct_gen_schema.q ql/src/test/queries/clientpositive/parquet_avro_array_of_single_field_struct_gen_schema.q new file mode 100644 index 0000000..51dba64 --- /dev/null +++ ql/src/test/queries/clientpositive/parquet_avro_array_of_single_field_struct_gen_schema.q @@ -0,0 +1,18 @@ +-- this test creates a Parquet table with an array of single-field structs +-- as written by parquet-avro + +CREATE TABLE parquet_avro_array_of_single_field_structs_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +TBLPROPERTIES ('parquet.file'='../../data/files/AvroSingleFieldGroupInList.parquet'); + +LOAD DATA LOCAL INPATH '../../data/files/AvroSingleFieldGroupInList.parquet' +OVERWRITE INTO TABLE parquet_avro_array_of_single_field_structs_gen_schema; + +SELECT * FROM parquet_avro_array_of_single_field_structs_gen_schema; + +DROP TABLE parquet_avro_array_of_single_field_structs_gen_schema; diff --git ql/src/test/queries/clientpositive/parquet_decimal_gen_schema.q ql/src/test/queries/clientpositive/parquet_decimal_gen_schema.q new file mode 100644 index 0000000..7d39d36 --- /dev/null +++ ql/src/test/queries/clientpositive/parquet_decimal_gen_schema.q @@ -0,0 +1,16 @@ +CREATE TABLE parquet_decimal_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +TBLPROPERTIES ('parquet.file'='../../data/files/dec.parq'); + +DESCRIBE FORMATTED parquet_decimal_gen_schema; + +LOAD DATA LOCAL INPATH '../../data/files/dec.parq' OVERWRITE INTO TABLE parquet_decimal_gen_schema; + +SELECT * FROM parquet_decimal_gen_schema; + +DROP TABLE parquet_decimal_gen_schema; \ No newline at end of file diff --git ql/src/test/queries/clientpositive/parquet_thrift_array_of_primitives_gen_schema.q ql/src/test/queries/clientpositive/parquet_thrift_array_of_primitives_gen_schema.q new file mode 100644 index 0000000..0fcc356 --- /dev/null +++ ql/src/test/queries/clientpositive/parquet_thrift_array_of_primitives_gen_schema.q @@ -0,0 +1,17 @@ +-- this test creates a Parquet table with an array of structs + +CREATE TABLE parquet_thrift_array_of_primitives_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +TBLPROPERTIES ('parquet.file'='../../data/files/ThriftPrimitiveInList.parquet'); + +LOAD DATA LOCAL INPATH '../../data/files/ThriftPrimitiveInList.parquet' +OVERWRITE INTO TABLE parquet_thrift_array_of_primitives_gen_schema; + +SELECT * FROM parquet_thrift_array_of_primitives_gen_schema; + +DROP TABLE parquet_thrift_array_of_primitives_gen_schema; diff --git ql/src/test/queries/clientpositive/parquet_thrift_array_of_single_field_struct_gen_schema.q ql/src/test/queries/clientpositive/parquet_thrift_array_of_single_field_struct_gen_schema.q new file mode 100644 index 0000000..1646118 --- /dev/null +++ ql/src/test/queries/clientpositive/parquet_thrift_array_of_single_field_struct_gen_schema.q @@ -0,0 +1,18 @@ +-- this test creates a Parquet table with an array of single-field structs +-- as written by parquet-thrift + +CREATE TABLE parquet_thrift_array_of_single_field_structs_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +TBLPROPERTIES ('parquet.file'='../../data/files/ThriftSingleFieldGroupInList.parquet'); + +LOAD DATA LOCAL INPATH '../../data/files/ThriftSingleFieldGroupInList.parquet' +OVERWRITE INTO TABLE parquet_thrift_array_of_single_field_structs_gen_schema; + +SELECT * FROM parquet_thrift_array_of_single_field_structs_gen_schema; + +DROP TABLE parquet_thrift_array_of_single_field_structs_gen_schema; diff --git ql/src/test/results/clientpositive/create_view_partitioned.q.out ql/src/test/results/clientpositive/create_view_partitioned.q.out index ebf9a6b..e1bf0ff 100644 --- ql/src/test/results/clientpositive/create_view_partitioned.q.out +++ ql/src/test/results/clientpositive/create_view_partitioned.q.out @@ -175,12 +175,16 @@ POSTHOOK: query: SHOW TABLE EXTENDED LIKE vp1 POSTHOOK: type: SHOW_TABLESTATUS tableName:vp1 #### A masked pattern was here #### -location:null inputformat:org.apache.hadoop.mapred.SequenceFileInputFormat outputformat:org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat columns:struct columns { string key} partitioned:true partitionColumns:struct partition_columns { string value} +totalNumberFiles:0 +totalFileSize:0 +maxFileSize:0 +minFileSize:0 +#### A masked pattern was here #### PREHOOK: query: SHOW TABLE EXTENDED LIKE vp1 PARTITION(value='val_86') PREHOOK: type: SHOW_TABLESTATUS @@ -188,12 +192,16 @@ POSTHOOK: query: SHOW TABLE EXTENDED LIKE vp1 PARTITION(value='val_86') POSTHOOK: type: SHOW_TABLESTATUS tableName:vp1 #### A masked pattern was here #### -location:null inputformat:org.apache.hadoop.mapred.SequenceFileInputFormat outputformat:org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat columns:struct columns { string key} partitioned:true partitionColumns:struct partition_columns { string value} +totalNumberFiles:0 +totalFileSize:0 +maxFileSize:0 +minFileSize:0 +#### A masked pattern was here #### PREHOOK: query: ALTER VIEW vp1 DROP PARTITION (value='val_xyz') diff --git ql/src/test/results/clientpositive/parquet_array_of_multi_field_struct_gen_schema.q.out ql/src/test/results/clientpositive/parquet_array_of_multi_field_struct_gen_schema.q.out new file mode 100644 index 0000000..ffe77ff --- /dev/null +++ ql/src/test/results/clientpositive/parquet_array_of_multi_field_struct_gen_schema.q.out @@ -0,0 +1,53 @@ +PREHOOK: query: -- this test creates a Parquet table with an array of multi-field structs + +CREATE TABLE parquet_array_of_multi_field_structs_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +TBLPROPERTIES ('parquet.file'='../../data/files/MultiFieldGroupInList.parquet') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@parquet_array_of_multi_field_structs_gen_schema +POSTHOOK: query: -- this test creates a Parquet table with an array of multi-field structs + +CREATE TABLE parquet_array_of_multi_field_structs_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +TBLPROPERTIES ('parquet.file'='../../data/files/MultiFieldGroupInList.parquet') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@parquet_array_of_multi_field_structs_gen_schema +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/MultiFieldGroupInList.parquet' +OVERWRITE INTO TABLE parquet_array_of_multi_field_structs_gen_schema +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@parquet_array_of_multi_field_structs_gen_schema +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/MultiFieldGroupInList.parquet' +OVERWRITE INTO TABLE parquet_array_of_multi_field_structs_gen_schema +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@parquet_array_of_multi_field_structs_gen_schema +PREHOOK: query: SELECT * FROM parquet_array_of_multi_field_structs_gen_schema +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_array_of_multi_field_structs_gen_schema +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM parquet_array_of_multi_field_structs_gen_schema +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_array_of_multi_field_structs_gen_schema +#### A masked pattern was here #### +[{"latitude":0.0,"longitude":0.0},{"latitude":0.0,"longitude":180.0}] +PREHOOK: query: DROP TABLE parquet_array_of_multi_field_structs_gen_schema +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@parquet_array_of_multi_field_structs_gen_schema +PREHOOK: Output: default@parquet_array_of_multi_field_structs_gen_schema +POSTHOOK: query: DROP TABLE parquet_array_of_multi_field_structs_gen_schema +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@parquet_array_of_multi_field_structs_gen_schema +POSTHOOK: Output: default@parquet_array_of_multi_field_structs_gen_schema diff --git ql/src/test/results/clientpositive/parquet_array_of_optional_elements_gen_schema.q.out ql/src/test/results/clientpositive/parquet_array_of_optional_elements_gen_schema.q.out new file mode 100644 index 0000000..69e920f --- /dev/null +++ ql/src/test/results/clientpositive/parquet_array_of_optional_elements_gen_schema.q.out @@ -0,0 +1,53 @@ +PREHOOK: query: -- this test creates a Parquet table with an array of optional structs + +CREATE TABLE parquet_array_of_optional_elements_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +TBLPROPERTIES ('parquet.file'='../../data/files/NewOptionalGroupInList.parquet') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@parquet_array_of_optional_elements_gen_schema +POSTHOOK: query: -- this test creates a Parquet table with an array of optional structs + +CREATE TABLE parquet_array_of_optional_elements_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +TBLPROPERTIES ('parquet.file'='../../data/files/NewOptionalGroupInList.parquet') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@parquet_array_of_optional_elements_gen_schema +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/NewOptionalGroupInList.parquet' +OVERWRITE INTO TABLE parquet_array_of_optional_elements_gen_schema +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@parquet_array_of_optional_elements_gen_schema +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/NewOptionalGroupInList.parquet' +OVERWRITE INTO TABLE parquet_array_of_optional_elements_gen_schema +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@parquet_array_of_optional_elements_gen_schema +PREHOOK: query: SELECT * FROM parquet_array_of_optional_elements_gen_schema +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_array_of_optional_elements_gen_schema +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM parquet_array_of_optional_elements_gen_schema +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_array_of_optional_elements_gen_schema +#### A masked pattern was here #### +[{"latitude":0.0,"longitude":0.0},null,{"latitude":0.0,"longitude":180.0}] +PREHOOK: query: DROP TABLE parquet_array_of_optional_elements_gen_schema +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@parquet_array_of_optional_elements_gen_schema +PREHOOK: Output: default@parquet_array_of_optional_elements_gen_schema +POSTHOOK: query: DROP TABLE parquet_array_of_optional_elements_gen_schema +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@parquet_array_of_optional_elements_gen_schema +POSTHOOK: Output: default@parquet_array_of_optional_elements_gen_schema diff --git ql/src/test/results/clientpositive/parquet_array_of_required_elements_gen_schema.q.out ql/src/test/results/clientpositive/parquet_array_of_required_elements_gen_schema.q.out new file mode 100644 index 0000000..6129468 --- /dev/null +++ ql/src/test/results/clientpositive/parquet_array_of_required_elements_gen_schema.q.out @@ -0,0 +1,53 @@ +PREHOOK: query: -- this test creates a Parquet table with an array of structs + +CREATE TABLE parquet_array_of_required_elements_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +TBLPROPERTIES ('parquet.file'='../../data/files/NewRequiredGroupInList.parquet') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@parquet_array_of_required_elements_gen_schema +POSTHOOK: query: -- this test creates a Parquet table with an array of structs + +CREATE TABLE parquet_array_of_required_elements_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +TBLPROPERTIES ('parquet.file'='../../data/files/NewRequiredGroupInList.parquet') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@parquet_array_of_required_elements_gen_schema +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/NewRequiredGroupInList.parquet' +OVERWRITE INTO TABLE parquet_array_of_required_elements_gen_schema +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@parquet_array_of_required_elements_gen_schema +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/NewRequiredGroupInList.parquet' +OVERWRITE INTO TABLE parquet_array_of_required_elements_gen_schema +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@parquet_array_of_required_elements_gen_schema +PREHOOK: query: SELECT * FROM parquet_array_of_required_elements_gen_schema +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_array_of_required_elements_gen_schema +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM parquet_array_of_required_elements_gen_schema +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_array_of_required_elements_gen_schema +#### A masked pattern was here #### +[{"latitude":0.0,"longitude":180.0},{"latitude":0.0,"longitude":0.0}] +PREHOOK: query: DROP TABLE parquet_array_of_required_elements_gen_schema +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@parquet_array_of_required_elements_gen_schema +PREHOOK: Output: default@parquet_array_of_required_elements_gen_schema +POSTHOOK: query: DROP TABLE parquet_array_of_required_elements_gen_schema +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@parquet_array_of_required_elements_gen_schema +POSTHOOK: Output: default@parquet_array_of_required_elements_gen_schema diff --git ql/src/test/results/clientpositive/parquet_array_of_single_field_struct_gen_schema.q.out ql/src/test/results/clientpositive/parquet_array_of_single_field_struct_gen_schema.q.out new file mode 100644 index 0000000..e5adf8a --- /dev/null +++ ql/src/test/results/clientpositive/parquet_array_of_single_field_struct_gen_schema.q.out @@ -0,0 +1,57 @@ +PREHOOK: query: -- this test creates a Parquet table with an array of single-field structs +-- that has an ambiguous Parquet schema that is assumed to be a list of bigints +-- This is verifies compliance with the spec for this case. + +CREATE TABLE parquet_ambiguous_array_of_single_field_structs_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +TBLPROPERTIES ('parquet.file'='../../data/files/SingleFieldGroupInList.parquet') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@parquet_ambiguous_array_of_single_field_structs_gen_schema +POSTHOOK: query: -- this test creates a Parquet table with an array of single-field structs +-- that has an ambiguous Parquet schema that is assumed to be a list of bigints +-- This is verifies compliance with the spec for this case. + +CREATE TABLE parquet_ambiguous_array_of_single_field_structs_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +TBLPROPERTIES ('parquet.file'='../../data/files/SingleFieldGroupInList.parquet') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@parquet_ambiguous_array_of_single_field_structs_gen_schema +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/SingleFieldGroupInList.parquet' +OVERWRITE INTO TABLE parquet_ambiguous_array_of_single_field_structs_gen_schema +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@parquet_ambiguous_array_of_single_field_structs_gen_schema +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/SingleFieldGroupInList.parquet' +OVERWRITE INTO TABLE parquet_ambiguous_array_of_single_field_structs_gen_schema +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@parquet_ambiguous_array_of_single_field_structs_gen_schema +PREHOOK: query: SELECT * FROM parquet_ambiguous_array_of_single_field_structs_gen_schema +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_ambiguous_array_of_single_field_structs_gen_schema +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM parquet_ambiguous_array_of_single_field_structs_gen_schema +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_ambiguous_array_of_single_field_structs_gen_schema +#### A masked pattern was here #### +[1234,2345] +PREHOOK: query: DROP TABLE parquet_ambiguous_array_of_single_field_structs_gen_schema +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@parquet_ambiguous_array_of_single_field_structs_gen_schema +PREHOOK: Output: default@parquet_ambiguous_array_of_single_field_structs_gen_schema +POSTHOOK: query: DROP TABLE parquet_ambiguous_array_of_single_field_structs_gen_schema +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@parquet_ambiguous_array_of_single_field_structs_gen_schema +POSTHOOK: Output: default@parquet_ambiguous_array_of_single_field_structs_gen_schema diff --git ql/src/test/results/clientpositive/parquet_array_of_structs_gen_schema.q.out ql/src/test/results/clientpositive/parquet_array_of_structs_gen_schema.q.out new file mode 100644 index 0000000..d502ee1 --- /dev/null +++ ql/src/test/results/clientpositive/parquet_array_of_structs_gen_schema.q.out @@ -0,0 +1,53 @@ +PREHOOK: query: -- this test creates a Parquet table with an array of structs + +CREATE TABLE parquet_array_of_structs_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +TBLPROPERTIES ('parquet.file'='../../data/files/HiveRequiredGroupInList.parquet') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@parquet_array_of_structs_gen_schema +POSTHOOK: query: -- this test creates a Parquet table with an array of structs + +CREATE TABLE parquet_array_of_structs_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +TBLPROPERTIES ('parquet.file'='../../data/files/HiveRequiredGroupInList.parquet') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@parquet_array_of_structs_gen_schema +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/HiveRequiredGroupInList.parquet' +OVERWRITE INTO TABLE parquet_array_of_structs_gen_schema +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@parquet_array_of_structs_gen_schema +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/HiveRequiredGroupInList.parquet' +OVERWRITE INTO TABLE parquet_array_of_structs_gen_schema +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@parquet_array_of_structs_gen_schema +PREHOOK: query: SELECT * FROM parquet_array_of_structs_gen_schema +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_array_of_structs_gen_schema +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM parquet_array_of_structs_gen_schema +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_array_of_structs_gen_schema +#### A masked pattern was here #### +[{"latitude":0.0,"longitude":180.0},{"latitude":0.0,"longitude":0.0}] +PREHOOK: query: DROP TABLE parquet_array_of_structs_gen_schema +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@parquet_array_of_structs_gen_schema +PREHOOK: Output: default@parquet_array_of_structs_gen_schema +POSTHOOK: query: DROP TABLE parquet_array_of_structs_gen_schema +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@parquet_array_of_structs_gen_schema +POSTHOOK: Output: default@parquet_array_of_structs_gen_schema diff --git ql/src/test/results/clientpositive/parquet_array_of_structs_gen_schema_ext.q.out ql/src/test/results/clientpositive/parquet_array_of_structs_gen_schema_ext.q.out new file mode 100644 index 0000000..30acdae --- /dev/null +++ ql/src/test/results/clientpositive/parquet_array_of_structs_gen_schema_ext.q.out @@ -0,0 +1,43 @@ +PREHOOK: query: CREATE EXTERNAL TABLE parquet_array_of_structs_gen_schema_ext +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +LOCATION +#### A masked pattern was here #### +PREHOOK: type: CREATETABLE +#### A masked pattern was here #### +PREHOOK: Output: database:default +PREHOOK: Output: default@parquet_array_of_structs_gen_schema_ext +POSTHOOK: query: CREATE EXTERNAL TABLE parquet_array_of_structs_gen_schema_ext +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +LOCATION +#### A masked pattern was here #### +POSTHOOK: type: CREATETABLE +#### A masked pattern was here #### +POSTHOOK: Output: database:default +POSTHOOK: Output: default@parquet_array_of_structs_gen_schema_ext +PREHOOK: query: SELECT * FROM parquet_array_of_structs_gen_schema_ext +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_array_of_structs_gen_schema_ext +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM parquet_array_of_structs_gen_schema_ext +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_array_of_structs_gen_schema_ext +#### A masked pattern was here #### +[{"latitude":0.0,"longitude":180.0},{"latitude":0.0,"longitude":0.0}] +PREHOOK: query: DROP TABLE parquet_array_of_structs_gen_schema_ext +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@parquet_array_of_structs_gen_schema_ext +PREHOOK: Output: default@parquet_array_of_structs_gen_schema_ext +POSTHOOK: query: DROP TABLE parquet_array_of_structs_gen_schema_ext +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@parquet_array_of_structs_gen_schema_ext +POSTHOOK: Output: default@parquet_array_of_structs_gen_schema_ext diff --git ql/src/test/results/clientpositive/parquet_array_of_unannotated_groups_gen_schema.q.out ql/src/test/results/clientpositive/parquet_array_of_unannotated_groups_gen_schema.q.out new file mode 100644 index 0000000..b7faa1d --- /dev/null +++ ql/src/test/results/clientpositive/parquet_array_of_unannotated_groups_gen_schema.q.out @@ -0,0 +1,55 @@ +PREHOOK: query: -- this test creates a Parquet table from a structure with an unannotated +-- repeated structure of (x,y) structs + +CREATE TABLE parquet_array_of_unannotated_groups_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +TBLPROPERTIES ('parquet.file'='../../data/files/UnannotatedListOfGroups.parquet') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@parquet_array_of_unannotated_groups_gen_schema +POSTHOOK: query: -- this test creates a Parquet table from a structure with an unannotated +-- repeated structure of (x,y) structs + +CREATE TABLE parquet_array_of_unannotated_groups_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +TBLPROPERTIES ('parquet.file'='../../data/files/UnannotatedListOfGroups.parquet') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@parquet_array_of_unannotated_groups_gen_schema +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/UnannotatedListOfGroups.parquet' +OVERWRITE INTO TABLE parquet_array_of_unannotated_groups_gen_schema +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@parquet_array_of_unannotated_groups_gen_schema +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/UnannotatedListOfGroups.parquet' +OVERWRITE INTO TABLE parquet_array_of_unannotated_groups_gen_schema +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@parquet_array_of_unannotated_groups_gen_schema +PREHOOK: query: SELECT * FROM parquet_array_of_unannotated_groups_gen_schema +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_array_of_unannotated_groups_gen_schema +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM parquet_array_of_unannotated_groups_gen_schema +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_array_of_unannotated_groups_gen_schema +#### A masked pattern was here #### +[{"x":1.0,"y":1.0},{"x":2.0,"y":2.0}] +PREHOOK: query: DROP TABLE parquet_array_of_unannotated_groups_gen_schema +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@parquet_array_of_unannotated_groups_gen_schema +PREHOOK: Output: default@parquet_array_of_unannotated_groups_gen_schema +POSTHOOK: query: DROP TABLE parquet_array_of_unannotated_groups_gen_schema +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@parquet_array_of_unannotated_groups_gen_schema +POSTHOOK: Output: default@parquet_array_of_unannotated_groups_gen_schema diff --git ql/src/test/results/clientpositive/parquet_array_of_unannotated_primitives_gen_schema.q.out ql/src/test/results/clientpositive/parquet_array_of_unannotated_primitives_gen_schema.q.out new file mode 100644 index 0000000..474a0ca --- /dev/null +++ ql/src/test/results/clientpositive/parquet_array_of_unannotated_primitives_gen_schema.q.out @@ -0,0 +1,55 @@ +PREHOOK: query: -- this test creates a Parquet table from a structure with an unannotated +-- repeated structure of int32s + +CREATE TABLE parquet_array_of_unannotated_ints_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +TBLPROPERTIES ('parquet.file'='../../data/files/UnannotatedListOfPrimitives.parquet') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@parquet_array_of_unannotated_ints_gen_schema +POSTHOOK: query: -- this test creates a Parquet table from a structure with an unannotated +-- repeated structure of int32s + +CREATE TABLE parquet_array_of_unannotated_ints_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +TBLPROPERTIES ('parquet.file'='../../data/files/UnannotatedListOfPrimitives.parquet') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@parquet_array_of_unannotated_ints_gen_schema +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/UnannotatedListOfPrimitives.parquet' +OVERWRITE INTO TABLE parquet_array_of_unannotated_ints_gen_schema +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@parquet_array_of_unannotated_ints_gen_schema +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/UnannotatedListOfPrimitives.parquet' +OVERWRITE INTO TABLE parquet_array_of_unannotated_ints_gen_schema +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@parquet_array_of_unannotated_ints_gen_schema +PREHOOK: query: SELECT * FROM parquet_array_of_unannotated_ints_gen_schema +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_array_of_unannotated_ints_gen_schema +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM parquet_array_of_unannotated_ints_gen_schema +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_array_of_unannotated_ints_gen_schema +#### A masked pattern was here #### +[34,35,36] +PREHOOK: query: DROP TABLE parquet_array_of_unannotated_ints_gen_schema +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@parquet_array_of_unannotated_ints_gen_schema +PREHOOK: Output: default@parquet_array_of_unannotated_ints_gen_schema +POSTHOOK: query: DROP TABLE parquet_array_of_unannotated_ints_gen_schema +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@parquet_array_of_unannotated_ints_gen_schema +POSTHOOK: Output: default@parquet_array_of_unannotated_ints_gen_schema diff --git ql/src/test/results/clientpositive/parquet_avro_array_of_primitives_gen_schema.q.out ql/src/test/results/clientpositive/parquet_avro_array_of_primitives_gen_schema.q.out new file mode 100644 index 0000000..dc7be20 --- /dev/null +++ ql/src/test/results/clientpositive/parquet_avro_array_of_primitives_gen_schema.q.out @@ -0,0 +1,49 @@ +PREHOOK: query: CREATE TABLE parquet_avro_array_of_primitives_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +TBLPROPERTIES ('parquet.file'='../../data/files/AvroPrimitiveInList.parquet') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@parquet_avro_array_of_primitives_gen_schema +POSTHOOK: query: CREATE TABLE parquet_avro_array_of_primitives_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +TBLPROPERTIES ('parquet.file'='../../data/files/AvroPrimitiveInList.parquet') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@parquet_avro_array_of_primitives_gen_schema +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/AvroPrimitiveInList.parquet' OVERWRITE INTO TABLE +parquet_avro_array_of_primitives_gen_schema +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@parquet_avro_array_of_primitives_gen_schema +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/AvroPrimitiveInList.parquet' OVERWRITE INTO TABLE +parquet_avro_array_of_primitives_gen_schema +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@parquet_avro_array_of_primitives_gen_schema +PREHOOK: query: SELECT * FROM parquet_avro_array_of_primitives_gen_schema +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_avro_array_of_primitives_gen_schema +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM parquet_avro_array_of_primitives_gen_schema +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_avro_array_of_primitives_gen_schema +#### A masked pattern was here #### +[34,35,36] +PREHOOK: query: DROP TABLE parquet_avro_array_of_primitives_gen_schema +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@parquet_avro_array_of_primitives_gen_schema +PREHOOK: Output: default@parquet_avro_array_of_primitives_gen_schema +POSTHOOK: query: DROP TABLE parquet_avro_array_of_primitives_gen_schema +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@parquet_avro_array_of_primitives_gen_schema +POSTHOOK: Output: default@parquet_avro_array_of_primitives_gen_schema diff --git ql/src/test/results/clientpositive/parquet_avro_array_of_single_field_struct_gen_schema.q.out ql/src/test/results/clientpositive/parquet_avro_array_of_single_field_struct_gen_schema.q.out new file mode 100644 index 0000000..5aad90e --- /dev/null +++ ql/src/test/results/clientpositive/parquet_avro_array_of_single_field_struct_gen_schema.q.out @@ -0,0 +1,55 @@ +PREHOOK: query: -- this test creates a Parquet table with an array of single-field structs +-- as written by parquet-avro + +CREATE TABLE parquet_avro_array_of_single_field_structs_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +TBLPROPERTIES ('parquet.file'='../../data/files/AvroSingleFieldGroupInList.parquet') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@parquet_avro_array_of_single_field_structs_gen_schema +POSTHOOK: query: -- this test creates a Parquet table with an array of single-field structs +-- as written by parquet-avro + +CREATE TABLE parquet_avro_array_of_single_field_structs_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +TBLPROPERTIES ('parquet.file'='../../data/files/AvroSingleFieldGroupInList.parquet') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@parquet_avro_array_of_single_field_structs_gen_schema +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/AvroSingleFieldGroupInList.parquet' +OVERWRITE INTO TABLE parquet_avro_array_of_single_field_structs_gen_schema +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@parquet_avro_array_of_single_field_structs_gen_schema +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/AvroSingleFieldGroupInList.parquet' +OVERWRITE INTO TABLE parquet_avro_array_of_single_field_structs_gen_schema +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@parquet_avro_array_of_single_field_structs_gen_schema +PREHOOK: query: SELECT * FROM parquet_avro_array_of_single_field_structs_gen_schema +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_avro_array_of_single_field_structs_gen_schema +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM parquet_avro_array_of_single_field_structs_gen_schema +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_avro_array_of_single_field_structs_gen_schema +#### A masked pattern was here #### +[{"count":1234},{"count":2345}] +PREHOOK: query: DROP TABLE parquet_avro_array_of_single_field_structs_gen_schema +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@parquet_avro_array_of_single_field_structs_gen_schema +PREHOOK: Output: default@parquet_avro_array_of_single_field_structs_gen_schema +POSTHOOK: query: DROP TABLE parquet_avro_array_of_single_field_structs_gen_schema +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@parquet_avro_array_of_single_field_structs_gen_schema +POSTHOOK: Output: default@parquet_avro_array_of_single_field_structs_gen_schema diff --git ql/src/test/results/clientpositive/parquet_decimal_gen_schema.q.out ql/src/test/results/clientpositive/parquet_decimal_gen_schema.q.out new file mode 100644 index 0000000..e3ab207 --- /dev/null +++ ql/src/test/results/clientpositive/parquet_decimal_gen_schema.q.out @@ -0,0 +1,88 @@ +PREHOOK: query: CREATE TABLE parquet_decimal_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +TBLPROPERTIES ('parquet.file'='../../data/files/dec.parq') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@parquet_decimal_gen_schema +POSTHOOK: query: CREATE TABLE parquet_decimal_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +TBLPROPERTIES ('parquet.file'='../../data/files/dec.parq') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@parquet_decimal_gen_schema +PREHOOK: query: DESCRIBE FORMATTED parquet_decimal_gen_schema +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@parquet_decimal_gen_schema +POSTHOOK: query: DESCRIBE FORMATTED parquet_decimal_gen_schema +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@parquet_decimal_gen_schema +# col_name data_type comment + +name string +value decimal(5,2) + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Protect Mode: None +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: + parquet.file ../../data/files/dec.parq +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe +InputFormat: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/dec.parq' OVERWRITE INTO TABLE parquet_decimal_gen_schema +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@parquet_decimal_gen_schema +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/dec.parq' OVERWRITE INTO TABLE parquet_decimal_gen_schema +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@parquet_decimal_gen_schema +PREHOOK: query: SELECT * FROM parquet_decimal_gen_schema +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_decimal_gen_schema +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM parquet_decimal_gen_schema +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_decimal_gen_schema +#### A masked pattern was here #### +Tom 234.79 +Beck 77.34 +Snow 55.71 +Mary 4.33 +Cluck 5.96 +Tom 12.25 +Mary 33.33 +Tom 0.19 +Beck 3.15 +Beck 7.99 +PREHOOK: query: DROP TABLE parquet_decimal_gen_schema +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@parquet_decimal_gen_schema +PREHOOK: Output: default@parquet_decimal_gen_schema +POSTHOOK: query: DROP TABLE parquet_decimal_gen_schema +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@parquet_decimal_gen_schema +POSTHOOK: Output: default@parquet_decimal_gen_schema diff --git ql/src/test/results/clientpositive/parquet_thrift_array_of_primitives_gen_schema.q.out ql/src/test/results/clientpositive/parquet_thrift_array_of_primitives_gen_schema.q.out new file mode 100644 index 0000000..79c9e05 --- /dev/null +++ ql/src/test/results/clientpositive/parquet_thrift_array_of_primitives_gen_schema.q.out @@ -0,0 +1,53 @@ +PREHOOK: query: -- this test creates a Parquet table with an array of structs + +CREATE TABLE parquet_thrift_array_of_primitives_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +TBLPROPERTIES ('parquet.file'='../../data/files/ThriftPrimitiveInList.parquet') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@parquet_thrift_array_of_primitives_gen_schema +POSTHOOK: query: -- this test creates a Parquet table with an array of structs + +CREATE TABLE parquet_thrift_array_of_primitives_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +TBLPROPERTIES ('parquet.file'='../../data/files/ThriftPrimitiveInList.parquet') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@parquet_thrift_array_of_primitives_gen_schema +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/ThriftPrimitiveInList.parquet' +OVERWRITE INTO TABLE parquet_thrift_array_of_primitives_gen_schema +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@parquet_thrift_array_of_primitives_gen_schema +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/ThriftPrimitiveInList.parquet' +OVERWRITE INTO TABLE parquet_thrift_array_of_primitives_gen_schema +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@parquet_thrift_array_of_primitives_gen_schema +PREHOOK: query: SELECT * FROM parquet_thrift_array_of_primitives_gen_schema +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_thrift_array_of_primitives_gen_schema +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM parquet_thrift_array_of_primitives_gen_schema +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_thrift_array_of_primitives_gen_schema +#### A masked pattern was here #### +[34,35,36] +PREHOOK: query: DROP TABLE parquet_thrift_array_of_primitives_gen_schema +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@parquet_thrift_array_of_primitives_gen_schema +PREHOOK: Output: default@parquet_thrift_array_of_primitives_gen_schema +POSTHOOK: query: DROP TABLE parquet_thrift_array_of_primitives_gen_schema +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@parquet_thrift_array_of_primitives_gen_schema +POSTHOOK: Output: default@parquet_thrift_array_of_primitives_gen_schema diff --git ql/src/test/results/clientpositive/parquet_thrift_array_of_single_field_struct_gen_schema.q.out ql/src/test/results/clientpositive/parquet_thrift_array_of_single_field_struct_gen_schema.q.out new file mode 100644 index 0000000..ec476bb --- /dev/null +++ ql/src/test/results/clientpositive/parquet_thrift_array_of_single_field_struct_gen_schema.q.out @@ -0,0 +1,55 @@ +PREHOOK: query: -- this test creates a Parquet table with an array of single-field structs +-- as written by parquet-thrift + +CREATE TABLE parquet_thrift_array_of_single_field_structs_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +TBLPROPERTIES ('parquet.file'='../../data/files/ThriftSingleFieldGroupInList.parquet') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@parquet_thrift_array_of_single_field_structs_gen_schema +POSTHOOK: query: -- this test creates a Parquet table with an array of single-field structs +-- as written by parquet-thrift + +CREATE TABLE parquet_thrift_array_of_single_field_structs_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +TBLPROPERTIES ('parquet.file'='../../data/files/ThriftSingleFieldGroupInList.parquet') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@parquet_thrift_array_of_single_field_structs_gen_schema +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/ThriftSingleFieldGroupInList.parquet' +OVERWRITE INTO TABLE parquet_thrift_array_of_single_field_structs_gen_schema +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@parquet_thrift_array_of_single_field_structs_gen_schema +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/ThriftSingleFieldGroupInList.parquet' +OVERWRITE INTO TABLE parquet_thrift_array_of_single_field_structs_gen_schema +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@parquet_thrift_array_of_single_field_structs_gen_schema +PREHOOK: query: SELECT * FROM parquet_thrift_array_of_single_field_structs_gen_schema +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_thrift_array_of_single_field_structs_gen_schema +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM parquet_thrift_array_of_single_field_structs_gen_schema +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_thrift_array_of_single_field_structs_gen_schema +#### A masked pattern was here #### +[{"count":1234},{"count":2345}] +PREHOOK: query: DROP TABLE parquet_thrift_array_of_single_field_structs_gen_schema +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@parquet_thrift_array_of_single_field_structs_gen_schema +PREHOOK: Output: default@parquet_thrift_array_of_single_field_structs_gen_schema +POSTHOOK: query: DROP TABLE parquet_thrift_array_of_single_field_structs_gen_schema +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@parquet_thrift_array_of_single_field_structs_gen_schema +POSTHOOK: Output: default@parquet_thrift_array_of_single_field_structs_gen_schema