diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 679962c65c..40d1644c80 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -1963,7 +1963,6 @@ private static void populateLlapDaemonVarsSet(Set llapDaemonVarsSetLocal "org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe," + "org.apache.hadoop.hive.serde2.MetadataTypedColumnsetSerDe," + "org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe," + - "org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe," + "org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe," + "org.apache.hadoop.hive.serde2.OpenCSVSerde", "SerDes retrieving schema from metastore. This is an internal parameter."), diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ParquetSchemaReader.java b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ParquetSchemaReader.java new file mode 100644 index 0000000000..1f7a6c6d42 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ParquetSchemaReader.java @@ -0,0 +1,36 @@ +/** + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.io.parquet.convert; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; +import org.apache.parquet.hadoop.ParquetFileReader; +import org.apache.parquet.hadoop.metadata.ParquetMetadata; +import org.apache.parquet.schema.GroupType; + +import java.io.IOException; + +public class ParquetSchemaReader { + public static GroupType read(Path parquetFile) throws IOException { + + Configuration conf = new Configuration(); + ParquetMetadata metaData; + try { + metaData = ParquetFileReader.readFooter(conf, parquetFile); + } catch (IOException e) { + throw new IOException("Error reading footer from: " + parquetFile, e); + } + return metaData.getFileMetaData().getSchema(); + } +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ParquetToHiveSchemaConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ParquetToHiveSchemaConverter.java new file mode 100644 index 0000000000..d495305280 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ParquetToHiveSchemaConverter.java @@ -0,0 +1,221 @@ +/** + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.io.parquet.convert; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; +import org.apache.parquet.schema.*; + + +import java.util.ArrayList; +import java.util.List; +import java.util.Properties; + +public class ParquetToHiveSchemaConverter { + + private static final String INT96_IS_TS_PROPERTY_KEY = "parquet.int96.is.timestamp"; + + private Properties properties = null; + + public ParquetToHiveSchemaConverter() {} + public ParquetToHiveSchemaConverter(Properties props) { + properties = props; + } + + private static final Log LOG = LogFactory.getLog(ParquetToHiveSchemaConverter.class); + + public StructTypeInfo convert(GroupType parquetSchema) { + return convertFields(parquetSchema.getFields()); + } + + private StructTypeInfo convertFields(List parquetFields) { + StructTypeInfo structTypeInfo = new StructTypeInfo(); + ArrayList names = new ArrayList(); + ArrayList types = new ArrayList(); + + for (Type parquetType : parquetFields) { + + TypeInfo type; + if (parquetType.isRepetition(Type.Repetition.REPEATED)) { + type = createHiveArray(parquetType, ""); + } else { + type = convertField(parquetType); + } + + names.add(parquetType.getName()); + types.add(type); + } + + structTypeInfo.setAllStructFieldNames(names); + structTypeInfo.setAllStructFieldTypeInfos(types); + + LOG.info("Generated Hive's StructTypeInfo from parquet schema is: " + structTypeInfo); + + return structTypeInfo; + } + + private TypeInfo convertField(final Type parquetType) { + if (parquetType.isPrimitive()) { + final PrimitiveType.PrimitiveTypeName parquetPrimitiveTypeName = + parquetType.asPrimitiveType().getPrimitiveTypeName(); + final OriginalType originalType = parquetType.getOriginalType(); + + if (originalType == OriginalType.DECIMAL) { + final DecimalMetadata decimalMetadata = parquetType.asPrimitiveType() + .getDecimalMetadata(); + return TypeInfoFactory.getDecimalTypeInfo(decimalMetadata.getPrecision(), + decimalMetadata.getScale()); + } + + if (parquetPrimitiveTypeName.equals(PrimitiveType.PrimitiveTypeName.INT96)) { + if (properties == null || !properties.containsKey(INT96_IS_TS_PROPERTY_KEY)) { + throw new UnsupportedOperationException("Parquet's INT96 does not have a valid mapping" + + " to a Hive type.\nIf you want Parquet's INT96 to be mapped to Hive's timestamp," + + " then set '" + INT96_IS_TS_PROPERTY_KEY + "' in the table properties. Otherwise, " + + "provide hive schema explicitly in the DDL statement"); + } + } + + return parquetPrimitiveTypeName.convert( + new PrimitiveType.PrimitiveTypeNameConverter() { + @Override + public TypeInfo convertBOOLEAN(PrimitiveType.PrimitiveTypeName primitiveTypeName) { + return TypeInfoFactory.booleanTypeInfo; + } + + @Override + public TypeInfo convertINT32(PrimitiveType.PrimitiveTypeName primitiveTypeName) { + return TypeInfoFactory.intTypeInfo; + } + + @Override + public TypeInfo convertINT64(PrimitiveType.PrimitiveTypeName primitiveTypeName) { + return TypeInfoFactory.longTypeInfo; + } + + @Override + public TypeInfo convertINT96(PrimitiveType.PrimitiveTypeName primitiveTypeName) { + return TypeInfoFactory.timestampTypeInfo; + } + + @Override + public TypeInfo convertFLOAT(PrimitiveType.PrimitiveTypeName primitiveTypeName) { + return TypeInfoFactory.floatTypeInfo; + } + + @Override + public TypeInfo convertDOUBLE(PrimitiveType.PrimitiveTypeName primitiveTypeName) { + return TypeInfoFactory.doubleTypeInfo; + } + + @Override + public TypeInfo convertFIXED_LEN_BYTE_ARRAY(PrimitiveType.PrimitiveTypeName + primitiveTypeName) { + return TypeInfoFactory.binaryTypeInfo; + } + + @Override + public TypeInfo convertBINARY(PrimitiveType.PrimitiveTypeName primitiveTypeName) { + if (originalType == OriginalType.UTF8 || originalType == OriginalType.ENUM) { + return TypeInfoFactory.stringTypeInfo; + } else { + return TypeInfoFactory.binaryTypeInfo; + } + } + }); + } else { + GroupType parquetGroupType = parquetType.asGroupType(); + OriginalType originalType = parquetGroupType.getOriginalType(); + if (originalType != null) { + switch (originalType) { + case LIST: + if (parquetGroupType.getFieldCount() != 1) { + throw new UnsupportedOperationException("Invalid list type " + parquetGroupType); + } + Type elementType = parquetGroupType.getType(0); + if (!elementType.isRepetition(Type.Repetition.REPEATED)) { + throw new UnsupportedOperationException("Invalid list type " + parquetGroupType); + } + return createHiveArray(elementType, parquetGroupType.getName()); + case MAP: + case MAP_KEY_VALUE: + if (parquetGroupType.getFieldCount() != 1 || parquetGroupType.getType(0).isPrimitive()) { + throw new UnsupportedOperationException("Invalid map type " + parquetGroupType); + } + GroupType mapKeyValType = parquetGroupType.getType(0).asGroupType(); + if (!mapKeyValType.isRepetition(Type.Repetition.REPEATED) || + !mapKeyValType.getOriginalType().equals(OriginalType.MAP_KEY_VALUE) || + mapKeyValType.getFieldCount() != 2) { + throw new UnsupportedOperationException("Invalid map type " + parquetGroupType); + } + Type keyType = mapKeyValType.getType(0); + if (!keyType.isPrimitive() || + !keyType.asPrimitiveType().getPrimitiveTypeName().equals(PrimitiveType + .PrimitiveTypeName.BINARY) || + !keyType.getOriginalType().equals(OriginalType.UTF8)) { + throw new UnsupportedOperationException("Map key type must be binary (UTF8): " + + keyType); + } + Type valueType = mapKeyValType.getType(1); + return createHiveMap(convertField(keyType), convertField(valueType)); + case ENUM: + case UTF8: + return TypeInfoFactory.stringTypeInfo; + default: + throw new UnsupportedOperationException("Cannot convert Parquet type " + + parquetType); + } + } else { + // if no original type then it's a record + return createHiveStruct(parquetGroupType.getFields()); + } + } + } + + private TypeInfo createHiveStruct(List parquetFields) { + List names = new ArrayList(); + List typeInfos = new ArrayList(); + + for (Type field: parquetFields) { + names.add(field.getName()); + typeInfos.add(convertField(field)); + } + + return TypeInfoFactory.getStructTypeInfo(names, typeInfos); + } + + private TypeInfo createHiveMap(TypeInfo keyType, TypeInfo valueType) { + return TypeInfoFactory.getMapTypeInfo(keyType, valueType); + } + + private TypeInfo createHiveArray(Type elementType, String elementName) { + if (elementType.isPrimitive()) { + return TypeInfoFactory.getListTypeInfo(convertField(elementType)); + } else { + final GroupType groupType = elementType.asGroupType(); + final List groupFields = groupType.getFields(); + if (groupFields.size() > 1 || + (groupFields.size() == 1 && + (elementType.getName().equals("array") || + elementType.getName().equals(elementName + "_tuple")))) { + return TypeInfoFactory.getListTypeInfo(createHiveStruct(groupFields)); + } else { + return TypeInfoFactory.getListTypeInfo(convertField(groupType.getFields().get(0))); + } + } + } +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ParquetHiveSerDe.java b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ParquetHiveSerDe.java index 5d98b69183..43bffd272a 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ParquetHiveSerDe.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ParquetHiveSerDe.java @@ -13,6 +13,8 @@ */ package org.apache.hadoop.hive.ql.io.parquet.serde; +import java.io.IOException; +import java.net.URI; import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; @@ -23,6 +25,11 @@ import com.google.common.base.Preconditions; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.ql.io.parquet.convert.ParquetSchemaReader; +import org.apache.hadoop.hive.ql.io.parquet.convert.ParquetToHiveSchemaConverter; import org.apache.hadoop.hive.ql.metadata.Table; import org.apache.hadoop.hive.ql.optimizer.FieldNode; import org.apache.hadoop.hive.serde.serdeConstants; @@ -44,6 +51,8 @@ import org.apache.hadoop.io.Text; import org.apache.hadoop.io.Writable; import org.apache.parquet.hadoop.ParquetOutputFormat; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** * @@ -53,6 +62,8 @@ @SerDeSpec(schemaProps = {serdeConstants.LIST_COLUMNS, serdeConstants.LIST_COLUMN_TYPES, ParquetOutputFormat.COMPRESSION}) public class ParquetHiveSerDe extends AbstractSerDe { + public static final Logger LOG = LoggerFactory.getLogger(ParquetHiveSerDe.class.getName()); + public static final Text MAP_KEY = new Text("key"); public static final Text MAP_VALUE = new Text("value"); public static final Text MAP = new Text("map"); @@ -78,28 +89,68 @@ public ParquetHiveSerDe() { @Override public final void initialize(final Configuration conf, final Properties tbl) throws SerDeException { - final List columnNames; - final List columnTypes; + List columnNames; + List columnTypes; // Get column names and sort order final String columnNameProperty = tbl.getProperty(serdeConstants.LIST_COLUMNS); final String columnTypeProperty = tbl.getProperty(serdeConstants.LIST_COLUMN_TYPES); final String columnNameDelimiter = tbl.containsKey(serdeConstants.COLUMN_NAME_DELIMITER) ? tbl .getProperty(serdeConstants.COLUMN_NAME_DELIMITER) : String.valueOf(SerDeUtils.COMMA); - if (columnNameProperty.length() == 0) { - columnNames = new ArrayList(); - } else { - columnNames = Arrays.asList(columnNameProperty.split(columnNameDelimiter)); - } - if (columnTypeProperty.length() == 0) { - columnTypes = new ArrayList(); + if (columnNameProperty.length() == 0 && columnTypeProperty.length() == 0) { + final String locationProperty = tbl.getProperty("location", null); + Path parquetFile = locationProperty != null ? getParquetFile(conf, + new Path(locationProperty)) : null; + if (parquetFile == null) { + /** + * Attempt to determine hive schema failed, but can not throw + * an exception, as Hive calls init on the serde during + * any call, including calls to update the serde properties, meaning + * if the serde is in a bad state, there is no way to update that state. + */ + LOG.error("Failed to create hive schema for the parquet backed table.\n" + + "Either provide schema for table,\n" + + "OR make sure that external table's path has at least one parquet file with required " + + "metadata"); + columnNames = new ArrayList(); + columnTypes = new ArrayList(); + } else { + StructTypeInfo structTypeInfo = null; + try { + structTypeInfo = new ParquetToHiveSchemaConverter(tbl).convert( + ParquetSchemaReader.read(parquetFile)); + } catch (IOException ioe) { + LOG.error(ioe.getMessage(), ioe); + } catch (UnsupportedOperationException ue) { + LOG.error(ue.getMessage(), ue); + } catch (RuntimeException ex) { + LOG.error(ex.getMessage(), ex); + } + if (structTypeInfo == null) { + columnNames = new ArrayList(); + columnTypes = new ArrayList(); + } else { + columnNames = structTypeInfo.getAllStructFieldNames(); + columnTypes = structTypeInfo.getAllStructFieldTypeInfos(); + } + } } else { - columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty); + if (columnNameProperty.length() == 0) { + columnNames = new ArrayList(); + } else { + columnNames = Arrays.asList(columnNameProperty.split(",")); + } + if (columnTypeProperty.length() == 0) { + columnTypes = new ArrayList(); + } else { + columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty); + } } if (columnNames.size() != columnTypes.size()) { - throw new IllegalArgumentException("ParquetHiveSerde initialization failed. Number of column " + - "name and column type differs. columnNames = " + columnNames + ", columnTypes = " + - columnTypes); + LOG.error("ParquetHiveSerde initialization failed. Number of column name and column type " + + "differs. columnNames = " + columnNames + ", columnTypes = " + columnTypes); + columnNames = new ArrayList(); + columnTypes = new ArrayList(); } // Create row related objects StructTypeInfo completeTypeInfo = @@ -268,4 +319,46 @@ StructTypeInfo prune() { return (StructTypeInfo) TypeInfoFactory.getStructTypeInfo(newNames, newTypes); } } + + private Path getParquetFile(Configuration conf, Path loc) { + if (loc == null) { + return null; + } + + Path parquetFile; + try { + parquetFile = getAFile(FileSystem.get(new URI(loc.toString()), conf), loc); + } catch (Exception e) { + LOG.error("Unable to read file from " + loc + ": " + e, e); + parquetFile = null; + } + + return parquetFile; + } + + private Path getAFile(FileSystem fs, Path path) throws IOException { + FileStatus status = fs.getFileStatus(path); + + if (status.isFile()) { + if (status.getLen() > 0) { + return path; + } else { + return null; + } + } + + for(FileStatus childStatus: fs.listStatus(path)) { + Path file = getAFile(fs, childStatus.getPath()); + + if (file != null) { + return file; + } + } + + return null; + } + + @Override public boolean shouldStoreFieldsInMetastore(Map tableParams) { + return true; + } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java index 53b9a43631..4008bfef90 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java @@ -1097,6 +1097,8 @@ public void createTable(Table tbl, boolean ifNotExists, if (org.apache.commons.lang3.StringUtils.isBlank(tbl.getDbName())) { tbl.setDbName(SessionState.get().getCurrentDatabase()); } + + setTableLocInTableProperties(tbl); if (tbl.getCols().size() == 0 || tbl.getSd().getColsSize() == 0) { tbl.setFields(HiveMetaStoreUtils.getFieldsFromDeserializer(tbl.getTableName(), tbl.getDeserializer())); @@ -1160,6 +1162,32 @@ public void createTable(Table tbl, boolean ifNotExists) throws HiveException { return schema; } + private void setTableLocInTableProperties(Table tbl) throws HiveException, TException { + if (HiveUtils.getStorageHandler(conf, tbl.getParameters().get(META_TABLE_STORAGE)) == null) { + tbl.getTTable().getSd().setLocation(getTablePath(tbl)); + } + } + + private String getTablePath(Table table) throws TException { + Warehouse wh = new Warehouse(conf); + Path tablePath; + + final String location = table.getSd().getLocation(); + if ((location == null || location.isEmpty())) { + tablePath = wh.getDefaultTablePath( + getMSC().getDatabase(table.getDbName()), table.getTableName()); + } else { + tablePath = wh.getDnsPath(new Path(location)); + } + + if (tablePath != null) { + LOG.info("Table path is: " + tablePath); + return tablePath.toString(); + } else { + return null; + } + } + /** * Drops table along with the data in it. If the table doesn't exist then it * is a no-op. If ifPurge option is specified it is passed to the diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestParquetToHiveSchemaConverter.java b/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestParquetToHiveSchemaConverter.java new file mode 100644 index 0000000000..e346b0c864 --- /dev/null +++ b/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestParquetToHiveSchemaConverter.java @@ -0,0 +1,405 @@ +/** + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.io.parquet; + +import org.apache.hadoop.hive.ql.io.parquet.convert.ParquetToHiveSchemaConverter; +import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; +import org.apache.parquet.schema.*; +import org.junit.Assert; +import org.junit.BeforeClass; +import org.junit.Test; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Properties; + +public class TestParquetToHiveSchemaConverter { + + static ParquetToHiveSchemaConverter parquetToHiveSchemaConverter; + + @BeforeClass + public static void setUp() { + parquetToHiveSchemaConverter = new ParquetToHiveSchemaConverter(); + } + + @Test + public void testUtf8() { + GroupType groupType = Types.repeatedGroup(). + required(PrimitiveType.PrimitiveTypeName.BINARY). + as(OriginalType.UTF8). + named("utf8"). + named("top"); + + ArrayList names = new ArrayList(); + names.add("utf8"); + ArrayList typeInfos = new ArrayList(); + typeInfos.add(TypeInfoFactory.stringTypeInfo); + + test(groupType, names, typeInfos); + } + + @Test + public void testEnum() { + GroupType groupType = Types.repeatedGroup(). + required(PrimitiveType.PrimitiveTypeName.BINARY). + as(OriginalType.ENUM). + named("enum"). + named("top"); + + ArrayList names = new ArrayList(); + names.add("enum"); + ArrayList typeInfos = new ArrayList(); + typeInfos.add(TypeInfoFactory.stringTypeInfo); + + test(groupType, names, typeInfos); + } + + @Test + public void testBinary() { + GroupType groupType = Types.requiredGroup(). + required(PrimitiveType.PrimitiveTypeName.BINARY). + named("binary"). + named("top"); + + ArrayList names = new ArrayList(); + names.add("binary"); + ArrayList typeInfos = new ArrayList(); + typeInfos.add(TypeInfoFactory.binaryTypeInfo); + + test(groupType, names, typeInfos); + } + + @Test + public void testBoolean() { + GroupType groupType = Types.requiredGroup(). + required(PrimitiveType.PrimitiveTypeName.BOOLEAN). + named("boolean"). + named("top"); + + ArrayList names = new ArrayList(); + names.add("boolean"); + ArrayList typeInfos = new ArrayList(); + typeInfos.add(TypeInfoFactory.booleanTypeInfo); + + test(groupType, names, typeInfos); + } + + @Test + public void testDouble() { + GroupType groupType = Types.repeatedGroup(). + required(PrimitiveType.PrimitiveTypeName.DOUBLE). + named("double"). + named("top"); + + ArrayList names = new ArrayList(); + names.add("double"); + ArrayList typeInfos = new ArrayList(); + typeInfos.add(TypeInfoFactory.doubleTypeInfo); + + test(groupType, names, typeInfos); + } + + @Test + public void testFixedLenByteArray() { + GroupType groupType = Types.requiredGroup(). + required(PrimitiveType.PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY). + length(8). + named("fixedLenByteArray"). + named("top"); + + ArrayList names = new ArrayList(); + names.add("fixedLenByteArray"); + ArrayList typeInfos = new ArrayList(); + typeInfos.add(TypeInfoFactory.binaryTypeInfo); + + test(groupType, names, typeInfos); + } + + @Test + public void testFloat() { + GroupType groupType = Types.requiredGroup(). + required(PrimitiveType.PrimitiveTypeName.FLOAT). + named("float"). + named("top"); + + ArrayList names = new ArrayList(); + names.add("float"); + ArrayList typeInfos = new ArrayList(); + typeInfos.add(TypeInfoFactory.floatTypeInfo); + + test(groupType, names, typeInfos); + } + + @Test + public void testInt32() { + GroupType groupType = Types.repeatedGroup(). + required(PrimitiveType.PrimitiveTypeName.INT32). + named("int32"). + named("top"); + + + ArrayList names = new ArrayList(); + names.add("int32"); + ArrayList typeInfos = new ArrayList(); + typeInfos.add(TypeInfoFactory.intTypeInfo); + + test(groupType, names, typeInfos); + } + + @Test + public void testInt64() { + GroupType groupType = Types.repeatedGroup(). + required(PrimitiveType.PrimitiveTypeName.INT64). + named("int64"). + named("top"); + + ArrayList names = new ArrayList(); + names.add("int64"); + ArrayList typeInfos = new ArrayList(); + typeInfos.add(TypeInfoFactory.longTypeInfo); + + test(groupType, names, typeInfos); + } + + @Test + public void testInt96() { + GroupType groupType = Types.requiredGroup(). + required(PrimitiveType.PrimitiveTypeName.INT96). + named("int96"). + named("top"); + + ArrayList names = new ArrayList(); + names.add("int96"); + ArrayList typeInfos = new ArrayList(); + typeInfos.add(TypeInfoFactory.timestampTypeInfo); + + Properties props = new Properties(); + props.setProperty("parquet.int96.is.timestamp", ""); + + test(groupType, names, typeInfos, new ParquetToHiveSchemaConverter(props)); + } + + @Test + public void testInt96Negative() { + GroupType groupType = Types.requiredGroup(). + required(PrimitiveType.PrimitiveTypeName.INT96). + named("int96"). + named("top"); + + ArrayList names = new ArrayList(); + names.add("int96"); + ArrayList typeInfos = new ArrayList(); + typeInfos.add(TypeInfoFactory.timestampTypeInfo); + + try { + test(groupType, names, typeInfos); + Assert.fail("Failed to throw UnsupportedOperationException for INT96"); + } catch (UnsupportedOperationException use) { + // It's good! + } + } + + @Test + public void primitiveOptional() { + GroupType groupType = Types.optionalGroup(). + optional(PrimitiveType.PrimitiveTypeName.INT32). + named("int32"). + named("top"); + + ArrayList names = new ArrayList(); + names.add("int32"); + ArrayList typeInfos = new ArrayList(); + typeInfos.add(TypeInfoFactory.intTypeInfo); + + test(groupType, names, typeInfos); + } + + @Test + public void listOfPrimitives() { + GroupType groupType = Types.requiredGroup(). + repeated(PrimitiveType.PrimitiveTypeName.INT32). + named("intlist"). + named("top"); + + ArrayList names = new ArrayList(); + names.add("intlist"); + ArrayList typeInfos = new ArrayList(); + typeInfos.add(TypeInfoFactory.getListTypeInfo(TypeInfoFactory.intTypeInfo)); + + test(groupType, names, typeInfos); + } + + @Test + public void listOfStruct() { + GroupType groupType = Types.requiredGroup(). + repeatedGroup(). + required(PrimitiveType.PrimitiveTypeName.BINARY). + as(OriginalType.UTF8). + named("string"). + required(PrimitiveType.PrimitiveTypeName.FLOAT). + named("float"). + named("structlist"). + named("top"); + + ArrayList names = new ArrayList(); + names.add("structlist"); + final List structTypeInfos = new ArrayList(); + structTypeInfos.add(TypeInfoFactory.stringTypeInfo); + structTypeInfos.add(TypeInfoFactory.floatTypeInfo); + ArrayList typeInfos = new ArrayList(); + typeInfos.add(TypeInfoFactory.getListTypeInfo(TypeInfoFactory.getStructTypeInfo(Arrays.asList + ("string", "float"), structTypeInfos))); + + test(groupType, names, typeInfos); + } + + @Test + public void structOfPrimitives() { + GroupType groupType = Types.requiredGroup(). + requiredGroup(). + required(PrimitiveType.PrimitiveTypeName.BINARY). + as(OriginalType.UTF8). + named("utf8"). + required(PrimitiveType.PrimitiveTypeName.BINARY). + as(OriginalType.ENUM). + named("enum"). + required(PrimitiveType.PrimitiveTypeName.BINARY). + named("binary"). + required(PrimitiveType.PrimitiveTypeName.BOOLEAN). + named("boolean"). + required(PrimitiveType.PrimitiveTypeName.DOUBLE). + named("double"). + required(PrimitiveType.PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY). + length(8). + named("fixedLenByteArray"). + required(PrimitiveType.PrimitiveTypeName.FLOAT). + named("float"). + required(PrimitiveType.PrimitiveTypeName.INT32). + named("int32"). + required(PrimitiveType.PrimitiveTypeName.INT64). + named("int64"). + named("struct"). + named("top"); + + ArrayList names = new ArrayList(); + names.add("struct"); + final List structTypeInfos = new ArrayList(); + structTypeInfos.add(TypeInfoFactory.stringTypeInfo); + structTypeInfos.add(TypeInfoFactory.stringTypeInfo); + structTypeInfos.add(TypeInfoFactory.binaryTypeInfo); + structTypeInfos.add(TypeInfoFactory.booleanTypeInfo); + structTypeInfos.add(TypeInfoFactory.doubleTypeInfo); + structTypeInfos.add(TypeInfoFactory.binaryTypeInfo); + structTypeInfos.add(TypeInfoFactory.floatTypeInfo); + structTypeInfos.add(TypeInfoFactory.intTypeInfo); + structTypeInfos.add(TypeInfoFactory.longTypeInfo); + ArrayList typeInfos = new ArrayList(); + typeInfos.add(TypeInfoFactory.getStructTypeInfo(Arrays.asList + ("utf8", "enum", "binary", "boolean", "double", "fixedLenByteArray", "float", + "int32", "int64"), + structTypeInfos)); + + test(groupType, names, typeInfos); + } + + @Test + public void nestedStruct() { + GroupType groupType = Types.requiredGroup(). + requiredGroup(). + requiredGroup(). + required(PrimitiveType.PrimitiveTypeName.INT32). + named("int"). + required(PrimitiveType.PrimitiveTypeName.BINARY). + as(OriginalType.UTF8). + named("string"). + named("innerstruct"). + named("outerstruct"). + named("top"); + + final ArrayList typeInfos = new ArrayList(Arrays.asList( + TypeInfoFactory.getStructTypeInfo(Arrays.asList("innerstruct"), Arrays.asList( + TypeInfoFactory.getStructTypeInfo(new ArrayList(Arrays.asList("int", "string")), + new ArrayList(Arrays.asList( + TypeInfoFactory.intTypeInfo, + TypeInfoFactory.stringTypeInfo))) + )) + )); + + test(groupType, new ArrayList(Arrays.asList("outerstruct")), typeInfos); + } + + @Test + public void map() { + GroupType groupType = Types.requiredGroup(). + addField( + ConversionPatterns.mapType( + Type.Repetition.REQUIRED, + "map", + Types.required(PrimitiveType.PrimitiveTypeName.BINARY).as(OriginalType.UTF8).named("key"), + Types.required(PrimitiveType.PrimitiveTypeName.DOUBLE).named("value")) + ).named("top"); + + + final ArrayList typeInfos = new ArrayList(Arrays.asList( + TypeInfoFactory.getMapTypeInfo(TypeInfoFactory.stringTypeInfo, TypeInfoFactory.doubleTypeInfo) + )); + + test(groupType, new ArrayList(Arrays.asList("map")), typeInfos); + } + + @Test + public void nestedMap() { + + GroupType groupType = Types.requiredGroup(). + addField( + ConversionPatterns.mapType( + Type.Repetition.REQUIRED, + "map", + Types.required(PrimitiveType.PrimitiveTypeName.BINARY).as(OriginalType.UTF8).named("key"), + ConversionPatterns.mapType( + Type.Repetition.REQUIRED, + "value", + Types.required(PrimitiveType.PrimitiveTypeName.BINARY).as(OriginalType.UTF8).named + ("key"), + Types.required(PrimitiveType.PrimitiveTypeName.INT64).named("value") + ) + ) + ).named("top"); + + final ArrayList typeInfos = new ArrayList(Arrays.asList( + TypeInfoFactory.getMapTypeInfo(TypeInfoFactory.stringTypeInfo, + TypeInfoFactory.getMapTypeInfo(TypeInfoFactory.stringTypeInfo, + TypeInfoFactory.longTypeInfo)) + )); + + test(groupType, new ArrayList(Arrays.asList("map")), typeInfos); + } + + private void test(GroupType groupType, ArrayList names, ArrayList typeInfos) { + test(groupType, names, typeInfos, parquetToHiveSchemaConverter); + } + + private void test(GroupType groupType, ArrayList names, ArrayList typeInfos, + ParquetToHiveSchemaConverter converter) { + StructTypeInfo structTypeInfo = new StructTypeInfo(); + structTypeInfo.setAllStructFieldNames(names); + structTypeInfo.setAllStructFieldTypeInfos(typeInfos); + + final StructTypeInfo actualTypeInfo = converter.convert(groupType); + Assert.assertEquals(structTypeInfo, actualTypeInfo); + } +} \ No newline at end of file diff --git a/ql/src/test/queries/clientpositive/parquet_array_of_multi_field_struct_gen_schema.q b/ql/src/test/queries/clientpositive/parquet_array_of_multi_field_struct_gen_schema.q new file mode 100644 index 0000000000..bb3872eead --- /dev/null +++ b/ql/src/test/queries/clientpositive/parquet_array_of_multi_field_struct_gen_schema.q @@ -0,0 +1,16 @@ +dfs ${system:test.dfs.mkdir} ${system:test.tmp.dir}/parquet_array_of_multi_field_structs_gen_schema; +dfs -cp ${system:hive.root}/data/files/MultiFieldGroupInList.parquet ${system:test.tmp.dir}/parquet_array_of_multi_field_structs_gen_schema; + +CREATE EXTERNAL TABLE parquet_array_of_multi_field_structs_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +LOCATION + '${system:test.tmp.dir}/parquet_array_of_multi_field_structs_gen_schema'; + +SELECT * FROM parquet_array_of_multi_field_structs_gen_schema; + +DROP TABLE parquet_array_of_multi_field_structs_gen_schema; diff --git a/ql/src/test/queries/clientpositive/parquet_array_of_optional_elements_gen_schema.q b/ql/src/test/queries/clientpositive/parquet_array_of_optional_elements_gen_schema.q new file mode 100644 index 0000000000..21cc9e2e0c --- /dev/null +++ b/ql/src/test/queries/clientpositive/parquet_array_of_optional_elements_gen_schema.q @@ -0,0 +1,16 @@ +dfs ${system:test.dfs.mkdir} ${system:test.tmp.dir}/parquet_array_of_optional_elements_gen_schema; +dfs -cp ${system:hive.root}/data/files/NewOptionalGroupInList.parquet ${system:test.tmp.dir}/parquet_array_of_optional_elements_gen_schema; + +CREATE TABLE parquet_array_of_optional_elements_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +LOCATION + '${system:test.tmp.dir}/parquet_array_of_optional_elements_gen_schema'; + +SELECT * FROM parquet_array_of_optional_elements_gen_schema; + +DROP TABLE parquet_array_of_optional_elements_gen_schema; diff --git a/ql/src/test/queries/clientpositive/parquet_array_of_required_elements_gen_schema.q b/ql/src/test/queries/clientpositive/parquet_array_of_required_elements_gen_schema.q new file mode 100644 index 0000000000..c71cbe3c60 --- /dev/null +++ b/ql/src/test/queries/clientpositive/parquet_array_of_required_elements_gen_schema.q @@ -0,0 +1,16 @@ +dfs ${system:test.dfs.mkdir} ${system:test.tmp.dir}/parquet_array_of_required_elements_gen_schema; +dfs -cp ${system:hive.root}/data/files/NewRequiredGroupInList.parquet ${system:test.tmp.dir}/parquet_array_of_required_elements_gen_schema; + +CREATE TABLE parquet_array_of_required_elements_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +LOCATION + '${system:test.tmp.dir}/parquet_array_of_required_elements_gen_schema'; + +SELECT * FROM parquet_array_of_required_elements_gen_schema; + +DROP TABLE parquet_array_of_required_elements_gen_schema; diff --git a/ql/src/test/queries/clientpositive/parquet_array_of_single_field_struct_gen_schema.q b/ql/src/test/queries/clientpositive/parquet_array_of_single_field_struct_gen_schema.q new file mode 100644 index 0000000000..2de661c46c --- /dev/null +++ b/ql/src/test/queries/clientpositive/parquet_array_of_single_field_struct_gen_schema.q @@ -0,0 +1,16 @@ +dfs ${system:test.dfs.mkdir} ${system:test.tmp.dir}/parquet_ambiguous_array_of_single_field_structs_gen_schema; +dfs -cp ${system:hive.root}/data/files/SingleFieldGroupInList.parquet ${system:test.tmp.dir}/parquet_ambiguous_array_of_single_field_structs_gen_schema; + +CREATE TABLE parquet_ambiguous_array_of_single_field_structs_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +LOCATION + '${system:test.tmp.dir}/parquet_ambiguous_array_of_single_field_structs_gen_schema'; + +SELECT * FROM parquet_ambiguous_array_of_single_field_structs_gen_schema; + +DROP TABLE parquet_ambiguous_array_of_single_field_structs_gen_schema; diff --git a/ql/src/test/queries/clientpositive/parquet_array_of_structs_gen_schema.q b/ql/src/test/queries/clientpositive/parquet_array_of_structs_gen_schema.q new file mode 100644 index 0000000000..1ae564c20d --- /dev/null +++ b/ql/src/test/queries/clientpositive/parquet_array_of_structs_gen_schema.q @@ -0,0 +1,16 @@ +dfs ${system:test.dfs.mkdir} ${system:test.tmp.dir}/parquet_array_of_structs_gen_schema; +dfs -cp ${system:hive.root}/data/files/HiveRequiredGroupInList.parquet ${system:test.tmp.dir}/parquet_array_of_structs_gen_schema; + +CREATE TABLE parquet_array_of_structs_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +LOCATION + '${system:test.tmp.dir}/parquet_array_of_structs_gen_schema'; + +SELECT * FROM parquet_array_of_structs_gen_schema; + +DROP TABLE parquet_array_of_structs_gen_schema; diff --git a/ql/src/test/queries/clientpositive/parquet_array_of_structs_gen_schema_ext.q b/ql/src/test/queries/clientpositive/parquet_array_of_structs_gen_schema_ext.q new file mode 100644 index 0000000000..17a6e22230 --- /dev/null +++ b/ql/src/test/queries/clientpositive/parquet_array_of_structs_gen_schema_ext.q @@ -0,0 +1,16 @@ +dfs ${system:test.dfs.mkdir} ${system:test.tmp.dir}/parquet_decimal_gen_schema_ext_tmp; +dfs -cp ${system:hive.root}/data/files/HiveRequiredGroupInList.parquet ${system:test.tmp.dir}/parquet_decimal_gen_schema_ext_tmp; + +CREATE EXTERNAL TABLE parquet_array_of_structs_gen_schema_ext +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +LOCATION + '${system:test.tmp.dir}/parquet_decimal_gen_schema_ext_tmp'; + +SELECT * FROM parquet_array_of_structs_gen_schema_ext; + +DROP TABLE parquet_array_of_structs_gen_schema_ext; diff --git a/ql/src/test/queries/clientpositive/parquet_array_of_unannotated_groups_gen_schema.q b/ql/src/test/queries/clientpositive/parquet_array_of_unannotated_groups_gen_schema.q new file mode 100644 index 0000000000..30078c9bdd --- /dev/null +++ b/ql/src/test/queries/clientpositive/parquet_array_of_unannotated_groups_gen_schema.q @@ -0,0 +1,16 @@ +dfs ${system:test.dfs.mkdir} ${system:test.tmp.dir}/parquet_array_of_unannotated_groups_gen_schema; +dfs -cp ${system:hive.root}/data/files/UnannotatedListOfGroups.parquet ${system:test.tmp.dir}/parquet_array_of_unannotated_groups_gen_schema; + +CREATE TABLE parquet_array_of_unannotated_groups_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +LOCATION + '${system:test.tmp.dir}/parquet_array_of_unannotated_groups_gen_schema'; + +SELECT * FROM parquet_array_of_unannotated_groups_gen_schema; + +DROP TABLE parquet_array_of_unannotated_groups_gen_schema; diff --git a/ql/src/test/queries/clientpositive/parquet_array_of_unannotated_primitives_gen_schema.q b/ql/src/test/queries/clientpositive/parquet_array_of_unannotated_primitives_gen_schema.q new file mode 100644 index 0000000000..29aa95ac9a --- /dev/null +++ b/ql/src/test/queries/clientpositive/parquet_array_of_unannotated_primitives_gen_schema.q @@ -0,0 +1,16 @@ +dfs ${system:test.dfs.mkdir} ${system:test.tmp.dir}/parquet_array_of_unannotated_ints_gen_schema; +dfs -cp ${system:hive.root}/data/files/UnannotatedListOfPrimitives.parquet ${system:test.tmp.dir}/parquet_array_of_unannotated_ints_gen_schema; + +CREATE TABLE parquet_array_of_unannotated_ints_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +LOCATION + '${system:test.tmp.dir}/parquet_array_of_unannotated_ints_gen_schema'; + +SELECT * FROM parquet_array_of_unannotated_ints_gen_schema; + +DROP TABLE parquet_array_of_unannotated_ints_gen_schema; \ No newline at end of file diff --git a/ql/src/test/queries/clientpositive/parquet_avro_array_of_primitives_gen_schema.q b/ql/src/test/queries/clientpositive/parquet_avro_array_of_primitives_gen_schema.q new file mode 100644 index 0000000000..ee64e434bf --- /dev/null +++ b/ql/src/test/queries/clientpositive/parquet_avro_array_of_primitives_gen_schema.q @@ -0,0 +1,16 @@ +dfs ${system:test.dfs.mkdir} ${system:test.tmp.dir}/parquet_avro_array_of_primitives_gen_schema; +dfs -cp ${system:hive.root}/data/files/AvroPrimitiveInList.parquet ${system:test.tmp.dir}/parquet_avro_array_of_primitives_gen_schema; + +CREATE TABLE parquet_avro_array_of_primitives_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +LOCATION + '${system:test.tmp.dir}/parquet_avro_array_of_primitives_gen_schema'; + +SELECT * FROM parquet_avro_array_of_primitives_gen_schema; + +DROP TABLE parquet_avro_array_of_primitives_gen_schema; diff --git a/ql/src/test/queries/clientpositive/parquet_avro_array_of_single_field_struct_gen_schema.q b/ql/src/test/queries/clientpositive/parquet_avro_array_of_single_field_struct_gen_schema.q new file mode 100644 index 0000000000..2dbb9a55f9 --- /dev/null +++ b/ql/src/test/queries/clientpositive/parquet_avro_array_of_single_field_struct_gen_schema.q @@ -0,0 +1,16 @@ +dfs ${system:test.dfs.mkdir} ${system:test.tmp.dir}/parquet_avro_array_of_single_field_structs_gen_schema; +dfs -cp ${system:hive.root}/data/files/AvroSingleFieldGroupInList.parquet ${system:test.tmp.dir}/parquet_avro_array_of_single_field_structs_gen_schema; + +CREATE TABLE parquet_avro_array_of_single_field_structs_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +LOCATION + '${system:test.tmp.dir}/parquet_avro_array_of_single_field_structs_gen_schema'; + +SELECT * FROM parquet_avro_array_of_single_field_structs_gen_schema; + +DROP TABLE parquet_avro_array_of_single_field_structs_gen_schema; diff --git a/ql/src/test/queries/clientpositive/parquet_decimal_gen_schema.q b/ql/src/test/queries/clientpositive/parquet_decimal_gen_schema.q new file mode 100644 index 0000000000..1176126d6b --- /dev/null +++ b/ql/src/test/queries/clientpositive/parquet_decimal_gen_schema.q @@ -0,0 +1,18 @@ +dfs ${system:test.dfs.mkdir} ${system:test.tmp.dir}/parquet_decimal_gen_schema; +dfs -cp ${system:hive.root}/data/files/dec.parq ${system:test.tmp.dir}/parquet_decimal_gen_schema; + +CREATE TABLE parquet_decimal_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +LOCATION + '${system:test.tmp.dir}/parquet_decimal_gen_schema'; + +DESCRIBE FORMATTED parquet_decimal_gen_schema; + +SELECT * FROM parquet_decimal_gen_schema; + +DROP TABLE parquet_decimal_gen_schema; \ No newline at end of file diff --git a/ql/src/test/queries/clientpositive/parquet_thrift_array_of_primitives_gen_schema.q b/ql/src/test/queries/clientpositive/parquet_thrift_array_of_primitives_gen_schema.q new file mode 100644 index 0000000000..1f83684fac --- /dev/null +++ b/ql/src/test/queries/clientpositive/parquet_thrift_array_of_primitives_gen_schema.q @@ -0,0 +1,16 @@ +dfs ${system:test.dfs.mkdir} ${system:test.tmp.dir}/parquet_thrift_array_of_primitives_gen_schema; +dfs -cp ${system:hive.root}/data/files/ThriftPrimitiveInList.parquet ${system:test.tmp.dir}/parquet_thrift_array_of_primitives_gen_schema; + +CREATE TABLE parquet_thrift_array_of_primitives_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +LOCATION + '${system:test.tmp.dir}/parquet_thrift_array_of_primitives_gen_schema'; + +SELECT * FROM parquet_thrift_array_of_primitives_gen_schema; + +DROP TABLE parquet_thrift_array_of_primitives_gen_schema; diff --git a/ql/src/test/queries/clientpositive/parquet_thrift_array_of_single_field_struct_gen_schema.q b/ql/src/test/queries/clientpositive/parquet_thrift_array_of_single_field_struct_gen_schema.q new file mode 100644 index 0000000000..4cbd95cd24 --- /dev/null +++ b/ql/src/test/queries/clientpositive/parquet_thrift_array_of_single_field_struct_gen_schema.q @@ -0,0 +1,16 @@ +dfs ${system:test.dfs.mkdir} ${system:test.tmp.dir}/parquet_thrift_array_of_single_field_structs_gen_schema; +dfs -cp ${system:hive.root}/data/files/ThriftSingleFieldGroupInList.parquet ${system:test.tmp.dir}/parquet_thrift_array_of_single_field_structs_gen_schema; + +CREATE TABLE parquet_thrift_array_of_single_field_structs_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +LOCATION + '${system:test.tmp.dir}/parquet_thrift_array_of_single_field_structs_gen_schema'; + +SELECT * FROM parquet_thrift_array_of_single_field_structs_gen_schema; + +DROP TABLE parquet_thrift_array_of_single_field_structs_gen_schema; diff --git a/ql/src/test/results/clientpositive/llap/parquet_array_map_emptynullvals.q.out b/ql/src/test/results/clientpositive/llap/parquet_array_map_emptynullvals.q.out index 46086072fb..4b23de84dd 100644 --- a/ql/src/test/results/clientpositive/llap/parquet_array_map_emptynullvals.q.out +++ b/ql/src/test/results/clientpositive/llap/parquet_array_map_emptynullvals.q.out @@ -57,9 +57,9 @@ POSTHOOK: query: insert into table testSets2 select * from testSets POSTHOOK: type: QUERY POSTHOOK: Input: default@testsets POSTHOOK: Output: default@testsets2 -POSTHOOK: Lineage: testsets2.arrayvalues SIMPLE [(testsets)testsets.FieldSchema(name:arrayvalues, type:array, comment:null), ] -POSTHOOK: Lineage: testsets2.key SIMPLE [(testsets)testsets.FieldSchema(name:key, type:string, comment:null), ] -POSTHOOK: Lineage: testsets2.mapvalues SIMPLE [(testsets)testsets.FieldSchema(name:mapvalues, type:map, comment:null), ] +POSTHOOK: Lineage: testsets2.arrayvalues SIMPLE [(testsets)testsets.FieldSchema(name:arrayvalues, type:array, comment:), ] +POSTHOOK: Lineage: testsets2.key SIMPLE [(testsets)testsets.FieldSchema(name:key, type:string, comment:), ] +POSTHOOK: Lineage: testsets2.mapvalues SIMPLE [(testsets)testsets.FieldSchema(name:mapvalues, type:map, comment:), ] PREHOOK: query: select * from testSets2 PREHOOK: type: QUERY PREHOOK: Input: default@testsets2 diff --git a/ql/src/test/results/clientpositive/llap/parquet_map_of_arrays_of_ints.q.out b/ql/src/test/results/clientpositive/llap/parquet_map_of_arrays_of_ints.q.out index d76f92e422..27a8eb7c51 100644 --- a/ql/src/test/results/clientpositive/llap/parquet_map_of_arrays_of_ints.q.out +++ b/ql/src/test/results/clientpositive/llap/parquet_map_of_arrays_of_ints.q.out @@ -30,7 +30,7 @@ POSTHOOK: type: CREATETABLE_AS_SELECT POSTHOOK: Input: default@parquet_map_of_arrays_of_ints POSTHOOK: Output: database:default POSTHOOK: Output: default@parquet_map_of_arrays_of_ints_copy -POSTHOOK: Lineage: parquet_map_of_arrays_of_ints_copy.examples SIMPLE [(parquet_map_of_arrays_of_ints)parquet_map_of_arrays_of_ints.FieldSchema(name:examples, type:map>, comment:null), ] +POSTHOOK: Lineage: parquet_map_of_arrays_of_ints_copy.examples SIMPLE [(parquet_map_of_arrays_of_ints)parquet_map_of_arrays_of_ints.FieldSchema(name:examples, type:map>, comment:), ] PREHOOK: query: SELECT * FROM parquet_map_of_arrays_of_ints_copy PREHOOK: type: QUERY PREHOOK: Input: default@parquet_map_of_arrays_of_ints_copy diff --git a/ql/src/test/results/clientpositive/llap/parquet_map_of_maps.q.out b/ql/src/test/results/clientpositive/llap/parquet_map_of_maps.q.out index 242fa40e22..9bb334d022 100644 --- a/ql/src/test/results/clientpositive/llap/parquet_map_of_maps.q.out +++ b/ql/src/test/results/clientpositive/llap/parquet_map_of_maps.q.out @@ -30,7 +30,7 @@ POSTHOOK: type: CREATETABLE_AS_SELECT POSTHOOK: Input: default@parquet_map_of_maps POSTHOOK: Output: database:default POSTHOOK: Output: default@parquet_map_of_maps_copy -POSTHOOK: Lineage: parquet_map_of_maps_copy.map_of_maps SIMPLE [(parquet_map_of_maps)parquet_map_of_maps.FieldSchema(name:map_of_maps, type:map>, comment:null), ] +POSTHOOK: Lineage: parquet_map_of_maps_copy.map_of_maps SIMPLE [(parquet_map_of_maps)parquet_map_of_maps.FieldSchema(name:map_of_maps, type:map>, comment:), ] PREHOOK: query: SELECT * FROM parquet_map_of_maps_copy PREHOOK: type: QUERY PREHOOK: Input: default@parquet_map_of_maps_copy diff --git a/ql/src/test/results/clientpositive/llap/parquet_mixed_partition_formats2.q.out b/ql/src/test/results/clientpositive/llap/parquet_mixed_partition_formats2.q.out index d32d782fc8..54e99697ea 100644 --- a/ql/src/test/results/clientpositive/llap/parquet_mixed_partition_formats2.q.out +++ b/ql/src/test/results/clientpositive/llap/parquet_mixed_partition_formats2.q.out @@ -87,9 +87,9 @@ POSTHOOK: Input: default@parquet_table_json_partition POSTHOOK: Input: default@parquet_table_json_partition@ts=20150101 POSTHOOK: Output: database:default POSTHOOK: Output: default@new_table -POSTHOOK: Lineage: new_table.address SIMPLE [(parquet_table_json_partition)parquet_table_json_partition.FieldSchema(name:address, type:struct, comment:from deserializer), ] -POSTHOOK: Lineage: new_table.id SIMPLE [(parquet_table_json_partition)parquet_table_json_partition.FieldSchema(name:id, type:bigint, comment:from deserializer), ] -POSTHOOK: Lineage: new_table.reports SIMPLE [(parquet_table_json_partition)parquet_table_json_partition.FieldSchema(name:reports, type:array, comment:from deserializer), ] +POSTHOOK: Lineage: new_table.address SIMPLE [(parquet_table_json_partition)parquet_table_json_partition.FieldSchema(name:address, type:struct, comment:), ] +POSTHOOK: Lineage: new_table.id SIMPLE [(parquet_table_json_partition)parquet_table_json_partition.FieldSchema(name:id, type:bigint, comment:), ] +POSTHOOK: Lineage: new_table.reports SIMPLE [(parquet_table_json_partition)parquet_table_json_partition.FieldSchema(name:reports, type:array, comment:), ] POSTHOOK: Lineage: new_table.ts SIMPLE [(parquet_table_json_partition)parquet_table_json_partition.FieldSchema(name:ts, type:string, comment:null), ] PREHOOK: query: SELECT * FROM new_table ORDER BY address, reports PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/llap/parquet_schema_evolution.q.out b/ql/src/test/results/clientpositive/llap/parquet_schema_evolution.q.out index 1274165dbe..adf3701f91 100644 --- a/ql/src/test/results/clientpositive/llap/parquet_schema_evolution.q.out +++ b/ql/src/test/results/clientpositive/llap/parquet_schema_evolution.q.out @@ -90,8 +90,8 @@ POSTHOOK: type: CREATETABLE_AS_SELECT POSTHOOK: Input: default@newstructfield POSTHOOK: Output: database:default POSTHOOK: Output: default@NewStructFieldTable -POSTHOOK: Lineage: newstructfieldtable.a SIMPLE [(newstructfield)newstructfield.FieldSchema(name:a, type:struct,a2:struct,a3:int>, comment:null), ] -POSTHOOK: Lineage: newstructfieldtable.b SIMPLE [(newstructfield)newstructfield.FieldSchema(name:b, type:int, comment:null), ] +POSTHOOK: Lineage: newstructfieldtable.a SIMPLE [(newstructfield)newstructfield.FieldSchema(name:a, type:struct,a2:struct,a3:int>, comment:), ] +POSTHOOK: Lineage: newstructfieldtable.b SIMPLE [(newstructfield)newstructfield.FieldSchema(name:b, type:int, comment:), ] PREHOOK: query: DESCRIBE NewStructFieldTable PREHOOK: type: DESCTABLE PREHOOK: Input: default@newstructfieldtable diff --git a/ql/src/test/results/clientpositive/llap/parquet_vectorization_part.q.out b/ql/src/test/results/clientpositive/llap/parquet_vectorization_part.q.out index 4467b5a29e..53477f1c30 100644 --- a/ql/src/test/results/clientpositive/llap/parquet_vectorization_part.q.out +++ b/ql/src/test/results/clientpositive/llap/parquet_vectorization_part.q.out @@ -14,18 +14,18 @@ POSTHOOK: query: insert overwrite table alltypesparquet_part partition (ds='2011 POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesparquet POSTHOOK: Output: default@alltypesparquet_part@ds=2011 -POSTHOOK: Lineage: alltypesparquet_part PARTITION(ds=2011).cbigint SIMPLE [(alltypesparquet)alltypesparquet.FieldSchema(name:cbigint, type:bigint, comment:null), ] -POSTHOOK: Lineage: alltypesparquet_part PARTITION(ds=2011).cboolean1 SIMPLE [(alltypesparquet)alltypesparquet.FieldSchema(name:cboolean1, type:boolean, comment:null), ] -POSTHOOK: Lineage: alltypesparquet_part PARTITION(ds=2011).cboolean2 SIMPLE [(alltypesparquet)alltypesparquet.FieldSchema(name:cboolean2, type:boolean, comment:null), ] -POSTHOOK: Lineage: alltypesparquet_part PARTITION(ds=2011).cdouble SIMPLE [(alltypesparquet)alltypesparquet.FieldSchema(name:cdouble, type:double, comment:null), ] -POSTHOOK: Lineage: alltypesparquet_part PARTITION(ds=2011).cfloat SIMPLE [(alltypesparquet)alltypesparquet.FieldSchema(name:cfloat, type:float, comment:null), ] -POSTHOOK: Lineage: alltypesparquet_part PARTITION(ds=2011).cint SIMPLE [(alltypesparquet)alltypesparquet.FieldSchema(name:cint, type:int, comment:null), ] -POSTHOOK: Lineage: alltypesparquet_part PARTITION(ds=2011).csmallint SIMPLE [(alltypesparquet)alltypesparquet.FieldSchema(name:csmallint, type:smallint, comment:null), ] -POSTHOOK: Lineage: alltypesparquet_part PARTITION(ds=2011).cstring1 SIMPLE [(alltypesparquet)alltypesparquet.FieldSchema(name:cstring1, type:string, comment:null), ] -POSTHOOK: Lineage: alltypesparquet_part PARTITION(ds=2011).cstring2 SIMPLE [(alltypesparquet)alltypesparquet.FieldSchema(name:cstring2, type:string, comment:null), ] -POSTHOOK: Lineage: alltypesparquet_part PARTITION(ds=2011).ctimestamp1 SIMPLE [(alltypesparquet)alltypesparquet.FieldSchema(name:ctimestamp1, type:timestamp, comment:null), ] -POSTHOOK: Lineage: alltypesparquet_part PARTITION(ds=2011).ctimestamp2 SIMPLE [(alltypesparquet)alltypesparquet.FieldSchema(name:ctimestamp2, type:timestamp, comment:null), ] -POSTHOOK: Lineage: alltypesparquet_part PARTITION(ds=2011).ctinyint SIMPLE [(alltypesparquet)alltypesparquet.FieldSchema(name:ctinyint, type:tinyint, comment:null), ] +POSTHOOK: Lineage: alltypesparquet_part PARTITION(ds=2011).cbigint SIMPLE [(alltypesparquet)alltypesparquet.FieldSchema(name:cbigint, type:bigint, comment:), ] +POSTHOOK: Lineage: alltypesparquet_part PARTITION(ds=2011).cboolean1 SIMPLE [(alltypesparquet)alltypesparquet.FieldSchema(name:cboolean1, type:boolean, comment:), ] +POSTHOOK: Lineage: alltypesparquet_part PARTITION(ds=2011).cboolean2 SIMPLE [(alltypesparquet)alltypesparquet.FieldSchema(name:cboolean2, type:boolean, comment:), ] +POSTHOOK: Lineage: alltypesparquet_part PARTITION(ds=2011).cdouble SIMPLE [(alltypesparquet)alltypesparquet.FieldSchema(name:cdouble, type:double, comment:), ] +POSTHOOK: Lineage: alltypesparquet_part PARTITION(ds=2011).cfloat SIMPLE [(alltypesparquet)alltypesparquet.FieldSchema(name:cfloat, type:float, comment:), ] +POSTHOOK: Lineage: alltypesparquet_part PARTITION(ds=2011).cint SIMPLE [(alltypesparquet)alltypesparquet.FieldSchema(name:cint, type:int, comment:), ] +POSTHOOK: Lineage: alltypesparquet_part PARTITION(ds=2011).csmallint SIMPLE [(alltypesparquet)alltypesparquet.FieldSchema(name:csmallint, type:smallint, comment:), ] +POSTHOOK: Lineage: alltypesparquet_part PARTITION(ds=2011).cstring1 SIMPLE [(alltypesparquet)alltypesparquet.FieldSchema(name:cstring1, type:string, comment:), ] +POSTHOOK: Lineage: alltypesparquet_part PARTITION(ds=2011).cstring2 SIMPLE [(alltypesparquet)alltypesparquet.FieldSchema(name:cstring2, type:string, comment:), ] +POSTHOOK: Lineage: alltypesparquet_part PARTITION(ds=2011).ctimestamp1 SIMPLE [(alltypesparquet)alltypesparquet.FieldSchema(name:ctimestamp1, type:timestamp, comment:), ] +POSTHOOK: Lineage: alltypesparquet_part PARTITION(ds=2011).ctimestamp2 SIMPLE [(alltypesparquet)alltypesparquet.FieldSchema(name:ctimestamp2, type:timestamp, comment:), ] +POSTHOOK: Lineage: alltypesparquet_part PARTITION(ds=2011).ctinyint SIMPLE [(alltypesparquet)alltypesparquet.FieldSchema(name:ctinyint, type:tinyint, comment:), ] PREHOOK: query: insert overwrite table alltypesparquet_part partition (ds='2012') select * from alltypesparquet limit 100 PREHOOK: type: QUERY PREHOOK: Input: default@alltypesparquet @@ -34,18 +34,18 @@ POSTHOOK: query: insert overwrite table alltypesparquet_part partition (ds='2012 POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesparquet POSTHOOK: Output: default@alltypesparquet_part@ds=2012 -POSTHOOK: Lineage: alltypesparquet_part PARTITION(ds=2012).cbigint SIMPLE [(alltypesparquet)alltypesparquet.FieldSchema(name:cbigint, type:bigint, comment:null), ] -POSTHOOK: Lineage: alltypesparquet_part PARTITION(ds=2012).cboolean1 SIMPLE [(alltypesparquet)alltypesparquet.FieldSchema(name:cboolean1, type:boolean, comment:null), ] -POSTHOOK: Lineage: alltypesparquet_part PARTITION(ds=2012).cboolean2 SIMPLE [(alltypesparquet)alltypesparquet.FieldSchema(name:cboolean2, type:boolean, comment:null), ] -POSTHOOK: Lineage: alltypesparquet_part PARTITION(ds=2012).cdouble SIMPLE [(alltypesparquet)alltypesparquet.FieldSchema(name:cdouble, type:double, comment:null), ] -POSTHOOK: Lineage: alltypesparquet_part PARTITION(ds=2012).cfloat SIMPLE [(alltypesparquet)alltypesparquet.FieldSchema(name:cfloat, type:float, comment:null), ] -POSTHOOK: Lineage: alltypesparquet_part PARTITION(ds=2012).cint SIMPLE [(alltypesparquet)alltypesparquet.FieldSchema(name:cint, type:int, comment:null), ] -POSTHOOK: Lineage: alltypesparquet_part PARTITION(ds=2012).csmallint SIMPLE [(alltypesparquet)alltypesparquet.FieldSchema(name:csmallint, type:smallint, comment:null), ] -POSTHOOK: Lineage: alltypesparquet_part PARTITION(ds=2012).cstring1 SIMPLE [(alltypesparquet)alltypesparquet.FieldSchema(name:cstring1, type:string, comment:null), ] -POSTHOOK: Lineage: alltypesparquet_part PARTITION(ds=2012).cstring2 SIMPLE [(alltypesparquet)alltypesparquet.FieldSchema(name:cstring2, type:string, comment:null), ] -POSTHOOK: Lineage: alltypesparquet_part PARTITION(ds=2012).ctimestamp1 SIMPLE [(alltypesparquet)alltypesparquet.FieldSchema(name:ctimestamp1, type:timestamp, comment:null), ] -POSTHOOK: Lineage: alltypesparquet_part PARTITION(ds=2012).ctimestamp2 SIMPLE [(alltypesparquet)alltypesparquet.FieldSchema(name:ctimestamp2, type:timestamp, comment:null), ] -POSTHOOK: Lineage: alltypesparquet_part PARTITION(ds=2012).ctinyint SIMPLE [(alltypesparquet)alltypesparquet.FieldSchema(name:ctinyint, type:tinyint, comment:null), ] +POSTHOOK: Lineage: alltypesparquet_part PARTITION(ds=2012).cbigint SIMPLE [(alltypesparquet)alltypesparquet.FieldSchema(name:cbigint, type:bigint, comment:), ] +POSTHOOK: Lineage: alltypesparquet_part PARTITION(ds=2012).cboolean1 SIMPLE [(alltypesparquet)alltypesparquet.FieldSchema(name:cboolean1, type:boolean, comment:), ] +POSTHOOK: Lineage: alltypesparquet_part PARTITION(ds=2012).cboolean2 SIMPLE [(alltypesparquet)alltypesparquet.FieldSchema(name:cboolean2, type:boolean, comment:), ] +POSTHOOK: Lineage: alltypesparquet_part PARTITION(ds=2012).cdouble SIMPLE [(alltypesparquet)alltypesparquet.FieldSchema(name:cdouble, type:double, comment:), ] +POSTHOOK: Lineage: alltypesparquet_part PARTITION(ds=2012).cfloat SIMPLE [(alltypesparquet)alltypesparquet.FieldSchema(name:cfloat, type:float, comment:), ] +POSTHOOK: Lineage: alltypesparquet_part PARTITION(ds=2012).cint SIMPLE [(alltypesparquet)alltypesparquet.FieldSchema(name:cint, type:int, comment:), ] +POSTHOOK: Lineage: alltypesparquet_part PARTITION(ds=2012).csmallint SIMPLE [(alltypesparquet)alltypesparquet.FieldSchema(name:csmallint, type:smallint, comment:), ] +POSTHOOK: Lineage: alltypesparquet_part PARTITION(ds=2012).cstring1 SIMPLE [(alltypesparquet)alltypesparquet.FieldSchema(name:cstring1, type:string, comment:), ] +POSTHOOK: Lineage: alltypesparquet_part PARTITION(ds=2012).cstring2 SIMPLE [(alltypesparquet)alltypesparquet.FieldSchema(name:cstring2, type:string, comment:), ] +POSTHOOK: Lineage: alltypesparquet_part PARTITION(ds=2012).ctimestamp1 SIMPLE [(alltypesparquet)alltypesparquet.FieldSchema(name:ctimestamp1, type:timestamp, comment:), ] +POSTHOOK: Lineage: alltypesparquet_part PARTITION(ds=2012).ctimestamp2 SIMPLE [(alltypesparquet)alltypesparquet.FieldSchema(name:ctimestamp2, type:timestamp, comment:), ] +POSTHOOK: Lineage: alltypesparquet_part PARTITION(ds=2012).ctinyint SIMPLE [(alltypesparquet)alltypesparquet.FieldSchema(name:ctinyint, type:tinyint, comment:), ] PREHOOK: query: select count(cdouble), cint from alltypesparquet_part where ds='2011' group by cint limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@alltypesparquet_part diff --git a/ql/src/test/results/clientpositive/llap/parquet_vectorization_part_varchar.q.out b/ql/src/test/results/clientpositive/llap/parquet_vectorization_part_varchar.q.out index 120821786a..3f673c1861 100644 --- a/ql/src/test/results/clientpositive/llap/parquet_vectorization_part_varchar.q.out +++ b/ql/src/test/results/clientpositive/llap/parquet_vectorization_part_varchar.q.out @@ -14,18 +14,18 @@ POSTHOOK: query: insert overwrite table alltypesparquet_part_varchar partition ( POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesparquet POSTHOOK: Output: default@alltypesparquet_part_varchar@ds=2011 -POSTHOOK: Lineage: alltypesparquet_part_varchar PARTITION(ds=2011).cbigint SIMPLE [(alltypesparquet)alltypesparquet.FieldSchema(name:cbigint, type:bigint, comment:null), ] -POSTHOOK: Lineage: alltypesparquet_part_varchar PARTITION(ds=2011).cboolean1 SIMPLE [(alltypesparquet)alltypesparquet.FieldSchema(name:cboolean1, type:boolean, comment:null), ] -POSTHOOK: Lineage: alltypesparquet_part_varchar PARTITION(ds=2011).cboolean2 SIMPLE [(alltypesparquet)alltypesparquet.FieldSchema(name:cboolean2, type:boolean, comment:null), ] -POSTHOOK: Lineage: alltypesparquet_part_varchar PARTITION(ds=2011).cdouble SIMPLE [(alltypesparquet)alltypesparquet.FieldSchema(name:cdouble, type:double, comment:null), ] -POSTHOOK: Lineage: alltypesparquet_part_varchar PARTITION(ds=2011).cfloat SIMPLE [(alltypesparquet)alltypesparquet.FieldSchema(name:cfloat, type:float, comment:null), ] -POSTHOOK: Lineage: alltypesparquet_part_varchar PARTITION(ds=2011).cint SIMPLE [(alltypesparquet)alltypesparquet.FieldSchema(name:cint, type:int, comment:null), ] -POSTHOOK: Lineage: alltypesparquet_part_varchar PARTITION(ds=2011).csmallint SIMPLE [(alltypesparquet)alltypesparquet.FieldSchema(name:csmallint, type:smallint, comment:null), ] -POSTHOOK: Lineage: alltypesparquet_part_varchar PARTITION(ds=2011).cstring1 SIMPLE [(alltypesparquet)alltypesparquet.FieldSchema(name:cstring1, type:string, comment:null), ] -POSTHOOK: Lineage: alltypesparquet_part_varchar PARTITION(ds=2011).cstring2 SIMPLE [(alltypesparquet)alltypesparquet.FieldSchema(name:cstring2, type:string, comment:null), ] -POSTHOOK: Lineage: alltypesparquet_part_varchar PARTITION(ds=2011).ctimestamp1 SIMPLE [(alltypesparquet)alltypesparquet.FieldSchema(name:ctimestamp1, type:timestamp, comment:null), ] -POSTHOOK: Lineage: alltypesparquet_part_varchar PARTITION(ds=2011).ctimestamp2 SIMPLE [(alltypesparquet)alltypesparquet.FieldSchema(name:ctimestamp2, type:timestamp, comment:null), ] -POSTHOOK: Lineage: alltypesparquet_part_varchar PARTITION(ds=2011).ctinyint SIMPLE [(alltypesparquet)alltypesparquet.FieldSchema(name:ctinyint, type:tinyint, comment:null), ] +POSTHOOK: Lineage: alltypesparquet_part_varchar PARTITION(ds=2011).cbigint SIMPLE [(alltypesparquet)alltypesparquet.FieldSchema(name:cbigint, type:bigint, comment:), ] +POSTHOOK: Lineage: alltypesparquet_part_varchar PARTITION(ds=2011).cboolean1 SIMPLE [(alltypesparquet)alltypesparquet.FieldSchema(name:cboolean1, type:boolean, comment:), ] +POSTHOOK: Lineage: alltypesparquet_part_varchar PARTITION(ds=2011).cboolean2 SIMPLE [(alltypesparquet)alltypesparquet.FieldSchema(name:cboolean2, type:boolean, comment:), ] +POSTHOOK: Lineage: alltypesparquet_part_varchar PARTITION(ds=2011).cdouble SIMPLE [(alltypesparquet)alltypesparquet.FieldSchema(name:cdouble, type:double, comment:), ] +POSTHOOK: Lineage: alltypesparquet_part_varchar PARTITION(ds=2011).cfloat SIMPLE [(alltypesparquet)alltypesparquet.FieldSchema(name:cfloat, type:float, comment:), ] +POSTHOOK: Lineage: alltypesparquet_part_varchar PARTITION(ds=2011).cint SIMPLE [(alltypesparquet)alltypesparquet.FieldSchema(name:cint, type:int, comment:), ] +POSTHOOK: Lineage: alltypesparquet_part_varchar PARTITION(ds=2011).csmallint SIMPLE [(alltypesparquet)alltypesparquet.FieldSchema(name:csmallint, type:smallint, comment:), ] +POSTHOOK: Lineage: alltypesparquet_part_varchar PARTITION(ds=2011).cstring1 SIMPLE [(alltypesparquet)alltypesparquet.FieldSchema(name:cstring1, type:string, comment:), ] +POSTHOOK: Lineage: alltypesparquet_part_varchar PARTITION(ds=2011).cstring2 SIMPLE [(alltypesparquet)alltypesparquet.FieldSchema(name:cstring2, type:string, comment:), ] +POSTHOOK: Lineage: alltypesparquet_part_varchar PARTITION(ds=2011).ctimestamp1 SIMPLE [(alltypesparquet)alltypesparquet.FieldSchema(name:ctimestamp1, type:timestamp, comment:), ] +POSTHOOK: Lineage: alltypesparquet_part_varchar PARTITION(ds=2011).ctimestamp2 SIMPLE [(alltypesparquet)alltypesparquet.FieldSchema(name:ctimestamp2, type:timestamp, comment:), ] +POSTHOOK: Lineage: alltypesparquet_part_varchar PARTITION(ds=2011).ctinyint SIMPLE [(alltypesparquet)alltypesparquet.FieldSchema(name:ctinyint, type:tinyint, comment:), ] PREHOOK: query: insert overwrite table alltypesparquet_part_varchar partition (ds='2012') select * from alltypesparquet limit 100 PREHOOK: type: QUERY PREHOOK: Input: default@alltypesparquet @@ -34,18 +34,18 @@ POSTHOOK: query: insert overwrite table alltypesparquet_part_varchar partition ( POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesparquet POSTHOOK: Output: default@alltypesparquet_part_varchar@ds=2012 -POSTHOOK: Lineage: alltypesparquet_part_varchar PARTITION(ds=2012).cbigint SIMPLE [(alltypesparquet)alltypesparquet.FieldSchema(name:cbigint, type:bigint, comment:null), ] -POSTHOOK: Lineage: alltypesparquet_part_varchar PARTITION(ds=2012).cboolean1 SIMPLE [(alltypesparquet)alltypesparquet.FieldSchema(name:cboolean1, type:boolean, comment:null), ] -POSTHOOK: Lineage: alltypesparquet_part_varchar PARTITION(ds=2012).cboolean2 SIMPLE [(alltypesparquet)alltypesparquet.FieldSchema(name:cboolean2, type:boolean, comment:null), ] -POSTHOOK: Lineage: alltypesparquet_part_varchar PARTITION(ds=2012).cdouble SIMPLE [(alltypesparquet)alltypesparquet.FieldSchema(name:cdouble, type:double, comment:null), ] -POSTHOOK: Lineage: alltypesparquet_part_varchar PARTITION(ds=2012).cfloat SIMPLE [(alltypesparquet)alltypesparquet.FieldSchema(name:cfloat, type:float, comment:null), ] -POSTHOOK: Lineage: alltypesparquet_part_varchar PARTITION(ds=2012).cint SIMPLE [(alltypesparquet)alltypesparquet.FieldSchema(name:cint, type:int, comment:null), ] -POSTHOOK: Lineage: alltypesparquet_part_varchar PARTITION(ds=2012).csmallint SIMPLE [(alltypesparquet)alltypesparquet.FieldSchema(name:csmallint, type:smallint, comment:null), ] -POSTHOOK: Lineage: alltypesparquet_part_varchar PARTITION(ds=2012).cstring1 SIMPLE [(alltypesparquet)alltypesparquet.FieldSchema(name:cstring1, type:string, comment:null), ] -POSTHOOK: Lineage: alltypesparquet_part_varchar PARTITION(ds=2012).cstring2 SIMPLE [(alltypesparquet)alltypesparquet.FieldSchema(name:cstring2, type:string, comment:null), ] -POSTHOOK: Lineage: alltypesparquet_part_varchar PARTITION(ds=2012).ctimestamp1 SIMPLE [(alltypesparquet)alltypesparquet.FieldSchema(name:ctimestamp1, type:timestamp, comment:null), ] -POSTHOOK: Lineage: alltypesparquet_part_varchar PARTITION(ds=2012).ctimestamp2 SIMPLE [(alltypesparquet)alltypesparquet.FieldSchema(name:ctimestamp2, type:timestamp, comment:null), ] -POSTHOOK: Lineage: alltypesparquet_part_varchar PARTITION(ds=2012).ctinyint SIMPLE [(alltypesparquet)alltypesparquet.FieldSchema(name:ctinyint, type:tinyint, comment:null), ] +POSTHOOK: Lineage: alltypesparquet_part_varchar PARTITION(ds=2012).cbigint SIMPLE [(alltypesparquet)alltypesparquet.FieldSchema(name:cbigint, type:bigint, comment:), ] +POSTHOOK: Lineage: alltypesparquet_part_varchar PARTITION(ds=2012).cboolean1 SIMPLE [(alltypesparquet)alltypesparquet.FieldSchema(name:cboolean1, type:boolean, comment:), ] +POSTHOOK: Lineage: alltypesparquet_part_varchar PARTITION(ds=2012).cboolean2 SIMPLE [(alltypesparquet)alltypesparquet.FieldSchema(name:cboolean2, type:boolean, comment:), ] +POSTHOOK: Lineage: alltypesparquet_part_varchar PARTITION(ds=2012).cdouble SIMPLE [(alltypesparquet)alltypesparquet.FieldSchema(name:cdouble, type:double, comment:), ] +POSTHOOK: Lineage: alltypesparquet_part_varchar PARTITION(ds=2012).cfloat SIMPLE [(alltypesparquet)alltypesparquet.FieldSchema(name:cfloat, type:float, comment:), ] +POSTHOOK: Lineage: alltypesparquet_part_varchar PARTITION(ds=2012).cint SIMPLE [(alltypesparquet)alltypesparquet.FieldSchema(name:cint, type:int, comment:), ] +POSTHOOK: Lineage: alltypesparquet_part_varchar PARTITION(ds=2012).csmallint SIMPLE [(alltypesparquet)alltypesparquet.FieldSchema(name:csmallint, type:smallint, comment:), ] +POSTHOOK: Lineage: alltypesparquet_part_varchar PARTITION(ds=2012).cstring1 SIMPLE [(alltypesparquet)alltypesparquet.FieldSchema(name:cstring1, type:string, comment:), ] +POSTHOOK: Lineage: alltypesparquet_part_varchar PARTITION(ds=2012).cstring2 SIMPLE [(alltypesparquet)alltypesparquet.FieldSchema(name:cstring2, type:string, comment:), ] +POSTHOOK: Lineage: alltypesparquet_part_varchar PARTITION(ds=2012).ctimestamp1 SIMPLE [(alltypesparquet)alltypesparquet.FieldSchema(name:ctimestamp1, type:timestamp, comment:), ] +POSTHOOK: Lineage: alltypesparquet_part_varchar PARTITION(ds=2012).ctimestamp2 SIMPLE [(alltypesparquet)alltypesparquet.FieldSchema(name:ctimestamp2, type:timestamp, comment:), ] +POSTHOOK: Lineage: alltypesparquet_part_varchar PARTITION(ds=2012).ctinyint SIMPLE [(alltypesparquet)alltypesparquet.FieldSchema(name:ctinyint, type:tinyint, comment:), ] PREHOOK: query: select count(cdouble), cint from alltypesparquet_part_varchar where ds='2011' group by cint limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@alltypesparquet_part_varchar diff --git a/ql/src/test/results/clientpositive/llap/table_storage.q.out b/ql/src/test/results/clientpositive/llap/table_storage.q.out index 76441ba1b8..e2e20e6e95 100644 --- a/ql/src/test/results/clientpositive/llap/table_storage.q.out +++ b/ql/src/test/results/clientpositive/llap/table_storage.q.out @@ -393,8 +393,8 @@ POSTHOOK: query: SHOW CREATE TABLE t POSTHOOK: type: SHOW_CREATETABLE POSTHOOK: Input: default@t CREATE TABLE `t`( - `key` string, - `val` string) + `key` string COMMENT '', + `val` string COMMENT '') ROW FORMAT SERDE 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' STORED AS INPUTFORMAT @@ -439,8 +439,8 @@ POSTHOOK: query: SHOW CREATE TABLE t POSTHOOK: type: SHOW_CREATETABLE POSTHOOK: Input: default@t CREATE TABLE `t`( - `key` string, - `val` string) + `key` string COMMENT '', + `val` string COMMENT '') ROW FORMAT SERDE 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' STORED AS INPUTFORMAT @@ -484,8 +484,8 @@ POSTHOOK: query: SHOW CREATE TABLE t POSTHOOK: type: SHOW_CREATETABLE POSTHOOK: Input: default@t CREATE TABLE `t`( - `key` string, - `val` string) + `key` string COMMENT '', + `val` string COMMENT '') ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe' STORED AS INPUTFORMAT @@ -531,8 +531,8 @@ POSTHOOK: query: SHOW CREATE TABLE t POSTHOOK: type: SHOW_CREATETABLE POSTHOOK: Input: default@t CREATE TABLE `t`( - `key` string, - `val` string) + `key` string COMMENT '', + `val` string COMMENT '') ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe' WITH SERDEPROPERTIES ( diff --git a/ql/src/test/results/clientpositive/llap/vectorized_parquet_types.q.out b/ql/src/test/results/clientpositive/llap/vectorized_parquet_types.q.out index 73346ed057..a097bf0b33 100644 --- a/ql/src/test/results/clientpositive/llap/vectorized_parquet_types.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorized_parquet_types.q.out @@ -464,17 +464,17 @@ select * from parquet_types_n2 POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_types_n2 POSTHOOK: Output: default@parquet_type_nodict -POSTHOOK: Lineage: parquet_type_nodict.cbinary SIMPLE [(parquet_types_n2)parquet_types_n2.FieldSchema(name:cbinary, type:binary, comment:null), ] -POSTHOOK: Lineage: parquet_type_nodict.cchar SIMPLE [(parquet_types_n2)parquet_types_n2.FieldSchema(name:cchar, type:char(5), comment:null), ] -POSTHOOK: Lineage: parquet_type_nodict.cdecimal SIMPLE [(parquet_types_n2)parquet_types_n2.FieldSchema(name:cdecimal, type:decimal(4,2), comment:null), ] -POSTHOOK: Lineage: parquet_type_nodict.cdouble SIMPLE [(parquet_types_n2)parquet_types_n2.FieldSchema(name:cdouble, type:double, comment:null), ] -POSTHOOK: Lineage: parquet_type_nodict.cfloat SIMPLE [(parquet_types_n2)parquet_types_n2.FieldSchema(name:cfloat, type:float, comment:null), ] -POSTHOOK: Lineage: parquet_type_nodict.cint SIMPLE [(parquet_types_n2)parquet_types_n2.FieldSchema(name:cint, type:int, comment:null), ] -POSTHOOK: Lineage: parquet_type_nodict.csmallint SIMPLE [(parquet_types_n2)parquet_types_n2.FieldSchema(name:csmallint, type:smallint, comment:null), ] -POSTHOOK: Lineage: parquet_type_nodict.cstring1 SIMPLE [(parquet_types_n2)parquet_types_n2.FieldSchema(name:cstring1, type:string, comment:null), ] -POSTHOOK: Lineage: parquet_type_nodict.ctinyint SIMPLE [(parquet_types_n2)parquet_types_n2.FieldSchema(name:ctinyint, type:tinyint, comment:null), ] -POSTHOOK: Lineage: parquet_type_nodict.cvarchar SIMPLE [(parquet_types_n2)parquet_types_n2.FieldSchema(name:cvarchar, type:varchar(10), comment:null), ] -POSTHOOK: Lineage: parquet_type_nodict.t SIMPLE [(parquet_types_n2)parquet_types_n2.FieldSchema(name:t, type:timestamp, comment:null), ] +POSTHOOK: Lineage: parquet_type_nodict.cbinary SIMPLE [(parquet_types_n2)parquet_types_n2.FieldSchema(name:cbinary, type:binary, comment:), ] +POSTHOOK: Lineage: parquet_type_nodict.cchar SIMPLE [(parquet_types_n2)parquet_types_n2.FieldSchema(name:cchar, type:char(5), comment:), ] +POSTHOOK: Lineage: parquet_type_nodict.cdecimal SIMPLE [(parquet_types_n2)parquet_types_n2.FieldSchema(name:cdecimal, type:decimal(4,2), comment:), ] +POSTHOOK: Lineage: parquet_type_nodict.cdouble SIMPLE [(parquet_types_n2)parquet_types_n2.FieldSchema(name:cdouble, type:double, comment:), ] +POSTHOOK: Lineage: parquet_type_nodict.cfloat SIMPLE [(parquet_types_n2)parquet_types_n2.FieldSchema(name:cfloat, type:float, comment:), ] +POSTHOOK: Lineage: parquet_type_nodict.cint SIMPLE [(parquet_types_n2)parquet_types_n2.FieldSchema(name:cint, type:int, comment:), ] +POSTHOOK: Lineage: parquet_type_nodict.csmallint SIMPLE [(parquet_types_n2)parquet_types_n2.FieldSchema(name:csmallint, type:smallint, comment:), ] +POSTHOOK: Lineage: parquet_type_nodict.cstring1 SIMPLE [(parquet_types_n2)parquet_types_n2.FieldSchema(name:cstring1, type:string, comment:), ] +POSTHOOK: Lineage: parquet_type_nodict.ctinyint SIMPLE [(parquet_types_n2)parquet_types_n2.FieldSchema(name:ctinyint, type:tinyint, comment:), ] +POSTHOOK: Lineage: parquet_type_nodict.cvarchar SIMPLE [(parquet_types_n2)parquet_types_n2.FieldSchema(name:cvarchar, type:varchar(10), comment:), ] +POSTHOOK: Lineage: parquet_type_nodict.t SIMPLE [(parquet_types_n2)parquet_types_n2.FieldSchema(name:t, type:timestamp, comment:), ] PREHOOK: query: explain vectorization expression SELECT cint, ctinyint, csmallint, cfloat, cdouble, cstring1, t, cchar, cvarchar, hex(cbinary), cdecimal FROM parquet_type_nodict diff --git a/ql/src/test/results/clientpositive/parquet_array_of_multi_field_struct_gen_schema.q.out b/ql/src/test/results/clientpositive/parquet_array_of_multi_field_struct_gen_schema.q.out new file mode 100644 index 0000000000..4c02ebe6a2 --- /dev/null +++ b/ql/src/test/results/clientpositive/parquet_array_of_multi_field_struct_gen_schema.q.out @@ -0,0 +1,43 @@ +PREHOOK: query: CREATE EXTERNAL TABLE parquet_array_of_multi_field_structs_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +LOCATION +#### A masked pattern was here #### +PREHOOK: type: CREATETABLE +#### A masked pattern was here #### +PREHOOK: Output: database:default +PREHOOK: Output: default@parquet_array_of_multi_field_structs_gen_schema +POSTHOOK: query: CREATE EXTERNAL TABLE parquet_array_of_multi_field_structs_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +LOCATION +#### A masked pattern was here #### +POSTHOOK: type: CREATETABLE +#### A masked pattern was here #### +POSTHOOK: Output: database:default +POSTHOOK: Output: default@parquet_array_of_multi_field_structs_gen_schema +PREHOOK: query: SELECT * FROM parquet_array_of_multi_field_structs_gen_schema +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_array_of_multi_field_structs_gen_schema +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM parquet_array_of_multi_field_structs_gen_schema +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_array_of_multi_field_structs_gen_schema +#### A masked pattern was here #### +[{"latitude":0.0,"longitude":0.0},{"latitude":0.0,"longitude":180.0}] +PREHOOK: query: DROP TABLE parquet_array_of_multi_field_structs_gen_schema +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@parquet_array_of_multi_field_structs_gen_schema +PREHOOK: Output: default@parquet_array_of_multi_field_structs_gen_schema +POSTHOOK: query: DROP TABLE parquet_array_of_multi_field_structs_gen_schema +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@parquet_array_of_multi_field_structs_gen_schema +POSTHOOK: Output: default@parquet_array_of_multi_field_structs_gen_schema diff --git a/ql/src/test/results/clientpositive/parquet_array_of_optional_elements_gen_schema.q.out b/ql/src/test/results/clientpositive/parquet_array_of_optional_elements_gen_schema.q.out new file mode 100644 index 0000000000..0de5b6520e --- /dev/null +++ b/ql/src/test/results/clientpositive/parquet_array_of_optional_elements_gen_schema.q.out @@ -0,0 +1,43 @@ +PREHOOK: query: CREATE TABLE parquet_array_of_optional_elements_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +LOCATION +#### A masked pattern was here #### +PREHOOK: type: CREATETABLE +#### A masked pattern was here #### +PREHOOK: Output: database:default +PREHOOK: Output: default@parquet_array_of_optional_elements_gen_schema +POSTHOOK: query: CREATE TABLE parquet_array_of_optional_elements_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +LOCATION +#### A masked pattern was here #### +POSTHOOK: type: CREATETABLE +#### A masked pattern was here #### +POSTHOOK: Output: database:default +POSTHOOK: Output: default@parquet_array_of_optional_elements_gen_schema +PREHOOK: query: SELECT * FROM parquet_array_of_optional_elements_gen_schema +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_array_of_optional_elements_gen_schema +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM parquet_array_of_optional_elements_gen_schema +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_array_of_optional_elements_gen_schema +#### A masked pattern was here #### +[{"latitude":0.0,"longitude":0.0},null,{"latitude":0.0,"longitude":180.0}] +PREHOOK: query: DROP TABLE parquet_array_of_optional_elements_gen_schema +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@parquet_array_of_optional_elements_gen_schema +PREHOOK: Output: default@parquet_array_of_optional_elements_gen_schema +POSTHOOK: query: DROP TABLE parquet_array_of_optional_elements_gen_schema +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@parquet_array_of_optional_elements_gen_schema +POSTHOOK: Output: default@parquet_array_of_optional_elements_gen_schema diff --git a/ql/src/test/results/clientpositive/parquet_array_of_required_elements_gen_schema.q.out b/ql/src/test/results/clientpositive/parquet_array_of_required_elements_gen_schema.q.out new file mode 100644 index 0000000000..4303f0f234 --- /dev/null +++ b/ql/src/test/results/clientpositive/parquet_array_of_required_elements_gen_schema.q.out @@ -0,0 +1,43 @@ +PREHOOK: query: CREATE TABLE parquet_array_of_required_elements_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +LOCATION +#### A masked pattern was here #### +PREHOOK: type: CREATETABLE +#### A masked pattern was here #### +PREHOOK: Output: database:default +PREHOOK: Output: default@parquet_array_of_required_elements_gen_schema +POSTHOOK: query: CREATE TABLE parquet_array_of_required_elements_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +LOCATION +#### A masked pattern was here #### +POSTHOOK: type: CREATETABLE +#### A masked pattern was here #### +POSTHOOK: Output: database:default +POSTHOOK: Output: default@parquet_array_of_required_elements_gen_schema +PREHOOK: query: SELECT * FROM parquet_array_of_required_elements_gen_schema +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_array_of_required_elements_gen_schema +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM parquet_array_of_required_elements_gen_schema +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_array_of_required_elements_gen_schema +#### A masked pattern was here #### +[{"latitude":0.0,"longitude":180.0},{"latitude":0.0,"longitude":0.0}] +PREHOOK: query: DROP TABLE parquet_array_of_required_elements_gen_schema +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@parquet_array_of_required_elements_gen_schema +PREHOOK: Output: default@parquet_array_of_required_elements_gen_schema +POSTHOOK: query: DROP TABLE parquet_array_of_required_elements_gen_schema +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@parquet_array_of_required_elements_gen_schema +POSTHOOK: Output: default@parquet_array_of_required_elements_gen_schema diff --git a/ql/src/test/results/clientpositive/parquet_array_of_single_field_struct_gen_schema.q.out b/ql/src/test/results/clientpositive/parquet_array_of_single_field_struct_gen_schema.q.out new file mode 100644 index 0000000000..e9ca9abb72 --- /dev/null +++ b/ql/src/test/results/clientpositive/parquet_array_of_single_field_struct_gen_schema.q.out @@ -0,0 +1,43 @@ +PREHOOK: query: CREATE TABLE parquet_ambiguous_array_of_single_field_structs_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +LOCATION +#### A masked pattern was here #### +PREHOOK: type: CREATETABLE +#### A masked pattern was here #### +PREHOOK: Output: database:default +PREHOOK: Output: default@parquet_ambiguous_array_of_single_field_structs_gen_schema +POSTHOOK: query: CREATE TABLE parquet_ambiguous_array_of_single_field_structs_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +LOCATION +#### A masked pattern was here #### +POSTHOOK: type: CREATETABLE +#### A masked pattern was here #### +POSTHOOK: Output: database:default +POSTHOOK: Output: default@parquet_ambiguous_array_of_single_field_structs_gen_schema +PREHOOK: query: SELECT * FROM parquet_ambiguous_array_of_single_field_structs_gen_schema +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_ambiguous_array_of_single_field_structs_gen_schema +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM parquet_ambiguous_array_of_single_field_structs_gen_schema +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_ambiguous_array_of_single_field_structs_gen_schema +#### A masked pattern was here #### +[1234,2345] +PREHOOK: query: DROP TABLE parquet_ambiguous_array_of_single_field_structs_gen_schema +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@parquet_ambiguous_array_of_single_field_structs_gen_schema +PREHOOK: Output: default@parquet_ambiguous_array_of_single_field_structs_gen_schema +POSTHOOK: query: DROP TABLE parquet_ambiguous_array_of_single_field_structs_gen_schema +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@parquet_ambiguous_array_of_single_field_structs_gen_schema +POSTHOOK: Output: default@parquet_ambiguous_array_of_single_field_structs_gen_schema diff --git a/ql/src/test/results/clientpositive/parquet_array_of_structs_gen_schema.q.out b/ql/src/test/results/clientpositive/parquet_array_of_structs_gen_schema.q.out new file mode 100644 index 0000000000..c9b1f3fd44 --- /dev/null +++ b/ql/src/test/results/clientpositive/parquet_array_of_structs_gen_schema.q.out @@ -0,0 +1,43 @@ +PREHOOK: query: CREATE TABLE parquet_array_of_structs_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +LOCATION +#### A masked pattern was here #### +PREHOOK: type: CREATETABLE +#### A masked pattern was here #### +PREHOOK: Output: database:default +PREHOOK: Output: default@parquet_array_of_structs_gen_schema +POSTHOOK: query: CREATE TABLE parquet_array_of_structs_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +LOCATION +#### A masked pattern was here #### +POSTHOOK: type: CREATETABLE +#### A masked pattern was here #### +POSTHOOK: Output: database:default +POSTHOOK: Output: default@parquet_array_of_structs_gen_schema +PREHOOK: query: SELECT * FROM parquet_array_of_structs_gen_schema +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_array_of_structs_gen_schema +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM parquet_array_of_structs_gen_schema +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_array_of_structs_gen_schema +#### A masked pattern was here #### +[{"latitude":0.0,"longitude":180.0},{"latitude":0.0,"longitude":0.0}] +PREHOOK: query: DROP TABLE parquet_array_of_structs_gen_schema +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@parquet_array_of_structs_gen_schema +PREHOOK: Output: default@parquet_array_of_structs_gen_schema +POSTHOOK: query: DROP TABLE parquet_array_of_structs_gen_schema +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@parquet_array_of_structs_gen_schema +POSTHOOK: Output: default@parquet_array_of_structs_gen_schema diff --git a/ql/src/test/results/clientpositive/parquet_array_of_structs_gen_schema_ext.q.out b/ql/src/test/results/clientpositive/parquet_array_of_structs_gen_schema_ext.q.out new file mode 100644 index 0000000000..30acdaee55 --- /dev/null +++ b/ql/src/test/results/clientpositive/parquet_array_of_structs_gen_schema_ext.q.out @@ -0,0 +1,43 @@ +PREHOOK: query: CREATE EXTERNAL TABLE parquet_array_of_structs_gen_schema_ext +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +LOCATION +#### A masked pattern was here #### +PREHOOK: type: CREATETABLE +#### A masked pattern was here #### +PREHOOK: Output: database:default +PREHOOK: Output: default@parquet_array_of_structs_gen_schema_ext +POSTHOOK: query: CREATE EXTERNAL TABLE parquet_array_of_structs_gen_schema_ext +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +LOCATION +#### A masked pattern was here #### +POSTHOOK: type: CREATETABLE +#### A masked pattern was here #### +POSTHOOK: Output: database:default +POSTHOOK: Output: default@parquet_array_of_structs_gen_schema_ext +PREHOOK: query: SELECT * FROM parquet_array_of_structs_gen_schema_ext +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_array_of_structs_gen_schema_ext +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM parquet_array_of_structs_gen_schema_ext +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_array_of_structs_gen_schema_ext +#### A masked pattern was here #### +[{"latitude":0.0,"longitude":180.0},{"latitude":0.0,"longitude":0.0}] +PREHOOK: query: DROP TABLE parquet_array_of_structs_gen_schema_ext +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@parquet_array_of_structs_gen_schema_ext +PREHOOK: Output: default@parquet_array_of_structs_gen_schema_ext +POSTHOOK: query: DROP TABLE parquet_array_of_structs_gen_schema_ext +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@parquet_array_of_structs_gen_schema_ext +POSTHOOK: Output: default@parquet_array_of_structs_gen_schema_ext diff --git a/ql/src/test/results/clientpositive/parquet_array_of_unannotated_groups_gen_schema.q.out b/ql/src/test/results/clientpositive/parquet_array_of_unannotated_groups_gen_schema.q.out new file mode 100644 index 0000000000..dd738dd237 --- /dev/null +++ b/ql/src/test/results/clientpositive/parquet_array_of_unannotated_groups_gen_schema.q.out @@ -0,0 +1,43 @@ +PREHOOK: query: CREATE TABLE parquet_array_of_unannotated_groups_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +LOCATION +#### A masked pattern was here #### +PREHOOK: type: CREATETABLE +#### A masked pattern was here #### +PREHOOK: Output: database:default +PREHOOK: Output: default@parquet_array_of_unannotated_groups_gen_schema +POSTHOOK: query: CREATE TABLE parquet_array_of_unannotated_groups_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +LOCATION +#### A masked pattern was here #### +POSTHOOK: type: CREATETABLE +#### A masked pattern was here #### +POSTHOOK: Output: database:default +POSTHOOK: Output: default@parquet_array_of_unannotated_groups_gen_schema +PREHOOK: query: SELECT * FROM parquet_array_of_unannotated_groups_gen_schema +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_array_of_unannotated_groups_gen_schema +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM parquet_array_of_unannotated_groups_gen_schema +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_array_of_unannotated_groups_gen_schema +#### A masked pattern was here #### +[{"x":1.0,"y":1.0},{"x":2.0,"y":2.0}] +PREHOOK: query: DROP TABLE parquet_array_of_unannotated_groups_gen_schema +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@parquet_array_of_unannotated_groups_gen_schema +PREHOOK: Output: default@parquet_array_of_unannotated_groups_gen_schema +POSTHOOK: query: DROP TABLE parquet_array_of_unannotated_groups_gen_schema +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@parquet_array_of_unannotated_groups_gen_schema +POSTHOOK: Output: default@parquet_array_of_unannotated_groups_gen_schema diff --git a/ql/src/test/results/clientpositive/parquet_array_of_unannotated_primitives_gen_schema.q.out b/ql/src/test/results/clientpositive/parquet_array_of_unannotated_primitives_gen_schema.q.out new file mode 100644 index 0000000000..14b52ba7b0 --- /dev/null +++ b/ql/src/test/results/clientpositive/parquet_array_of_unannotated_primitives_gen_schema.q.out @@ -0,0 +1,43 @@ +PREHOOK: query: CREATE TABLE parquet_array_of_unannotated_ints_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +LOCATION +#### A masked pattern was here #### +PREHOOK: type: CREATETABLE +#### A masked pattern was here #### +PREHOOK: Output: database:default +PREHOOK: Output: default@parquet_array_of_unannotated_ints_gen_schema +POSTHOOK: query: CREATE TABLE parquet_array_of_unannotated_ints_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +LOCATION +#### A masked pattern was here #### +POSTHOOK: type: CREATETABLE +#### A masked pattern was here #### +POSTHOOK: Output: database:default +POSTHOOK: Output: default@parquet_array_of_unannotated_ints_gen_schema +PREHOOK: query: SELECT * FROM parquet_array_of_unannotated_ints_gen_schema +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_array_of_unannotated_ints_gen_schema +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM parquet_array_of_unannotated_ints_gen_schema +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_array_of_unannotated_ints_gen_schema +#### A masked pattern was here #### +[34,35,36] +PREHOOK: query: DROP TABLE parquet_array_of_unannotated_ints_gen_schema +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@parquet_array_of_unannotated_ints_gen_schema +PREHOOK: Output: default@parquet_array_of_unannotated_ints_gen_schema +POSTHOOK: query: DROP TABLE parquet_array_of_unannotated_ints_gen_schema +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@parquet_array_of_unannotated_ints_gen_schema +POSTHOOK: Output: default@parquet_array_of_unannotated_ints_gen_schema diff --git a/ql/src/test/results/clientpositive/parquet_avro_array_of_primitives_gen_schema.q.out b/ql/src/test/results/clientpositive/parquet_avro_array_of_primitives_gen_schema.q.out new file mode 100644 index 0000000000..2cad677d75 --- /dev/null +++ b/ql/src/test/results/clientpositive/parquet_avro_array_of_primitives_gen_schema.q.out @@ -0,0 +1,43 @@ +PREHOOK: query: CREATE TABLE parquet_avro_array_of_primitives_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +LOCATION +#### A masked pattern was here #### +PREHOOK: type: CREATETABLE +#### A masked pattern was here #### +PREHOOK: Output: database:default +PREHOOK: Output: default@parquet_avro_array_of_primitives_gen_schema +POSTHOOK: query: CREATE TABLE parquet_avro_array_of_primitives_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +LOCATION +#### A masked pattern was here #### +POSTHOOK: type: CREATETABLE +#### A masked pattern was here #### +POSTHOOK: Output: database:default +POSTHOOK: Output: default@parquet_avro_array_of_primitives_gen_schema +PREHOOK: query: SELECT * FROM parquet_avro_array_of_primitives_gen_schema +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_avro_array_of_primitives_gen_schema +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM parquet_avro_array_of_primitives_gen_schema +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_avro_array_of_primitives_gen_schema +#### A masked pattern was here #### +[34,35,36] +PREHOOK: query: DROP TABLE parquet_avro_array_of_primitives_gen_schema +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@parquet_avro_array_of_primitives_gen_schema +PREHOOK: Output: default@parquet_avro_array_of_primitives_gen_schema +POSTHOOK: query: DROP TABLE parquet_avro_array_of_primitives_gen_schema +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@parquet_avro_array_of_primitives_gen_schema +POSTHOOK: Output: default@parquet_avro_array_of_primitives_gen_schema diff --git a/ql/src/test/results/clientpositive/parquet_avro_array_of_single_field_struct_gen_schema.q.out b/ql/src/test/results/clientpositive/parquet_avro_array_of_single_field_struct_gen_schema.q.out new file mode 100644 index 0000000000..c285687350 --- /dev/null +++ b/ql/src/test/results/clientpositive/parquet_avro_array_of_single_field_struct_gen_schema.q.out @@ -0,0 +1,43 @@ +PREHOOK: query: CREATE TABLE parquet_avro_array_of_single_field_structs_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +LOCATION +#### A masked pattern was here #### +PREHOOK: type: CREATETABLE +#### A masked pattern was here #### +PREHOOK: Output: database:default +PREHOOK: Output: default@parquet_avro_array_of_single_field_structs_gen_schema +POSTHOOK: query: CREATE TABLE parquet_avro_array_of_single_field_structs_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +LOCATION +#### A masked pattern was here #### +POSTHOOK: type: CREATETABLE +#### A masked pattern was here #### +POSTHOOK: Output: database:default +POSTHOOK: Output: default@parquet_avro_array_of_single_field_structs_gen_schema +PREHOOK: query: SELECT * FROM parquet_avro_array_of_single_field_structs_gen_schema +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_avro_array_of_single_field_structs_gen_schema +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM parquet_avro_array_of_single_field_structs_gen_schema +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_avro_array_of_single_field_structs_gen_schema +#### A masked pattern was here #### +[{"count":1234},{"count":2345}] +PREHOOK: query: DROP TABLE parquet_avro_array_of_single_field_structs_gen_schema +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@parquet_avro_array_of_single_field_structs_gen_schema +PREHOOK: Output: default@parquet_avro_array_of_single_field_structs_gen_schema +POSTHOOK: query: DROP TABLE parquet_avro_array_of_single_field_structs_gen_schema +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@parquet_avro_array_of_single_field_structs_gen_schema +POSTHOOK: Output: default@parquet_avro_array_of_single_field_structs_gen_schema diff --git a/ql/src/test/results/clientpositive/parquet_decimal_gen_schema.q.out b/ql/src/test/results/clientpositive/parquet_decimal_gen_schema.q.out new file mode 100644 index 0000000000..f2ff8fe615 --- /dev/null +++ b/ql/src/test/results/clientpositive/parquet_decimal_gen_schema.q.out @@ -0,0 +1,84 @@ +PREHOOK: query: CREATE TABLE parquet_decimal_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +LOCATION +#### A masked pattern was here #### +PREHOOK: type: CREATETABLE +#### A masked pattern was here #### +PREHOOK: Output: database:default +PREHOOK: Output: default@parquet_decimal_gen_schema +POSTHOOK: query: CREATE TABLE parquet_decimal_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +LOCATION +#### A masked pattern was here #### +POSTHOOK: type: CREATETABLE +#### A masked pattern was here #### +POSTHOOK: Output: database:default +POSTHOOK: Output: default@parquet_decimal_gen_schema +PREHOOK: query: DESCRIBE FORMATTED parquet_decimal_gen_schema +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@parquet_decimal_gen_schema +POSTHOOK: query: DESCRIBE FORMATTED parquet_decimal_gen_schema +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@parquet_decimal_gen_schema +# col_name data_type comment +name string +value decimal(5,2) + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: + bucketing_version 2 + numFiles 1 + totalSize 335 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe +InputFormat: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: SELECT * FROM parquet_decimal_gen_schema +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_decimal_gen_schema +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM parquet_decimal_gen_schema +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_decimal_gen_schema +#### A masked pattern was here #### +Tom 234.79 +Beck 77.34 +Snow 55.71 +Mary 4.33 +Cluck 5.96 +Tom 12.25 +Mary 33.33 +Tom 0.19 +Beck 3.15 +Beck 7.99 +PREHOOK: query: DROP TABLE parquet_decimal_gen_schema +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@parquet_decimal_gen_schema +PREHOOK: Output: default@parquet_decimal_gen_schema +POSTHOOK: query: DROP TABLE parquet_decimal_gen_schema +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@parquet_decimal_gen_schema +POSTHOOK: Output: default@parquet_decimal_gen_schema diff --git a/ql/src/test/results/clientpositive/parquet_thrift_array_of_primitives_gen_schema.q.out b/ql/src/test/results/clientpositive/parquet_thrift_array_of_primitives_gen_schema.q.out new file mode 100644 index 0000000000..c7a21dd54e --- /dev/null +++ b/ql/src/test/results/clientpositive/parquet_thrift_array_of_primitives_gen_schema.q.out @@ -0,0 +1,43 @@ +PREHOOK: query: CREATE TABLE parquet_thrift_array_of_primitives_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +LOCATION +#### A masked pattern was here #### +PREHOOK: type: CREATETABLE +#### A masked pattern was here #### +PREHOOK: Output: database:default +PREHOOK: Output: default@parquet_thrift_array_of_primitives_gen_schema +POSTHOOK: query: CREATE TABLE parquet_thrift_array_of_primitives_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +LOCATION +#### A masked pattern was here #### +POSTHOOK: type: CREATETABLE +#### A masked pattern was here #### +POSTHOOK: Output: database:default +POSTHOOK: Output: default@parquet_thrift_array_of_primitives_gen_schema +PREHOOK: query: SELECT * FROM parquet_thrift_array_of_primitives_gen_schema +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_thrift_array_of_primitives_gen_schema +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM parquet_thrift_array_of_primitives_gen_schema +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_thrift_array_of_primitives_gen_schema +#### A masked pattern was here #### +[34,35,36] +PREHOOK: query: DROP TABLE parquet_thrift_array_of_primitives_gen_schema +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@parquet_thrift_array_of_primitives_gen_schema +PREHOOK: Output: default@parquet_thrift_array_of_primitives_gen_schema +POSTHOOK: query: DROP TABLE parquet_thrift_array_of_primitives_gen_schema +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@parquet_thrift_array_of_primitives_gen_schema +POSTHOOK: Output: default@parquet_thrift_array_of_primitives_gen_schema diff --git a/ql/src/test/results/clientpositive/parquet_thrift_array_of_single_field_struct_gen_schema.q.out b/ql/src/test/results/clientpositive/parquet_thrift_array_of_single_field_struct_gen_schema.q.out new file mode 100644 index 0000000000..0218f259c2 --- /dev/null +++ b/ql/src/test/results/clientpositive/parquet_thrift_array_of_single_field_struct_gen_schema.q.out @@ -0,0 +1,43 @@ +PREHOOK: query: CREATE TABLE parquet_thrift_array_of_single_field_structs_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +LOCATION +#### A masked pattern was here #### +PREHOOK: type: CREATETABLE +#### A masked pattern was here #### +PREHOOK: Output: database:default +PREHOOK: Output: default@parquet_thrift_array_of_single_field_structs_gen_schema +POSTHOOK: query: CREATE TABLE parquet_thrift_array_of_single_field_structs_gen_schema +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +LOCATION +#### A masked pattern was here #### +POSTHOOK: type: CREATETABLE +#### A masked pattern was here #### +POSTHOOK: Output: database:default +POSTHOOK: Output: default@parquet_thrift_array_of_single_field_structs_gen_schema +PREHOOK: query: SELECT * FROM parquet_thrift_array_of_single_field_structs_gen_schema +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_thrift_array_of_single_field_structs_gen_schema +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM parquet_thrift_array_of_single_field_structs_gen_schema +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_thrift_array_of_single_field_structs_gen_schema +#### A masked pattern was here #### +[{"count":1234},{"count":2345}] +PREHOOK: query: DROP TABLE parquet_thrift_array_of_single_field_structs_gen_schema +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@parquet_thrift_array_of_single_field_structs_gen_schema +PREHOOK: Output: default@parquet_thrift_array_of_single_field_structs_gen_schema +POSTHOOK: query: DROP TABLE parquet_thrift_array_of_single_field_structs_gen_schema +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@parquet_thrift_array_of_single_field_structs_gen_schema +POSTHOOK: Output: default@parquet_thrift_array_of_single_field_structs_gen_schema diff --git a/ql/src/test/results/clientpositive/spark/parquet_vectorization_decimal_date.q.out b/ql/src/test/results/clientpositive/spark/parquet_vectorization_decimal_date.q.out index 7ba21a9722..16b7295847 100644 --- a/ql/src/test/results/clientpositive/spark/parquet_vectorization_decimal_date.q.out +++ b/ql/src/test/results/clientpositive/spark/parquet_vectorization_decimal_date.q.out @@ -8,10 +8,10 @@ POSTHOOK: type: CREATETABLE_AS_SELECT POSTHOOK: Input: default@alltypesparquet POSTHOOK: Output: database:default POSTHOOK: Output: default@date_decimal_test_parquet -POSTHOOK: Lineage: date_decimal_test_parquet.cdate EXPRESSION [(alltypesparquet)alltypesparquet.FieldSchema(name:cint, type:int, comment:null), ] -POSTHOOK: Lineage: date_decimal_test_parquet.cdecimal EXPRESSION [(alltypesparquet)alltypesparquet.FieldSchema(name:cdouble, type:double, comment:null), ] -POSTHOOK: Lineage: date_decimal_test_parquet.cdouble SIMPLE [(alltypesparquet)alltypesparquet.FieldSchema(name:cdouble, type:double, comment:null), ] -POSTHOOK: Lineage: date_decimal_test_parquet.cint SIMPLE [(alltypesparquet)alltypesparquet.FieldSchema(name:cint, type:int, comment:null), ] +POSTHOOK: Lineage: date_decimal_test_parquet.cdate EXPRESSION [(alltypesparquet)alltypesparquet.FieldSchema(name:cint, type:int, comment:), ] +POSTHOOK: Lineage: date_decimal_test_parquet.cdecimal EXPRESSION [(alltypesparquet)alltypesparquet.FieldSchema(name:cdouble, type:double, comment:), ] +POSTHOOK: Lineage: date_decimal_test_parquet.cdouble SIMPLE [(alltypesparquet)alltypesparquet.FieldSchema(name:cdouble, type:double, comment:), ] +POSTHOOK: Lineage: date_decimal_test_parquet.cint SIMPLE [(alltypesparquet)alltypesparquet.FieldSchema(name:cint, type:int, comment:), ] PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cdate, cdecimal from date_decimal_test_parquet where cint IS NOT NULL AND cdouble IS NOT NULL LIMIT 10 PREHOOK: type: QUERY PREHOOK: Input: default@date_decimal_test_parquet diff --git a/ql/src/test/results/clientpositive/spark/parquet_vectorization_part_project.q.out b/ql/src/test/results/clientpositive/spark/parquet_vectorization_part_project.q.out index e8ad131220..82383a0210 100644 --- a/ql/src/test/results/clientpositive/spark/parquet_vectorization_part_project.q.out +++ b/ql/src/test/results/clientpositive/spark/parquet_vectorization_part_project.q.out @@ -14,18 +14,18 @@ POSTHOOK: query: insert overwrite table alltypesparquet_part_n0 partition (ds='2 POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesparquet POSTHOOK: Output: default@alltypesparquet_part_n0@ds=2011 -POSTHOOK: Lineage: alltypesparquet_part_n0 PARTITION(ds=2011).cbigint SIMPLE [(alltypesparquet)alltypesparquet.FieldSchema(name:cbigint, type:bigint, comment:null), ] -POSTHOOK: Lineage: alltypesparquet_part_n0 PARTITION(ds=2011).cboolean1 SIMPLE [(alltypesparquet)alltypesparquet.FieldSchema(name:cboolean1, type:boolean, comment:null), ] -POSTHOOK: Lineage: alltypesparquet_part_n0 PARTITION(ds=2011).cboolean2 SIMPLE [(alltypesparquet)alltypesparquet.FieldSchema(name:cboolean2, type:boolean, comment:null), ] -POSTHOOK: Lineage: alltypesparquet_part_n0 PARTITION(ds=2011).cdouble SIMPLE [(alltypesparquet)alltypesparquet.FieldSchema(name:cdouble, type:double, comment:null), ] -POSTHOOK: Lineage: alltypesparquet_part_n0 PARTITION(ds=2011).cfloat SIMPLE [(alltypesparquet)alltypesparquet.FieldSchema(name:cfloat, type:float, comment:null), ] -POSTHOOK: Lineage: alltypesparquet_part_n0 PARTITION(ds=2011).cint SIMPLE [(alltypesparquet)alltypesparquet.FieldSchema(name:cint, type:int, comment:null), ] -POSTHOOK: Lineage: alltypesparquet_part_n0 PARTITION(ds=2011).csmallint SIMPLE [(alltypesparquet)alltypesparquet.FieldSchema(name:csmallint, type:smallint, comment:null), ] -POSTHOOK: Lineage: alltypesparquet_part_n0 PARTITION(ds=2011).cstring1 SIMPLE [(alltypesparquet)alltypesparquet.FieldSchema(name:cstring1, type:string, comment:null), ] -POSTHOOK: Lineage: alltypesparquet_part_n0 PARTITION(ds=2011).cstring2 SIMPLE [(alltypesparquet)alltypesparquet.FieldSchema(name:cstring2, type:string, comment:null), ] -POSTHOOK: Lineage: alltypesparquet_part_n0 PARTITION(ds=2011).ctimestamp1 SIMPLE [(alltypesparquet)alltypesparquet.FieldSchema(name:ctimestamp1, type:timestamp, comment:null), ] -POSTHOOK: Lineage: alltypesparquet_part_n0 PARTITION(ds=2011).ctimestamp2 SIMPLE [(alltypesparquet)alltypesparquet.FieldSchema(name:ctimestamp2, type:timestamp, comment:null), ] -POSTHOOK: Lineage: alltypesparquet_part_n0 PARTITION(ds=2011).ctinyint SIMPLE [(alltypesparquet)alltypesparquet.FieldSchema(name:ctinyint, type:tinyint, comment:null), ] +POSTHOOK: Lineage: alltypesparquet_part_n0 PARTITION(ds=2011).cbigint SIMPLE [(alltypesparquet)alltypesparquet.FieldSchema(name:cbigint, type:bigint, comment:), ] +POSTHOOK: Lineage: alltypesparquet_part_n0 PARTITION(ds=2011).cboolean1 SIMPLE [(alltypesparquet)alltypesparquet.FieldSchema(name:cboolean1, type:boolean, comment:), ] +POSTHOOK: Lineage: alltypesparquet_part_n0 PARTITION(ds=2011).cboolean2 SIMPLE [(alltypesparquet)alltypesparquet.FieldSchema(name:cboolean2, type:boolean, comment:), ] +POSTHOOK: Lineage: alltypesparquet_part_n0 PARTITION(ds=2011).cdouble SIMPLE [(alltypesparquet)alltypesparquet.FieldSchema(name:cdouble, type:double, comment:), ] +POSTHOOK: Lineage: alltypesparquet_part_n0 PARTITION(ds=2011).cfloat SIMPLE [(alltypesparquet)alltypesparquet.FieldSchema(name:cfloat, type:float, comment:), ] +POSTHOOK: Lineage: alltypesparquet_part_n0 PARTITION(ds=2011).cint SIMPLE [(alltypesparquet)alltypesparquet.FieldSchema(name:cint, type:int, comment:), ] +POSTHOOK: Lineage: alltypesparquet_part_n0 PARTITION(ds=2011).csmallint SIMPLE [(alltypesparquet)alltypesparquet.FieldSchema(name:csmallint, type:smallint, comment:), ] +POSTHOOK: Lineage: alltypesparquet_part_n0 PARTITION(ds=2011).cstring1 SIMPLE [(alltypesparquet)alltypesparquet.FieldSchema(name:cstring1, type:string, comment:), ] +POSTHOOK: Lineage: alltypesparquet_part_n0 PARTITION(ds=2011).cstring2 SIMPLE [(alltypesparquet)alltypesparquet.FieldSchema(name:cstring2, type:string, comment:), ] +POSTHOOK: Lineage: alltypesparquet_part_n0 PARTITION(ds=2011).ctimestamp1 SIMPLE [(alltypesparquet)alltypesparquet.FieldSchema(name:ctimestamp1, type:timestamp, comment:), ] +POSTHOOK: Lineage: alltypesparquet_part_n0 PARTITION(ds=2011).ctimestamp2 SIMPLE [(alltypesparquet)alltypesparquet.FieldSchema(name:ctimestamp2, type:timestamp, comment:), ] +POSTHOOK: Lineage: alltypesparquet_part_n0 PARTITION(ds=2011).ctinyint SIMPLE [(alltypesparquet)alltypesparquet.FieldSchema(name:ctinyint, type:tinyint, comment:), ] PREHOOK: query: insert overwrite table alltypesparquet_part_n0 partition (ds='2012') select * from alltypesparquet order by ctinyint, cint, cbigint limit 100 PREHOOK: type: QUERY PREHOOK: Input: default@alltypesparquet @@ -34,18 +34,18 @@ POSTHOOK: query: insert overwrite table alltypesparquet_part_n0 partition (ds='2 POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesparquet POSTHOOK: Output: default@alltypesparquet_part_n0@ds=2012 -POSTHOOK: Lineage: alltypesparquet_part_n0 PARTITION(ds=2012).cbigint SIMPLE [(alltypesparquet)alltypesparquet.FieldSchema(name:cbigint, type:bigint, comment:null), ] -POSTHOOK: Lineage: alltypesparquet_part_n0 PARTITION(ds=2012).cboolean1 SIMPLE [(alltypesparquet)alltypesparquet.FieldSchema(name:cboolean1, type:boolean, comment:null), ] -POSTHOOK: Lineage: alltypesparquet_part_n0 PARTITION(ds=2012).cboolean2 SIMPLE [(alltypesparquet)alltypesparquet.FieldSchema(name:cboolean2, type:boolean, comment:null), ] -POSTHOOK: Lineage: alltypesparquet_part_n0 PARTITION(ds=2012).cdouble SIMPLE [(alltypesparquet)alltypesparquet.FieldSchema(name:cdouble, type:double, comment:null), ] -POSTHOOK: Lineage: alltypesparquet_part_n0 PARTITION(ds=2012).cfloat SIMPLE [(alltypesparquet)alltypesparquet.FieldSchema(name:cfloat, type:float, comment:null), ] -POSTHOOK: Lineage: alltypesparquet_part_n0 PARTITION(ds=2012).cint SIMPLE [(alltypesparquet)alltypesparquet.FieldSchema(name:cint, type:int, comment:null), ] -POSTHOOK: Lineage: alltypesparquet_part_n0 PARTITION(ds=2012).csmallint SIMPLE [(alltypesparquet)alltypesparquet.FieldSchema(name:csmallint, type:smallint, comment:null), ] -POSTHOOK: Lineage: alltypesparquet_part_n0 PARTITION(ds=2012).cstring1 SIMPLE [(alltypesparquet)alltypesparquet.FieldSchema(name:cstring1, type:string, comment:null), ] -POSTHOOK: Lineage: alltypesparquet_part_n0 PARTITION(ds=2012).cstring2 SIMPLE [(alltypesparquet)alltypesparquet.FieldSchema(name:cstring2, type:string, comment:null), ] -POSTHOOK: Lineage: alltypesparquet_part_n0 PARTITION(ds=2012).ctimestamp1 SIMPLE [(alltypesparquet)alltypesparquet.FieldSchema(name:ctimestamp1, type:timestamp, comment:null), ] -POSTHOOK: Lineage: alltypesparquet_part_n0 PARTITION(ds=2012).ctimestamp2 SIMPLE [(alltypesparquet)alltypesparquet.FieldSchema(name:ctimestamp2, type:timestamp, comment:null), ] -POSTHOOK: Lineage: alltypesparquet_part_n0 PARTITION(ds=2012).ctinyint SIMPLE [(alltypesparquet)alltypesparquet.FieldSchema(name:ctinyint, type:tinyint, comment:null), ] +POSTHOOK: Lineage: alltypesparquet_part_n0 PARTITION(ds=2012).cbigint SIMPLE [(alltypesparquet)alltypesparquet.FieldSchema(name:cbigint, type:bigint, comment:), ] +POSTHOOK: Lineage: alltypesparquet_part_n0 PARTITION(ds=2012).cboolean1 SIMPLE [(alltypesparquet)alltypesparquet.FieldSchema(name:cboolean1, type:boolean, comment:), ] +POSTHOOK: Lineage: alltypesparquet_part_n0 PARTITION(ds=2012).cboolean2 SIMPLE [(alltypesparquet)alltypesparquet.FieldSchema(name:cboolean2, type:boolean, comment:), ] +POSTHOOK: Lineage: alltypesparquet_part_n0 PARTITION(ds=2012).cdouble SIMPLE [(alltypesparquet)alltypesparquet.FieldSchema(name:cdouble, type:double, comment:), ] +POSTHOOK: Lineage: alltypesparquet_part_n0 PARTITION(ds=2012).cfloat SIMPLE [(alltypesparquet)alltypesparquet.FieldSchema(name:cfloat, type:float, comment:), ] +POSTHOOK: Lineage: alltypesparquet_part_n0 PARTITION(ds=2012).cint SIMPLE [(alltypesparquet)alltypesparquet.FieldSchema(name:cint, type:int, comment:), ] +POSTHOOK: Lineage: alltypesparquet_part_n0 PARTITION(ds=2012).csmallint SIMPLE [(alltypesparquet)alltypesparquet.FieldSchema(name:csmallint, type:smallint, comment:), ] +POSTHOOK: Lineage: alltypesparquet_part_n0 PARTITION(ds=2012).cstring1 SIMPLE [(alltypesparquet)alltypesparquet.FieldSchema(name:cstring1, type:string, comment:), ] +POSTHOOK: Lineage: alltypesparquet_part_n0 PARTITION(ds=2012).cstring2 SIMPLE [(alltypesparquet)alltypesparquet.FieldSchema(name:cstring2, type:string, comment:), ] +POSTHOOK: Lineage: alltypesparquet_part_n0 PARTITION(ds=2012).ctimestamp1 SIMPLE [(alltypesparquet)alltypesparquet.FieldSchema(name:ctimestamp1, type:timestamp, comment:), ] +POSTHOOK: Lineage: alltypesparquet_part_n0 PARTITION(ds=2012).ctimestamp2 SIMPLE [(alltypesparquet)alltypesparquet.FieldSchema(name:ctimestamp2, type:timestamp, comment:), ] +POSTHOOK: Lineage: alltypesparquet_part_n0 PARTITION(ds=2012).ctinyint SIMPLE [(alltypesparquet)alltypesparquet.FieldSchema(name:ctinyint, type:tinyint, comment:), ] PREHOOK: query: explain vectorization select (cdouble+2) c1 from alltypesparquet_part_n0 order by c1 limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@alltypesparquet_part_n0 diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java index 22140a36f7..fcc48d7ad7 100644 --- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java @@ -4310,6 +4310,12 @@ private boolean createLocationForAddedPartition( boolean result = false; if (partLocation != null) { + if (part.getSd() == null) { + // If partition does not have a storage descriptor, get one from table + // Using deepCopy as different partitions of a table + // can have different storage descriptors. + part.setSd(tbl.getSd().deepCopy()); + } part.getSd().setLocation(partLocation.toString()); // Check to see if the directory already exists before calling