diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ConversionTreeReaderFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ConversionTreeReaderFactory.java new file mode 100644 index 0000000000000000000000000000000000000000..aaf4eb467dde60e214ece8a2940769265911621f --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ConversionTreeReaderFactory.java @@ -0,0 +1,38 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.io.orc; + +import java.io.IOException; +import java.util.List; + +/** + * Factory for creating ORC tree readers. These tree readers can handle type promotions and type + * conversions. + */ +public class ConversionTreeReaderFactory extends TreeReaderFactory { + + // TODO: This is currently only a place holder for type conversions. + + public static TreeReader createTreeReader(int columnId, + List types, + boolean[] included, + boolean skipCorrupt + ) throws IOException { + return TreeReaderFactory.createTreeReader(columnId, types, included, skipCorrupt); + } +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderFactory.java new file mode 100644 index 0000000000000000000000000000000000000000..e9a34cbabe55eecb4ad5733fcc54346d7fe8db76 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderFactory.java @@ -0,0 +1,249 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.io.orc; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.serde.serdeConstants; +import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.StructField; +import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.UnionObjectInspector; +import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; +import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo; + +import com.google.common.collect.Lists; + +/** + * Factory to create ORC tree readers. It also compares file schema with schema specified on read + * to see if type promotions are possible. + */ +public class RecordReaderFactory { + static final Log LOG = LogFactory.getLog(RecordReaderFactory.class); + private static final boolean isLogInfoEnabled = LOG.isInfoEnabled(); + + public static TreeReaderFactory.TreeReader createTreeReader(int colId, + Configuration conf, + List fileSchema, + boolean[] included, + boolean skipCorrupt) throws IOException { + List schemaOnRead = getSchemaOnRead(fileSchema.get(0).getSubtypesCount(), conf); + List schemaUsed = getMatchingSchema(fileSchema, schemaOnRead); + if (schemaUsed == null) { + return TreeReaderFactory.createTreeReader(colId, fileSchema, included, skipCorrupt); + } else { + return ConversionTreeReaderFactory.createTreeReader(colId, schemaUsed, included, skipCorrupt); + } + } + + private static List getMatchingSchema(List fileSchema, + List schemaOnRead) { + if (schemaOnRead == null) { + if (isLogInfoEnabled) { + LOG.info("Schema is not specified on read. Using file schema."); + } + return null; + } + + if (fileSchema.size() != schemaOnRead.size()) { + if (isLogInfoEnabled) { + LOG.info("Schema on read column count does not match file schema's column count." + + " Falling back to using file schema."); + } + return null; + } else { + List result = Lists.newArrayList(fileSchema); + // check type promotion. ORC can only support type promotions for integer types + // short -> int -> bigint as same integer readers are used for the above types. + boolean canPromoteType = false; + for (int i = 0; i < fileSchema.size(); i++) { + OrcProto.Type fColType = fileSchema.get(i); + OrcProto.Type rColType = schemaOnRead.get(i); + if (!fColType.getKind().equals(rColType.getKind())) { + + if (fColType.getKind().equals(OrcProto.Type.Kind.SHORT)) { + + if (rColType.getKind().equals(OrcProto.Type.Kind.INT) || + rColType.getKind().equals(OrcProto.Type.Kind.LONG)) { + // type promotion possible, converting SHORT to INT/LONG requested type + result.set(i, result.get(i).toBuilder().setKind(rColType.getKind()).build()); + canPromoteType = true; + } else { + canPromoteType = false; + } + + } else if (fColType.getKind().equals(OrcProto.Type.Kind.INT)) { + + if (rColType.getKind().equals(OrcProto.Type.Kind.LONG)) { + // type promotion possible, converting INT to LONG requested type + result.set(i, result.get(i).toBuilder().setKind(rColType.getKind()).build()); + canPromoteType = true; + } else { + canPromoteType = false; + } + + } else { + canPromoteType = false; + } + } + } + + if (canPromoteType) { + if (isLogInfoEnabled) { + LOG.info("Integer type promotion happened in ORC record reader. Using promoted schema."); + } + return result; + } + } + + return null; + } + + private static List getSchemaOnRead(int numCols, Configuration conf) { + String columnTypeProperty = conf.get(serdeConstants.LIST_COLUMN_TYPES); + final String columnNameProperty = conf.get(serdeConstants.LIST_COLUMNS); + if (columnTypeProperty == null || columnNameProperty == null) { + return null; + } + + ArrayList columnNames = Lists.newArrayList(columnNameProperty.split(",")); + ArrayList fieldTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty); + StructTypeInfo structTypeInfo = new StructTypeInfo(); + // Column types from conf includes virtual and partition columns at the end. We consider only + // the actual columns in the file. + structTypeInfo.setAllStructFieldNames(Lists.newArrayList(columnNames.subList(0, numCols))); + structTypeInfo.setAllStructFieldTypeInfos(Lists.newArrayList(fieldTypes.subList(0, numCols))); + ObjectInspector oi = TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(structTypeInfo); + return getOrcTypes(oi); + } + + private static List getOrcTypes(ObjectInspector inspector) { + List result = Lists.newArrayList(); + getOrcTypesImpl(result, inspector); + return result; + } + + private static void getOrcTypesImpl(List result, ObjectInspector inspector) { + OrcProto.Type.Builder type = OrcProto.Type.newBuilder(); + switch (inspector.getCategory()) { + case PRIMITIVE: + switch (((PrimitiveObjectInspector) inspector).getPrimitiveCategory()) { + case BOOLEAN: + type.setKind(OrcProto.Type.Kind.BOOLEAN); + break; + case BYTE: + type.setKind(OrcProto.Type.Kind.BYTE); + break; + case SHORT: + type.setKind(OrcProto.Type.Kind.SHORT); + break; + case INT: + type.setKind(OrcProto.Type.Kind.INT); + break; + case LONG: + type.setKind(OrcProto.Type.Kind.LONG); + break; + case FLOAT: + type.setKind(OrcProto.Type.Kind.FLOAT); + break; + case DOUBLE: + type.setKind(OrcProto.Type.Kind.DOUBLE); + break; + case STRING: + type.setKind(OrcProto.Type.Kind.STRING); + break; + case CHAR: + // The char length needs to be written to file and should be available + // from the object inspector + CharTypeInfo charTypeInfo = (CharTypeInfo) ((PrimitiveObjectInspector) inspector) + .getTypeInfo(); + type.setKind(OrcProto.Type.Kind.CHAR); + type.setMaximumLength(charTypeInfo.getLength()); + break; + case VARCHAR: + // The varchar length needs to be written to file and should be available + // from the object inspector + VarcharTypeInfo typeInfo = (VarcharTypeInfo) ((PrimitiveObjectInspector) inspector) + .getTypeInfo(); + type.setKind(OrcProto.Type.Kind.VARCHAR); + type.setMaximumLength(typeInfo.getLength()); + break; + case BINARY: + type.setKind(OrcProto.Type.Kind.BINARY); + break; + case TIMESTAMP: + type.setKind(OrcProto.Type.Kind.TIMESTAMP); + break; + case DATE: + type.setKind(OrcProto.Type.Kind.DATE); + break; + case DECIMAL: + DecimalTypeInfo decTypeInfo = (DecimalTypeInfo) ((PrimitiveObjectInspector) inspector) + .getTypeInfo(); + type.setKind(OrcProto.Type.Kind.DECIMAL); + type.setPrecision(decTypeInfo.precision()); + type.setScale(decTypeInfo.scale()); + break; + default: + throw new IllegalArgumentException("Unknown primitive category: " + + ((PrimitiveObjectInspector) inspector).getPrimitiveCategory()); + } + result.add(type.build()); + break; + case LIST: + type.setKind(OrcProto.Type.Kind.LIST); + result.add(type.build()); + getOrcTypesImpl(result, ((ListObjectInspector) inspector).getListElementObjectInspector()); + break; + case MAP: + type.setKind(OrcProto.Type.Kind.MAP); + result.add(type.build()); + getOrcTypesImpl(result, ((MapObjectInspector) inspector).getMapKeyObjectInspector()); + getOrcTypesImpl(result, ((MapObjectInspector) inspector).getMapValueObjectInspector()); + break; + case STRUCT: + type.setKind(OrcProto.Type.Kind.STRUCT); + result.add(type.build()); + for (StructField field : ((StructObjectInspector) inspector).getAllStructFieldRefs()) { + getOrcTypesImpl(result, field.getFieldObjectInspector()); + } + break; + case UNION: + type.setKind(OrcProto.Type.Kind.UNION); + result.add(type.build()); + for (ObjectInspector oi : ((UnionObjectInspector) inspector).getObjectInspectors()) { + getOrcTypesImpl(result, oi); + } + break; + default: + throw new IllegalArgumentException("Unknown category: " + inspector.getCategory()); + } + } +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java index a5a59439bc1e0f2616c02039c74b6d2d0f0a02c0..58e19cb7b546434cd9f2bbb52afd0dff2156f535 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java @@ -199,7 +199,7 @@ protected RecordReaderImpl(List stripes, firstRow = skippedRows; totalRowCount = rows; boolean skipCorrupt = HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_ORC_SKIP_CORRUPT_DATA); - reader = TreeReaderFactory.createTreeReader(0, types, included, skipCorrupt); + reader = RecordReaderFactory.createTreeReader(0, conf, types, included, skipCorrupt); indexes = new OrcProto.RowIndex[types.size()]; bloomFilterIndices = new OrcProto.BloomFilterIndex[types.size()]; advanceToNextRow(reader, 0L, true); diff --git a/ql/src/test/queries/clientpositive/orc_int_type_promotion.q b/ql/src/test/queries/clientpositive/orc_int_type_promotion.q new file mode 100644 index 0000000000000000000000000000000000000000..4a805a0a8d91622b3bc2a3e23b38bdb5b308c0a6 --- /dev/null +++ b/ql/src/test/queries/clientpositive/orc_int_type_promotion.q @@ -0,0 +1,79 @@ +create table if not exists alltypes ( + bo boolean, + ti tinyint, + si smallint, + i int, + bi bigint, + f float, + d double, + de decimal(10,3), + ts timestamp, + da date, + s string, + c char(5), + vc varchar(5), + m map, + l array, + st struct +) row format delimited fields terminated by '|' +collection items terminated by ',' +map keys terminated by ':' stored as textfile; + +create table if not exists alltypes_orc ( + bo boolean, + ti tinyint, + si smallint, + i int, + bi bigint, + f float, + d double, + de decimal(10,3), + ts timestamp, + da date, + s string, + c char(5), + vc varchar(5), + m map, + l array, + st struct +) stored as orc; + +load data local inpath '../../data/files/alltypes2.txt' overwrite into table alltypes; + +insert overwrite table alltypes_orc select * from alltypes; + +select * from alltypes_orc; + +alter table alltypes_orc change si si int; +select * from alltypes_orc; + +alter table alltypes_orc change si si bigint; +alter table alltypes_orc change i i bigint; +select * from alltypes_orc; + +alter table alltypes_orc change l l array; +select * from alltypes_orc; + +set hive.vectorized.execution.enabled=true; +set hive.fetch.task.conversion=none; +alter table alltypes_orc change si si smallint; +alter table alltypes_orc change i i int; + +explain select ti, si, i, bi from alltypes_orc; +select ti, si, i, bi from alltypes_orc; + +alter table alltypes_orc change si si int; +select ti, si, i, bi from alltypes_orc; + +alter table alltypes_orc change si si bigint; +alter table alltypes_orc change i i bigint; +select ti, si, i, bi from alltypes_orc; + +set hive.exec.dynamic.partition.mode=nonstrict; +create table src_part_orc (key int, value string) partitioned by (ds string) stored as orc; +insert overwrite table src_part_orc partition(ds) select key, value, ds from srcpart where ds is not null; + +select * from src_part_orc limit 10; + +alter table src_part_orc change key key bigint; +select * from src_part_orc limit 10; diff --git a/ql/src/test/results/clientpositive/orc_int_type_promotion.q.out b/ql/src/test/results/clientpositive/orc_int_type_promotion.q.out new file mode 100644 index 0000000000000000000000000000000000000000..d26dff22625fb310d9ebc2382b22a51acc2c9ae1 --- /dev/null +++ b/ql/src/test/results/clientpositive/orc_int_type_promotion.q.out @@ -0,0 +1,377 @@ +PREHOOK: query: create table if not exists alltypes ( + bo boolean, + ti tinyint, + si smallint, + i int, + bi bigint, + f float, + d double, + de decimal(10,3), + ts timestamp, + da date, + s string, + c char(5), + vc varchar(5), + m map, + l array, + st struct +) row format delimited fields terminated by '|' +collection items terminated by ',' +map keys terminated by ':' stored as textfile +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@alltypes +POSTHOOK: query: create table if not exists alltypes ( + bo boolean, + ti tinyint, + si smallint, + i int, + bi bigint, + f float, + d double, + de decimal(10,3), + ts timestamp, + da date, + s string, + c char(5), + vc varchar(5), + m map, + l array, + st struct +) row format delimited fields terminated by '|' +collection items terminated by ',' +map keys terminated by ':' stored as textfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@alltypes +PREHOOK: query: create table if not exists alltypes_orc ( + bo boolean, + ti tinyint, + si smallint, + i int, + bi bigint, + f float, + d double, + de decimal(10,3), + ts timestamp, + da date, + s string, + c char(5), + vc varchar(5), + m map, + l array, + st struct +) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@alltypes_orc +POSTHOOK: query: create table if not exists alltypes_orc ( + bo boolean, + ti tinyint, + si smallint, + i int, + bi bigint, + f float, + d double, + de decimal(10,3), + ts timestamp, + da date, + s string, + c char(5), + vc varchar(5), + m map, + l array, + st struct +) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@alltypes_orc +PREHOOK: query: load data local inpath '../../data/files/alltypes2.txt' overwrite into table alltypes +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@alltypes +POSTHOOK: query: load data local inpath '../../data/files/alltypes2.txt' overwrite into table alltypes +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@alltypes +PREHOOK: query: insert overwrite table alltypes_orc select * from alltypes +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypes +PREHOOK: Output: default@alltypes_orc +POSTHOOK: query: insert overwrite table alltypes_orc select * from alltypes +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypes +POSTHOOK: Output: default@alltypes_orc +POSTHOOK: Lineage: alltypes_orc.bi SIMPLE [(alltypes)alltypes.FieldSchema(name:bi, type:bigint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.bo SIMPLE [(alltypes)alltypes.FieldSchema(name:bo, type:boolean, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.c SIMPLE [(alltypes)alltypes.FieldSchema(name:c, type:char(5), comment:null), ] +POSTHOOK: Lineage: alltypes_orc.d SIMPLE [(alltypes)alltypes.FieldSchema(name:d, type:double, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.da SIMPLE [(alltypes)alltypes.FieldSchema(name:da, type:date, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.de SIMPLE [(alltypes)alltypes.FieldSchema(name:de, type:decimal(10,3), comment:null), ] +POSTHOOK: Lineage: alltypes_orc.f SIMPLE [(alltypes)alltypes.FieldSchema(name:f, type:float, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.i SIMPLE [(alltypes)alltypes.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.l SIMPLE [(alltypes)alltypes.FieldSchema(name:l, type:array, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.m SIMPLE [(alltypes)alltypes.FieldSchema(name:m, type:map, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.s SIMPLE [(alltypes)alltypes.FieldSchema(name:s, type:string, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.si SIMPLE [(alltypes)alltypes.FieldSchema(name:si, type:smallint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.st SIMPLE [(alltypes)alltypes.FieldSchema(name:st, type:struct, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ti SIMPLE [(alltypes)alltypes.FieldSchema(name:ti, type:tinyint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ts SIMPLE [(alltypes)alltypes.FieldSchema(name:ts, type:timestamp, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.vc SIMPLE [(alltypes)alltypes.FieldSchema(name:vc, type:varchar(5), comment:null), ] +PREHOOK: query: select * from alltypes_orc +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypes_orc +#### A masked pattern was here #### +POSTHOOK: query: select * from alltypes_orc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypes_orc +#### A masked pattern was here #### +true 10 100 1000 10000 4.0 20.0 4.222 1969-12-31 15:59:58.174 1970-01-01 string hello hello {"k1":"v1","k2":"v2"} [100,200] {"c1":null,"c2":" \"foo\"}"} +false 20 200 2000 20000 8.0 40.0 2.222 1970-12-31 15:59:58.174 1971-01-01 abcd world world {"k3":"v3","k4":"v4"} [200,300] {"c1":null,"c2":" \"bar\"}"} +PREHOOK: query: alter table alltypes_orc change si si int +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@alltypes_orc +PREHOOK: Output: default@alltypes_orc +POSTHOOK: query: alter table alltypes_orc change si si int +POSTHOOK: type: ALTERTABLE_RENAMECOL +POSTHOOK: Input: default@alltypes_orc +POSTHOOK: Output: default@alltypes_orc +PREHOOK: query: select * from alltypes_orc +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypes_orc +#### A masked pattern was here #### +POSTHOOK: query: select * from alltypes_orc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypes_orc +#### A masked pattern was here #### +true 10 100 1000 10000 4.0 20.0 4.222 1969-12-31 15:59:58.174 1970-01-01 string hello hello {"k1":"v1","k2":"v2"} [100,200] {"c1":null,"c2":" \"foo\"}"} +false 20 200 2000 20000 8.0 40.0 2.222 1970-12-31 15:59:58.174 1971-01-01 abcd world world {"k3":"v3","k4":"v4"} [200,300] {"c1":null,"c2":" \"bar\"}"} +PREHOOK: query: alter table alltypes_orc change si si bigint +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@alltypes_orc +PREHOOK: Output: default@alltypes_orc +POSTHOOK: query: alter table alltypes_orc change si si bigint +POSTHOOK: type: ALTERTABLE_RENAMECOL +POSTHOOK: Input: default@alltypes_orc +POSTHOOK: Output: default@alltypes_orc +PREHOOK: query: alter table alltypes_orc change i i bigint +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@alltypes_orc +PREHOOK: Output: default@alltypes_orc +POSTHOOK: query: alter table alltypes_orc change i i bigint +POSTHOOK: type: ALTERTABLE_RENAMECOL +POSTHOOK: Input: default@alltypes_orc +POSTHOOK: Output: default@alltypes_orc +PREHOOK: query: select * from alltypes_orc +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypes_orc +#### A masked pattern was here #### +POSTHOOK: query: select * from alltypes_orc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypes_orc +#### A masked pattern was here #### +true 10 100 1000 10000 4.0 20.0 4.222 1969-12-31 15:59:58.174 1970-01-01 string hello hello {"k1":"v1","k2":"v2"} [100,200] {"c1":null,"c2":" \"foo\"}"} +false 20 200 2000 20000 8.0 40.0 2.222 1970-12-31 15:59:58.174 1971-01-01 abcd world world {"k3":"v3","k4":"v4"} [200,300] {"c1":null,"c2":" \"bar\"}"} +PREHOOK: query: alter table alltypes_orc change l l array +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@alltypes_orc +PREHOOK: Output: default@alltypes_orc +POSTHOOK: query: alter table alltypes_orc change l l array +POSTHOOK: type: ALTERTABLE_RENAMECOL +POSTHOOK: Input: default@alltypes_orc +POSTHOOK: Output: default@alltypes_orc +PREHOOK: query: select * from alltypes_orc +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypes_orc +#### A masked pattern was here #### +POSTHOOK: query: select * from alltypes_orc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypes_orc +#### A masked pattern was here #### +true 10 100 1000 10000 4.0 20.0 4.222 1969-12-31 15:59:58.174 1970-01-01 string hello hello {"k1":"v1","k2":"v2"} [100,200] {"c1":null,"c2":" \"foo\"}"} +false 20 200 2000 20000 8.0 40.0 2.222 1970-12-31 15:59:58.174 1971-01-01 abcd world world {"k3":"v3","k4":"v4"} [200,300] {"c1":null,"c2":" \"bar\"}"} +PREHOOK: query: alter table alltypes_orc change si si smallint +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@alltypes_orc +PREHOOK: Output: default@alltypes_orc +POSTHOOK: query: alter table alltypes_orc change si si smallint +POSTHOOK: type: ALTERTABLE_RENAMECOL +POSTHOOK: Input: default@alltypes_orc +POSTHOOK: Output: default@alltypes_orc +PREHOOK: query: alter table alltypes_orc change i i int +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@alltypes_orc +PREHOOK: Output: default@alltypes_orc +POSTHOOK: query: alter table alltypes_orc change i i int +POSTHOOK: type: ALTERTABLE_RENAMECOL +POSTHOOK: Input: default@alltypes_orc +POSTHOOK: Output: default@alltypes_orc +PREHOOK: query: explain select ti, si, i, bi from alltypes_orc +PREHOOK: type: QUERY +POSTHOOK: query: explain select ti, si, i, bi from alltypes_orc +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: alltypes_orc + Statistics: Num rows: 88 Data size: 1772 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ti (type: tinyint), si (type: smallint), i (type: int), bi (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 88 Data size: 1772 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 88 Data size: 1772 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select ti, si, i, bi from alltypes_orc +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypes_orc +#### A masked pattern was here #### +POSTHOOK: query: select ti, si, i, bi from alltypes_orc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypes_orc +#### A masked pattern was here #### +10 100 1000 10000 +20 200 2000 20000 +PREHOOK: query: alter table alltypes_orc change si si int +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@alltypes_orc +PREHOOK: Output: default@alltypes_orc +POSTHOOK: query: alter table alltypes_orc change si si int +POSTHOOK: type: ALTERTABLE_RENAMECOL +POSTHOOK: Input: default@alltypes_orc +POSTHOOK: Output: default@alltypes_orc +PREHOOK: query: select ti, si, i, bi from alltypes_orc +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypes_orc +#### A masked pattern was here #### +POSTHOOK: query: select ti, si, i, bi from alltypes_orc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypes_orc +#### A masked pattern was here #### +10 100 1000 10000 +20 200 2000 20000 +PREHOOK: query: alter table alltypes_orc change si si bigint +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@alltypes_orc +PREHOOK: Output: default@alltypes_orc +POSTHOOK: query: alter table alltypes_orc change si si bigint +POSTHOOK: type: ALTERTABLE_RENAMECOL +POSTHOOK: Input: default@alltypes_orc +POSTHOOK: Output: default@alltypes_orc +PREHOOK: query: alter table alltypes_orc change i i bigint +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@alltypes_orc +PREHOOK: Output: default@alltypes_orc +POSTHOOK: query: alter table alltypes_orc change i i bigint +POSTHOOK: type: ALTERTABLE_RENAMECOL +POSTHOOK: Input: default@alltypes_orc +POSTHOOK: Output: default@alltypes_orc +PREHOOK: query: select ti, si, i, bi from alltypes_orc +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypes_orc +#### A masked pattern was here #### +POSTHOOK: query: select ti, si, i, bi from alltypes_orc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypes_orc +#### A masked pattern was here #### +10 100 1000 10000 +20 200 2000 20000 +PREHOOK: query: create table src_part_orc (key int, value string) partitioned by (ds string) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@src_part_orc +POSTHOOK: query: create table src_part_orc (key int, value string) partitioned by (ds string) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@src_part_orc +PREHOOK: query: insert overwrite table src_part_orc partition(ds) select key, value, ds from srcpart where ds is not null +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +PREHOOK: Output: default@src_part_orc +POSTHOOK: query: insert overwrite table src_part_orc partition(ds) select key, value, ds from srcpart where ds is not null +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +POSTHOOK: Output: default@src_part_orc@ds=2008-04-08 +POSTHOOK: Output: default@src_part_orc@ds=2008-04-09 +POSTHOOK: Lineage: src_part_orc PARTITION(ds=2008-04-08).key EXPRESSION [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: src_part_orc PARTITION(ds=2008-04-08).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: src_part_orc PARTITION(ds=2008-04-09).key EXPRESSION [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: src_part_orc PARTITION(ds=2008-04-09).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: select * from src_part_orc limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src_part_orc +PREHOOK: Input: default@src_part_orc@ds=2008-04-08 +PREHOOK: Input: default@src_part_orc@ds=2008-04-09 +#### A masked pattern was here #### +POSTHOOK: query: select * from src_part_orc limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src_part_orc +POSTHOOK: Input: default@src_part_orc@ds=2008-04-08 +POSTHOOK: Input: default@src_part_orc@ds=2008-04-09 +#### A masked pattern was here #### +238 val_238 2008-04-08 +86 val_86 2008-04-08 +311 val_311 2008-04-08 +27 val_27 2008-04-08 +165 val_165 2008-04-08 +409 val_409 2008-04-08 +255 val_255 2008-04-08 +278 val_278 2008-04-08 +98 val_98 2008-04-08 +484 val_484 2008-04-08 +PREHOOK: query: alter table src_part_orc change key key bigint +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@src_part_orc +PREHOOK: Output: default@src_part_orc +POSTHOOK: query: alter table src_part_orc change key key bigint +POSTHOOK: type: ALTERTABLE_RENAMECOL +POSTHOOK: Input: default@src_part_orc +POSTHOOK: Output: default@src_part_orc +PREHOOK: query: select * from src_part_orc limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src_part_orc +PREHOOK: Input: default@src_part_orc@ds=2008-04-08 +PREHOOK: Input: default@src_part_orc@ds=2008-04-09 +#### A masked pattern was here #### +POSTHOOK: query: select * from src_part_orc limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src_part_orc +POSTHOOK: Input: default@src_part_orc@ds=2008-04-08 +POSTHOOK: Input: default@src_part_orc@ds=2008-04-09 +#### A masked pattern was here #### +238 val_238 2008-04-08 +86 val_86 2008-04-08 +311 val_311 2008-04-08 +27 val_27 2008-04-08 +165 val_165 2008-04-08 +409 val_409 2008-04-08 +255 val_255 2008-04-08 +278 val_278 2008-04-08 +98 val_98 2008-04-08 +484 val_484 2008-04-08