diff --git a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java index a069394..55aea0e 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java @@ -60,11 +60,13 @@ import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.StandardConstantListObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.StandardConstantMapObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.StandardConstantStructObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.StandardListObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.StandardMapObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.StructField; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.UnionObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption; import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableBinaryObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableBooleanObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableByteObjectInspector; @@ -940,15 +942,21 @@ public static long getSizeOfComplexTypes(HiveConf conf, ObjectInspector oi) { } break; case STRUCT: - StructObjectInspector soi = (StructObjectInspector) oi; - - // add constant object overhead for struct - result += JavaDataModel.get().object(); - - // add constant struct field names references overhead - result += soi.getAllStructFieldRefs().size() * JavaDataModel.get().ref(); - for (StructField field : soi.getAllStructFieldRefs()) { - result += getSizeOfComplexTypes(conf, field.getFieldObjectInspector()); + if (oi instanceof StandardConstantStructObjectInspector) { + // constant map projection of known length + StandardConstantStructObjectInspector scsoi = (StandardConstantStructObjectInspector) oi; + result += getSizeOfStruct(scsoi); + } else { + StructObjectInspector soi = (StructObjectInspector) oi; + + // add constant object overhead for struct + result += JavaDataModel.get().object(); + + // add constant struct field names references overhead + result += soi.getAllStructFieldRefs().size() * JavaDataModel.get().ref(); + for (StructField field : soi.getAllStructFieldRefs()) { + result += getSizeOfComplexTypes(conf, field.getFieldObjectInspector()); + } } break; case UNION: @@ -1053,6 +1061,24 @@ public static long getSizeOfMap(StandardConstantMapObjectInspector scmoi) { return result; } + public static long getSizeOfStruct(StandardConstantStructObjectInspector soi) { + long result = 0; + // add constant object overhead for struct + result += JavaDataModel.get().object(); + + // add constant struct field names references overhead + result += soi.getAllStructFieldRefs().size() * JavaDataModel.get().ref(); + List value = soi.getWritableConstantValue(); + List fields = soi.getAllStructFieldRefs(); + if (value == null || value.size() != fields.size()) { + return result; + } + for (int i = 0; i < fields.size(); i++) { + result += getWritableSize(fields.get(i).getFieldObjectInspector(), value.get(i)); + } + return result; + } + /** * Get size of primitive data types based on their respective writable object inspector * @param oi diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIn.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIn.java index 38b1dc4..56ac3e1 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIn.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIn.java @@ -32,9 +32,12 @@ import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; import org.apache.hadoop.io.BooleanWritable; +import com.esotericsoftware.minlog.Log; + /** * GenericUDFIn * @@ -168,6 +171,14 @@ public Object evaluate(DeferredObject[] arguments) throws HiveException { } break; } + case STRUCT: { + if (constantInSet.contains(((StructObjectInspector) compareOI).getStructFieldsDataAsList(conversionHelper + .convertIfNecessary(arguments[0].get(), argumentOIs[0])))) { + bw.set(true); + return bw; + } + break; + } default: throw new RuntimeException("Compare of unsupported constant type: " + compareOI.getCategory()); diff --git a/ql/src/test/queries/clientpositive/structin.q b/ql/src/test/queries/clientpositive/structin.q new file mode 100644 index 0000000..48b31f3 --- /dev/null +++ b/ql/src/test/queries/clientpositive/structin.q @@ -0,0 +1,17 @@ +create table t11 (`id` string, `lineid` string); +set hive.cbo.enable=false; +set hive.tez.dynamic.partition.pruning=false; +set hive.vectorized.execution.enabled=true; + +explain select * from t11 where struct(`id`, `lineid`) +IN ( +struct('1234-1111-0074578664','3'), +struct('1234-1111-0074578695','1'), +struct('1234-1111-0074580704','1'), +struct('1234-1111-0074581619','2'), +struct('1234-1111-0074582745','1'), +struct('1234-1111-0074586625','1'), +struct('1234-1111-0074019112','1'), +struct('1234-1111-0074019610','1'), +struct('1234-1111-0074022106','1') +); diff --git a/ql/src/test/results/clientpositive/null_cast.q.out b/ql/src/test/results/clientpositive/null_cast.q.out index 810eacd..b5af69b 100644 --- a/ql/src/test/results/clientpositive/null_cast.q.out +++ b/ql/src/test/results/clientpositive/null_cast.q.out @@ -25,10 +25,10 @@ STAGE PLANS: Select Operator expressions: array(null,0) (type: array), array(null,array()) (type: array>), array(null,map()) (type: array>), array(null,struct(0)) (type: array>) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 500 Data size: 340000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 108000 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 500 Data size: 340000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 108000 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git a/ql/src/test/results/clientpositive/structin.q.out b/ql/src/test/results/clientpositive/structin.q.out new file mode 100644 index 0000000..e36fceb --- /dev/null +++ b/ql/src/test/results/clientpositive/structin.q.out @@ -0,0 +1,66 @@ +PREHOOK: query: create table t11 (`id` string, `lineid` string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t11 +POSTHOOK: query: create table t11 (`id` string, `lineid` string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t11 +PREHOOK: query: explain select * from t11 where struct(`id`, `lineid`) +IN ( +struct('1234-1111-0074578664','3'), +struct('1234-1111-0074578695','1'), +struct('1234-1111-0074580704','1'), +struct('1234-1111-0074581619','2'), +struct('1234-1111-0074582745','1'), +struct('1234-1111-0074586625','1'), +struct('1234-1111-0074019112','1'), +struct('1234-1111-0074019610','1'), +struct('1234-1111-0074022106','1') +) +PREHOOK: type: QUERY +POSTHOOK: query: explain select * from t11 where struct(`id`, `lineid`) +IN ( +struct('1234-1111-0074578664','3'), +struct('1234-1111-0074578695','1'), +struct('1234-1111-0074580704','1'), +struct('1234-1111-0074581619','2'), +struct('1234-1111-0074582745','1'), +struct('1234-1111-0074586625','1'), +struct('1234-1111-0074019112','1'), +struct('1234-1111-0074019610','1'), +struct('1234-1111-0074022106','1') +) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: t11 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: (struct(id,lineid)) IN (struct('1234-1111-0074578664','3'), struct('1234-1111-0074578695','1'), struct('1234-1111-0074580704','1'), struct('1234-1111-0074581619','2'), struct('1234-1111-0074582745','1'), struct('1234-1111-0074586625','1'), struct('1234-1111-0074019112','1'), struct('1234-1111-0074019610','1'), struct('1234-1111-0074022106','1')) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: id (type: string), lineid (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + diff --git a/ql/src/test/results/clientpositive/udf_inline.q.out b/ql/src/test/results/clientpositive/udf_inline.q.out index 45bd463..7d372f3 100644 --- a/ql/src/test/results/clientpositive/udf_inline.q.out +++ b/ql/src/test/results/clientpositive/udf_inline.q.out @@ -33,13 +33,13 @@ STAGE PLANS: Select Operator expressions: array(struct(1,'dude!'),struct(2,'Wheres'),struct(3,'my car?')) (type: array>) outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 1220000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 32000 Basic stats: COMPLETE Column stats: COMPLETE UDTF Operator - Statistics: Num rows: 500 Data size: 1220000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 32000 Basic stats: COMPLETE Column stats: COMPLETE function name: inline Limit Number of rows: 2 - Statistics: Num rows: 2 Data size: 4880 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 128 Basic stats: COMPLETE Column stats: COMPLETE ListSink PREHOOK: query: SELECT inline( diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorFactory.java b/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorFactory.java index c35f4e9..97bb715 100644 --- a/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorFactory.java +++ b/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorFactory.java @@ -215,7 +215,6 @@ public static StandardConstantListObjectInspector getStandardConstantListObjectI return new StandardConstantListObjectInspector(listElementObjectInspector, constantValue); } - static ConcurrentHashMap, StandardMapObjectInspector> cachedStandardMapObjectInspector = new ConcurrentHashMap, StandardMapObjectInspector>(); @@ -297,6 +296,12 @@ public static StandardStructObjectInspector getStandardStructObjectInspector( return result; } + public static StandardConstantStructObjectInspector getStandardConstantStructObjectInspector( + List structFieldNames, + List structFieldObjectInspectors, List value) { + return new StandardConstantStructObjectInspector(structFieldNames, structFieldObjectInspectors, value); + } + static ConcurrentHashMap, UnionStructObjectInspector> cachedUnionStructObjectInspector = new ConcurrentHashMap, UnionStructObjectInspector>(); diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorUtils.java b/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorUtils.java index 6ef9f5d..64dd512 100644 --- a/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorUtils.java +++ b/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorUtils.java @@ -1073,6 +1073,21 @@ public static ConstantObjectInspector getConstantObjectInspector(ObjectInspector ObjectInspectorCopyOption.WRITABLE ), (Map)writableValue); + case STRUCT: + StructObjectInspector soi = (StructObjectInspector) oi; + List fields = soi.getAllStructFieldRefs(); + List fieldNames = new ArrayList(fields.size()); + List fieldObjectInspectors = new ArrayList( + fields.size()); + for (StructField f : fields) { + fieldNames.add(f.getFieldName()); + fieldObjectInspectors.add(getStandardObjectInspector(f + .getFieldObjectInspector(), ObjectInspectorCopyOption.WRITABLE)); + } + return ObjectInspectorFactory.getStandardConstantStructObjectInspector( + fieldNames, + fieldObjectInspectors, + (List)writableValue); default: throw new IllegalArgumentException( writableOI.getCategory() + " not yet supported for constant OI"); @@ -1088,6 +1103,7 @@ public static boolean supportsConstantObjectInspector(ObjectInspector oi) { case PRIMITIVE: case LIST: case MAP: + case STRUCT: return true; default: return false; diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/StandardConstantStructObjectInspector.java b/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/StandardConstantStructObjectInspector.java new file mode 100644 index 0000000..8c47c0f --- /dev/null +++ b/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/StandardConstantStructObjectInspector.java @@ -0,0 +1,51 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.serde2.objectinspector; + +import java.util.List; + +/** + * A StandardStructObjectInspector which also implements the + * ConstantObjectInspector interface. + * + * Always use the ObjectInspectorFactory to create new ObjectInspector objects, + * instead of directly creating an instance of this class. + */ +public class StandardConstantStructObjectInspector extends StandardStructObjectInspector + implements ConstantObjectInspector { + + private List value; + + protected StandardConstantStructObjectInspector() { + super(); + } + /** + * Call ObjectInspectorFactory.getStandardListObjectInspector instead. + */ + protected StandardConstantStructObjectInspector(List structFieldNames, + List structFieldObjectInspectors, List value) { + super(structFieldNames, structFieldObjectInspectors); + this.value = value; + } + + @Override + public List getWritableConstantValue() { + return value; + } +}