commit 0b00d88b157d758ce0f341d2f59f1b3844a1d707 Author: Misha Dmitriev Date: Thu Mar 9 17:49:39 2017 -0800 HIVE-16166: Intern strings in a number of places, getting their overhead from 15% down to 6% diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnInfo.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnInfo.java index e3da7f01cdaeec3b8ae685ff57c8c25ec01c6ece..bb8dcbb6ac3ab4ca65e478ffe02a76ff077f8a2d 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnInfo.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnInfo.java @@ -20,6 +20,7 @@ import java.io.Serializable; +import org.apache.hadoop.hive.common.StringInternUtils; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; @@ -96,7 +97,7 @@ public ColumnInfo(String internalName, ObjectInspector objectInspector, this.tabAlias = tabAlias; this.isVirtualCol = isVirtualCol; this.isHiddenVirtualCol = isHiddenVirtualCol; - this.typeName = getType().getTypeName(); + setTypeName(getType().getTypeName()); } public ColumnInfo(ColumnInfo columnInfo) { @@ -114,7 +115,7 @@ public String getTypeName() { } public void setTypeName(String typeName) { - this.typeName = typeName; + this.typeName = StringInternUtils.internIfNotNull(typeName); } public TypeInfo getType() { @@ -160,7 +161,7 @@ public String toString() { } public void setAlias(String col_alias) { - alias = col_alias; + alias = StringInternUtils.internIfNotNull(col_alias); } public String getAlias() { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/hooks/LineageInfo.java b/ql/src/java/org/apache/hadoop/hive/ql/hooks/LineageInfo.java index 2806c547b16a6b643415e68b7f810e7afe79c543..73054361f8840e4627684969e83cf6ad0ee47af8 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/hooks/LineageInfo.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/hooks/LineageInfo.java @@ -27,6 +27,7 @@ import org.apache.commons.collections.SetUtils; import org.apache.commons.lang.StringUtils; +import org.apache.hadoop.hive.common.StringInternUtils; import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.metastore.api.Partition; import org.apache.hadoop.hive.metastore.api.Table; @@ -403,7 +404,7 @@ public String getExpr() { * @param expr the expr to set */ public void setExpr(String expr) { - this.expr = expr; + this.expr = StringInternUtils.internIfNotNull(expr); } /** diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeConstantDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeConstantDesc.java index a5221a2baf072408129b3c402a5486cf1ebc0248..9bb3df4a5c8dc68bbe1ffdb1e0b24204fbb0823e 100755 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeConstantDesc.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeConstantDesc.java @@ -22,6 +22,7 @@ import java.util.List; import org.apache.commons.lang.builder.HashCodeBuilder; +import org.apache.hadoop.hive.common.StringInternUtils; import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; @@ -67,7 +68,7 @@ public ExprNodeConstantDesc() { public ExprNodeConstantDesc(TypeInfo typeInfo, Object value) { super(typeInfo); - this.value = value; + setValue(value); } public ExprNodeConstantDesc(Object value) { @@ -77,6 +78,9 @@ public ExprNodeConstantDesc(Object value) { public void setValue(Object value) { // Kryo setter + if (value instanceof String) { + value = StringInternUtils.internIfNotNull((String) value); + } this.value = value; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/TableDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/TableDesc.java index 63dc94cb629de47d5b813d1fb964373832acf249..117aa1487dbfbce45d5df1b13d69657e442efa78 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/TableDesc.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/TableDesc.java @@ -19,6 +19,7 @@ package org.apache.hadoop.hive.ql.plan; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.common.StringInternUtils; import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants; import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.io.HiveFileFormatUtils; @@ -67,7 +68,7 @@ public TableDesc( this.inputFileFormatClass = inputFormatClass; outputFileFormatClass = HiveFileFormatUtils .getOutputFormatSubstitute(outputFormatClass); - this.properties = properties; + setProperties(properties); } public Class getDeserializerClass() { @@ -129,6 +130,7 @@ public Map getPropertiesExplain() { } public void setProperties(final Properties properties) { + StringInternUtils.internValuesInMap((Map) properties); this.properties = properties; } diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerDe.java b/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerDe.java index e5f2c5e72b9dd191d7c8627c5df6fba23d53b084..5467d8a84f56edb6806a19baa640f27e011f8ef5 100644 --- a/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerDe.java +++ b/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerDe.java @@ -24,6 +24,7 @@ import java.util.Properties; import org.apache.avro.Schema; +import org.apache.hadoop.hive.common.StringInternUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; @@ -106,7 +107,8 @@ public void initialize(Configuration configuration, Properties properties) throw schema = determineSchemaOrReturnErrorSchema(configuration, properties); } else { // Get column names and sort order - columnNames = Arrays.asList(columnNameProperty.split(columnNameDelimiter)); + columnNames = StringInternUtils.internStringsInList( + Arrays.asList(columnNameProperty.split(columnNameDelimiter))); columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty); schema = getSchemaFromCols(properties, columnNames, columnTypes, columnCommentProperty); @@ -127,7 +129,7 @@ public void initialize(Configuration configuration, Properties properties) throw badSchema = schema.equals(SchemaResolutionProblem.SIGNAL_BAD_SCHEMA); AvroObjectInspectorGenerator aoig = new AvroObjectInspectorGenerator(schema); - this.columnNames = aoig.getColumnNames(); + this.columnNames = StringInternUtils.internStringsInList(aoig.getColumnNames()); this.columnTypes = aoig.getColumnTypes(); this.oi = aoig.getObjectInspector(); } diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorFactory.java b/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorFactory.java index 2b3fded503c1a3ab0ecfca25133f69553d72dcfa..cb63d59e18f7881f1197669faf7151193a213f76 100644 --- a/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorFactory.java +++ b/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorFactory.java @@ -29,6 +29,7 @@ import java.util.Set; import java.util.concurrent.ConcurrentHashMap; +import org.apache.hadoop.hive.common.StringInternUtils; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils; import org.apache.thrift.TUnion; @@ -229,8 +230,8 @@ private static ObjectInspector getReflectionObjectInspectorNoCache(Type t, } - static ConcurrentHashMap cachedStandardListObjectInspector = - new ConcurrentHashMap(); + static ConcurrentHashMap + cachedStandardListObjectInspector = new ConcurrentHashMap(); public static StandardListObjectInspector getStandardListObjectInspector( ObjectInspector listElementObjectInspector) { @@ -316,13 +317,15 @@ public static StandardStructObjectInspector getStandardStructObjectInspector( List structFieldObjectInspectors, List structComments) { ArrayList> signature = new ArrayList>(3); + StringInternUtils.internStringsInList(structFieldNames); signature.add(structFieldNames); signature.add(structFieldObjectInspectors); - if(structComments != null) { + if (structComments != null) { + StringInternUtils.internStringsInList(structComments); signature.add(structComments); } StandardStructObjectInspector result = cachedStandardStructObjectInspector.get(signature); - if(result == null) { + if (result == null) { result = new StandardStructObjectInspector(structFieldNames, structFieldObjectInspectors, structComments); StandardStructObjectInspector prev = cachedStandardStructObjectInspector.putIfAbsent(signature, result); diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/StandardStructObjectInspector.java b/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/StandardStructObjectInspector.java index d2e8823e69aac6f12d7e818c0577ea6277705be2..36a500790ad1a24c30f2c686798614db109084a0 100644 --- a/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/StandardStructObjectInspector.java +++ b/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/StandardStructObjectInspector.java @@ -55,7 +55,7 @@ protected MyField() { public MyField(int fieldID, String fieldName, ObjectInspector fieldObjectInspector) { this.fieldID = fieldID; - this.fieldName = fieldName.toLowerCase(); + this.fieldName = fieldName.toLowerCase().intern(); this.fieldObjectInspector = fieldObjectInspector; }