diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java index 2f36f04..583712b 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java @@ -18,6 +18,7 @@ package org.apache.hadoop.hive.ql.optimizer; +import com.google.common.collect.Interner; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.fs.Path; @@ -39,8 +40,6 @@ import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.OperatorFactory; import org.apache.hadoop.hive.ql.exec.OperatorUtils; -import org.apache.hadoop.hive.ql.exec.OrcFileMergeOperator; -import org.apache.hadoop.hive.ql.exec.RCFileMergeOperator; import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator; import org.apache.hadoop.hive.ql.exec.RowSchema; import org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator; @@ -99,7 +98,6 @@ import org.apache.hadoop.hive.ql.plan.TezWork; import org.apache.hadoop.hive.ql.stats.StatsFactory; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; -import org.apache.hadoop.mapred.FileInputFormat; import org.apache.hadoop.mapred.InputFormat; import java.io.Serializable; @@ -873,6 +871,30 @@ public static void setKeyAndValueDescForTaskTree(Task ta } } + public static void internTableDesc(Task task, Interner interner) { + + if (task instanceof ConditionalTask) { + for (Task tsk : ((ConditionalTask) task).getListTasks()) { + internTableDesc(tsk, interner); + } + } else if (task instanceof ExecDriver) { + MapredWork work = (MapredWork) task.getWork(); + work.getMapWork().internTable(interner); + } else if (task != null && (task.getWork() instanceof TezWork)) { + TezWork work = (TezWork)task.getWork(); + for (BaseWork w : work.getAllWorkUnsorted()) { + if (w instanceof MapWork) { + ((MapWork)w).internTable(interner); + } + } + } + if (task.getNumChild() > 0) { + for (Task childTask : task.getChildTasks()) { + internTableDesc(childTask, interner); + } + } + } + /** * create a new plan and return. * diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompiler.java ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompiler.java index 0f714b5..2097043 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompiler.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompiler.java @@ -26,6 +26,8 @@ import java.util.List; import java.util.Set; +import com.google.common.collect.Interner; +import com.google.common.collect.Interners; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.fs.Path; @@ -278,6 +280,11 @@ public void compile(final ParseContext pCtx, final List interner = Interners.newWeakInterner(); + for (Task rootTask : rootTasks) { + GenMapRedUtils.internTableDesc(rootTask, interner); + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/MapWork.java ql/src/java/org/apache/hadoop/hive/ql/plan/MapWork.java index 15a97ab..6e4068b 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/MapWork.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/MapWork.java @@ -30,6 +30,7 @@ import java.util.Map.Entry; import java.util.Set; +import com.google.common.collect.Interner; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.fs.Path; @@ -195,6 +196,19 @@ public void deriveExplainAttributes() { } } + public void internTable(Interner interner) { + if (aliasToPartnInfo != null) { + for (PartitionDesc part : aliasToPartnInfo.values()) { + part.intern(interner); + } + } + if (pathToPartitionInfo != null) { + for (PartitionDesc part : pathToPartitionInfo.values()) { + part.intern(interner); + } + } + } + /** * @return the aliasToPartnInfo */ diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/PartitionDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/PartitionDesc.java index 1149bda..10c38d9 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/PartitionDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/PartitionDesc.java @@ -48,12 +48,10 @@ public class PartitionDesc implements Serializable, Cloneable { static { - TABLE_INTERNER = Interners.newWeakInterner(); STRING_INTERNER = Interners.newWeakInterner(); CLASS_INTERNER = Interners.newWeakInterner(); } - private static final Interner TABLE_INTERNER; private static final Interner STRING_INTERNER; private static final Interner> CLASS_INTERNER; @@ -73,12 +71,12 @@ public PartitionDesc() { } public PartitionDesc(final TableDesc table, final LinkedHashMap partSpec) { - setTableDesc(table); + this.tableDesc = table; this.partSpec = partSpec; } public PartitionDesc(final Partition part) throws HiveException { - setTableDesc(Utilities.getTableDesc(part.getTable())); + this.tableDesc = Utilities.getTableDesc(part.getTable()); setProperties(part.getMetadataFromPartitionSchema()); partSpec = part.getSpec(); setInputFileFormatClass(part.getInputFormatClass()); @@ -86,7 +84,7 @@ public PartitionDesc(final Partition part) throws HiveException { } public PartitionDesc(final Partition part,final TableDesc tblDesc) throws HiveException { - setTableDesc(tblDesc); + this.tableDesc = tblDesc; setProperties(part.getSchemaFromTableSchema(tblDesc.getProperties())); // each partition maintains a large properties partSpec = part.getSpec(); setOutputFileFormatClass(part.getInputFormatClass()); @@ -99,7 +97,7 @@ public TableDesc getTableDesc() { } public void setTableDesc(TableDesc tableDesc) { - this.tableDesc = TABLE_INTERNER.intern(tableDesc); + this.tableDesc = tableDesc; } @Explain(displayName = "partition values") @@ -266,4 +264,8 @@ public void deriveBaseFileName(String path) { baseFileName = path; } } + + public void intern(Interner interner) { + this.tableDesc = interner.intern(tableDesc); + } } diff --git ql/src/test/results/compiler/plan/input1.q.xml ql/src/test/results/compiler/plan/input1.q.xml index 239ebbc..8b5d4fc 100755 --- ql/src/test/results/compiler/plan/input1.q.xml +++ ql/src/test/results/compiler/plan/input1.q.xml @@ -334,7 +334,7 @@ - + org.apache.hadoop.mapred.TextInputFormat @@ -432,66 +432,7 @@ true - - - org.apache.hadoop.mapred.TextInputFormat - - - org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - - - - - name - default.dest1 - - - columns.types - string:string - - - serialization.ddl - struct dest1 { string key, string value} - - - serialization.format - 1 - - - columns - key,value - - - columns.comments - defaultdefault - - - bucket_count - -1 - - - serialization.lib - org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - - file.inputformat - org.apache.hadoop.mapred.TextInputFormat - - - file.outputformat - org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - - - location - #### A masked pattern was here #### - - - transient_lastDdlTime - #### A masked pattern was here #### - - - - + diff --git ql/src/test/results/compiler/plan/input2.q.xml ql/src/test/results/compiler/plan/input2.q.xml index 7f84e15..f106d80 100755 --- ql/src/test/results/compiler/plan/input2.q.xml +++ ql/src/test/results/compiler/plan/input2.q.xml @@ -334,7 +334,7 @@ - + org.apache.hadoop.mapred.TextInputFormat @@ -432,66 +432,7 @@ true - - - org.apache.hadoop.mapred.TextInputFormat - - - org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - - - - - name - default.dest1 - - - columns.types - string:string - - - serialization.ddl - struct dest1 { string key, string value} - - - serialization.format - 1 - - - columns - key,value - - - columns.comments - defaultdefault - - - bucket_count - -1 - - - serialization.lib - org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - - file.inputformat - org.apache.hadoop.mapred.TextInputFormat - - - file.outputformat - org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - - - location - #### A masked pattern was here #### - - - transient_lastDdlTime - #### A masked pattern was here #### - - - - + diff --git ql/src/test/results/compiler/plan/input3.q.xml ql/src/test/results/compiler/plan/input3.q.xml index 35ad155..0152d46 100755 --- ql/src/test/results/compiler/plan/input3.q.xml +++ ql/src/test/results/compiler/plan/input3.q.xml @@ -334,7 +334,7 @@ - + org.apache.hadoop.mapred.TextInputFormat @@ -432,66 +432,7 @@ true - - - org.apache.hadoop.mapred.TextInputFormat - - - org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - - - - - name - default.dest1 - - - columns.types - string:string - - - serialization.ddl - struct dest1 { string key, string value} - - - serialization.format - 1 - - - columns - key,value - - - columns.comments - defaultdefault - - - bucket_count - -1 - - - serialization.lib - org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - - file.inputformat - org.apache.hadoop.mapred.TextInputFormat - - - file.outputformat - org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - - - location - #### A masked pattern was here #### - - - transient_lastDdlTime - #### A masked pattern was here #### - - - - + @@ -881,7 +822,7 @@ - + org.apache.hadoop.mapred.TextInputFormat @@ -979,66 +920,7 @@ true - - - org.apache.hadoop.mapred.TextInputFormat - - - org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - - - - - name - default.dest2 - - - columns.types - string:string - - - serialization.ddl - struct dest2 { string key, string value} - - - serialization.format - 1 - - - columns - key,value - - - columns.comments - defaultdefault - - - bucket_count - -1 - - - serialization.lib - org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - - file.inputformat - org.apache.hadoop.mapred.TextInputFormat - - - file.outputformat - org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - - - location - #### A masked pattern was here #### - - - transient_lastDdlTime - #### A masked pattern was here #### - - - - + @@ -1440,7 +1322,7 @@ - + org.apache.hadoop.mapred.TextInputFormat @@ -1547,70 +1429,7 @@ true - - - org.apache.hadoop.mapred.TextInputFormat - - - org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - - - - - name - default.dest3 - - - columns.types - string:string - - - serialization.ddl - struct dest3 { string key, string value} - - - serialization.format - 1 - - - columns - key,value - - - partition_columns.types - string:string - - - columns.comments - defaultdefault - - - bucket_count - -1 - - - serialization.lib - org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - - file.inputformat - org.apache.hadoop.mapred.TextInputFormat - - - file.outputformat - org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - - - location - #### A masked pattern was here #### - - - transient_lastDdlTime - #### A masked pattern was here #### - - - - + diff --git ql/src/test/results/compiler/plan/input6.q.xml ql/src/test/results/compiler/plan/input6.q.xml index 185844b..5a459b6 100644 --- ql/src/test/results/compiler/plan/input6.q.xml +++ ql/src/test/results/compiler/plan/input6.q.xml @@ -334,7 +334,7 @@ - + org.apache.hadoop.mapred.TextInputFormat @@ -432,66 +432,7 @@ true - - - org.apache.hadoop.mapred.TextInputFormat - - - org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - - - - - name - default.dest1 - - - columns.types - string:string - - - serialization.ddl - struct dest1 { string key, string value} - - - serialization.format - 1 - - - columns - key,value - - - columns.comments - defaultdefault - - - bucket_count - -1 - - - serialization.lib - org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - - file.inputformat - org.apache.hadoop.mapred.TextInputFormat - - - file.outputformat - org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - - - location - #### A masked pattern was here #### - - - transient_lastDdlTime - #### A masked pattern was here #### - - - - + diff --git ql/src/test/results/compiler/plan/input7.q.xml ql/src/test/results/compiler/plan/input7.q.xml index a160585..676a72e 100644 --- ql/src/test/results/compiler/plan/input7.q.xml +++ ql/src/test/results/compiler/plan/input7.q.xml @@ -334,7 +334,7 @@ - + org.apache.hadoop.mapred.TextInputFormat @@ -432,66 +432,7 @@ true - - - org.apache.hadoop.mapred.TextInputFormat - - - org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - - - - - name - default.dest1 - - - columns.types - string:string - - - serialization.ddl - struct dest1 { string key, string value} - - - serialization.format - 1 - - - columns - key,value - - - columns.comments - defaultdefault - - - bucket_count - -1 - - - serialization.lib - org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - - file.inputformat - org.apache.hadoop.mapred.TextInputFormat - - - file.outputformat - org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - - - location - #### A masked pattern was here #### - - - transient_lastDdlTime - #### A masked pattern was here #### - - - - + diff --git ql/src/test/results/compiler/plan/input9.q.xml ql/src/test/results/compiler/plan/input9.q.xml index 0d35af7..3b0c93b 100644 --- ql/src/test/results/compiler/plan/input9.q.xml +++ ql/src/test/results/compiler/plan/input9.q.xml @@ -334,7 +334,7 @@ - + org.apache.hadoop.mapred.TextInputFormat @@ -432,66 +432,7 @@ true - - - org.apache.hadoop.mapred.TextInputFormat - - - org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - - - - - name - default.dest1 - - - columns.types - string:string - - - serialization.ddl - struct dest1 { string key, string value} - - - serialization.format - 1 - - - columns - key,value - - - columns.comments - defaultdefault - - - bucket_count - -1 - - - serialization.lib - org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - - file.inputformat - org.apache.hadoop.mapred.TextInputFormat - - - file.outputformat - org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - - - location - #### A masked pattern was here #### - - - transient_lastDdlTime - #### A masked pattern was here #### - - - - + diff --git ql/src/test/results/compiler/plan/sample2.q.xml ql/src/test/results/compiler/plan/sample2.q.xml index 9f8e559..1b3a948 100644 --- ql/src/test/results/compiler/plan/sample2.q.xml +++ ql/src/test/results/compiler/plan/sample2.q.xml @@ -334,7 +334,7 @@ - + org.apache.hadoop.mapred.TextInputFormat @@ -432,66 +432,7 @@ true - - - org.apache.hadoop.mapred.TextInputFormat - - - org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - - - - - name - default.dest1 - - - columns.types - string:string - - - serialization.ddl - struct dest1 { string key, string value} - - - serialization.format - 1 - - - columns - key,value - - - columns.comments - defaultdefault - - - bucket_count - -1 - - - serialization.lib - org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - - file.inputformat - org.apache.hadoop.mapred.TextInputFormat - - - file.outputformat - org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - - - location - #### A masked pattern was here #### - - - transient_lastDdlTime - #### A masked pattern was here #### - - - - + diff --git ql/src/test/results/compiler/plan/sample3.q.xml ql/src/test/results/compiler/plan/sample3.q.xml index ebba8a3..28e3208 100644 --- ql/src/test/results/compiler/plan/sample3.q.xml +++ ql/src/test/results/compiler/plan/sample3.q.xml @@ -334,7 +334,7 @@ - + org.apache.hadoop.mapred.TextInputFormat @@ -432,66 +432,7 @@ true - - - org.apache.hadoop.mapred.TextInputFormat - - - org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - - - - - name - default.dest1 - - - columns.types - string:string - - - serialization.ddl - struct dest1 { string key, string value} - - - serialization.format - 1 - - - columns - key,value - - - columns.comments - defaultdefault - - - bucket_count - -1 - - - serialization.lib - org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - - file.inputformat - org.apache.hadoop.mapred.TextInputFormat - - - file.outputformat - org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - - - location - #### A masked pattern was here #### - - - transient_lastDdlTime - #### A masked pattern was here #### - - - - + diff --git ql/src/test/results/compiler/plan/sample4.q.xml ql/src/test/results/compiler/plan/sample4.q.xml index 9f8e559..1b3a948 100644 --- ql/src/test/results/compiler/plan/sample4.q.xml +++ ql/src/test/results/compiler/plan/sample4.q.xml @@ -334,7 +334,7 @@ - + org.apache.hadoop.mapred.TextInputFormat @@ -432,66 +432,7 @@ true - - - org.apache.hadoop.mapred.TextInputFormat - - - org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - - - - - name - default.dest1 - - - columns.types - string:string - - - serialization.ddl - struct dest1 { string key, string value} - - - serialization.format - 1 - - - columns - key,value - - - columns.comments - defaultdefault - - - bucket_count - -1 - - - serialization.lib - org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - - file.inputformat - org.apache.hadoop.mapred.TextInputFormat - - - file.outputformat - org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - - - location - #### A masked pattern was here #### - - - transient_lastDdlTime - #### A masked pattern was here #### - - - - + diff --git ql/src/test/results/compiler/plan/sample5.q.xml ql/src/test/results/compiler/plan/sample5.q.xml index e5eaa8d..3cfd796 100644 --- ql/src/test/results/compiler/plan/sample5.q.xml +++ ql/src/test/results/compiler/plan/sample5.q.xml @@ -334,7 +334,7 @@ - + org.apache.hadoop.mapred.TextInputFormat @@ -432,66 +432,7 @@ true - - - org.apache.hadoop.mapred.TextInputFormat - - - org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - - - - - name - default.dest1 - - - columns.types - string:string - - - serialization.ddl - struct dest1 { string key, string value} - - - serialization.format - 1 - - - columns - key,value - - - columns.comments - defaultdefault - - - bucket_count - -1 - - - serialization.lib - org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - - file.inputformat - org.apache.hadoop.mapred.TextInputFormat - - - file.outputformat - org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - - - location - #### A masked pattern was here #### - - - transient_lastDdlTime - #### A masked pattern was here #### - - - - + diff --git ql/src/test/results/compiler/plan/sample6.q.xml ql/src/test/results/compiler/plan/sample6.q.xml index 9931f41..a4fee5d 100644 --- ql/src/test/results/compiler/plan/sample6.q.xml +++ ql/src/test/results/compiler/plan/sample6.q.xml @@ -334,7 +334,7 @@ - + org.apache.hadoop.mapred.TextInputFormat @@ -432,66 +432,7 @@ true - - - org.apache.hadoop.mapred.TextInputFormat - - - org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - - - - - name - default.dest1 - - - columns.types - string:string - - - serialization.ddl - struct dest1 { string key, string value} - - - serialization.format - 1 - - - columns - key,value - - - columns.comments - defaultdefault - - - bucket_count - -1 - - - serialization.lib - org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - - file.inputformat - org.apache.hadoop.mapred.TextInputFormat - - - file.outputformat - org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - - - location - #### A masked pattern was here #### - - - transient_lastDdlTime - #### A masked pattern was here #### - - - - + diff --git ql/src/test/results/compiler/plan/sample7.q.xml ql/src/test/results/compiler/plan/sample7.q.xml index 42eb9cd..a5fbe9d 100644 --- ql/src/test/results/compiler/plan/sample7.q.xml +++ ql/src/test/results/compiler/plan/sample7.q.xml @@ -334,7 +334,7 @@ - + org.apache.hadoop.mapred.TextInputFormat @@ -432,66 +432,7 @@ true - - - org.apache.hadoop.mapred.TextInputFormat - - - org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - - - - - name - default.dest1 - - - columns.types - string:string - - - serialization.ddl - struct dest1 { string key, string value} - - - serialization.format - 1 - - - columns - key,value - - - columns.comments - defaultdefault - - - bucket_count - -1 - - - serialization.lib - org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - - file.inputformat - org.apache.hadoop.mapred.TextInputFormat - - - file.outputformat - org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - - - location - #### A masked pattern was here #### - - - transient_lastDdlTime - #### A masked pattern was here #### - - - - + diff --git ql/src/test/results/compiler/plan/union.q.xml ql/src/test/results/compiler/plan/union.q.xml index 2808b05..e09bee6 100644 --- ql/src/test/results/compiler/plan/union.q.xml +++ ql/src/test/results/compiler/plan/union.q.xml @@ -235,7 +235,7 @@ - + org.apache.hadoop.mapred.TextInputFormat @@ -449,7 +449,7 @@ - + org.apache.hadoop.mapred.TextInputFormat @@ -593,7 +593,7 @@ - + @@ -640,34 +640,7 @@ true - - - org.apache.hadoop.mapred.TextInputFormat - - - org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - - - - - columns - _col0,_col1 - - - serialization.lib - org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - - serialization.format - 1 - - - columns.types - string:string - - - - + 1 @@ -1634,7 +1607,7 @@ - +