From df387b698470754071f33992d28f3176b920b63b Mon Sep 17 00:00:00 2001 From: vkorukanti Date: Sun, 14 Sep 2014 21:24:03 -0700 Subject: [PATCH] HIVE-8099: IN operator for partition column fails when the partition column type is DATE --- .../apache/hadoop/hive/metastore/ObjectStore.java | 6 ++- .../hive/metastore/PartitionExpressionProxy.java | 7 ++- .../MockPartitionExpressionForMetastore.java | 5 +- .../hive/ql/optimizer/ppr/PartExprEvalUtils.java | 7 ++- .../ppr/PartitionExpressionForMetastore.java | 6 ++- .../hive/ql/optimizer/ppr/PartitionPruner.java | 37 +++++++++++--- .../clientpositive/partition_type_in_plan.q | 15 ++++++ .../clientpositive/partition_type_in_plan.q.out | 57 ++++++++++++++++++++++ 8 files changed, 125 insertions(+), 15 deletions(-) create mode 100644 ql/src/test/queries/clientpositive/partition_type_in_plan.q create mode 100644 ql/src/test/results/clientpositive/partition_type_in_plan.q.out diff --git metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java index 2aa5d20..758f77c 100644 --- metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java +++ metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java @@ -132,6 +132,8 @@ import org.apache.hadoop.hive.metastore.parser.ExpressionTree.Operator; import org.apache.hadoop.hive.metastore.parser.FilterLexer; import org.apache.hadoop.hive.metastore.parser.FilterParser; +import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; import org.apache.hadoop.hive.shims.ShimLoader; import org.apache.hadoop.hive.metastore.partition.spec.PartitionSpecProxy; import org.apache.hadoop.util.StringUtils; @@ -2136,14 +2138,16 @@ private boolean getPartitionNamesPrunedByExprNoTxn(Table table, byte[] expr, result.addAll(getPartitionNamesNoTxn( table.getDbName(), table.getTableName(), maxParts)); List columnNames = new ArrayList(); + List typeInfos = new ArrayList(); for (FieldSchema fs : table.getPartitionKeys()) { columnNames.add(fs.getName()); + typeInfos.add(TypeInfoFactory.getPrimitiveTypeInfo(fs.getType())); } if (defaultPartName == null || defaultPartName.isEmpty()) { defaultPartName = HiveConf.getVar(getConf(), HiveConf.ConfVars.DEFAULTPARTITIONNAME); } return expressionProxy.filterPartitionsByExpr( - columnNames, expr, defaultPartName, result); + columnNames, typeInfos, expr, defaultPartName, result); } /** diff --git metastore/src/java/org/apache/hadoop/hive/metastore/PartitionExpressionProxy.java metastore/src/java/org/apache/hadoop/hive/metastore/PartitionExpressionProxy.java index 0787775..5195481 100644 --- metastore/src/java/org/apache/hadoop/hive/metastore/PartitionExpressionProxy.java +++ metastore/src/java/org/apache/hadoop/hive/metastore/PartitionExpressionProxy.java @@ -21,6 +21,7 @@ import java.util.List; import org.apache.hadoop.hive.metastore.api.MetaException; +import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; /** * The proxy interface that metastore uses to manipulate and apply @@ -37,12 +38,14 @@ /** * Filters the partition names via serialized Hive expression. - * @param columnNames Partition column names in the underlying table. + * @param partColumnNames Partition column names in the underlying table. + * @param partColumnTypeInfos Partition column types in the underlying table * @param expr Serialized expression. * @param defaultPartitionName Default partition name from job or server configuration. * @param partitionNames Partition names; the list is modified in place. * @return Whether there were any unknown partitions preserved in the name list. */ - public boolean filterPartitionsByExpr(List columnNames, byte[] expr, + public boolean filterPartitionsByExpr(List partColumnNames, + List partColumnTypeInfos, byte[] expr, String defaultPartitionName, List partitionNames) throws MetaException; } diff --git metastore/src/test/org/apache/hadoop/hive/metastore/MockPartitionExpressionForMetastore.java metastore/src/test/org/apache/hadoop/hive/metastore/MockPartitionExpressionForMetastore.java index ecbc8c8..0b36412 100644 --- metastore/src/test/org/apache/hadoop/hive/metastore/MockPartitionExpressionForMetastore.java +++ metastore/src/test/org/apache/hadoop/hive/metastore/MockPartitionExpressionForMetastore.java @@ -1,6 +1,7 @@ package org.apache.hadoop.hive.metastore; import org.apache.hadoop.hive.metastore.api.MetaException; +import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; import java.util.List; @@ -14,7 +15,9 @@ public String convertExprToFilter(byte[] expr) throws MetaException { } @Override - public boolean filterPartitionsByExpr(List columnNames, byte[] expr, String defaultPartitionName, List partitionNames) throws MetaException { + public boolean filterPartitionsByExpr(List partColumnNames, + List partColumnTypeInfos, byte[] expr, String defaultPartitionName, + List partitionNames) throws MetaException { return false; } } diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartExprEvalUtils.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartExprEvalUtils.java index 6159c7d..dc5d2df 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartExprEvalUtils.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartExprEvalUtils.java @@ -39,6 +39,7 @@ import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; public class PartExprEvalUtils { @@ -103,11 +104,13 @@ static synchronized public Object evalExprWithPart(ExprNodeDesc expr, } static synchronized public ObjectPair prepareExpr( - ExprNodeGenericFuncDesc expr, List partNames) throws HiveException { + ExprNodeGenericFuncDesc expr, List partNames, + List partColumnTypeInfos) throws HiveException { // Create the row object List partObjectInspectors = new ArrayList(); for (int i = 0; i < partNames.size(); i++) { - partObjectInspectors.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector); + partObjectInspectors.add(PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector( + partColumnTypeInfos.get(i))); } StructObjectInspector objectInspector = ObjectInspectorFactory .getStandardStructObjectInspector(partNames, partObjectInspectors); diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionExpressionForMetastore.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionExpressionForMetastore.java index d98b5c5..9ffa177 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionExpressionForMetastore.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionExpressionForMetastore.java @@ -27,6 +27,7 @@ import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; +import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; /** * The basic implementation of PartitionExpressionProxy that uses ql package classes. @@ -40,13 +41,14 @@ public String convertExprToFilter(byte[] exprBytes) throws MetaException { } @Override - public boolean filterPartitionsByExpr(List columnNames, byte[] exprBytes, + public boolean filterPartitionsByExpr(List partColumnNames, + List partColumnTypeInfos, byte[] exprBytes, String defaultPartitionName, List partitionNames) throws MetaException { ExprNodeGenericFuncDesc expr = deserializeExpr(exprBytes); try { long startTime = System.nanoTime(), len = partitionNames.size(); boolean result = PartitionPruner.prunePartitionNames( - columnNames, expr, defaultPartitionName, partitionNames); + partColumnNames, partColumnTypeInfos, expr, defaultPartitionName, partitionNames); double timeMs = (System.nanoTime() - startTime) / 1000000.0; LOG.debug("Pruning " + len + " partition names took " + timeMs + "ms"); return result; diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java index 1796b7b..4b2a81a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java @@ -57,7 +57,9 @@ import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPOr; import org.apache.hadoop.hive.serde.serdeConstants; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; @@ -420,9 +422,10 @@ static private boolean pruneBySequentialScan(Table tab, List partitio String defaultPartitionName = conf.getVar(HiveConf.ConfVars.DEFAULTPARTITIONNAME); List partCols = extractPartColNames(tab); + List partColTypeInfos = extractPartColTypes(tab); boolean hasUnknownPartitions = prunePartitionNames( - partCols, prunerExpr, defaultPartitionName, partNames); + partCols, partColTypeInfos, prunerExpr, defaultPartitionName, partNames); perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.PRUNE_LISTING); perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.PARTITION_RETRIEVING); @@ -442,19 +445,30 @@ static private boolean pruneBySequentialScan(Table tab, List partitio return partCols; } + private static List extractPartColTypes(Table tab) { + List pCols = tab.getPartCols(); + List partColTypeInfos = new ArrayList(pCols.size()); + for (FieldSchema pCol : pCols) { + partColTypeInfos.add(TypeInfoFactory.getPrimitiveTypeInfo(pCol.getType())); + } + return partColTypeInfos; + } + /** * Prunes partition names to see if they match the prune expression. - * @param columnNames name of partition columns + * @param partColumnNames name of partition columns + * @param partColumnTypeInfos types of partition columns * @param prunerExpr The expression to match. * @param defaultPartitionName name of default partition * @param partNames Partition names to filter. The list is modified in place. * @return Whether the list has any partitions for which the expression may or may not match. */ - public static boolean prunePartitionNames(List columnNames, ExprNodeGenericFuncDesc prunerExpr, + public static boolean prunePartitionNames(List partColumnNames, + List partColumnTypeInfos, ExprNodeGenericFuncDesc prunerExpr, String defaultPartitionName, List partNames) throws HiveException, MetaException { // Prepare the expression to filter on the columns. ObjectPair handle = - PartExprEvalUtils.prepareExpr(prunerExpr, columnNames); + PartExprEvalUtils.prepareExpr(prunerExpr, partColumnNames, partColumnTypeInfos); // Filter the name list. Removing elements one by one can be slow on e.g. ArrayList, // so let's create a new list and copy it if we don't have a linked list @@ -462,8 +476,8 @@ public static boolean prunePartitionNames(List columnNames, ExprNodeGene List partNamesSeq = inPlace ? partNames : new LinkedList(partNames); // Array for the values to pass to evaluator. - ArrayList values = new ArrayList(columnNames.size()); - for (int i = 0; i < columnNames.size(); ++i) { + ArrayList values = new ArrayList(partColumnNames.size()); + for (int i = 0; i < partColumnNames.size(); ++i) { values.add(null); } @@ -473,8 +487,17 @@ public static boolean prunePartitionNames(List columnNames, ExprNodeGene String partName = partIter.next(); Warehouse.makeValsFromName(partName, values); + ArrayList convertedValues = new ArrayList(values.size()); + for(int i=0; i