diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/ConvertAstToSearchArg.java b/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/ConvertAstToSearchArg.java index eb8c03f595e73e6ff80d6954fca56651b7e60419..11055530613571c4ebd82743efdb1bf1d7c3ba2f 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/ConvertAstToSearchArg.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/ConvertAstToSearchArg.java @@ -18,14 +18,15 @@ package org.apache.hadoop.hive.ql.io.sarg; -import com.esotericsoftware.kryo.Kryo; -import com.esotericsoftware.kryo.io.Input; +import java.sql.Date; +import java.sql.Timestamp; +import java.util.List; + import org.apache.commons.codec.binary.Base64; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.common.type.HiveChar; -import org.apache.hadoop.hive.common.type.HiveVarchar; import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; @@ -48,12 +49,12 @@ import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPOr; import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; -import java.sql.Date; -import java.sql.Timestamp; -import java.util.List; +import com.esotericsoftware.kryo.Kryo; +import com.esotericsoftware.kryo.io.Input; public class ConvertAstToSearchArg { private static final Log LOG = LogFactory.getLog(ConvertAstToSearchArg.class); @@ -133,7 +134,8 @@ private static String getColumnName(ExprNodeGenericFuncDesc expr, } private static Object boxLiteral(ExprNodeConstantDesc constantDesc, - PredicateLeaf.Type type) { + PredicateLeaf.Type type, + TypeInfo columnType) { Object lit = constantDesc.getValue(); if (lit == null) { return null; @@ -144,8 +146,14 @@ private static Object boxLiteral(ExprNodeConstantDesc constantDesc, case LONG: return ((Number) lit).longValue(); case STRING: + // chars are stored as is in ORC without stripping white spaces. For PPD to work correctly, + // expand the right hand side literal to match the length of char column type. + if (columnType instanceof CharTypeInfo) { + return HiveChar.getPaddedValue(lit.toString(), ((CharTypeInfo) columnType).getLength()); + } + if (lit instanceof HiveChar) { - lit = ((HiveChar) lit).getPaddedValue(); + return ((HiveChar) lit).getPaddedValue(); } else if (lit instanceof String) { return lit; } else { @@ -182,6 +190,7 @@ private static Object boxLiteral(ExprNodeConstantDesc constantDesc, private static Object findLiteral(ExprNodeGenericFuncDesc expr, PredicateLeaf.Type type) { List children = expr.getChildren(); + TypeInfo columnType = null; if (children.size() != 2) { return null; } @@ -191,7 +200,9 @@ private static Object findLiteral(ExprNodeGenericFuncDesc expr, if (result != null) { return null; } - result = boxLiteral((ExprNodeConstantDesc) child, type); + result = boxLiteral((ExprNodeConstantDesc) child, type, columnType); + } else if (child instanceof ExprNodeColumnDesc) { + columnType = child.getTypeInfo(); } } return result; @@ -210,7 +221,7 @@ private static Object getLiteral(ExprNodeGenericFuncDesc expr, List children = expr.getChildren(); Object child = children.get(position); if (child instanceof ExprNodeConstantDesc) { - return boxLiteral((ExprNodeConstantDesc) child, type); + return boxLiteral((ExprNodeConstantDesc) child, type, null); } return null; } @@ -225,7 +236,7 @@ private static Object getLiteral(ExprNodeGenericFuncDesc expr, int posn = 0; for(ExprNodeDesc child: children.subList(start, children.size())) { if (child instanceof ExprNodeConstantDesc) { - result[posn++] = boxLiteral((ExprNodeConstantDesc) child, type); + result[posn++] = boxLiteral((ExprNodeConstantDesc) child, type, null); } else { // if we get some non-literals, we need to punt return null; diff --git a/ql/src/test/queries/clientpositive/orc_ppd_char.q b/ql/src/test/queries/clientpositive/orc_ppd_char.q index 1f5f54ae19ee8035505d5aaf264fded6c82b7514..00bafe0c5a108028b0ddb1bdb410d3a72dd37d99 100644 --- a/ql/src/test/queries/clientpositive/orc_ppd_char.q +++ b/ql/src/test/queries/clientpositive/orc_ppd_char.q @@ -74,3 +74,9 @@ select sum(hash(*)) from newtypesorc where c between "carrot" and "carrot1"; set hive.optimize.index.filter=true; select sum(hash(*)) from newtypesorc where c between "carrot" and "carrot1"; +create table orc_test( col1 string, col2 char(10)) stored as orc tblproperties ("orc.compress"="NONE"); +insert into orc_test values ('val1', '1'); + +select * from orc_test where col2='1'; + +select * from orc_test where col2=cast('1' as char(5)); diff --git a/ql/src/test/results/clientpositive/orc_ppd_char.q.out b/ql/src/test/results/clientpositive/orc_ppd_char.q.out index 79838c1d5b77287044ea0b97eb7fe65111da3953..1328aab68ff1b0c7cfdd902a8ee4160af9c5728e 100644 --- a/ql/src/test/results/clientpositive/orc_ppd_char.q.out +++ b/ql/src/test/results/clientpositive/orc_ppd_char.q.out @@ -218,3 +218,39 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@newtypesorc #### A masked pattern was here #### NULL +PREHOOK: query: create table orc_test( col1 string, col2 char(10)) stored as orc tblproperties ("orc.compress"="NONE") +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@orc_test +POSTHOOK: query: create table orc_test( col1 string, col2 char(10)) stored as orc tblproperties ("orc.compress"="NONE") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@orc_test +PREHOOK: query: insert into orc_test values ('val1', '1') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__1 +PREHOOK: Output: default@orc_test +POSTHOOK: query: insert into orc_test values ('val1', '1') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__1 +POSTHOOK: Output: default@orc_test +POSTHOOK: Lineage: orc_test.col1 SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: orc_test.col2 EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +PREHOOK: query: select * from orc_test where col2='1' +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_test +#### A masked pattern was here #### +POSTHOOK: query: select * from orc_test where col2='1' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_test +#### A masked pattern was here #### +val1 1 +PREHOOK: query: select * from orc_test where col2=cast('1' as char(5)) +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_test +#### A masked pattern was here #### +POSTHOOK: query: select * from orc_test where col2=cast('1' as char(5)) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_test +#### A masked pattern was here #### +val1 1