Index: ql/src/test/results/clientnegative/udf_in.q.out =================================================================== --- ql/src/test/results/clientnegative/udf_in.q.out (revision 0) +++ ql/src/test/results/clientnegative/udf_in.q.out (revision 0) @@ -0,0 +1 @@ +FAILED: Error in semantic analysis: line 1:9 Wrong Arguments 3: The arguments for IN should be the same type! Types are: {int IN (array)} Index: ql/src/test/results/clientpositive/show_functions.q.out =================================================================== --- ql/src/test/results/clientpositive/show_functions.q.out (revision 6233) +++ ql/src/test/results/clientpositive/show_functions.q.out (working copy) @@ -57,6 +57,7 @@ hex hour if +in index instr int Index: ql/src/test/results/clientpositive/udf_in.q.out =================================================================== --- ql/src/test/results/clientpositive/udf_in.q.out (revision 0) +++ ql/src/test/results/clientpositive/udf_in.q.out (revision 0) @@ -0,0 +1,40 @@ +PREHOOK: query: SELECT 1 IN (1, 2, 3), + 4 IN (1, 2, 3), + array(1,2,3) IN (array(1,2,3)), + "bee" IN("aee", "bee", "cee", 1), + "dee" IN("aee", "bee", "cee"), + 1 = 1 IN(true, false), + true IN (true, false) = true, + 1 IN (1, 2, 3) OR false IN(false), + NULL IN (1, 2, 3), + 4 IN (1, 2, 3, NULL), + (1+3) IN (5, 6, (1+2) + 1) FROM src LIMIT 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: file:/data/users/pyang/task2/trunk/VENDOR.hive/trunk/build/ql/scratchdir/hive_2010-05-06_18-58-01_015_8699021825803506871/10000 +POSTHOOK: query: SELECT 1 IN (1, 2, 3), + 4 IN (1, 2, 3), + array(1,2,3) IN (array(1,2,3)), + "bee" IN("aee", "bee", "cee", 1), + "dee" IN("aee", "bee", "cee"), + 1 = 1 IN(true, false), + true IN (true, false) = true, + 1 IN (1, 2, 3) OR false IN(false), + NULL IN (1, 2, 3), + 4 IN (1, 2, 3, NULL), + (1+3) IN (5, 6, (1+2) + 1) FROM src LIMIT 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: file:/data/users/pyang/task2/trunk/VENDOR.hive/trunk/build/ql/scratchdir/hive_2010-05-06_18-58-01_015_8699021825803506871/10000 +true false true true false true true true NULL NULL true +PREHOOK: query: SELECT key FROM src WHERE key IN ("238", 86) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: file:/data/users/pyang/task2/trunk/VENDOR.hive/trunk/build/ql/scratchdir/hive_2010-05-06_18-58-05_442_6167022487053863872/10000 +POSTHOOK: query: SELECT key FROM src WHERE key IN ("238", 86) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: file:/data/users/pyang/task2/trunk/VENDOR.hive/trunk/build/ql/scratchdir/hive_2010-05-06_18-58-05_442_6167022487053863872/10000 +238 +86 +238 Index: ql/src/test/queries/clientnegative/udf_in.q =================================================================== --- ql/src/test/queries/clientnegative/udf_in.q (revision 0) +++ ql/src/test/queries/clientnegative/udf_in.q (revision 0) @@ -0,0 +1 @@ +SELECT 3 IN (array(1,2,3)) FROM src; \ No newline at end of file Index: ql/src/test/queries/clientpositive/udf_in.q =================================================================== --- ql/src/test/queries/clientpositive/udf_in.q (revision 0) +++ ql/src/test/queries/clientpositive/udf_in.q (revision 0) @@ -0,0 +1,13 @@ +SELECT 1 IN (1, 2, 3), + 4 IN (1, 2, 3), + array(1,2,3) IN (array(1,2,3)), + "bee" IN("aee", "bee", "cee", 1), + "dee" IN("aee", "bee", "cee"), + 1 = 1 IN(true, false), + true IN (true, false) = true, + 1 IN (1, 2, 3) OR false IN(false), + NULL IN (1, 2, 3), + 4 IN (1, 2, 3, NULL), + (1+3) IN (5, 6, (1+2) + 1) FROM src LIMIT 1; + +SELECT key FROM src WHERE key IN ("238", 86); \ No newline at end of file Index: ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java (revision 6233) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java (working copy) @@ -145,6 +145,7 @@ import org.apache.hadoop.hive.ql.udf.generic.GenericUDFField; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFHash; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFIf; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFIn; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFIndex; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFInstr; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFLocate; @@ -304,6 +305,7 @@ registerGenericUDF("isnotnull", GenericUDFOPNotNull.class); registerGenericUDF("if", GenericUDFIf.class); + registerGenericUDF("in", GenericUDFIn.class); // Aliases for Java Class Names // These are used in getImplicitConvertUDFMethod @@ -408,7 +410,7 @@ Class genericUDFClass) { if (GenericUDF.class.isAssignableFrom(genericUDFClass)) { FunctionInfo fI = new FunctionInfo(isNative, functionName, - (GenericUDF) ReflectionUtils.newInstance(genericUDFClass, null)); + ReflectionUtils.newInstance(genericUDFClass, null)); mFunctions.put(functionName.toLowerCase(), fI); } else { throw new RuntimeException("Registering GenericUDF Class " @@ -430,7 +432,7 @@ Class genericUDTFClass) { if (GenericUDTF.class.isAssignableFrom(genericUDTFClass)) { FunctionInfo fI = new FunctionInfo(isNative, functionName, - (GenericUDTF) ReflectionUtils.newInstance(genericUDTFClass, null)); + ReflectionUtils.newInstance(genericUDTFClass, null)); mFunctions.put(functionName.toLowerCase(), fI); } else { throw new RuntimeException("Registering GenericUDTF Class " @@ -669,7 +671,7 @@ Class udafClass) { mFunctions.put(functionName.toLowerCase(), new FunctionInfo(isNative, functionName.toLowerCase(), new GenericUDAFBridge( - (UDAF) ReflectionUtils.newInstance(udafClass, null)))); + ReflectionUtils.newInstance(udafClass, null)))); } public static void unregisterTemporaryUDF(String functionName) throws HiveException { @@ -887,7 +889,7 @@ bridge.getUdfClass()); } - return (GenericUDF) ReflectionUtils + return ReflectionUtils .newInstance(genericUDF.getClass(), null); } @@ -898,7 +900,7 @@ if (null == genericUDTF) { return null; } - return (GenericUDTF) ReflectionUtils.newInstance(genericUDTF.getClass(), + return ReflectionUtils.newInstance(genericUDTF.getClass(), null); } Index: ql/src/java/org/apache/hadoop/hive/ql/parse/Hive.g =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/Hive.g (revision 6233) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/Hive.g (working copy) @@ -1166,7 +1166,6 @@ (expression (COMMA expression)*)? RPAREN -> {$dist == null}? ^(TOK_FUNCTION functionName (expression+)?) -> ^(TOK_FUNCTIONDI functionName (expression+)?) - ; functionName @@ -1336,10 +1335,19 @@ precedenceEqualExpression : - precedenceBitwiseOrExpression (precedenceEqualOperator^ precedenceBitwiseOrExpression)* + precedenceBitwiseOrExpression ( (precedenceEqualOperator^ precedenceBitwiseOrExpression) | (inOperator^ expressions) )* ; +inOperator + : + KW_IN -> ^(TOK_FUNCTION KW_IN) + ; +expressions + : + LPAREN expression (COMMA expression)* RPAREN -> expression* + ; + precedenceNotOperator : KW_NOT @@ -1432,6 +1440,7 @@ | BITWISEXOR | KW_RLIKE | KW_REGEXP + | KW_IN ; descFuncNames Index: ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (revision 6233) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (working copy) @@ -31,9 +31,9 @@ import java.util.LinkedHashMap; import java.util.List; import java.util.Map; -import java.util.Map.Entry; import java.util.Set; import java.util.TreeSet; +import java.util.Map.Entry; import java.util.regex.Pattern; import java.util.regex.PatternSyntaxException; @@ -88,7 +88,6 @@ import org.apache.hadoop.hive.ql.optimizer.GenMRFileSink1; import org.apache.hadoop.hive.ql.optimizer.GenMROperator; import org.apache.hadoop.hive.ql.optimizer.GenMRProcContext; -import org.apache.hadoop.hive.ql.optimizer.GenMRProcContext.GenMapRedCtx; import org.apache.hadoop.hive.ql.optimizer.GenMRRedSink1; import org.apache.hadoop.hive.ql.optimizer.GenMRRedSink2; import org.apache.hadoop.hive.ql.optimizer.GenMRRedSink3; @@ -98,6 +97,7 @@ import org.apache.hadoop.hive.ql.optimizer.GenMapRedUtils; import org.apache.hadoop.hive.ql.optimizer.MapJoinFactory; import org.apache.hadoop.hive.ql.optimizer.Optimizer; +import org.apache.hadoop.hive.ql.optimizer.GenMRProcContext.GenMapRedCtx; import org.apache.hadoop.hive.ql.optimizer.physical.PhysicalContext; import org.apache.hadoop.hive.ql.optimizer.physical.PhysicalOptimizer; import org.apache.hadoop.hive.ql.optimizer.ppr.PartitionPruner; @@ -117,7 +117,6 @@ import org.apache.hadoop.hive.ql.plan.FetchWork; import org.apache.hadoop.hive.ql.plan.FileSinkDesc; import org.apache.hadoop.hive.ql.plan.FilterDesc; -import org.apache.hadoop.hive.ql.plan.FilterDesc.sampleDesc; import org.apache.hadoop.hive.ql.plan.ForwardDesc; import org.apache.hadoop.hive.ql.plan.GroupByDesc; import org.apache.hadoop.hive.ql.plan.JoinCondDesc; @@ -138,11 +137,12 @@ import org.apache.hadoop.hive.ql.plan.TableScanDesc; import org.apache.hadoop.hive.ql.plan.UDTFDesc; import org.apache.hadoop.hive.ql.plan.UnionDesc; +import org.apache.hadoop.hive.ql.plan.FilterDesc.sampleDesc; import org.apache.hadoop.hive.ql.session.SessionState; import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.Mode; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFHash; import org.apache.hadoop.hive.ql.udf.generic.GenericUDTF; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.Mode; import org.apache.hadoop.hive.serde.Constants; import org.apache.hadoop.hive.serde2.Deserializer; import org.apache.hadoop.hive.serde2.MetadataTypedColumnsetSerDe; @@ -150,9 +150,9 @@ import org.apache.hadoop.hive.serde2.SerDeUtils; import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; import org.apache.hadoop.hive.serde2.objectinspector.StructField; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; @@ -6214,7 +6214,9 @@ .put(new RuleRegExp("R3", HiveParser.Identifier + "%|" + HiveParser.StringLiteral + "%|" + HiveParser.TOK_CHARSETLITERAL + "%|" + HiveParser.KW_IF + "%|" + HiveParser.KW_CASE + "%|" - + HiveParser.KW_WHEN + "%"), TypeCheckProcFactory + + HiveParser.KW_WHEN + "%|" + HiveParser.KW_IN + "%|" + + HiveParser.KW_ARRAY + "%|" + HiveParser.KW_MAP + "%|" + + HiveParser.KW_STRUCT + "%"), TypeCheckProcFactory .getStrExprProcessor()); opRules.put(new RuleRegExp("R4", HiveParser.KW_TRUE + "%|" + HiveParser.KW_FALSE + "%"), TypeCheckProcFactory Index: ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIn.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIn.java (revision 0) +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIn.java (revision 0) @@ -0,0 +1,135 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.udf.generic; + +import org.apache.hadoop.hive.ql.exec.Description; +import org.apache.hadoop.hive.ql.exec.UDFArgumentException; +import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFUtils.ReturnObjectInspectorResolver; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.io.BooleanWritable; + +/** + * GenericUDFIn + * + * e.g. + * SELECT key FROM src WHERE key IN ("238", "1"); + * + * From MySQL page on IN(): To comply with the SQL standard, IN returns NULL + * not only if the expression on the left hand side is NULL, but also if no + * match is found in the list and one of the expressions in the list is NULL. + */ +@Description(name = "in", + value = "test _FUNC_(val1, val2...) - returns true if test equals any valN ") + +public class GenericUDFIn extends GenericUDF { + + private ObjectInspector[] argumentOIs; + BooleanWritable bw = new BooleanWritable(); + + ReturnObjectInspectorResolver conversionHelper = null; + ObjectInspector compareOI; + + @Override + public ObjectInspector initialize(ObjectInspector[] arguments) + throws UDFArgumentException { + if (arguments.length < 2) { + throw new UDFArgumentLengthException( + "The function IN requires at least two arguments, got " + + arguments.length); + } + argumentOIs = arguments; + + // We want to use the ReturnObjectInspectorResolver because otherwise + // ObjectInspectorUtils.compare() will return != for two objects that have + // different object inspectors, e.g. 238 and "238". The ROIR will help convert + // both values to a common type so that they can be compared reasonably. + conversionHelper = new GenericUDFUtils.ReturnObjectInspectorResolver(true); + + for (ObjectInspector oi : arguments) { + if(!conversionHelper.update(oi)) { + StringBuilder sb = new StringBuilder(); + sb.append("The arguments for IN should be the same type! Types are: {"); + sb.append(arguments[0].getTypeName()); + sb.append(" IN ("); + for(int i=1; i= 2); + StringBuilder sb = new StringBuilder(); + + sb.append("("); + sb.append(children[0]); + sb.append(") "); + sb.append("IN ("); + for(int i=1; i