diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFBasePad.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFBasePad.java index d6dab78..2363ff3 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFBasePad.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFBasePad.java @@ -48,9 +48,9 @@ public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumen throw new UDFArgumentException(udfName + " requires three arguments. Found :" + arguments.length); } - converter1 = checkArguments(arguments, 0); - converter2 = checkArguments(arguments, 1); - converter3 = checkArguments(arguments, 2); + converter1 = checkTextArguments(arguments, 0); + converter2 = checkIntArguments(arguments, 1); + converter3 = checkTextArguments(arguments, 2); return PrimitiveObjectInspectorFactory.writableStringObjectInspector; } @@ -91,31 +91,39 @@ public String getDisplayString(String[] children) { protected abstract void performOp(byte[] data, byte[] txt, byte[] padTxt, int len, Text str, Text pad); - private Converter checkArguments(ObjectInspector[] arguments, int i) + // Convert input arguments to Text, if necessary. + private Converter checkTextArguments(ObjectInspector[] arguments, int i) throws UDFArgumentException { if (arguments[i].getCategory() != ObjectInspector.Category.PRIMITIVE) { throw new UDFArgumentTypeException(i + 1, "Only primitive type arguments are accepted but " - + arguments[i].getTypeName() + " is passed. as arguments"); + + arguments[i].getTypeName() + " is passed. as arguments"); + } + + Converter converter = ObjectInspectorConverters.getConverter((PrimitiveObjectInspector) arguments[i], + PrimitiveObjectInspectorFactory.writableStringObjectInspector); + + return converter; + } + + private Converter checkIntArguments(ObjectInspector[] arguments, int i) + throws UDFArgumentException { + if (arguments[i].getCategory() != ObjectInspector.Category.PRIMITIVE) { + throw new UDFArgumentTypeException(i + 1, "Only primitive type arguments are accepted but " + + arguments[i].getTypeName() + " is passed. as arguments"); } PrimitiveCategory inputType = ((PrimitiveObjectInspector) arguments[i]).getPrimitiveCategory(); Converter converter; switch (inputType) { - case STRING: - case CHAR: - case VARCHAR: - converter = ObjectInspectorConverters.getConverter((PrimitiveObjectInspector) arguments[i], - PrimitiveObjectInspectorFactory.writableStringObjectInspector); - break; case INT: case SHORT: case BYTE: converter = ObjectInspectorConverters.getConverter((PrimitiveObjectInspector) arguments[i], - PrimitiveObjectInspectorFactory.writableIntObjectInspector); + PrimitiveObjectInspectorFactory.writableIntObjectInspector); break; default: throw new UDFArgumentTypeException(i + 1, udfName - + " only takes STRING/CHAR/INT/SHORT/BYTE/VARCHAR types as " + (i + 1) + "-ths argument, got " - + inputType); + + " only takes INT/SHORT/BYTE types as " + (i + 1) + "-ths argument, got " + + inputType); } return converter; } diff --git ql/src/test/queries/clientpositive/char_pad_convert.q ql/src/test/queries/clientpositive/char_pad_convert.q new file mode 100644 index 0000000..dad7d15 --- /dev/null +++ ql/src/test/queries/clientpositive/char_pad_convert.q @@ -0,0 +1,70 @@ + +create table over1k( + t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + ts timestamp, + dec decimal(4,2), + bin binary) + row format delimited + fields terminated by '|'; + +load data local inpath '../../data/files/over1k' into table over1k; + +-- Pass non-strings for the first and third arguments to test argument conversion + +-- Integers +select lpad(t, 4, ' '), + lpad(si, 2, ' '), + lpad(i, 9, 'z'), + lpad(b, 2, 'a') from over1k limit 5; + +select lpad("oh", 10, t), + lpad("my", 6, si), + lpad("other", 14, i), + lpad("one", 12, b) from over1k limit 5; + +-- Integers +select rpad(t, 4, ' '), + rpad(si, 2, ' '), + rpad(i, 9, 'z'), + rpad(b, 2, 'a') from over1k limit 5; + +select rpad("oh", 10, t), + rpad("my", 6, si), + rpad("other", 14, i), + rpad("one", 12, b) from over1k limit 5; + +-- More +select lpad(f, 4, ' '), + lpad(d, 2, ' '), + lpad(bo, 9, 'z'), + lpad(ts, 2, 'a'), + lpad(dec, 7, 'd'), + lpad(bin, 8, 'b') from over1k limit 5; + +select lpad("oh", 10, f), + lpad("my", 6, d), + lpad("other", 14, bo), + lpad("one", 12, ts), + lpad("two", 7, dec), + lpad("three", 8, bin) from over1k limit 5; + +select rpad(f, 4, ' '), + rpad(d, 2, ' '), + rpad(bo, 9, 'z'), + rpad(ts, 2, 'a'), + rpad(dec, 7, 'd'), + rpad(bin, 8, 'b') from over1k limit 5; + +select rpad("oh", 10, f), + rpad("my", 6, d), + rpad("other", 14, bo), + rpad("one", 12, ts), + rpad("two", 7, dec), + rpad("three", 8, bin) from over1k limit 5; \ No newline at end of file diff --git ql/src/test/results/clientpositive/char_pad_convert.q.out ql/src/test/results/clientpositive/char_pad_convert.q.out new file mode 100644 index 0000000..1f81426 --- /dev/null +++ ql/src/test/results/clientpositive/char_pad_convert.q.out @@ -0,0 +1,219 @@ +PREHOOK: query: create table over1k( + t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + ts timestamp, + dec decimal(4,2), + bin binary) + row format delimited + fields terminated by '|' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +POSTHOOK: query: create table over1k( + t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + ts timestamp, + dec decimal(4,2), + bin binary) + row format delimited + fields terminated by '|' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@over1k +PREHOOK: query: load data local inpath '../../data/files/over1k' into table over1k +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@over1k +POSTHOOK: query: load data local inpath '../../data/files/over1k' into table over1k +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@over1k +PREHOOK: query: -- Pass non-strings for the first and third arguments to test argument conversion + +-- Integers +select lpad(t, 4, ' '), + lpad(si, 2, ' '), + lpad(i, 9, 'z'), + lpad(b, 2, 'a') from over1k limit 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@over1k +#### A masked pattern was here #### +POSTHOOK: query: -- Pass non-strings for the first and third arguments to test argument conversion + +-- Integers +select lpad(t, 4, ' '), + lpad(si, 2, ' '), + lpad(i, 9, 'z'), + lpad(b, 2, 'a') from over1k limit 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over1k +#### A masked pattern was here #### + 124 33 zzzz65664 42 + 19 44 zzzz65553 42 + 35 38 zzzz65619 42 + 111 37 zzzz65656 42 + 54 31 zzzz65547 42 +PREHOOK: query: select lpad("oh", 10, t), + lpad("my", 6, si), + lpad("other", 14, i), + lpad("one", 12, b) from over1k limit 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@over1k +#### A masked pattern was here #### +POSTHOOK: query: select lpad("oh", 10, t), + lpad("my", 6, si), + lpad("other", 14, i), + lpad("one", 12, b) from over1k limit 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over1k +#### A masked pattern was here #### +12412412oh 3363my 656646566other 429496743one +19191919oh 4424my 655536555other 429496738one +35353535oh 3873my 656196561other 429496745one +11111111oh 3723my 656566565other 429496731one +54545454oh 3173my 655476554other 429496740one +PREHOOK: query: -- Integers +select rpad(t, 4, ' '), + rpad(si, 2, ' '), + rpad(i, 9, 'z'), + rpad(b, 2, 'a') from over1k limit 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@over1k +#### A masked pattern was here #### +POSTHOOK: query: -- Integers +select rpad(t, 4, ' '), + rpad(si, 2, ' '), + rpad(i, 9, 'z'), + rpad(b, 2, 'a') from over1k limit 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over1k +#### A masked pattern was here #### +124 33 65664zzzz 42 +19 44 65553zzzz 42 +35 38 65619zzzz 42 +111 37 65656zzzz 42 +54 31 65547zzzz 42 +PREHOOK: query: select rpad("oh", 10, t), + rpad("my", 6, si), + rpad("other", 14, i), + rpad("one", 12, b) from over1k limit 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@over1k +#### A masked pattern was here #### +POSTHOOK: query: select rpad("oh", 10, t), + rpad("my", 6, si), + rpad("other", 14, i), + rpad("one", 12, b) from over1k limit 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over1k +#### A masked pattern was here #### +oh12412412 my3363 other656646566 one429496743 +oh19191919 my4424 other655536555 one429496738 +oh35353535 my3873 other656196561 one429496745 +oh11111111 my3723 other656566565 one429496731 +oh54545454 my3173 other655476554 one429496740 +PREHOOK: query: -- More +select lpad(f, 4, ' '), + lpad(d, 2, ' '), + lpad(bo, 9, 'z'), + lpad(ts, 2, 'a'), + lpad(dec, 7, 'd'), + lpad(bin, 8, 'b') from over1k limit 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@over1k +#### A masked pattern was here #### +POSTHOOK: query: -- More +select lpad(f, 4, ' '), + lpad(d, 2, ' '), + lpad(bo, 9, 'z'), + lpad(ts, 2, 'a'), + lpad(dec, 7, 'd'), + lpad(bin, 8, 'b') from over1k limit 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over1k +#### A masked pattern was here #### +74.7 42 zzzzzTRUE 20 ddd45.4 yard du +26.4 37 zzzzzTRUE 20 dd29.62 history +96.9 18 zzzzFALSE 20 dd27.32 history +13.0 34 zzzzFALSE 20 dd23.91 topolog +60.7 2. zzzzFALSE 20 dd90.21 geology +PREHOOK: query: select lpad("oh", 10, f), + lpad("my", 6, d), + lpad("other", 14, bo), + lpad("one", 12, ts), + lpad("two", 7, dec), + lpad("three", 8, bin) from over1k limit 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@over1k +#### A masked pattern was here #### +POSTHOOK: query: select lpad("oh", 10, f), + lpad("my", 6, d), + lpad("other", 14, bo), + lpad("one", 12, ts), + lpad("two", 7, dec), + lpad("three", 8, bin) from over1k limit 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over1k +#### A masked pattern was here #### +74.7274.oh 42.4my TRUETRUETother 2013-03-0one 45.4two yathree +26.4326.oh 37.7my TRUETRUETother 2013-03-0one 29.6two hithree +96.9196.oh 18.8my FALSEFALSother 2013-03-0one 27.3two hithree +13.0113.oh 34.9my FALSEFALSother 2013-03-0one 23.9two tothree +60.7160.oh 2.09my FALSEFALSother 2013-03-0one 90.2two gethree +PREHOOK: query: select rpad(f, 4, ' '), + rpad(d, 2, ' '), + rpad(bo, 9, 'z'), + rpad(ts, 2, 'a'), + rpad(dec, 7, 'd'), + rpad(bin, 8, 'b') from over1k limit 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@over1k +#### A masked pattern was here #### +POSTHOOK: query: select rpad(f, 4, ' '), + rpad(d, 2, ' '), + rpad(bo, 9, 'z'), + rpad(ts, 2, 'a'), + rpad(dec, 7, 'd'), + rpad(bin, 8, 'b') from over1k limit 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over1k +#### A masked pattern was here #### +74.7 42 TRUEzzzzz 20 45.4ddd yard du +26.4 37 TRUEzzzzz 20 29.62dd history +96.9 18 FALSEzzzz 20 27.32dd history +13.0 34 FALSEzzzz 20 23.91dd topolog +60.7 2. FALSEzzzz 20 90.21dd geology +PREHOOK: query: select rpad("oh", 10, f), + rpad("my", 6, d), + rpad("other", 14, bo), + rpad("one", 12, ts), + rpad("two", 7, dec), + rpad("three", 8, bin) from over1k limit 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@over1k +#### A masked pattern was here #### +POSTHOOK: query: select rpad("oh", 10, f), + rpad("my", 6, d), + rpad("other", 14, bo), + rpad("one", 12, ts), + rpad("two", 7, dec), + rpad("three", 8, bin) from over1k limit 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over1k +#### A masked pattern was here #### +oh74.7274. my42.4 otherTRUETRUET one2013-03-0 two45.4 threeya +oh26.4326. my37.7 otherTRUETRUET one2013-03-0 two29.6 threehi +oh96.9196. my18.8 otherFALSEFALS one2013-03-0 two27.3 threehi +oh13.0113. my34.9 otherFALSEFALS one2013-03-0 two23.9 threeto +oh60.7160. my2.09 otherFALSEFALS one2013-03-0 two90.2 threege