From 2f44dcc9bcde397dffa29d7a70542d676ef485d5 Mon Sep 17 00:00:00 2001 From: Madhan Neethiraj Date: Wed, 20 Apr 2016 23:57:55 -0700 Subject: [PATCH] HIVE-13568: UDFs for use in column-masking - includes updates for review comments --- .../test/resources/testconfiguration.properties | 6 + .../hadoop/hive/ql/exec/FunctionRegistry.java | 8 + .../hadoop/hive/ql/udf/generic/BaseMaskUDF.java | 473 +++++++++++++++++++++ .../hadoop/hive/ql/udf/generic/GenericUDFMask.java | 334 +++++++++++++++ .../hive/ql/udf/generic/GenericUDFMaskFirstN.java | 229 ++++++++++ .../hive/ql/udf/generic/GenericUDFMaskHash.java | 77 ++++ .../hive/ql/udf/generic/GenericUDFMaskLastN.java | 193 +++++++++ .../ql/udf/generic/GenericUDFMaskShowFirstN.java | 248 +++++++++++ .../ql/udf/generic/GenericUDFMaskShowLastN.java | 198 +++++++++ ql/src/test/queries/clientpositive/udf_mask.q | 13 + .../test/queries/clientpositive/udf_mask_first_n.q | 13 + ql/src/test/queries/clientpositive/udf_mask_hash.q | 13 + .../test/queries/clientpositive/udf_mask_last_n.q | 13 + .../queries/clientpositive/udf_mask_show_first_n.q | 13 + .../queries/clientpositive/udf_mask_show_last_n.q | 13 + .../results/beelinepositive/show_functions.q.out | 6 + .../results/clientpositive/show_functions.q.out | 12 + ql/src/test/results/clientpositive/udf_mask.q.out | 71 ++++ .../results/clientpositive/udf_mask_first_n.q.out | 68 +++ .../results/clientpositive/udf_mask_hash.q.out | 59 +++ .../results/clientpositive/udf_mask_last_n.q.out | 68 +++ .../clientpositive/udf_mask_show_first_n.q.out | 68 +++ .../clientpositive/udf_mask_show_last_n.q.out | 68 +++ 23 files changed, 2264 insertions(+) create mode 100644 ql/src/java/org/apache/hadoop/hive/ql/udf/generic/BaseMaskUDF.java create mode 100644 ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFMask.java create mode 100644 ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFMaskFirstN.java create mode 100644 ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFMaskHash.java create mode 100644 ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFMaskLastN.java create mode 100644 ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFMaskShowFirstN.java create mode 100644 ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFMaskShowLastN.java create mode 100644 ql/src/test/queries/clientpositive/udf_mask.q create mode 100644 ql/src/test/queries/clientpositive/udf_mask_first_n.q create mode 100644 ql/src/test/queries/clientpositive/udf_mask_hash.q create mode 100644 ql/src/test/queries/clientpositive/udf_mask_last_n.q create mode 100644 ql/src/test/queries/clientpositive/udf_mask_show_first_n.q create mode 100644 ql/src/test/queries/clientpositive/udf_mask_show_last_n.q create mode 100644 ql/src/test/results/clientpositive/udf_mask.q.out create mode 100644 ql/src/test/results/clientpositive/udf_mask_first_n.q.out create mode 100644 ql/src/test/results/clientpositive/udf_mask_hash.q.out create mode 100644 ql/src/test/results/clientpositive/udf_mask_last_n.q.out create mode 100644 ql/src/test/results/clientpositive/udf_mask_show_first_n.q.out create mode 100644 ql/src/test/results/clientpositive/udf_mask_show_last_n.q.out diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties index 0ef3161..b9e8cf8 100644 --- a/itests/src/test/resources/testconfiguration.properties +++ b/itests/src/test/resources/testconfiguration.properties @@ -659,6 +659,12 @@ beeline.positive.exclude=add_part_exist.q,\ udf_map.q,\ udf_map_keys.q,\ udf_map_values.q,\ + udf_mask.q,\ + udf_mask_first_n.q,\ + udf_mask_hash.q,\ + udf_mask_last_n.q,\ + udf_mask_show_first_n.q,\ + udf_mask_show_last_n.q,\ udf_max.q,\ udf_min.q,\ udf_named_struct.q,\ diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java index 1343b39..00df3a0 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java @@ -474,6 +474,14 @@ system.registerHiddenBuiltIn(GenericUDFOPDTIPlus.class); system.registerHiddenBuiltIn(GenericUDFOPNumericMinus.class); system.registerHiddenBuiltIn(GenericUDFOPNumericPlus.class); + + // mask UDFs + system.registerGenericUDF(GenericUDFMask.UDF_NAME, GenericUDFMask.class); + system.registerGenericUDF(GenericUDFMaskFirstN.UDF_NAME, GenericUDFMaskFirstN.class); + system.registerGenericUDF(GenericUDFMaskLastN.UDF_NAME, GenericUDFMaskLastN.class); + system.registerGenericUDF(GenericUDFMaskShowFirstN.UDF_NAME, GenericUDFMaskShowFirstN.class); + system.registerGenericUDF(GenericUDFMaskShowLastN.UDF_NAME, GenericUDFMaskShowLastN.class); + system.registerGenericUDF(GenericUDFMaskHash.UDF_NAME, GenericUDFMaskHash.class); } public static String getNormalizedFunctionName(String fn) throws SemanticException { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/BaseMaskUDF.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/BaseMaskUDF.java new file mode 100644 index 0000000..343f332 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/BaseMaskUDF.java @@ -0,0 +1,473 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.udf.generic; + + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.common.type.HiveChar; +import org.apache.hadoop.hive.common.type.HiveVarchar; +import org.apache.hadoop.hive.ql.exec.UDFArgumentException; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredObject; +import org.apache.hadoop.hive.serde2.io.*; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.*; +import org.apache.hadoop.io.IntWritable; +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.io.Text; + +import java.sql.Date; + + +public abstract class BaseMaskUDF extends GenericUDF { + private static final Log LOG = LogFactory.getLog(BaseMaskUDF.class); + + final protected AbstractTransformer transformer; + final protected String displayName; + protected AbstractTransformerAdapter transformerAdapter = null; + + protected BaseMaskUDF(AbstractTransformer transformer, String displayName) { + this.transformer = transformer; + this.displayName = displayName; + } + + public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException { + LOG.debug("==> BaseMaskUDF.initialize()"); + + checkArgPrimitive(arguments, 0); // first argument is the column to be transformed + + PrimitiveObjectInspector columnType = ((PrimitiveObjectInspector) arguments[0]); + + transformer.init(arguments, 1); + + transformerAdapter = AbstractTransformerAdapter.getTransformerAdapter(columnType, transformer); + + ObjectInspector ret = PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(columnType.getPrimitiveCategory()); + + LOG.debug("<== BaseMaskUDF.initialize()"); + + return ret; + } + + @Override + public Object evaluate(DeferredObject[] arguments) throws HiveException { + Object ret = transformerAdapter.getTransformedWritable(arguments[0]); + + return ret; + } + + @Override + public String getDisplayString(String[] children) { + return getStandardDisplayString(displayName, children); + } +} + + +/** + * Interface to be implemented by transformers which transform a given value according to its specification. + */ +abstract class AbstractTransformer { + /** + * Initialzie the transformer object + * @param arguments arguments given to GenericUDF.initialzie() + * @param startIdx index into array, from which the transformer should read values + */ + abstract void init(ObjectInspector[] arguments, int startIdx); + + /** + * Transform a String value + * @param value value to transform + * @return transformed value + */ + abstract String transform(String value); + + /** + * Transform a Byte value + * @param value value to transform + * @return transformed value + */ + abstract Byte transform(Byte value); + + /** + * Transform a Short value + * @param value value to transform + * @return transformed value + */ + abstract Short transform(Short value); + + /** + * Transform a Integer value + * @param value value to transform + * @return transformed value + */ + abstract Integer transform(Integer value); + + /** + * Transform a Long value + * @param value value to transform + * @return transformed value + */ + abstract Long transform(Long value); + + /** + * Transform a Date value + * @param value value to transform + * @return transformed value + */ + abstract Date transform(Date value); +} + +/** + * Interface to be implemented by datatype specific adapters that handle necessary conversion of the transformed value + * into appropriate Writable object, which GenericUDF.evaluate() is expected to return. + */ +abstract class AbstractTransformerAdapter { + final AbstractTransformer transformer; + + AbstractTransformerAdapter(AbstractTransformer transformer) { + this.transformer = transformer; + } + + abstract Object getTransformedWritable(DeferredObject value) throws HiveException; + + static AbstractTransformerAdapter getTransformerAdapter(PrimitiveObjectInspector columnType, AbstractTransformer transformer) { + final AbstractTransformerAdapter ret; + + switch(columnType.getPrimitiveCategory()) { + case STRING: + ret = new StringTransformerAdapter((StringObjectInspector)columnType, transformer); + break; + + case CHAR: + ret = new HiveCharTransformerAdapter((HiveCharObjectInspector)columnType, transformer); + break; + + case VARCHAR: + ret = new HiveVarcharTransformerAdapter((HiveVarcharObjectInspector)columnType, transformer); + break; + + case BYTE: + ret = new ByteTransformerAdapter((ByteObjectInspector)columnType, transformer); + break; + + case SHORT: + ret = new ShortTransformerAdapter((ShortObjectInspector)columnType, transformer); + break; + + case INT: + ret = new IntegerTransformerAdapter((IntObjectInspector)columnType, transformer); + break; + + case LONG: + ret = new LongTransformerAdapter((LongObjectInspector)columnType, transformer); + break; + + case DATE: + ret = new DateTransformerAdapter((DateObjectInspector)columnType, transformer); + break; + + default: + ret = new UnsupportedDatatypeTransformAdapter(columnType, transformer); + break; + } + + return ret; + } +} + +class ByteTransformerAdapter extends AbstractTransformerAdapter { + final ByteObjectInspector columnType; + final ByteWritable writable; + + public ByteTransformerAdapter(ByteObjectInspector columnType, AbstractTransformer transformer) { + this(columnType, transformer, new ByteWritable()); + } + + public ByteTransformerAdapter(ByteObjectInspector columnType, AbstractTransformer transformer, ByteWritable writable) { + super(transformer); + + this.columnType = columnType; + this.writable = writable; + } + + @Override + public Object getTransformedWritable(DeferredObject object) throws HiveException { + Byte value = (Byte)columnType.getPrimitiveJavaObject(object.get()); + + if(value != null) { + Byte transformedValue = transformer.transform(value); + + if(transformedValue != null) { + writable.set(transformedValue); + + return writable; + } + } + + return null; + } +} + +class DateTransformerAdapter extends AbstractTransformerAdapter { + final DateObjectInspector columnType; + final DateWritable writable; + + public DateTransformerAdapter(DateObjectInspector columnType, AbstractTransformer transformer) { + this(columnType, transformer, new DateWritable()); + } + + public DateTransformerAdapter(DateObjectInspector columnType, AbstractTransformer transformer, DateWritable writable) { + super(transformer); + + this.columnType = columnType; + this.writable = writable; + } + + @Override + public Object getTransformedWritable(DeferredObject object) throws HiveException { + Date value = columnType.getPrimitiveJavaObject(object.get()); + + if(value != null) { + Date transformedValue = transformer.transform(value); + + if(transformedValue != null) { + writable.set(transformedValue); + + return writable; + } + } + + return null; + } +} + +class HiveCharTransformerAdapter extends AbstractTransformerAdapter { + final HiveCharObjectInspector columnType; + final HiveCharWritable writable; + + public HiveCharTransformerAdapter(HiveCharObjectInspector columnType, AbstractTransformer transformer) { + this(columnType, transformer, new HiveCharWritable()); + } + + public HiveCharTransformerAdapter(HiveCharObjectInspector columnType, AbstractTransformer transformer, HiveCharWritable writable) { + super(transformer); + + this.columnType = columnType; + this.writable = writable; + } + + @Override + public Object getTransformedWritable(DeferredObject object) throws HiveException { + HiveChar value = columnType.getPrimitiveJavaObject(object.get()); + + if(value != null) { + String transformedValue = transformer.transform(value.getValue()); + + if(transformedValue != null) { + writable.set(transformedValue); + + return writable; + } + } + + return null; + } +} + +class HiveVarcharTransformerAdapter extends AbstractTransformerAdapter { + final HiveVarcharObjectInspector columnType; + final HiveVarcharWritable writable; + + public HiveVarcharTransformerAdapter(HiveVarcharObjectInspector columnType, AbstractTransformer transformer) { + this(columnType, transformer, new HiveVarcharWritable()); + } + + public HiveVarcharTransformerAdapter(HiveVarcharObjectInspector columnType, AbstractTransformer transformer, HiveVarcharWritable writable) { + super(transformer); + + this.columnType = columnType; + this.writable = writable; + } + + @Override + public Object getTransformedWritable(DeferredObject object) throws HiveException { + HiveVarchar value = columnType.getPrimitiveJavaObject(object.get()); + + if(value != null) { + String transformedValue = transformer.transform(value.getValue()); + + if(transformedValue != null) { + writable.set(transformedValue); + + return writable; + } + } + + return null; + } +} + +class IntegerTransformerAdapter extends AbstractTransformerAdapter { + final IntObjectInspector columnType; + final IntWritable writable; + + public IntegerTransformerAdapter(IntObjectInspector columnType, AbstractTransformer transformer) { + this(columnType, transformer, new IntWritable()); + } + + public IntegerTransformerAdapter(IntObjectInspector columnType, AbstractTransformer transformer, IntWritable writable) { + super(transformer); + + this.columnType = columnType; + this.writable = writable; + } + + @Override + public Object getTransformedWritable(DeferredObject object) throws HiveException { + Integer value = (Integer)columnType.getPrimitiveJavaObject(object.get()); + + if(value != null) { + Integer transformedValue = transformer.transform(value); + + if(transformedValue != null) { + writable.set(transformedValue); + + return writable; + } + } + + return null; + } +} + +class LongTransformerAdapter extends AbstractTransformerAdapter { + final LongObjectInspector columnType; + final LongWritable writable; + + public LongTransformerAdapter(LongObjectInspector columnType, AbstractTransformer transformer) { + this(columnType, transformer, new LongWritable()); + } + + public LongTransformerAdapter(LongObjectInspector columnType, AbstractTransformer transformer, LongWritable writable) { + super(transformer); + + this.columnType = columnType; + this.writable = writable; + } + + @Override + public Object getTransformedWritable(DeferredObject object) throws HiveException { + Long value = (Long)columnType.getPrimitiveJavaObject(object.get()); + + if(value != null) { + Long transformedValue = transformer.transform(value); + + if(transformedValue != null) { + writable.set(transformedValue); + + return writable; + } + } + + return null; + } +} + +class ShortTransformerAdapter extends AbstractTransformerAdapter { + final ShortObjectInspector columnType; + final ShortWritable writable; + + public ShortTransformerAdapter(ShortObjectInspector columnType, AbstractTransformer transformer) { + this(columnType, transformer, new ShortWritable()); + } + + public ShortTransformerAdapter(ShortObjectInspector columnType, AbstractTransformer transformer, ShortWritable writable) { + super(transformer); + + this.columnType = columnType; + this.writable = writable; + } + + @Override + public Object getTransformedWritable(DeferredObject object) throws HiveException { + Short value = (Short)columnType.getPrimitiveJavaObject(object.get()); + + if(value != null) { + Short transformedValue = transformer.transform(value); + + if(transformedValue != null) { + writable.set(transformedValue); + + return writable; + } + } + + return null; + } +} + +class StringTransformerAdapter extends AbstractTransformerAdapter { + final StringObjectInspector columnType; + final Text writable; + + public StringTransformerAdapter(StringObjectInspector columnType, AbstractTransformer transformer) { + this(columnType, transformer, new Text()); + } + + public StringTransformerAdapter(StringObjectInspector columnType, AbstractTransformer transformer, Text writable) { + super(transformer); + + this.columnType = columnType; + this.writable = writable; + } + + @Override + public Object getTransformedWritable(DeferredObject object) throws HiveException { + String value = columnType.getPrimitiveJavaObject(object.get()); + + if(value != null) { + String transformedValue = transformer.transform(value); + + if(transformedValue != null) { + writable.set(transformedValue); + + return writable; + } + } + + return null; + } +} + +class UnsupportedDatatypeTransformAdapter extends AbstractTransformerAdapter { + final PrimitiveObjectInspector columnType; + + public UnsupportedDatatypeTransformAdapter(PrimitiveObjectInspector columnType, AbstractTransformer transformer) { + super(transformer); + + this.columnType = columnType; + } + + @Override + public Object getTransformedWritable(DeferredObject object) throws HiveException { + return null; + } +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFMask.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFMask.java new file mode 100644 index 0000000..cd35142 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFMask.java @@ -0,0 +1,334 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.udf.generic; + + +import java.sql.Date; + +import org.apache.hadoop.hive.ql.exec.Description; +import org.apache.hadoop.hive.serde2.io.ShortWritable; +import org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.*; +import org.apache.hadoop.io.IntWritable; +import org.apache.hadoop.io.LongWritable; + + +@Description(name = "mask", + value = "masks the given value", + extended = "Examples:\n " + + " mask(ccn)\n " + + " mask(ccn, 'X', 'x', '0')\n " + + " mask(ccn, 'x', 'x', 'x')\n " + + "Arguments:\n " + + " mask(value, upperChar, lowerChar, digitChar, otherChar, numberChar, dayValue, monthValue, yearValue)\n " + + " value - value to mask. Supported types: TINYINT, SMALLINT, INT, BIGINT, STRING, VARCHAR, CHAR, DATE\n " + + " upperChar - character to replace upper-case characters with. Specify -1 to retain original character. Default value: 'X'\n " + + " lowerChar - character to replace lower-case characters with. Specify -1 to retain original character. Default value: 'x'\n " + + " digitChar - character to replace digit characters with. Specify -1 to retain original character. Default value: 'n'\n " + + " otherChar - character to replace all other characters with. Specify -1 to retain original character. Default value: -1\n " + + " numberChar - character to replace digits in a number with. Valid values: 0-9. Default value: '1'\n " + + " dayValue - value to replace day field in a date with. Specify -1 to retain original value. Valid values: 1-31. Default value: 1\n " + + " monthValue - value to replace month field in a date with. Specify -1 to retain original value. Valid values: 0-11. Default value: 0\n " + + " yearValue - value to replace year field in a date with. Specify -1 to retain original value. Default value: 0\n " + ) +public class GenericUDFMask extends BaseMaskUDF { + public static final String UDF_NAME = "mask"; + + public GenericUDFMask() { + super(new MaskTransformer(), UDF_NAME); + } +} + +class MaskTransformer extends AbstractTransformer { + final static int MASKED_UPPERCASE = 'X'; + final static int MASKED_LOWERCASE = 'x'; + final static int MASKED_DIGIT = 'n'; + final static int MASKED_OTHER_CHAR = -1; + final static int MASKED_NUMBER = 1; + final static int MASKED_DAY_COMPONENT_VAL = 1; + final static int MASKED_MONTH_COMPONENT_VAL = 0; + final static int MASKED_YEAR_COMPONENT_VAL = 0; + final static int UNMASKED_VAL = -1; + + int maskedUpperChar = MASKED_UPPERCASE; + int maskedLowerChar = MASKED_LOWERCASE; + int maskedDigitChar = MASKED_DIGIT; + int maskedOtherChar = MASKED_OTHER_CHAR; + int maskedNumber = MASKED_NUMBER; + int maskedDayValue = MASKED_DAY_COMPONENT_VAL; + int maskedMonthValue = MASKED_MONTH_COMPONENT_VAL; + int maskedYearValue = MASKED_YEAR_COMPONENT_VAL; + + public MaskTransformer() { + } + + @Override + public void init(ObjectInspector[] arguments, int startIdx) { + int idx = startIdx; + + maskedUpperChar = getCharArg(arguments, idx++, MASKED_UPPERCASE); + maskedLowerChar = getCharArg(arguments, idx++, MASKED_LOWERCASE); + maskedDigitChar = getCharArg(arguments, idx++, MASKED_DIGIT); + maskedOtherChar = getCharArg(arguments, idx++, MASKED_OTHER_CHAR); + maskedNumber = getIntArg(arguments, idx++, MASKED_NUMBER); + maskedDayValue = getIntArg(arguments, idx++, MASKED_DAY_COMPONENT_VAL); + maskedMonthValue = getIntArg(arguments, idx++, MASKED_MONTH_COMPONENT_VAL); + maskedYearValue = getIntArg(arguments, idx++, MASKED_YEAR_COMPONENT_VAL); + + if(maskedNumber < 0 || maskedNumber > 9) { + maskedNumber = MASKED_NUMBER; + } + + if(maskedDayValue < 1 || maskedDayValue > 31) { + maskedDayValue = MASKED_DAY_COMPONENT_VAL; + } + + if(maskedMonthValue < 0 || maskedMonthValue > 11) { + maskedMonthValue = MASKED_MONTH_COMPONENT_VAL; + } + } + + @Override + String transform(final String val) { + StringBuilder ret = new StringBuilder(val.length()); + + for(int i = 0; i < val.length(); i++) { + ret.appendCodePoint(transformChar(val.charAt(i))); + } + + return ret.toString(); + } + + @Override + Byte transform(final Byte value) { + byte val = value; + + if(value < 0) { + val *= -1; + } + + byte ret = 0; + int pos = 1; + while(val != 0) { + ret += maskedNumber * pos; + + val /= 10; + pos *= 10; + } + + if(value < 0) { + ret *= -1; + } + + return ret; + } + + @Override + Short transform(final Short value) { + short val = value; + + if(value < 0) { + val *= -1; + } + + short ret = 0; + int pos = 1; + while(val != 0) { + ret += maskedNumber * pos; + + val /= 10; + pos *= 10; + } + + if(value < 0) { + ret *= -1; + } + + return ret; + } + + @Override + Integer transform(final Integer value) { + int val = value; + + if(value < 0) { + val *= -1; + } + + int ret = 0; + int pos = 1; + while(val != 0) { + ret += maskedNumber * pos; + + val /= 10; + pos *= 10; + } + + if(value < 0) { + ret *= -1; + } + + return ret; + } + + @Override + Long transform(final Long value) { + long val = value; + + if(value < 0) { + val *= -1; + } + + long ret = 0; + long pos = 1; + for(int i = 0; val != 0; i++) { + ret += maskedNumber * pos; + + val /= 10; + pos *= 10; + } + + if(value < 0) { + ret *= -1; + } + + return ret; + } + + @Override + Date transform(final Date value) { + int year = maskedYearValue == UNMASKED_VAL ? value.getYear() : maskedYearValue; + int month = maskedMonthValue == UNMASKED_VAL ? value.getMonth() : maskedMonthValue; + int day = maskedDayValue == UNMASKED_VAL ? value.getDate() : maskedDayValue; + + return new Date(year, month, day); + } + + protected int transformChar(final int c) { + switch(Character.getType(c)) { + case Character.UPPERCASE_LETTER: + if(maskedUpperChar != UNMASKED_VAL) { + return maskedUpperChar; + } + break; + + case Character.LOWERCASE_LETTER: + if(maskedLowerChar != UNMASKED_VAL) { + return maskedLowerChar; + } + break; + + case Character.DECIMAL_DIGIT_NUMBER: + if(maskedDigitChar != UNMASKED_VAL) { + return maskedDigitChar; + } + break; + + default: + if(maskedOtherChar != UNMASKED_VAL) { + return maskedOtherChar; + } + break; + } + + return c; + } + + int getCharArg(ObjectInspector[] arguments, int index, int defaultValue) { + int ret = defaultValue; + + ObjectInspector arg = (arguments != null && arguments.length > index) ? arguments[index] : null; + + if (arg != null) { + if(arg instanceof WritableConstantIntObjectInspector) { + IntWritable value = ((WritableConstantIntObjectInspector)arg).getWritableConstantValue(); + + if(value != null) { + ret = value.get(); + } + } else if(arg instanceof WritableConstantLongObjectInspector) { + LongWritable value = ((WritableConstantLongObjectInspector)arg).getWritableConstantValue(); + + if(value != null) { + ret = (int)value.get(); + } + } else if(arg instanceof WritableConstantShortObjectInspector) { + ShortWritable value = ((WritableConstantShortObjectInspector)arg).getWritableConstantValue(); + + if(value != null) { + ret = value.get(); + } + } else if(arg instanceof ConstantObjectInspector) { + Object value = ((ConstantObjectInspector) arg).getWritableConstantValue(); + + if (value != null) { + String strValue = value.toString(); + + if (strValue != null && strValue.length() > 0) { + ret = strValue.charAt(0); + } + } + } + } + + return ret; + } + + int getIntArg(ObjectInspector[] arguments, int index, int defaultValue) { + int ret = defaultValue; + + ObjectInspector arg = (arguments != null && arguments.length > index) ? arguments[index] : null; + + if (arg != null) { + if (arg instanceof WritableConstantIntObjectInspector) { + IntWritable value = ((WritableConstantIntObjectInspector) arg).getWritableConstantValue(); + + if (value != null) { + ret = value.get(); + } + } else if (arg instanceof WritableConstantLongObjectInspector) { + LongWritable value = ((WritableConstantLongObjectInspector) arg).getWritableConstantValue(); + + if (value != null) { + ret = (int) value.get(); + } + } else if (arg instanceof WritableConstantShortObjectInspector) { + ShortWritable value = ((WritableConstantShortObjectInspector) arg).getWritableConstantValue(); + + if (value != null) { + ret = value.get(); + } + } else if (arg instanceof ConstantObjectInspector) { + Object value = ((ConstantObjectInspector) arg).getWritableConstantValue(); + + if (value != null) { + String strValue = value.toString(); + + if (strValue != null && strValue.length() > 0) { + ret = Integer.parseInt(value.toString()); + } + } + } + } + + return ret; + } +} + diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFMaskFirstN.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFMaskFirstN.java new file mode 100644 index 0000000..1ecf86f --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFMaskFirstN.java @@ -0,0 +1,229 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.udf.generic; + +import org.apache.hadoop.hive.ql.exec.Description; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; + + +@Description(name = "mask_first_n", + value = "masks the first n characters of the value", + extended = "Examples:\n " + + " mask_first_n(ccn, 8)\n " + + " mask_first_n(ccn, 8, 'x', 'x', 'x')\n " + + "Arguments:\n " + + " mask(value, charCount, upperChar, lowerChar, digitChar, otherChar, numberChar)\n " + + " value - value to mask. Supported types: TINYINT, SMALLINT, INT, BIGINT, STRING, VARCHAR, CHAR\n " + + " charCount - number of characters. Default value: 4\n " + + " upperChar - character to replace upper-case characters with. Specify -1 to retain original character. Default value: 'X'\n " + + " lowerChar - character to replace lower-case characters with. Specify -1 to retain original character. Default value: 'x'\n " + + " digitChar - character to replace digit characters with. Specify -1 to retain original character. Default value: 'n'\n " + + " otherChar - character to replace all other characters with. Specify -1 to retain original character. Default value: -1\n " + + " numberChar - character to replace digits in a number with. Valid values: 0-9. Default value: '1'\n " + ) +public class GenericUDFMaskFirstN extends BaseMaskUDF { + public static final String UDF_NAME = "mask_first_n"; + + public GenericUDFMaskFirstN() { + super(new MaskFirstNTransformer(), UDF_NAME); + } +} + +class MaskFirstNTransformer extends MaskTransformer { + int charCount = 4; + + public MaskFirstNTransformer() { + super(); + } + + @Override + public void init(ObjectInspector[] arguments, int argsStartIdx) { + super.init(arguments, argsStartIdx + 1); // first argument is charCount, which is consumed in this method below + + charCount = getIntArg(arguments, argsStartIdx, 4); + + if(charCount < 0) { + charCount = 0; + } + } + + @Override + String transform(final String value) { + final StringBuilder ret = new StringBuilder(value.length()); + final int endIdx = value.length() < charCount ? value.length() : charCount; + + for(int i = 0; i < endIdx; i++) { + ret.appendCodePoint(transformChar(value.charAt(i))); + } + + for(int i = endIdx; i < value.length(); i++) { + ret.appendCodePoint(value.charAt(i)); + } + + return ret.toString(); + } + + @Override + Byte transform(final Byte value) { + byte val = value; + + if(value < 0) { + val *= -1; + } + + // count number of digits in the value + int digitCount = 0; + for(byte v = val; v != 0; v /= 10) { + digitCount++; + } + + // number of digits to retain from the end + final int retainCount = digitCount < charCount ? 0 : (digitCount - charCount); + + byte ret = 0; + int pos = 1; + for(int i = 0; val != 0; i++) { + if(i >= retainCount) { // mask this digit + ret += maskedNumber * pos; + } else { //retain this digit + ret += (val % 10) * pos; + } + + val /= 10; + pos *= 10; + } + + if(value < 0) { + ret *= -1; + } + + return ret; + } + + @Override + Short transform(final Short value) { + short val = value; + + if(value < 0) { + val *= -1; + } + + // count number of digits in the value + int digitCount = 0; + for(short v = val; v != 0; v /= 10) { + digitCount++; + } + + // number of digits to retain from the end + final int retainCount = digitCount < charCount ? 0 : (digitCount - charCount); + + short ret = 0; + int pos = 1; + for(int i = 0; val != 0; i++) { + if(i >= retainCount) { // mask this digit + ret += maskedNumber * pos; + } else { // retain this digit + ret += (val % 10) * pos; + } + + val /= 10; + pos *= 10; + } + + if(value < 0) { + ret *= -1; + } + + return ret; + } + + @Override + Integer transform(final Integer value) { + int val = value; + + if(value < 0) { + val *= -1; + } + + // count number of digits in the value + int digitCount = 0; + for(int v = val; v != 0; v /= 10) { + digitCount++; + } + + // number of digits to retain from the end + final int retainCount = digitCount < charCount ? 0 : (digitCount - charCount); + + int ret = 0; + int pos = 1; + for(int i = 0; val != 0; i++) { + if(i >= retainCount) { // mask this digit + ret += maskedNumber * pos; + } else { // retain this digit + ret += (val % 10) * pos; + } + + val /= 10; + pos *= 10; + } + + if(value < 0) { + ret *= -1; + } + + return ret; + } + + @Override + Long transform(final Long value) { + long val = value; + + if(value < 0) { + val *= -1; + } + + // count number of digits in the value + int digitCount = 0; + for(long v = val; v != 0; v /= 10) { + digitCount++; + } + + // number of digits to retain from the end + final int retainCount = digitCount < charCount ? 0 : (digitCount - charCount); + + long ret = 0; + long pos = 1; + for(int i = 0; val != 0; i++) { + if(i >= retainCount) { // mask this digit + ret += maskedNumber * pos; + } else { // retain this digit + ret += (val % 10) * pos; + } + + val /= 10; + pos *= 10; + } + + if(value < 0) { + ret *= -1; + } + + return ret; + } +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFMaskHash.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFMaskHash.java new file mode 100644 index 0000000..c456f43 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFMaskHash.java @@ -0,0 +1,77 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.udf.generic; + +import java.sql.Date; + +import org.apache.commons.codec.digest.DigestUtils; +import org.apache.hadoop.hive.ql.exec.Description; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; + + +@Description(name = "mask_hash", + value = "returns hash of the given value", + extended = "Examples:\n " + + " mask_hash(value)\n " + + "Arguments:\n " + + " value - value to mask. Supported types: STRING, VARCHAR, CHAR" + ) +public class GenericUDFMaskHash extends BaseMaskUDF { + public static final String UDF_NAME = "mask_hash"; + + public GenericUDFMaskHash() { + super(new MaskHashTransformer(), UDF_NAME); + } +} + +class MaskHashTransformer extends AbstractTransformer { + @Override + public void init(ObjectInspector[] arguments, int startIdx) { + } + + @Override + String transform(final String value) { + return DigestUtils.md5Hex(value); + } + + @Override + Byte transform(final Byte value) { + return null; + } + + @Override + Short transform(final Short value) { + return null; + } + + @Override + Integer transform(final Integer value) { + return null; + } + + @Override + Long transform(final Long value) { + return null; + } + + @Override + Date transform(final Date value) { + return null; + } +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFMaskLastN.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFMaskLastN.java new file mode 100644 index 0000000..2e867bc --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFMaskLastN.java @@ -0,0 +1,193 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.udf.generic; + +import org.apache.hadoop.hive.ql.exec.Description; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; + + +@Description(name = "mask_last_n", + value = "masks the last n characters of the value", + extended = "Examples:\n " + + " mask_last_n(ccn, 8)\n " + + " mask_last_n(ccn, 8, 'x', 'x', 'x')\n " + + "Arguments:\n " + + " mask_last_n(value, charCount, upperChar, lowerChar, digitChar, otherChar, numberChar)\n " + + " value - value to mask. Supported types: TINYINT, SMALLINT, INT, BIGINT, STRING, VARCHAR, CHAR\n " + + " charCount - number of characters. Default value: 4\n " + + " upperChar - character to replace upper-case characters with. Specify -1 to retain original character. Default value: 'X'\n " + + " lowerChar - character to replace lower-case characters with. Specify -1 to retain original character. Default value: 'x'\n " + + " digitChar - character to replace digit characters with. Specify -1 to retain original character. Default value: 'n'\n " + + " otherChar - character to replace all other characters with. Specify -1 to retain original character. Default value: -1\n " + + " numberChar - character to replace digits in a number with. Valid values: 0-9. Default value: '1'\n " + ) +public class GenericUDFMaskLastN extends BaseMaskUDF { + public static final String UDF_NAME = "mask_last_n"; + + public GenericUDFMaskLastN() { + super(new MaskLastNTransformer(), UDF_NAME); + } +} + +class MaskLastNTransformer extends MaskTransformer { + int charCount = 4; + + public MaskLastNTransformer() { + super(); + } + + @Override + public void init(ObjectInspector[] arguments, int argsStartIdx) { + super.init(arguments, argsStartIdx + 1); // first argument is charCount, which is consumed in this method below + + charCount = getIntArg(arguments, argsStartIdx, 4); + + if(charCount < 0) { + charCount = 0; + } + } + + @Override + String transform(final String value) { + final StringBuilder ret = new StringBuilder(value.length()); + final int startIdx = value.length() <= charCount ? 0 : (value.length() - charCount); + + for(int i = 0; i < startIdx; i++) { + ret.appendCodePoint(value.charAt(i)); + } + + for(int i = startIdx; i < value.length(); i++) { + ret.appendCodePoint(transformChar(value.charAt(i))); + } + + return ret.toString(); + } + + @Override + Byte transform(final Byte value) { + byte val = value; + + if(value < 0) { + val *= -1; + } + + byte ret = 0; + int pos = 1; + for(int i = 0; val != 0; i++) { + if(i < charCount) { // mask this digit + ret += maskedNumber * pos; + } else { //retain this digit + ret += (val % 10) * pos; + } + + val /= 10; + pos *= 10; + } + + if(value < 0) { + ret *= -1; + } + + return ret; + } + + @Override + Short transform(final Short value) { + short val = value; + + if(value < 0) { + val *= -1; + } + + short ret = 0; + int pos = 1; + for(int i = 0; val != 0; i++) { + if(i < charCount) { // mask this digit + ret += maskedNumber * pos; + } else { // retain this digit + ret += (val % 10) * pos; + } + + val /= 10; + pos *= 10; + } + + if(value < 0) { + ret *= -1; + } + + return ret; + } + + @Override + Integer transform(final Integer value) { + int val = value; + + if(value < 0) { + val *= -1; + } + + int ret = 0; + int pos = 1; + for(int i = 0; val != 0; i++) { + if(i < charCount) { // mask this digit + ret += maskedNumber * pos; + } else { // retain this digit + ret += (val % 10) * pos; + } + + val /= 10; + pos *= 10; + } + + if(value < 0) { + ret *= -1; + } + + return ret; + } + + @Override + Long transform(final Long value) { + long val = value; + + if(value < 0) { + val *= -1; + } + + long ret = 0; + long pos = 1; + for(int i = 0; val != 0; i++) { + if(i < charCount) { // mask this digit + ret += maskedNumber * pos; + } else { // retain this digit + ret += (val % 10) * pos; + } + + val /= 10; + pos *= 10; + } + + if(value < 0) { + ret *= -1; + } + + return ret; + } +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFMaskShowFirstN.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFMaskShowFirstN.java new file mode 100644 index 0000000..a9a1766 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFMaskShowFirstN.java @@ -0,0 +1,248 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.udf.generic; + +import org.apache.hadoop.hive.ql.exec.Description; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; + + +@Description(name = "mask_show_first_n", + value = "masks all but first n characters of the value", + extended = "Examples:\n " + + " mask_show_first_n(ccn, 8)\n " + + " mask_show_first_n(ccn, 8, 'x', 'x', 'x')\n " + + "Arguments:\n " + + " mask_show_first_n(value, charCount, upperChar, lowerChar, digitChar, otherChar, numberChar)\n " + + " value - value to mask. Supported types: TINYINT, SMALLINT, INT, BIGINT, STRING, VARCHAR, CHAR\n " + + " charCount - number of characters. Default value: 4\n " + + " upperChar - character to replace upper-case characters with. Specify -1 to retain original character. Default value: 'X'\n " + + " lowerChar - character to replace lower-case characters with. Specify -1 to retain original character. Default value: 'x'\n " + + " digitChar - character to replace digit characters with. Specify -1 to retain original character. Default value: 'n'\n " + + " otherChar - character to replace all other characters with. Specify -1 to retain original character. Default value: -1\n " + + " numberChar - character to replace digits in a number with. Valid values: 0-9. Default value: '1'\n " + ) +public class GenericUDFMaskShowFirstN extends BaseMaskUDF { + public static final String UDF_NAME = "mask_show_first_n"; + + public GenericUDFMaskShowFirstN() { + super(new MaskShowFirstNTransformer(), UDF_NAME); + } +} + +class MaskShowFirstNTransformer extends MaskTransformer { + int charCount = 4; + + public MaskShowFirstNTransformer() { + super(); + } + + @Override + public void init(ObjectInspector[] arguments, int argsStartIdx) { + super.init(arguments, argsStartIdx + 1); // first argument is charCount, which is consumed here + + charCount = getIntArg(arguments, argsStartIdx, 4); + + if(charCount < 0) { + charCount = 0; + } + } + + @Override + String transform(final String value) { + if(value.length() <= charCount) { + return value; + } + + final StringBuilder ret = new StringBuilder(value.length()); + + for(int i = 0; i < charCount; i++) { + ret.appendCodePoint(value.charAt(i)); + } + + for(int i = charCount; i < value.length(); i++) { + ret.appendCodePoint(transformChar(value.charAt(i))); + } + + return ret.toString(); + } + + @Override + Byte transform(final Byte value) { + byte val = value; + + if(value < 0) { + val *= -1; + } + + // count number of digits in the value + int digitCount = 0; + for(byte v = val; v != 0; v /= 10) { + digitCount++; + } + + // number of digits to mask from the end + final int maskCount = digitCount - charCount; + + if(maskCount <= 0) { + return value; + } + + byte ret = 0; + int pos = 1; + for(int i = 0; val != 0; i++) { + if(i < maskCount) { // mask this digit + ret += (maskedNumber * pos); + } else { //retain this digit + ret += ((val % 10) * pos); + } + + val /= 10; + pos *= 10; + } + + if(value < 0) { + ret *= -1; + } + + return ret; + } + + @Override + Short transform(final Short value) { + short val = value; + + if(value < 0) { + val *= -1; + } + + // count number of digits in the value + int digitCount = 0; + for(short v = val; v != 0; v /= 10) { + digitCount++; + } + + // number of digits to mask from the end + final int maskCount = digitCount - charCount; + + if(maskCount <= 0) { + return value; + } + + short ret = 0; + int pos = 1; + for(int i = 0; val != 0; i++) { + if(i < maskCount) { // mask this digit + ret += (maskedNumber * pos); + } else { // retain this digit + ret += ((val % 10) * pos); + } + + val /= 10; + pos *= 10; + } + + if(value < 0) { + ret *= -1; + } + + return ret; + } + + @Override + Integer transform(final Integer value) { + int val = value; + + if(value < 0) { + val *= -1; + } + + // count number of digits in the value + int digitCount = 0; + for(int v = val; v != 0; v /= 10) { + digitCount++; + } + + // number of digits to mask from the end + final int maskCount = digitCount - charCount; + + if(maskCount <= 0) { + return value; + } + + int ret = 0; + int pos = 1; + for(int i = 0; val != 0; i++) { + if(i < maskCount) { // mask this digit + ret += maskedNumber * pos; + } else { // retain this digit + ret += ((val % 10) * pos); + } + + val /= 10; + pos *= 10; + } + + if(value < 0) { + ret *= -1; + } + + return ret; + } + + @Override + Long transform(final Long value) { + long val = value; + + if(value < 0) { + val *= -1; + } + + // count number of digits in the value + int digitCount = 0; + for(long v = val; v != 0; v /= 10) { + digitCount++; + } + + // number of digits to mask from the end + final int maskCount = digitCount - charCount; + + if(maskCount <= 0) { + return value; + } + + long ret = 0; + long pos = 1; + for(int i = 0; val != 0; i++) { + if(i < maskCount) { // mask this digit + ret += (maskedNumber * pos); + } else { // retain this digit + ret += ((val % 10) * pos); + } + + val /= 10; + pos *= 10; + } + + if(value < 0) { + ret *= -1; + } + + return ret; + } +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFMaskShowLastN.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFMaskShowLastN.java new file mode 100644 index 0000000..2d05319 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFMaskShowLastN.java @@ -0,0 +1,198 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.udf.generic; + +import org.apache.hadoop.hive.ql.exec.Description; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; + + +@Description(name = "mask_show_last_n", + value = "masks all but last n characters of the value", + extended = "Examples:\n " + + " mask_show_last_n(ccn, 8)\n " + + " mask_show_last_n(ccn, 8, 'x', 'x', 'x')\n " + + "Arguments:\n " + + " mask_show_last_n(value, charCount, upperChar, lowerChar, digitChar, otherChar, numberChar)\n " + + " value - value to mask. Supported types: TINYINT, SMALLINT, INT, BIGINT, STRING, VARCHAR, CHAR\n " + + " charCount - number of characters. Default value: 4\n " + + " upperChar - character to replace upper-case characters with. Specify -1 to retain original character. Default value: 'X'\n " + + " lowerChar - character to replace lower-case characters with. Specify -1 to retain original character. Default value: 'x'\n " + + " digitChar - character to replace digit characters with. Specify -1 to retain original character. Default value: 'n'\n " + + " otherChar - character to replace all other characters with. Specify -1 to retain original character. Default value: -1\n " + + " numberChar - character to replace digits in a number with. Valid values: 0-9. Default value: '1'\n " + ) +public class GenericUDFMaskShowLastN extends BaseMaskUDF { + public static final String UDF_NAME = "mask_show_last_n"; + + public GenericUDFMaskShowLastN() { + super(new MaskShowLastNTransformer(), UDF_NAME); + } +} + +class MaskShowLastNTransformer extends MaskTransformer { + int charCount = 4; + + public MaskShowLastNTransformer() { + super(); + } + + @Override + public void init(ObjectInspector[] arguments, int argsStartIdx) { + super.init(arguments, argsStartIdx + 1); // first argument is charCount, which is consumed in this method below + + charCount = getIntArg(arguments, argsStartIdx, 4); + + if(charCount < 0) { + charCount = 0; + } + } + + + @Override + String transform(final String value) { + if(value.length() <= charCount) { + return value; + } + + final StringBuilder ret = new StringBuilder(value.length()); + final int endIdx = value.length() - charCount; + + for(int i = 0; i < endIdx; i++) { + ret.appendCodePoint(transformChar(value.charAt(i))); + } + + for(int i = endIdx; i < value.length(); i++) { + ret.appendCodePoint(value.charAt(i)); + } + + return ret.toString(); + } + + @Override + Byte transform(final Byte value) { + byte val = value; + + if(value < 0) { + val *= -1; + } + + byte ret = 0; + int pos = 1; + for(int i = 0; val != 0; i++) { + if(i >= charCount) { // mask this digit + ret += maskedNumber * pos; + } else { //retain this digit + ret += (val % 10) * pos; + } + + val /= 10; + pos *= 10; + } + + if(value < 0) { + ret *= -1; + } + + return ret; + } + + @Override + Short transform(final Short value) { + short val = value; + + if(value < 0) { + val *= -1; + } + + short ret = 0; + int pos = 1; + for(int i = 0; val != 0; i++) { + if(i >= charCount) { // mask this digit + ret += maskedNumber * pos; + } else { // retain this digit + ret += (val % 10) * pos; + } + + val /= 10; + pos *= 10; + } + + if(value < 0) { + ret *= -1; + } + + return ret; + } + + @Override + Integer transform(final Integer value) { + int val = value; + + if(value < 0) { + val *= -1; + } + + int ret = 0; + int pos = 1; + for(int i = 0; val != 0; i++) { + if(i >= charCount) { // mask this digit + ret += maskedNumber * pos; + } else { // retain this digit + ret += (val % 10) * pos; + } + + val /= 10; + pos *= 10; + } + + if(value < 0) { + ret *= -1; + } + + return ret; + } + + @Override + Long transform(final Long value) { + long val = value; + + if(value < 0) { + val *= -1; + } + + long ret = 0; + long pos = 1; + for(int i = 0; val != 0; i++) { + if(i >= charCount) { // mask this digit + ret += (maskedNumber * pos); + } else { // retain this digit + ret += ((val % 10) * pos); + } + + val /= 10; + pos *= 10; + } + + if(value < 0) { + ret *= -1; + } + + return ret; + } +} diff --git a/ql/src/test/queries/clientpositive/udf_mask.q b/ql/src/test/queries/clientpositive/udf_mask.q new file mode 100644 index 0000000..82b8ee7 --- /dev/null +++ b/ql/src/test/queries/clientpositive/udf_mask.q @@ -0,0 +1,13 @@ +DESCRIBE FUNCTION mask; +DESC FUNCTION EXTENDED mask; + +explain select mask('TestString-123', 'X', 'x', '0', '1'); + +select mask('TestString-123', 'X', 'x', '0', ':'), + mask(cast('TestString-123' as varchar(24)), 'X', 'x', '0', ':'), + mask(cast('TestString-123' as char(24)), 'X', 'x', '0', ':'), + mask(cast(123 as tinyint), -1, -1, -1, -1, '5'), + mask(cast(12345 as smallint), -1, -1, -1, -1, '5'), + mask(cast(12345 as int), -1, -1, -1, -1, '5'), + mask(cast(12345 as bigint), -1, -1, -1, -1, '5'), + mask(cast('2016-04-20' as date), -1, -1, -1, -1, -1, 0, 0, 0); diff --git a/ql/src/test/queries/clientpositive/udf_mask_first_n.q b/ql/src/test/queries/clientpositive/udf_mask_first_n.q new file mode 100644 index 0000000..3cd3962 --- /dev/null +++ b/ql/src/test/queries/clientpositive/udf_mask_first_n.q @@ -0,0 +1,13 @@ +DESCRIBE FUNCTION mask_first_n; +DESC FUNCTION EXTENDED mask_first_n; + +explain select mask_first_n('TestString-123', 4, 'X', 'x', '0', '1'); + +select mask_first_n('TestString-123', 4, 'X', 'x', '0', ':'), + mask_first_n(cast('TestString-123' as varchar(24)), 4, 'X', 'x', '0', ':'), + mask_first_n(cast('TestString-123' as char(24)), 4, 'X', 'x', '0', ':'), + mask_first_n(cast(123 as tinyint), 4, -1, -1, -1, -1, '5'), + mask_first_n(cast(12345 as smallint), 4, -1, -1, -1, -1, '5'), + mask_first_n(cast(12345 as int), 4, -1, -1, -1, -1, '5'), + mask_first_n(cast(12345 as bigint), 4, -1, -1, -1, -1, '5'), + mask_first_n(cast('2016-04-20' as date), 4, -1, -1, -1, -1, -1, 0, 0, 0); diff --git a/ql/src/test/queries/clientpositive/udf_mask_hash.q b/ql/src/test/queries/clientpositive/udf_mask_hash.q new file mode 100644 index 0000000..698f6b3 --- /dev/null +++ b/ql/src/test/queries/clientpositive/udf_mask_hash.q @@ -0,0 +1,13 @@ +DESCRIBE FUNCTION mask_hash; +DESC FUNCTION EXTENDED mask_hash; + +explain select mask_hash('TestString-123'); + +select mask_hash('TestString-123'), + mask_hash(cast('TestString-123' as varchar(24))), + mask_hash(cast('TestString-123' as char(24))), + mask_hash(cast(123 as tinyint)), + mask_hash(cast(12345 as smallint)), + mask_hash(cast(12345 as int)), + mask_hash(cast(12345 as bigint)), + mask_hash(cast('2016-04-20' as date)); diff --git a/ql/src/test/queries/clientpositive/udf_mask_last_n.q b/ql/src/test/queries/clientpositive/udf_mask_last_n.q new file mode 100644 index 0000000..89eb05d --- /dev/null +++ b/ql/src/test/queries/clientpositive/udf_mask_last_n.q @@ -0,0 +1,13 @@ +DESCRIBE FUNCTION mask_last_n; +DESC FUNCTION EXTENDED mask_last_n; + +explain select mask_last_n('TestString-123', 4, 'X', 'x', '0', '1'); + +select mask_last_n('TestString-123', 4, 'X', 'x', '0', ':'), + mask_last_n(cast('TestString-123' as varchar(24)), 4, 'X', 'x', '0', ':'), + mask_last_n(cast('TestString-123' as char(24)), 4, 'X', 'x', '0', ':'), + mask_last_n(cast(123 as tinyint), 4, -1, -1, -1, -1, '5'), + mask_last_n(cast(12345 as smallint), 4, -1, -1, -1, -1, '5'), + mask_last_n(cast(12345 as int), 4, -1, -1, -1, -1, '5'), + mask_last_n(cast(12345 as bigint), 4, -1, -1, -1, -1, '5'), + mask_last_n(cast('2016-04-20' as date), 4, -1, -1, -1, -1, -1, 0, 0, 0); diff --git a/ql/src/test/queries/clientpositive/udf_mask_show_first_n.q b/ql/src/test/queries/clientpositive/udf_mask_show_first_n.q new file mode 100644 index 0000000..1425a82 --- /dev/null +++ b/ql/src/test/queries/clientpositive/udf_mask_show_first_n.q @@ -0,0 +1,13 @@ +DESCRIBE FUNCTION mask_show_first_n; +DESC FUNCTION EXTENDED mask_show_first_n; + +explain select mask_show_first_n('TestString-123', 4, 'X', 'x', '0', '1'); + +select mask_show_first_n('TestString-123', 4, 'X', 'x', '0', ':'), + mask_show_first_n(cast('TestString-123' as varchar(24)), 4, 'X', 'x', '0', ':'), + mask_show_first_n(cast('TestString-123' as char(24)), 4, 'X', 'x', '0', ':'), + mask_show_first_n(cast(123 as tinyint), 4, -1, -1, -1, -1, '5'), + mask_show_first_n(cast(12345 as smallint), 4, -1, -1, -1, -1, '5'), + mask_show_first_n(cast(12345 as int), 4, -1, -1, -1, -1, '5'), + mask_show_first_n(cast(12345 as bigint), 4, -1, -1, -1, -1, '5'), + mask_show_first_n(cast('2016-04-20' as date), 4, -1, -1, -1, -1, -1, 0, 0, 0); diff --git a/ql/src/test/queries/clientpositive/udf_mask_show_last_n.q b/ql/src/test/queries/clientpositive/udf_mask_show_last_n.q new file mode 100644 index 0000000..c4d15fb --- /dev/null +++ b/ql/src/test/queries/clientpositive/udf_mask_show_last_n.q @@ -0,0 +1,13 @@ +DESCRIBE FUNCTION mask_show_last_n; +DESC FUNCTION EXTENDED mask_show_last_n; + +explain select mask_show_last_n('TestString-123', 4, 'X', 'x', '0', '1'); + +select mask_show_last_n('TestString-123', 4, 'X', 'x', '0', ':'), + mask_show_last_n(cast('TestString-123' as varchar(24)), 4, 'X', 'x', '0', ':'), + mask_show_last_n(cast('TestString-123' as char(24)), 4, 'X', 'x', '0', ':'), + mask_show_last_n(cast(123 as tinyint), 4, -1, -1, -1, -1, '5'), + mask_show_last_n(cast(12345 as smallint), 4, -1, -1, -1, -1, '5'), + mask_show_last_n(cast(12345 as int), 4, -1, -1, -1, -1, '5'), + mask_show_last_n(cast(12345 as bigint), 4, -1, -1, -1, -1, '5'), + mask_show_last_n(cast('2016-04-20' as date), 4, -1, -1, -1, -1, -1, 0, 0, 0); diff --git a/ql/src/test/results/beelinepositive/show_functions.q.out b/ql/src/test/results/beelinepositive/show_functions.q.out index d7f706b..4f3ec40 100644 --- a/ql/src/test/results/beelinepositive/show_functions.q.out +++ b/ql/src/test/results/beelinepositive/show_functions.q.out @@ -96,6 +96,12 @@ Saving all output to "!!{outputDirectory}!!/show_functions.q.raw". Enter "record 'map' 'map_keys' 'map_values' +'mask' +'mask_first_n' +'mask_hash' +'mask_last_n' +'mask_show_first_n' +'mask_show_last_n' 'max' 'min' 'minute' diff --git a/ql/src/test/results/clientpositive/show_functions.q.out b/ql/src/test/results/clientpositive/show_functions.q.out index 3cddcce..5c8b982 100644 --- a/ql/src/test/results/clientpositive/show_functions.q.out +++ b/ql/src/test/results/clientpositive/show_functions.q.out @@ -127,6 +127,12 @@ ltrim map map_keys map_values +mask +mask_first_n +mask_hash +mask_last_n +mask_show_first_n +mask_show_last_n matchpath max md5 @@ -340,6 +346,12 @@ POSTHOOK: type: SHOWFUNCTIONS map map_keys map_values +mask +mask_first_n +mask_hash +mask_last_n +mask_show_first_n +mask_show_last_n matchpath max md5 diff --git a/ql/src/test/results/clientpositive/udf_mask.q.out b/ql/src/test/results/clientpositive/udf_mask.q.out new file mode 100644 index 0000000..cbafaee --- /dev/null +++ b/ql/src/test/results/clientpositive/udf_mask.q.out @@ -0,0 +1,71 @@ +PREHOOK: query: DESCRIBE FUNCTION mask +PREHOOK: type: DESCFUNCTION +POSTHOOK: query: DESCRIBE FUNCTION mask +POSTHOOK: type: DESCFUNCTION +masks the given value +PREHOOK: query: DESC FUNCTION EXTENDED mask +PREHOOK: type: DESCFUNCTION +POSTHOOK: query: DESC FUNCTION EXTENDED mask +POSTHOOK: type: DESCFUNCTION +masks the given value +Examples: + mask(ccn) + mask(ccn, 'X', 'x', '0') + mask(ccn, 'x', 'x', 'x') + Arguments: + mask(value, upperChar, lowerChar, digitChar, otherChar, numberChar, dayValue, monthValue, yearValue) + value - value to mask. Supported types: TINYINT, SMALLINT, INT, BIGINT, STRING, VARCHAR, CHAR, DATE + upperChar - character to replace upper-case characters with. Specify -1 to retain original character. Default value: 'X' + lowerChar - character to replace lower-case characters with. Specify -1 to retain original character. Default value: 'x' + digitChar - character to replace digit characters with. Specify -1 to retain original character. Default value: 'n' + otherChar - character to replace all other characters with. Specify -1 to retain original character. Default value: -1 + numberChar - character to replace digits in a number with. Valid values: 0-9. Default value: '1' + dayValue - value to replace day field in a date with. Specify -1 to retain original value. Valid values: 1-31. Default value: 1 + monthValue - value to replace month field in a date with. Specify -1 to retain original value. Valid values: 0-11. Default value: 0 + yearValue - value to replace year field in a date with. Specify -1 to retain original value. Default value: 0 + +PREHOOK: query: explain select mask('TestString-123', 'X', 'x', '0', '1') +PREHOOK: type: QUERY +POSTHOOK: query: explain select mask('TestString-123', 'X', 'x', '0', '1') +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: _dummy_table + Row Limit Per Split: 1 + Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'XxxxXxxxxx1000' (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 98 Basic stats: COMPLETE Column stats: COMPLETE + ListSink + +PREHOOK: query: select mask('TestString-123', 'X', 'x', '0', ':'), + mask(cast('TestString-123' as varchar(24)), 'X', 'x', '0', ':'), + mask(cast('TestString-123' as char(24)), 'X', 'x', '0', ':'), + mask(cast(123 as tinyint), -1, -1, -1, -1, '5'), + mask(cast(12345 as smallint), -1, -1, -1, -1, '5'), + mask(cast(12345 as int), -1, -1, -1, -1, '5'), + mask(cast(12345 as bigint), -1, -1, -1, -1, '5'), + mask(cast('2016-04-20' as date), -1, -1, -1, -1, -1, 0, 0, 0) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +POSTHOOK: query: select mask('TestString-123', 'X', 'x', '0', ':'), + mask(cast('TestString-123' as varchar(24)), 'X', 'x', '0', ':'), + mask(cast('TestString-123' as char(24)), 'X', 'x', '0', ':'), + mask(cast(123 as tinyint), -1, -1, -1, -1, '5'), + mask(cast(12345 as smallint), -1, -1, -1, -1, '5'), + mask(cast(12345 as int), -1, -1, -1, -1, '5'), + mask(cast(12345 as bigint), -1, -1, -1, -1, '5'), + mask(cast('2016-04-20' as date), -1, -1, -1, -1, -1, 0, 0, 0) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +XxxxXxxxxx:000 XxxxXxxxxx:000 XxxxXxxxxx:000:::::::::: 43 -9981 55555 55555 1900-01-01 diff --git a/ql/src/test/results/clientpositive/udf_mask_first_n.q.out b/ql/src/test/results/clientpositive/udf_mask_first_n.q.out new file mode 100644 index 0000000..988cf70 --- /dev/null +++ b/ql/src/test/results/clientpositive/udf_mask_first_n.q.out @@ -0,0 +1,68 @@ +PREHOOK: query: DESCRIBE FUNCTION mask_first_n +PREHOOK: type: DESCFUNCTION +POSTHOOK: query: DESCRIBE FUNCTION mask_first_n +POSTHOOK: type: DESCFUNCTION +masks the first n characters of the value +PREHOOK: query: DESC FUNCTION EXTENDED mask_first_n +PREHOOK: type: DESCFUNCTION +POSTHOOK: query: DESC FUNCTION EXTENDED mask_first_n +POSTHOOK: type: DESCFUNCTION +masks the first n characters of the value +Examples: + mask_first_n(ccn, 8) + mask_first_n(ccn, 8, 'x', 'x', 'x') + Arguments: + mask(value, charCount, upperChar, lowerChar, digitChar, otherChar, numberChar) + value - value to mask. Supported types: TINYINT, SMALLINT, INT, BIGINT, STRING, VARCHAR, CHAR + charCount - number of characters. Default value: 4 + upperChar - character to replace upper-case characters with. Specify -1 to retain original character. Default value: 'X' + lowerChar - character to replace lower-case characters with. Specify -1 to retain original character. Default value: 'x' + digitChar - character to replace digit characters with. Specify -1 to retain original character. Default value: 'n' + otherChar - character to replace all other characters with. Specify -1 to retain original character. Default value: -1 + numberChar - character to replace digits in a number with. Valid values: 0-9. Default value: '1' + +PREHOOK: query: explain select mask_first_n('TestString-123', 4, 'X', 'x', '0', '1') +PREHOOK: type: QUERY +POSTHOOK: query: explain select mask_first_n('TestString-123', 4, 'X', 'x', '0', '1') +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: _dummy_table + Row Limit Per Split: 1 + Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'XxxxString-123' (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 98 Basic stats: COMPLETE Column stats: COMPLETE + ListSink + +PREHOOK: query: select mask_first_n('TestString-123', 4, 'X', 'x', '0', ':'), + mask_first_n(cast('TestString-123' as varchar(24)), 4, 'X', 'x', '0', ':'), + mask_first_n(cast('TestString-123' as char(24)), 4, 'X', 'x', '0', ':'), + mask_first_n(cast(123 as tinyint), 4, -1, -1, -1, -1, '5'), + mask_first_n(cast(12345 as smallint), 4, -1, -1, -1, -1, '5'), + mask_first_n(cast(12345 as int), 4, -1, -1, -1, -1, '5'), + mask_first_n(cast(12345 as bigint), 4, -1, -1, -1, -1, '5'), + mask_first_n(cast('2016-04-20' as date), 4, -1, -1, -1, -1, -1, 0, 0, 0) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +POSTHOOK: query: select mask_first_n('TestString-123', 4, 'X', 'x', '0', ':'), + mask_first_n(cast('TestString-123' as varchar(24)), 4, 'X', 'x', '0', ':'), + mask_first_n(cast('TestString-123' as char(24)), 4, 'X', 'x', '0', ':'), + mask_first_n(cast(123 as tinyint), 4, -1, -1, -1, -1, '5'), + mask_first_n(cast(12345 as smallint), 4, -1, -1, -1, -1, '5'), + mask_first_n(cast(12345 as int), 4, -1, -1, -1, -1, '5'), + mask_first_n(cast(12345 as bigint), 4, -1, -1, -1, -1, '5'), + mask_first_n(cast('2016-04-20' as date), 4, -1, -1, -1, -1, -1, 0, 0, 0) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +XxxxString-123 XxxxString-123 XxxxString-123 43 -9981 55555 55555 1900-01-01 diff --git a/ql/src/test/results/clientpositive/udf_mask_hash.q.out b/ql/src/test/results/clientpositive/udf_mask_hash.q.out new file mode 100644 index 0000000..9fc34bb --- /dev/null +++ b/ql/src/test/results/clientpositive/udf_mask_hash.q.out @@ -0,0 +1,59 @@ +PREHOOK: query: DESCRIBE FUNCTION mask_hash +PREHOOK: type: DESCFUNCTION +POSTHOOK: query: DESCRIBE FUNCTION mask_hash +POSTHOOK: type: DESCFUNCTION +returns hash of the given value +PREHOOK: query: DESC FUNCTION EXTENDED mask_hash +PREHOOK: type: DESCFUNCTION +POSTHOOK: query: DESC FUNCTION EXTENDED mask_hash +POSTHOOK: type: DESCFUNCTION +returns hash of the given value +Examples: + mask_hash(value) + Arguments: + value - value to mask. Supported types: STRING, VARCHAR, CHAR +PREHOOK: query: explain select mask_hash('TestString-123') +PREHOOK: type: QUERY +POSTHOOK: query: explain select mask_hash('TestString-123') +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: _dummy_table + Row Limit Per Split: 1 + Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'dd78d68ad1b23bde126812482dd70ac6' (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: COMPLETE + ListSink + +PREHOOK: query: select mask_hash('TestString-123'), + mask_hash(cast('TestString-123' as varchar(24))), + mask_hash(cast('TestString-123' as char(24))), + mask_hash(cast(123 as tinyint)), + mask_hash(cast(12345 as smallint)), + mask_hash(cast(12345 as int)), + mask_hash(cast(12345 as bigint)), + mask_hash(cast('2016-04-20' as date)) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +POSTHOOK: query: select mask_hash('TestString-123'), + mask_hash(cast('TestString-123' as varchar(24))), + mask_hash(cast('TestString-123' as char(24))), + mask_hash(cast(123 as tinyint)), + mask_hash(cast(12345 as smallint)), + mask_hash(cast(12345 as int)), + mask_hash(cast(12345 as bigint)), + mask_hash(cast('2016-04-20' as date)) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +dd78d68ad1b23bde126812482dd70ac6 dd78d68ad1b23bde126812482dd70ac6 835735ba20f1297683efca69fabd0fba NULL NULL NULL NULL NULL diff --git a/ql/src/test/results/clientpositive/udf_mask_last_n.q.out b/ql/src/test/results/clientpositive/udf_mask_last_n.q.out new file mode 100644 index 0000000..ae75d51 --- /dev/null +++ b/ql/src/test/results/clientpositive/udf_mask_last_n.q.out @@ -0,0 +1,68 @@ +PREHOOK: query: DESCRIBE FUNCTION mask_last_n +PREHOOK: type: DESCFUNCTION +POSTHOOK: query: DESCRIBE FUNCTION mask_last_n +POSTHOOK: type: DESCFUNCTION +masks the last n characters of the value +PREHOOK: query: DESC FUNCTION EXTENDED mask_last_n +PREHOOK: type: DESCFUNCTION +POSTHOOK: query: DESC FUNCTION EXTENDED mask_last_n +POSTHOOK: type: DESCFUNCTION +masks the last n characters of the value +Examples: + mask_last_n(ccn, 8) + mask_last_n(ccn, 8, 'x', 'x', 'x') + Arguments: + mask_last_n(value, charCount, upperChar, lowerChar, digitChar, otherChar, numberChar) + value - value to mask. Supported types: TINYINT, SMALLINT, INT, BIGINT, STRING, VARCHAR, CHAR + charCount - number of characters. Default value: 4 + upperChar - character to replace upper-case characters with. Specify -1 to retain original character. Default value: 'X' + lowerChar - character to replace lower-case characters with. Specify -1 to retain original character. Default value: 'x' + digitChar - character to replace digit characters with. Specify -1 to retain original character. Default value: 'n' + otherChar - character to replace all other characters with. Specify -1 to retain original character. Default value: -1 + numberChar - character to replace digits in a number with. Valid values: 0-9. Default value: '1' + +PREHOOK: query: explain select mask_last_n('TestString-123', 4, 'X', 'x', '0', '1') +PREHOOK: type: QUERY +POSTHOOK: query: explain select mask_last_n('TestString-123', 4, 'X', 'x', '0', '1') +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: _dummy_table + Row Limit Per Split: 1 + Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'TestString1000' (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 98 Basic stats: COMPLETE Column stats: COMPLETE + ListSink + +PREHOOK: query: select mask_last_n('TestString-123', 4, 'X', 'x', '0', ':'), + mask_last_n(cast('TestString-123' as varchar(24)), 4, 'X', 'x', '0', ':'), + mask_last_n(cast('TestString-123' as char(24)), 4, 'X', 'x', '0', ':'), + mask_last_n(cast(123 as tinyint), 4, -1, -1, -1, -1, '5'), + mask_last_n(cast(12345 as smallint), 4, -1, -1, -1, -1, '5'), + mask_last_n(cast(12345 as int), 4, -1, -1, -1, -1, '5'), + mask_last_n(cast(12345 as bigint), 4, -1, -1, -1, -1, '5'), + mask_last_n(cast('2016-04-20' as date), 4, -1, -1, -1, -1, -1, 0, 0, 0) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +POSTHOOK: query: select mask_last_n('TestString-123', 4, 'X', 'x', '0', ':'), + mask_last_n(cast('TestString-123' as varchar(24)), 4, 'X', 'x', '0', ':'), + mask_last_n(cast('TestString-123' as char(24)), 4, 'X', 'x', '0', ':'), + mask_last_n(cast(123 as tinyint), 4, -1, -1, -1, -1, '5'), + mask_last_n(cast(12345 as smallint), 4, -1, -1, -1, -1, '5'), + mask_last_n(cast(12345 as int), 4, -1, -1, -1, -1, '5'), + mask_last_n(cast(12345 as bigint), 4, -1, -1, -1, -1, '5'), + mask_last_n(cast('2016-04-20' as date), 4, -1, -1, -1, -1, -1, 0, 0, 0) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +TestString:000 TestString:000 TestString-123 :::: 43 15555 15555 15555 1900-01-01 diff --git a/ql/src/test/results/clientpositive/udf_mask_show_first_n.q.out b/ql/src/test/results/clientpositive/udf_mask_show_first_n.q.out new file mode 100644 index 0000000..d8ada97 --- /dev/null +++ b/ql/src/test/results/clientpositive/udf_mask_show_first_n.q.out @@ -0,0 +1,68 @@ +PREHOOK: query: DESCRIBE FUNCTION mask_show_first_n +PREHOOK: type: DESCFUNCTION +POSTHOOK: query: DESCRIBE FUNCTION mask_show_first_n +POSTHOOK: type: DESCFUNCTION +masks all but first n characters of the value +PREHOOK: query: DESC FUNCTION EXTENDED mask_show_first_n +PREHOOK: type: DESCFUNCTION +POSTHOOK: query: DESC FUNCTION EXTENDED mask_show_first_n +POSTHOOK: type: DESCFUNCTION +masks all but first n characters of the value +Examples: + mask_show_first_n(ccn, 8) + mask_show_first_n(ccn, 8, 'x', 'x', 'x') + Arguments: + mask_show_first_n(value, charCount, upperChar, lowerChar, digitChar, otherChar, numberChar) + value - value to mask. Supported types: TINYINT, SMALLINT, INT, BIGINT, STRING, VARCHAR, CHAR + charCount - number of characters. Default value: 4 + upperChar - character to replace upper-case characters with. Specify -1 to retain original character. Default value: 'X' + lowerChar - character to replace lower-case characters with. Specify -1 to retain original character. Default value: 'x' + digitChar - character to replace digit characters with. Specify -1 to retain original character. Default value: 'n' + otherChar - character to replace all other characters with. Specify -1 to retain original character. Default value: -1 + numberChar - character to replace digits in a number with. Valid values: 0-9. Default value: '1' + +PREHOOK: query: explain select mask_show_first_n('TestString-123', 4, 'X', 'x', '0', '1') +PREHOOK: type: QUERY +POSTHOOK: query: explain select mask_show_first_n('TestString-123', 4, 'X', 'x', '0', '1') +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: _dummy_table + Row Limit Per Split: 1 + Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'TestXxxxxx1000' (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 98 Basic stats: COMPLETE Column stats: COMPLETE + ListSink + +PREHOOK: query: select mask_show_first_n('TestString-123', 4, 'X', 'x', '0', ':'), + mask_show_first_n(cast('TestString-123' as varchar(24)), 4, 'X', 'x', '0', ':'), + mask_show_first_n(cast('TestString-123' as char(24)), 4, 'X', 'x', '0', ':'), + mask_show_first_n(cast(123 as tinyint), 4, -1, -1, -1, -1, '5'), + mask_show_first_n(cast(12345 as smallint), 4, -1, -1, -1, -1, '5'), + mask_show_first_n(cast(12345 as int), 4, -1, -1, -1, -1, '5'), + mask_show_first_n(cast(12345 as bigint), 4, -1, -1, -1, -1, '5'), + mask_show_first_n(cast('2016-04-20' as date), 4, -1, -1, -1, -1, -1, 0, 0, 0) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +POSTHOOK: query: select mask_show_first_n('TestString-123', 4, 'X', 'x', '0', ':'), + mask_show_first_n(cast('TestString-123' as varchar(24)), 4, 'X', 'x', '0', ':'), + mask_show_first_n(cast('TestString-123' as char(24)), 4, 'X', 'x', '0', ':'), + mask_show_first_n(cast(123 as tinyint), 4, -1, -1, -1, -1, '5'), + mask_show_first_n(cast(12345 as smallint), 4, -1, -1, -1, -1, '5'), + mask_show_first_n(cast(12345 as int), 4, -1, -1, -1, -1, '5'), + mask_show_first_n(cast(12345 as bigint), 4, -1, -1, -1, -1, '5'), + mask_show_first_n(cast('2016-04-20' as date), 4, -1, -1, -1, -1, -1, 0, 0, 0) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +TestXxxxxx:000 TestXxxxxx:000 TestXxxxxx:000:::::::::: 123 12345 12345 12345 1900-01-01 diff --git a/ql/src/test/results/clientpositive/udf_mask_show_last_n.q.out b/ql/src/test/results/clientpositive/udf_mask_show_last_n.q.out new file mode 100644 index 0000000..3bf2e62 --- /dev/null +++ b/ql/src/test/results/clientpositive/udf_mask_show_last_n.q.out @@ -0,0 +1,68 @@ +PREHOOK: query: DESCRIBE FUNCTION mask_show_last_n +PREHOOK: type: DESCFUNCTION +POSTHOOK: query: DESCRIBE FUNCTION mask_show_last_n +POSTHOOK: type: DESCFUNCTION +masks all but last n characters of the value +PREHOOK: query: DESC FUNCTION EXTENDED mask_show_last_n +PREHOOK: type: DESCFUNCTION +POSTHOOK: query: DESC FUNCTION EXTENDED mask_show_last_n +POSTHOOK: type: DESCFUNCTION +masks all but last n characters of the value +Examples: + mask_show_last_n(ccn, 8) + mask_show_last_n(ccn, 8, 'x', 'x', 'x') + Arguments: + mask_show_last_n(value, charCount, upperChar, lowerChar, digitChar, otherChar, numberChar) + value - value to mask. Supported types: TINYINT, SMALLINT, INT, BIGINT, STRING, VARCHAR, CHAR + charCount - number of characters. Default value: 4 + upperChar - character to replace upper-case characters with. Specify -1 to retain original character. Default value: 'X' + lowerChar - character to replace lower-case characters with. Specify -1 to retain original character. Default value: 'x' + digitChar - character to replace digit characters with. Specify -1 to retain original character. Default value: 'n' + otherChar - character to replace all other characters with. Specify -1 to retain original character. Default value: -1 + numberChar - character to replace digits in a number with. Valid values: 0-9. Default value: '1' + +PREHOOK: query: explain select mask_show_last_n('TestString-123', 4, 'X', 'x', '0', '1') +PREHOOK: type: QUERY +POSTHOOK: query: explain select mask_show_last_n('TestString-123', 4, 'X', 'x', '0', '1') +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: _dummy_table + Row Limit Per Split: 1 + Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'XxxxXxxxxx-123' (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 98 Basic stats: COMPLETE Column stats: COMPLETE + ListSink + +PREHOOK: query: select mask_show_last_n('TestString-123', 4, 'X', 'x', '0', ':'), + mask_show_last_n(cast('TestString-123' as varchar(24)), 4, 'X', 'x', '0', ':'), + mask_show_last_n(cast('TestString-123' as char(24)), 4, 'X', 'x', '0', ':'), + mask_show_last_n(cast(123 as tinyint), 4, -1, -1, -1, -1, '5'), + mask_show_last_n(cast(12345 as smallint), 4, -1, -1, -1, -1, '5'), + mask_show_last_n(cast(12345 as int), 4, -1, -1, -1, -1, '5'), + mask_show_last_n(cast(12345 as bigint), 4, -1, -1, -1, -1, '5'), + mask_show_last_n(cast('2016-04-20' as date), 4, -1, -1, -1, -1, -1, 0, 0, 0) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +POSTHOOK: query: select mask_show_last_n('TestString-123', 4, 'X', 'x', '0', ':'), + mask_show_last_n(cast('TestString-123' as varchar(24)), 4, 'X', 'x', '0', ':'), + mask_show_last_n(cast('TestString-123' as char(24)), 4, 'X', 'x', '0', ':'), + mask_show_last_n(cast(123 as tinyint), 4, -1, -1, -1, -1, '5'), + mask_show_last_n(cast(12345 as smallint), 4, -1, -1, -1, -1, '5'), + mask_show_last_n(cast(12345 as int), 4, -1, -1, -1, -1, '5'), + mask_show_last_n(cast(12345 as bigint), 4, -1, -1, -1, -1, '5'), + mask_show_last_n(cast('2016-04-20' as date), 4, -1, -1, -1, -1, -1, 0, 0, 0) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +XxxxXxxxxx-123 XxxxXxxxxx-123 XxxxXxxxxx:000:::::: 123 -13191 52345 52345 1900-01-01 -- 2.6.4 (Apple Git-63)