diff --git ql/src/java/org/apache/hadoop/hive/ql/io/sarg/ConvertAstToSearchArg.java ql/src/java/org/apache/hadoop/hive/ql/io/sarg/ConvertAstToSearchArg.java index 7e888bc..c9260ea 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/sarg/ConvertAstToSearchArg.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/sarg/ConvertAstToSearchArg.java @@ -27,13 +27,17 @@ import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.common.type.HiveChar; +import org.apache.hadoop.hive.ql.exec.FunctionRegistry; import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; import org.apache.hadoop.hive.ql.plan.TableScanDesc; +import org.apache.hadoop.hive.ql.udf.UDFLike; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBetween; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBridge; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFIn; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqual; @@ -48,7 +52,9 @@ import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNull; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPOr; import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; +import org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils; import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; @@ -358,7 +364,19 @@ private void parse(ExprNodeDesc expression) { // get the kind of expression ExprNodeGenericFuncDesc expr = (ExprNodeGenericFuncDesc) expression; - Class op = expr.getGenericUDF().getClass(); + GenericUDF udf = expr.getGenericUDF(); + Class op = udf.getClass(); + if (op == GenericUDFBridge.class) { + op = ((GenericUDFBridge) udf).getUdfClass(); + if (op == UDFLike.class) { + // convert a LIKE pattern into a SARGable expression + expr = UDFLike.searchArgument(expr); + if (expr != null) { + op = expr.getGenericUDF().getClass(); + assert op == GenericUDFOPEqual.class || op == GenericUDFOPGreaterThan.class : "LIKE is always >= or ="; + } + } + } // handle the logical operators if (op == GenericUDFOPOr.class) { @@ -403,9 +421,8 @@ private void parse(ExprNodeDesc expression) { builder.startNot(); createLeaf(PredicateLeaf.Operator.IS_NULL, expr, 0); builder.end(); - - // otherwise, we didn't understand it, so mark it maybe } else { + // otherwise, we didn't understand it, so mark it maybe builder.literal(SearchArgument.TruthValue.YES_NO_NULL); } } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java index e291a48..79e8fd6 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java @@ -32,9 +32,13 @@ import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBridge; +import org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils.PrimitiveGrouping; import org.apache.hadoop.hive.serde2.typeinfo.HiveDecimalUtils; import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; @@ -598,4 +602,25 @@ public static ExprNodeDesc flattenExpr(ExprNodeDesc source) { // constant or null expr, just return return source; } + + /** + * Convert an expr node desc into its constant String representation. + * Returns null if the expr is not a constant or if it is a non-string type. + * @param expr + * @return constant string expression or NULL + */ + public static String toConstantString(ExprNodeDesc expr) { + TypeInfo constType = expr.getTypeInfo(); + if (constType.getCategory() == Category.PRIMITIVE + && PrimitiveObjectInspectorUtils.getPrimitiveGrouping(((PrimitiveTypeInfo) constType) + .getPrimitiveCategory()) == PrimitiveGrouping.STRING_GROUP) { + Object constValue = + ((ConstantObjectInspector) expr.getWritableObjectInspector()) + .getWritableConstantValue(); + if (constValue != null) { + return constValue.toString(); + } + } + return null; + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/UDFLike.java ql/src/java/org/apache/hadoop/hive/ql/udf/UDFLike.java index 85d0363..8c008b7 100755 --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFLike.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFLike.java @@ -18,13 +18,28 @@ package org.apache.hadoop.hive.ql.udf; +import java.util.Arrays; +import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; +import javax.annotation.Nullable; + import org.apache.hadoop.hive.ql.exec.Description; import org.apache.hadoop.hive.ql.exec.UDF; import org.apache.hadoop.hive.ql.exec.vector.VectorizedExpressions; import org.apache.hadoop.hive.ql.exec.vector.expressions.FilterStringColLikeStringScalar; +import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeDescUtils; +import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBridge; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrGreaterThan; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils.PrimitiveGrouping; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; import org.apache.hadoop.io.BooleanWritable; import org.apache.hadoop.io.Text; @@ -202,6 +217,9 @@ public BooleanWritable evaluate(Text s, Text likePattern) { case BEGIN: endS = simplePattern.getLength(); break; + case MIDDLE: + // find() always looks in the middle mode. + break; case END: startS = endS - simplePattern.getLength(); break; @@ -217,4 +235,73 @@ public BooleanWritable evaluate(Text s, Text likePattern) { return result; } + /** + * Parses the likePattern. Based on it is a simple pattern or not, return as much of the simple + * pattern as possible as a minimum necessary prefix. + * + * "ab%" will be "ab" + * "a_b" will be "a" + * "abc%q%q_" will be "abc" + * "%abc" will be null + * null will be null + * + */ + private static String minPrefixPattern(@Nullable String likePattern) { + if (likePattern == null) { + return null; + } + int length = likePattern.length(); + StringBuilder sb = new StringBuilder(); + for (int i = 0; i < length; i++) { + // Make a special case for "\\_" and "\\%" + char n = likePattern.charAt(i); + if (n == '\\' + && i + 1 < likePattern.length() + && (likePattern.charAt(i + 1) == '_' || likePattern.charAt(i + 1) == '%')) { + sb.append(likePattern.charAt(i + 1)); + i++; + continue; + } + if (n == '_' || n == '%') { + // wildcard, end prefix + break; + } else { + sb.append(n); + } + } + if (sb.length() > 0) { + return sb.toString(); + } + return null; + } + + public static ExprNodeGenericFuncDesc searchArgument(ExprNodeGenericFuncDesc expr) { + GenericUDF udf = expr.getGenericUDF(); + boolean isBridge = (udf instanceof GenericUDFBridge); + + if (isBridge == false || ((GenericUDFBridge) udf).getUdfClass() != UDFLike.class) { + // is not a bridge UDF or the bridged UDF isn't UDFLike + throw new IllegalArgumentException("The argument is not of the right UDF: needs UDFLike"); + } + + List args = expr.getChildren(); + if (args.size() != 2) { + return null; + } + ExprNodeDesc column = args.get(0); + ExprNodeDesc pattern = args.get(1); + if (column instanceof ExprNodeColumnDesc && pattern instanceof ExprNodeConstantDesc) { + String minPrefix = minPrefixPattern(ExprNodeDescUtils.toConstantString(pattern)); + PrimitiveGrouping colGroup = TypeInfoUtils.getPrimitiveGrouping(column.getTypeInfo()); + if (colGroup == PrimitiveGrouping.STRING_GROUP && minPrefix != null) { + // >= "min-prefix" + ExprNodeConstantDesc prefixNode = new ExprNodeConstantDesc(pattern.getTypeInfo(), minPrefix); + return new ExprNodeGenericFuncDesc(TypeInfoFactory.booleanTypeInfo, + new GenericUDFOPEqualOrGreaterThan(), Arrays.asList(new ExprNodeDesc[] { column, + prefixNode })); + } + } + return null; + } + } diff --git ql/src/test/queries/clientpositive/orc_ppd_like.q ql/src/test/queries/clientpositive/orc_ppd_like.q new file mode 100644 index 0000000..850b524 --- /dev/null +++ ql/src/test/queries/clientpositive/orc_ppd_like.q @@ -0,0 +1,354 @@ +SET hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat; +SET mapred.min.split.size=1000; +SET mapred.max.split.size=5000; +SET hive.optimize.ppd=true; +SET hive.optimize.index.filter=true; +SET hive.tez.exec.print.summary=true; +SET hive.exec.reducers.bytes.per.reducer=4096; + +-- to verify the results match +-- grep "^[0-9]" ./ql/src/test/results/clientpositive/orc_ppd_like.q.out | sort -nr | uniq -c +-- this should have no number > 1, which indicates a PPD error + +DROP TABLE IF EXISTS lineitem_raw; +CREATE TABLE lineitem_raw (L_ORDERKEY INT, + L_PARTKEY INT, + L_SUPPKEY INT, + L_LINENUMBER INT, + L_QUANTITY DOUBLE, + L_EXTENDEDPRICE DOUBLE, + L_DISCOUNT DOUBLE, + L_TAX DOUBLE, + L_RETURNFLAG STRING, + L_LINESTATUS STRING, + l_shipdate STRING, + L_COMMITDATE STRING, + L_RECEIPTDATE STRING, + L_SHIPINSTRUCT STRING, + L_SHIPMODE STRING, + L_COMMENT STRING) +ROW FORMAT DELIMITED +FIELDS TERMINATED BY '|'; + +LOAD DATA LOCAL INPATH '../../data/files/lineitem.txt' OVERWRITE INTO TABLE lineitem_raw; + +CREATE TEMPORARY TABLE ll +STORED AS ORC TBLPROPERTIES("orc.row.index.stride" = "1000", "orc.bloom.filter.columns"="*") +as SELECT * from lineitem_raw +SORT BY l_shipdate; + + +select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where l_shipdate like '1998%') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where l_shipdate like '1998%') ss2 +) h; + + +select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where l_shipdate like '%1998') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where l_shipdate like '%1998') ss2 +) h; + + +select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where l_shipdate like '%-12-%') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where l_shipdate like '%-12-%') ss2 +) h; + + +select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where l_shipdate like '199_%') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where l_shipdate like '199_%') ss2 +) h; + + +select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where l_shipdate like '1998-02_29') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where l_shipdate like '1998-02_29') ss2 +) h; + + +select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where l_shipdate like '1998%31') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where l_shipdate like '1998%31') ss2 +) h; + + +select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where l_shipdate like '2020%') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where l_shipdate like '2020%') ss2 +) h; + + +select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where l_shipdate like '1970%') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where l_shipdate like '1970%') ss2 +) h; + + +select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where l_shipdate like '%44') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where l_shipdate like '%44') ss2 +) h; + + +select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_COMMITDATE like '1998%') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_COMMITDATE like '1998%') ss2 +) h; + + +select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_COMMITDATE like '%1998') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_COMMITDATE like '%1998') ss2 +) h; + + +select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_COMMITDATE like '%-12-%') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_COMMITDATE like '%-12-%') ss2 +) h; + + +select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_COMMITDATE like '199_%') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_COMMITDATE like '199_%') ss2 +) h; + + +select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_COMMITDATE like '1998-02_29') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_COMMITDATE like '1998-02_29') ss2 +) h; + + +select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_COMMITDATE like '1998%31') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_COMMITDATE like '1998%31') ss2 +) h; + + +select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_COMMITDATE like '2020%') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_COMMITDATE like '2020%') ss2 +) h; + + +select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_COMMITDATE like '1970%') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_COMMITDATE like '1970%') ss2 +) h; + + +select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_COMMITDATE like '%44') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_COMMITDATE like '%44') ss2 +) h; + + +select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_RECEIPTDATE like '1998%') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_RECEIPTDATE like '1998%') ss2 +) h; + + +select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_RECEIPTDATE like '%1998') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_RECEIPTDATE like '%1998') ss2 +) h; + + +select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_RECEIPTDATE like '%-12-%') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_RECEIPTDATE like '%-12-%') ss2 +) h; + + +select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_RECEIPTDATE like '199_%') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_RECEIPTDATE like '199_%') ss2 +) h; + + +select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_RECEIPTDATE like '1998-02_29') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_RECEIPTDATE like '1998-02_29') ss2 +) h; + + +select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_RECEIPTDATE like '1998%31') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_RECEIPTDATE like '1998%31') ss2 +) h; + + +select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_RECEIPTDATE like '2020%') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_RECEIPTDATE like '2020%') ss2 +) h; + + +select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_RECEIPTDATE like '1970%') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_RECEIPTDATE like '1970%') ss2 +) h; + + +select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_RECEIPTDATE like '%44') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_RECEIPTDATE like '%44') ss2 +) h; + + +select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_SHIPMODE like 'DELIVER%') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_SHIPMODE like 'DELIVER%') ss2 +) h; + + +select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_SHIPMODE like 'COLLECT%') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_SHIPMODE like 'COLLECT%') ss2 +) h; + + +select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_SHIPMODE like '%RETURN') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_SHIPMODE like '%RETURN') ss2 +) h; + + +select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_SHIPMODE like '%AIR') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_SHIPMODE like '%AIR') ss2 +) h; + + +select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_SHIPMODE like 'AIR%') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_SHIPMODE like 'AIR%') ss2 +) h; + + +select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_SHIPMODE like 'SHIP%') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_SHIPMODE like 'SHIP%') ss2 +) h; + + +select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_SHIPMODE like '%BACK%') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_SHIPMODE like '%BACK%') ss2 +) h; + + +select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_SHIPMODE like 'TAKE%RETURN') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_SHIPMODE like 'TAKE%RETURN') ss2 +) h; + + +select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_SHIPMODE like '%COD') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_SHIPMODE like '%COD') ss2 +) h; + + +select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_SHIPINSTRUCT like 'DELIVER%') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_SHIPINSTRUCT like 'DELIVER%') ss2 +) h; + + +select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_SHIPINSTRUCT like 'COLLECT%') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_SHIPINSTRUCT like 'COLLECT%') ss2 +) h; + + +select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_SHIPINSTRUCT like '%RETURN') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_SHIPINSTRUCT like '%RETURN') ss2 +) h; + + +select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_SHIPINSTRUCT like '%AIR') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_SHIPINSTRUCT like '%AIR') ss2 +) h; + + +select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_SHIPINSTRUCT like 'AIR%') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_SHIPINSTRUCT like 'AIR%') ss2 +) h; + + +select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_SHIPINSTRUCT like 'SHIP%') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_SHIPINSTRUCT like 'SHIP%') ss2 +) h; + + +select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_SHIPINSTRUCT like '%BACK%') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_SHIPINSTRUCT like '%BACK%') ss2 +) h; + + +select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_SHIPINSTRUCT like 'TAKE%RETURN') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_SHIPINSTRUCT like 'TAKE%RETURN') ss2 +) h; + + +select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_SHIPINSTRUCT like '%COD') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_SHIPINSTRUCT like '%COD') ss2 +) h; + diff --git ql/src/test/results/clientpositive/orc_ppd_like.q.out ql/src/test/results/clientpositive/orc_ppd_like.q.out new file mode 100644 index 0000000..d923230 --- /dev/null +++ ql/src/test/results/clientpositive/orc_ppd_like.q.out @@ -0,0 +1,919 @@ +PREHOOK: query: DROP TABLE IF EXISTS lineitem_raw +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS lineitem_raw +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE lineitem_raw (L_ORDERKEY INT, + L_PARTKEY INT, + L_SUPPKEY INT, + L_LINENUMBER INT, + L_QUANTITY DOUBLE, + L_EXTENDEDPRICE DOUBLE, + L_DISCOUNT DOUBLE, + L_TAX DOUBLE, + L_RETURNFLAG STRING, + L_LINESTATUS STRING, + l_shipdate STRING, + L_COMMITDATE STRING, + L_RECEIPTDATE STRING, + L_SHIPINSTRUCT STRING, + L_SHIPMODE STRING, + L_COMMENT STRING) +ROW FORMAT DELIMITED +FIELDS TERMINATED BY '|' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@lineitem_raw +POSTHOOK: query: CREATE TABLE lineitem_raw (L_ORDERKEY INT, + L_PARTKEY INT, + L_SUPPKEY INT, + L_LINENUMBER INT, + L_QUANTITY DOUBLE, + L_EXTENDEDPRICE DOUBLE, + L_DISCOUNT DOUBLE, + L_TAX DOUBLE, + L_RETURNFLAG STRING, + L_LINESTATUS STRING, + l_shipdate STRING, + L_COMMITDATE STRING, + L_RECEIPTDATE STRING, + L_SHIPINSTRUCT STRING, + L_SHIPMODE STRING, + L_COMMENT STRING) +ROW FORMAT DELIMITED +FIELDS TERMINATED BY '|' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@lineitem_raw +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/lineitem.txt' OVERWRITE INTO TABLE lineitem_raw +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@lineitem_raw +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/lineitem.txt' OVERWRITE INTO TABLE lineitem_raw +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@lineitem_raw +PREHOOK: query: CREATE TEMPORARY TABLE ll STORED AS ORC as SELECT * from lineitem_raw SORT BY l_shipdate +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@lineitem_raw +PREHOOK: Output: database:default +PREHOOK: Output: default@ll +POSTHOOK: query: CREATE TEMPORARY TABLE ll STORED AS ORC as SELECT * from lineitem_raw SORT BY l_shipdate +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@lineitem_raw +POSTHOOK: Output: database:default +POSTHOOK: Output: default@ll +PREHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where l_shipdate like '1998%') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where l_shipdate like '1998%') ss2 +) h +PREHOOK: type: QUERY +PREHOOK: Input: default@lineitem_raw +PREHOOK: Input: default@ll +#### A masked pattern was here #### +POSTHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where l_shipdate like '1998%') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where l_shipdate like '1998%') ss2 +) h +POSTHOOK: type: QUERY +POSTHOOK: Input: default@lineitem_raw +POSTHOOK: Input: default@ll +#### A masked pattern was here #### +1 +PREHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where l_shipdate like '%1998') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where l_shipdate like '%1998') ss2 +) h +PREHOOK: type: QUERY +PREHOOK: Input: default@lineitem_raw +PREHOOK: Input: default@ll +#### A masked pattern was here #### +POSTHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where l_shipdate like '%1998') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where l_shipdate like '%1998') ss2 +) h +POSTHOOK: type: QUERY +POSTHOOK: Input: default@lineitem_raw +POSTHOOK: Input: default@ll +#### A masked pattern was here #### +0 +PREHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where l_shipdate like '%-12-%') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where l_shipdate like '%-12-%') ss2 +) h +PREHOOK: type: QUERY +PREHOOK: Input: default@lineitem_raw +PREHOOK: Input: default@ll +#### A masked pattern was here #### +POSTHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where l_shipdate like '%-12-%') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where l_shipdate like '%-12-%') ss2 +) h +POSTHOOK: type: QUERY +POSTHOOK: Input: default@lineitem_raw +POSTHOOK: Input: default@ll +#### A masked pattern was here #### +1 +PREHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where l_shipdate like '199_%') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where l_shipdate like '199_%') ss2 +) h +PREHOOK: type: QUERY +PREHOOK: Input: default@lineitem_raw +PREHOOK: Input: default@ll +#### A masked pattern was here #### +POSTHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where l_shipdate like '199_%') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where l_shipdate like '199_%') ss2 +) h +POSTHOOK: type: QUERY +POSTHOOK: Input: default@lineitem_raw +POSTHOOK: Input: default@ll +#### A masked pattern was here #### +1 +PREHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where l_shipdate like '1998-02_29') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where l_shipdate like '1998-02_29') ss2 +) h +PREHOOK: type: QUERY +PREHOOK: Input: default@lineitem_raw +PREHOOK: Input: default@ll +#### A masked pattern was here #### +POSTHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where l_shipdate like '1998-02_29') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where l_shipdate like '1998-02_29') ss2 +) h +POSTHOOK: type: QUERY +POSTHOOK: Input: default@lineitem_raw +POSTHOOK: Input: default@ll +#### A masked pattern was here #### +0 +PREHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where l_shipdate like '1998%31') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where l_shipdate like '1998%31') ss2 +) h +PREHOOK: type: QUERY +PREHOOK: Input: default@lineitem_raw +PREHOOK: Input: default@ll +#### A masked pattern was here #### +POSTHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where l_shipdate like '1998%31') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where l_shipdate like '1998%31') ss2 +) h +POSTHOOK: type: QUERY +POSTHOOK: Input: default@lineitem_raw +POSTHOOK: Input: default@ll +#### A masked pattern was here #### +0 +PREHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where l_shipdate like '2020%') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where l_shipdate like '2020%') ss2 +) h +PREHOOK: type: QUERY +PREHOOK: Input: default@lineitem_raw +PREHOOK: Input: default@ll +#### A masked pattern was here #### +POSTHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where l_shipdate like '2020%') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where l_shipdate like '2020%') ss2 +) h +POSTHOOK: type: QUERY +POSTHOOK: Input: default@lineitem_raw +POSTHOOK: Input: default@ll +#### A masked pattern was here #### +0 +PREHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where l_shipdate like '1970%') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where l_shipdate like '1970%') ss2 +) h +PREHOOK: type: QUERY +PREHOOK: Input: default@lineitem_raw +PREHOOK: Input: default@ll +#### A masked pattern was here #### +POSTHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where l_shipdate like '1970%') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where l_shipdate like '1970%') ss2 +) h +POSTHOOK: type: QUERY +POSTHOOK: Input: default@lineitem_raw +POSTHOOK: Input: default@ll +#### A masked pattern was here #### +0 +PREHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where l_shipdate like '%44') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where l_shipdate like '%44') ss2 +) h +PREHOOK: type: QUERY +PREHOOK: Input: default@lineitem_raw +PREHOOK: Input: default@ll +#### A masked pattern was here #### +POSTHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where l_shipdate like '%44') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where l_shipdate like '%44') ss2 +) h +POSTHOOK: type: QUERY +POSTHOOK: Input: default@lineitem_raw +POSTHOOK: Input: default@ll +#### A masked pattern was here #### +0 +PREHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_COMMITDATE like '1998%') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_COMMITDATE like '1998%') ss2 +) h +PREHOOK: type: QUERY +PREHOOK: Input: default@lineitem_raw +PREHOOK: Input: default@ll +#### A masked pattern was here #### +POSTHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_COMMITDATE like '1998%') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_COMMITDATE like '1998%') ss2 +) h +POSTHOOK: type: QUERY +POSTHOOK: Input: default@lineitem_raw +POSTHOOK: Input: default@ll +#### A masked pattern was here #### +1 +PREHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_COMMITDATE like '%1998') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_COMMITDATE like '%1998') ss2 +) h +PREHOOK: type: QUERY +PREHOOK: Input: default@lineitem_raw +PREHOOK: Input: default@ll +#### A masked pattern was here #### +POSTHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_COMMITDATE like '%1998') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_COMMITDATE like '%1998') ss2 +) h +POSTHOOK: type: QUERY +POSTHOOK: Input: default@lineitem_raw +POSTHOOK: Input: default@ll +#### A masked pattern was here #### +0 +PREHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_COMMITDATE like '%-12-%') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_COMMITDATE like '%-12-%') ss2 +) h +PREHOOK: type: QUERY +PREHOOK: Input: default@lineitem_raw +PREHOOK: Input: default@ll +#### A masked pattern was here #### +POSTHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_COMMITDATE like '%-12-%') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_COMMITDATE like '%-12-%') ss2 +) h +POSTHOOK: type: QUERY +POSTHOOK: Input: default@lineitem_raw +POSTHOOK: Input: default@ll +#### A masked pattern was here #### +1 +PREHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_COMMITDATE like '199_%') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_COMMITDATE like '199_%') ss2 +) h +PREHOOK: type: QUERY +PREHOOK: Input: default@lineitem_raw +PREHOOK: Input: default@ll +#### A masked pattern was here #### +POSTHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_COMMITDATE like '199_%') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_COMMITDATE like '199_%') ss2 +) h +POSTHOOK: type: QUERY +POSTHOOK: Input: default@lineitem_raw +POSTHOOK: Input: default@ll +#### A masked pattern was here #### +1 +PREHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_COMMITDATE like '1998-02_29') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_COMMITDATE like '1998-02_29') ss2 +) h +PREHOOK: type: QUERY +PREHOOK: Input: default@lineitem_raw +PREHOOK: Input: default@ll +#### A masked pattern was here #### +POSTHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_COMMITDATE like '1998-02_29') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_COMMITDATE like '1998-02_29') ss2 +) h +POSTHOOK: type: QUERY +POSTHOOK: Input: default@lineitem_raw +POSTHOOK: Input: default@ll +#### A masked pattern was here #### +0 +PREHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_COMMITDATE like '1998%31') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_COMMITDATE like '1998%31') ss2 +) h +PREHOOK: type: QUERY +PREHOOK: Input: default@lineitem_raw +PREHOOK: Input: default@ll +#### A masked pattern was here #### +POSTHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_COMMITDATE like '1998%31') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_COMMITDATE like '1998%31') ss2 +) h +POSTHOOK: type: QUERY +POSTHOOK: Input: default@lineitem_raw +POSTHOOK: Input: default@ll +#### A masked pattern was here #### +0 +PREHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_COMMITDATE like '2020%') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_COMMITDATE like '2020%') ss2 +) h +PREHOOK: type: QUERY +PREHOOK: Input: default@lineitem_raw +PREHOOK: Input: default@ll +#### A masked pattern was here #### +POSTHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_COMMITDATE like '2020%') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_COMMITDATE like '2020%') ss2 +) h +POSTHOOK: type: QUERY +POSTHOOK: Input: default@lineitem_raw +POSTHOOK: Input: default@ll +#### A masked pattern was here #### +0 +PREHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_COMMITDATE like '1970%') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_COMMITDATE like '1970%') ss2 +) h +PREHOOK: type: QUERY +PREHOOK: Input: default@lineitem_raw +PREHOOK: Input: default@ll +#### A masked pattern was here #### +POSTHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_COMMITDATE like '1970%') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_COMMITDATE like '1970%') ss2 +) h +POSTHOOK: type: QUERY +POSTHOOK: Input: default@lineitem_raw +POSTHOOK: Input: default@ll +#### A masked pattern was here #### +0 +PREHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_COMMITDATE like '%44') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_COMMITDATE like '%44') ss2 +) h +PREHOOK: type: QUERY +PREHOOK: Input: default@lineitem_raw +PREHOOK: Input: default@ll +#### A masked pattern was here #### +POSTHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_COMMITDATE like '%44') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_COMMITDATE like '%44') ss2 +) h +POSTHOOK: type: QUERY +POSTHOOK: Input: default@lineitem_raw +POSTHOOK: Input: default@ll +#### A masked pattern was here #### +0 +PREHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_RECEIPTDATE like '1998%') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_RECEIPTDATE like '1998%') ss2 +) h +PREHOOK: type: QUERY +PREHOOK: Input: default@lineitem_raw +PREHOOK: Input: default@ll +#### A masked pattern was here #### +POSTHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_RECEIPTDATE like '1998%') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_RECEIPTDATE like '1998%') ss2 +) h +POSTHOOK: type: QUERY +POSTHOOK: Input: default@lineitem_raw +POSTHOOK: Input: default@ll +#### A masked pattern was here #### +1 +PREHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_RECEIPTDATE like '%1998') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_RECEIPTDATE like '%1998') ss2 +) h +PREHOOK: type: QUERY +PREHOOK: Input: default@lineitem_raw +PREHOOK: Input: default@ll +#### A masked pattern was here #### +POSTHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_RECEIPTDATE like '%1998') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_RECEIPTDATE like '%1998') ss2 +) h +POSTHOOK: type: QUERY +POSTHOOK: Input: default@lineitem_raw +POSTHOOK: Input: default@ll +#### A masked pattern was here #### +0 +PREHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_RECEIPTDATE like '%-12-%') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_RECEIPTDATE like '%-12-%') ss2 +) h +PREHOOK: type: QUERY +PREHOOK: Input: default@lineitem_raw +PREHOOK: Input: default@ll +#### A masked pattern was here #### +POSTHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_RECEIPTDATE like '%-12-%') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_RECEIPTDATE like '%-12-%') ss2 +) h +POSTHOOK: type: QUERY +POSTHOOK: Input: default@lineitem_raw +POSTHOOK: Input: default@ll +#### A masked pattern was here #### +1 +PREHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_RECEIPTDATE like '199_%') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_RECEIPTDATE like '199_%') ss2 +) h +PREHOOK: type: QUERY +PREHOOK: Input: default@lineitem_raw +PREHOOK: Input: default@ll +#### A masked pattern was here #### +POSTHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_RECEIPTDATE like '199_%') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_RECEIPTDATE like '199_%') ss2 +) h +POSTHOOK: type: QUERY +POSTHOOK: Input: default@lineitem_raw +POSTHOOK: Input: default@ll +#### A masked pattern was here #### +1 +PREHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_RECEIPTDATE like '1998-02_29') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_RECEIPTDATE like '1998-02_29') ss2 +) h +PREHOOK: type: QUERY +PREHOOK: Input: default@lineitem_raw +PREHOOK: Input: default@ll +#### A masked pattern was here #### +POSTHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_RECEIPTDATE like '1998-02_29') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_RECEIPTDATE like '1998-02_29') ss2 +) h +POSTHOOK: type: QUERY +POSTHOOK: Input: default@lineitem_raw +POSTHOOK: Input: default@ll +#### A masked pattern was here #### +0 +PREHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_RECEIPTDATE like '1998%31') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_RECEIPTDATE like '1998%31') ss2 +) h +PREHOOK: type: QUERY +PREHOOK: Input: default@lineitem_raw +PREHOOK: Input: default@ll +#### A masked pattern was here #### +POSTHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_RECEIPTDATE like '1998%31') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_RECEIPTDATE like '1998%31') ss2 +) h +POSTHOOK: type: QUERY +POSTHOOK: Input: default@lineitem_raw +POSTHOOK: Input: default@ll +#### A masked pattern was here #### +0 +PREHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_RECEIPTDATE like '2020%') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_RECEIPTDATE like '2020%') ss2 +) h +PREHOOK: type: QUERY +PREHOOK: Input: default@lineitem_raw +PREHOOK: Input: default@ll +#### A masked pattern was here #### +POSTHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_RECEIPTDATE like '2020%') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_RECEIPTDATE like '2020%') ss2 +) h +POSTHOOK: type: QUERY +POSTHOOK: Input: default@lineitem_raw +POSTHOOK: Input: default@ll +#### A masked pattern was here #### +0 +PREHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_RECEIPTDATE like '1970%') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_RECEIPTDATE like '1970%') ss2 +) h +PREHOOK: type: QUERY +PREHOOK: Input: default@lineitem_raw +PREHOOK: Input: default@ll +#### A masked pattern was here #### +POSTHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_RECEIPTDATE like '1970%') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_RECEIPTDATE like '1970%') ss2 +) h +POSTHOOK: type: QUERY +POSTHOOK: Input: default@lineitem_raw +POSTHOOK: Input: default@ll +#### A masked pattern was here #### +0 +PREHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_RECEIPTDATE like '%44') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_RECEIPTDATE like '%44') ss2 +) h +PREHOOK: type: QUERY +PREHOOK: Input: default@lineitem_raw +PREHOOK: Input: default@ll +#### A masked pattern was here #### +POSTHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_RECEIPTDATE like '%44') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_RECEIPTDATE like '%44') ss2 +) h +POSTHOOK: type: QUERY +POSTHOOK: Input: default@lineitem_raw +POSTHOOK: Input: default@ll +#### A masked pattern was here #### +0 +PREHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_SHIPMODE like 'DELIVER%') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_SHIPMODE like 'DELIVER%') ss2 +) h +PREHOOK: type: QUERY +PREHOOK: Input: default@lineitem_raw +PREHOOK: Input: default@ll +#### A masked pattern was here #### +POSTHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_SHIPMODE like 'DELIVER%') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_SHIPMODE like 'DELIVER%') ss2 +) h +POSTHOOK: type: QUERY +POSTHOOK: Input: default@lineitem_raw +POSTHOOK: Input: default@ll +#### A masked pattern was here #### +0 +PREHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_SHIPMODE like 'COLLECT%') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_SHIPMODE like 'COLLECT%') ss2 +) h +PREHOOK: type: QUERY +PREHOOK: Input: default@lineitem_raw +PREHOOK: Input: default@ll +#### A masked pattern was here #### +POSTHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_SHIPMODE like 'COLLECT%') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_SHIPMODE like 'COLLECT%') ss2 +) h +POSTHOOK: type: QUERY +POSTHOOK: Input: default@lineitem_raw +POSTHOOK: Input: default@ll +#### A masked pattern was here #### +0 +PREHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_SHIPMODE like '%RETURN') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_SHIPMODE like '%RETURN') ss2 +) h +PREHOOK: type: QUERY +PREHOOK: Input: default@lineitem_raw +PREHOOK: Input: default@ll +#### A masked pattern was here #### +POSTHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_SHIPMODE like '%RETURN') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_SHIPMODE like '%RETURN') ss2 +) h +POSTHOOK: type: QUERY +POSTHOOK: Input: default@lineitem_raw +POSTHOOK: Input: default@ll +#### A masked pattern was here #### +0 +PREHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_SHIPMODE like '%AIR') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_SHIPMODE like '%AIR') ss2 +) h +PREHOOK: type: QUERY +PREHOOK: Input: default@lineitem_raw +PREHOOK: Input: default@ll +#### A masked pattern was here #### +POSTHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_SHIPMODE like '%AIR') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_SHIPMODE like '%AIR') ss2 +) h +POSTHOOK: type: QUERY +POSTHOOK: Input: default@lineitem_raw +POSTHOOK: Input: default@ll +#### A masked pattern was here #### +1 +PREHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_SHIPMODE like 'AIR%') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_SHIPMODE like 'AIR%') ss2 +) h +PREHOOK: type: QUERY +PREHOOK: Input: default@lineitem_raw +PREHOOK: Input: default@ll +#### A masked pattern was here #### +POSTHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_SHIPMODE like 'AIR%') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_SHIPMODE like 'AIR%') ss2 +) h +POSTHOOK: type: QUERY +POSTHOOK: Input: default@lineitem_raw +POSTHOOK: Input: default@ll +#### A masked pattern was here #### +1 +PREHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_SHIPMODE like 'SHIP%') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_SHIPMODE like 'SHIP%') ss2 +) h +PREHOOK: type: QUERY +PREHOOK: Input: default@lineitem_raw +PREHOOK: Input: default@ll +#### A masked pattern was here #### +POSTHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_SHIPMODE like 'SHIP%') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_SHIPMODE like 'SHIP%') ss2 +) h +POSTHOOK: type: QUERY +POSTHOOK: Input: default@lineitem_raw +POSTHOOK: Input: default@ll +#### A masked pattern was here #### +1 +PREHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_SHIPMODE like '%BACK%') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_SHIPMODE like '%BACK%') ss2 +) h +PREHOOK: type: QUERY +PREHOOK: Input: default@lineitem_raw +PREHOOK: Input: default@ll +#### A masked pattern was here #### +POSTHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_SHIPMODE like '%BACK%') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_SHIPMODE like '%BACK%') ss2 +) h +POSTHOOK: type: QUERY +POSTHOOK: Input: default@lineitem_raw +POSTHOOK: Input: default@ll +#### A masked pattern was here #### +0 +PREHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_SHIPMODE like 'TAKE%RETURN') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_SHIPMODE like 'TAKE%RETURN') ss2 +) h +PREHOOK: type: QUERY +PREHOOK: Input: default@lineitem_raw +PREHOOK: Input: default@ll +#### A masked pattern was here #### +POSTHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_SHIPMODE like 'TAKE%RETURN') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_SHIPMODE like 'TAKE%RETURN') ss2 +) h +POSTHOOK: type: QUERY +POSTHOOK: Input: default@lineitem_raw +POSTHOOK: Input: default@ll +#### A masked pattern was here #### +0 +PREHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_SHIPMODE like '%COD') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_SHIPMODE like '%COD') ss2 +) h +PREHOOK: type: QUERY +PREHOOK: Input: default@lineitem_raw +PREHOOK: Input: default@ll +#### A masked pattern was here #### +POSTHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_SHIPMODE like '%COD') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_SHIPMODE like '%COD') ss2 +) h +POSTHOOK: type: QUERY +POSTHOOK: Input: default@lineitem_raw +POSTHOOK: Input: default@ll +#### A masked pattern was here #### +0 +PREHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_SHIPINSTRUCT like 'DELIVER%') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_SHIPINSTRUCT like 'DELIVER%') ss2 +) h +PREHOOK: type: QUERY +PREHOOK: Input: default@lineitem_raw +PREHOOK: Input: default@ll +#### A masked pattern was here #### +POSTHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_SHIPINSTRUCT like 'DELIVER%') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_SHIPINSTRUCT like 'DELIVER%') ss2 +) h +POSTHOOK: type: QUERY +POSTHOOK: Input: default@lineitem_raw +POSTHOOK: Input: default@ll +#### A masked pattern was here #### +1 +PREHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_SHIPINSTRUCT like 'COLLECT%') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_SHIPINSTRUCT like 'COLLECT%') ss2 +) h +PREHOOK: type: QUERY +PREHOOK: Input: default@lineitem_raw +PREHOOK: Input: default@ll +#### A masked pattern was here #### +POSTHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_SHIPINSTRUCT like 'COLLECT%') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_SHIPINSTRUCT like 'COLLECT%') ss2 +) h +POSTHOOK: type: QUERY +POSTHOOK: Input: default@lineitem_raw +POSTHOOK: Input: default@ll +#### A masked pattern was here #### +1 +PREHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_SHIPINSTRUCT like '%RETURN') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_SHIPINSTRUCT like '%RETURN') ss2 +) h +PREHOOK: type: QUERY +PREHOOK: Input: default@lineitem_raw +PREHOOK: Input: default@ll +#### A masked pattern was here #### +POSTHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_SHIPINSTRUCT like '%RETURN') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_SHIPINSTRUCT like '%RETURN') ss2 +) h +POSTHOOK: type: QUERY +POSTHOOK: Input: default@lineitem_raw +POSTHOOK: Input: default@ll +#### A masked pattern was here #### +1 +PREHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_SHIPINSTRUCT like '%AIR') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_SHIPINSTRUCT like '%AIR') ss2 +) h +PREHOOK: type: QUERY +PREHOOK: Input: default@lineitem_raw +PREHOOK: Input: default@ll +#### A masked pattern was here #### +POSTHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_SHIPINSTRUCT like '%AIR') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_SHIPINSTRUCT like '%AIR') ss2 +) h +POSTHOOK: type: QUERY +POSTHOOK: Input: default@lineitem_raw +POSTHOOK: Input: default@ll +#### A masked pattern was here #### +0 +PREHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_SHIPINSTRUCT like 'AIR%') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_SHIPINSTRUCT like 'AIR%') ss2 +) h +PREHOOK: type: QUERY +PREHOOK: Input: default@lineitem_raw +PREHOOK: Input: default@ll +#### A masked pattern was here #### +POSTHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_SHIPINSTRUCT like 'AIR%') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_SHIPINSTRUCT like 'AIR%') ss2 +) h +POSTHOOK: type: QUERY +POSTHOOK: Input: default@lineitem_raw +POSTHOOK: Input: default@ll +#### A masked pattern was here #### +0 +PREHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_SHIPINSTRUCT like 'SHIP%') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_SHIPINSTRUCT like 'SHIP%') ss2 +) h +PREHOOK: type: QUERY +PREHOOK: Input: default@lineitem_raw +PREHOOK: Input: default@ll +#### A masked pattern was here #### +POSTHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_SHIPINSTRUCT like 'SHIP%') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_SHIPINSTRUCT like 'SHIP%') ss2 +) h +POSTHOOK: type: QUERY +POSTHOOK: Input: default@lineitem_raw +POSTHOOK: Input: default@ll +#### A masked pattern was here #### +0 +PREHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_SHIPINSTRUCT like '%BACK%') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_SHIPINSTRUCT like '%BACK%') ss2 +) h +PREHOOK: type: QUERY +PREHOOK: Input: default@lineitem_raw +PREHOOK: Input: default@ll +#### A masked pattern was here #### +POSTHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_SHIPINSTRUCT like '%BACK%') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_SHIPINSTRUCT like '%BACK%') ss2 +) h +POSTHOOK: type: QUERY +POSTHOOK: Input: default@lineitem_raw +POSTHOOK: Input: default@ll +#### A masked pattern was here #### +1 +PREHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_SHIPINSTRUCT like 'TAKE%RETURN') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_SHIPINSTRUCT like 'TAKE%RETURN') ss2 +) h +PREHOOK: type: QUERY +PREHOOK: Input: default@lineitem_raw +PREHOOK: Input: default@ll +#### A masked pattern was here #### +POSTHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_SHIPINSTRUCT like 'TAKE%RETURN') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_SHIPINSTRUCT like 'TAKE%RETURN') ss2 +) h +POSTHOOK: type: QUERY +POSTHOOK: Input: default@lineitem_raw +POSTHOOK: Input: default@ll +#### A masked pattern was here #### +1 +PREHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_SHIPINSTRUCT like '%COD') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_SHIPINSTRUCT like '%COD') ss2 +) h +PREHOOK: type: QUERY +PREHOOK: Input: default@lineitem_raw +PREHOOK: Input: default@ll +#### A masked pattern was here #### +POSTHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_SHIPINSTRUCT like '%COD') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_SHIPINSTRUCT like '%COD') ss2 +) h +POSTHOOK: type: QUERY +POSTHOOK: Input: default@lineitem_raw +POSTHOOK: Input: default@ll +#### A masked pattern was here #### +1 diff --git ql/src/test/results/clientpositive/tez/orc_ppd_like.q.out ql/src/test/results/clientpositive/tez/orc_ppd_like.q.out new file mode 100644 index 0000000..3cc70f3 --- /dev/null +++ ql/src/test/results/clientpositive/tez/orc_ppd_like.q.out @@ -0,0 +1,933 @@ +PREHOOK: query: -- to verify the results match +-- grep "^[0-9]" ./ql/src/test/results/clientpositive/orc_ppd_like.q.out | sort -nr | uniq -c +-- this should have no number > 1, which indicates a PPD error + +DROP TABLE IF EXISTS lineitem_raw +PREHOOK: type: DROPTABLE +POSTHOOK: query: -- to verify the results match +-- grep "^[0-9]" ./ql/src/test/results/clientpositive/orc_ppd_like.q.out | sort -nr | uniq -c +-- this should have no number > 1, which indicates a PPD error + +DROP TABLE IF EXISTS lineitem_raw +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE lineitem_raw (L_ORDERKEY INT, + L_PARTKEY INT, + L_SUPPKEY INT, + L_LINENUMBER INT, + L_QUANTITY DOUBLE, + L_EXTENDEDPRICE DOUBLE, + L_DISCOUNT DOUBLE, + L_TAX DOUBLE, + L_RETURNFLAG STRING, + L_LINESTATUS STRING, + l_shipdate STRING, + L_COMMITDATE STRING, + L_RECEIPTDATE STRING, + L_SHIPINSTRUCT STRING, + L_SHIPMODE STRING, + L_COMMENT STRING) +ROW FORMAT DELIMITED +FIELDS TERMINATED BY '|' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@lineitem_raw +POSTHOOK: query: CREATE TABLE lineitem_raw (L_ORDERKEY INT, + L_PARTKEY INT, + L_SUPPKEY INT, + L_LINENUMBER INT, + L_QUANTITY DOUBLE, + L_EXTENDEDPRICE DOUBLE, + L_DISCOUNT DOUBLE, + L_TAX DOUBLE, + L_RETURNFLAG STRING, + L_LINESTATUS STRING, + l_shipdate STRING, + L_COMMITDATE STRING, + L_RECEIPTDATE STRING, + L_SHIPINSTRUCT STRING, + L_SHIPMODE STRING, + L_COMMENT STRING) +ROW FORMAT DELIMITED +FIELDS TERMINATED BY '|' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@lineitem_raw +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/lineitem.txt' OVERWRITE INTO TABLE lineitem_raw +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@lineitem_raw +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/lineitem.txt' OVERWRITE INTO TABLE lineitem_raw +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@lineitem_raw +PREHOOK: query: CREATE TEMPORARY TABLE ll +STORED AS ORC TBLPROPERTIES("orc.row.index.stride" = "1000", "orc.bloom.filter.columns"="*") +as SELECT * from lineitem_raw +SORT BY l_shipdate +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@lineitem_raw +PREHOOK: Output: database:default +PREHOOK: Output: default@ll +POSTHOOK: query: CREATE TEMPORARY TABLE ll +STORED AS ORC TBLPROPERTIES("orc.row.index.stride" = "1000", "orc.bloom.filter.columns"="*") +as SELECT * from lineitem_raw +SORT BY l_shipdate +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@lineitem_raw +POSTHOOK: Output: database:default +POSTHOOK: Output: default@ll +PREHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where l_shipdate like '1998%') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where l_shipdate like '1998%') ss2 +) h +PREHOOK: type: QUERY +PREHOOK: Input: default@lineitem_raw +PREHOOK: Input: default@ll +#### A masked pattern was here #### +POSTHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where l_shipdate like '1998%') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where l_shipdate like '1998%') ss2 +) h +POSTHOOK: type: QUERY +POSTHOOK: Input: default@lineitem_raw +POSTHOOK: Input: default@ll +#### A masked pattern was here #### +1 +PREHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where l_shipdate like '%1998') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where l_shipdate like '%1998') ss2 +) h +PREHOOK: type: QUERY +PREHOOK: Input: default@lineitem_raw +PREHOOK: Input: default@ll +#### A masked pattern was here #### +POSTHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where l_shipdate like '%1998') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where l_shipdate like '%1998') ss2 +) h +POSTHOOK: type: QUERY +POSTHOOK: Input: default@lineitem_raw +POSTHOOK: Input: default@ll +#### A masked pattern was here #### +0 +PREHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where l_shipdate like '%-12-%') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where l_shipdate like '%-12-%') ss2 +) h +PREHOOK: type: QUERY +PREHOOK: Input: default@lineitem_raw +PREHOOK: Input: default@ll +#### A masked pattern was here #### +POSTHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where l_shipdate like '%-12-%') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where l_shipdate like '%-12-%') ss2 +) h +POSTHOOK: type: QUERY +POSTHOOK: Input: default@lineitem_raw +POSTHOOK: Input: default@ll +#### A masked pattern was here #### +1 +PREHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where l_shipdate like '199_%') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where l_shipdate like '199_%') ss2 +) h +PREHOOK: type: QUERY +PREHOOK: Input: default@lineitem_raw +PREHOOK: Input: default@ll +#### A masked pattern was here #### +POSTHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where l_shipdate like '199_%') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where l_shipdate like '199_%') ss2 +) h +POSTHOOK: type: QUERY +POSTHOOK: Input: default@lineitem_raw +POSTHOOK: Input: default@ll +#### A masked pattern was here #### +1 +PREHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where l_shipdate like '1998-02_29') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where l_shipdate like '1998-02_29') ss2 +) h +PREHOOK: type: QUERY +PREHOOK: Input: default@lineitem_raw +PREHOOK: Input: default@ll +#### A masked pattern was here #### +POSTHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where l_shipdate like '1998-02_29') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where l_shipdate like '1998-02_29') ss2 +) h +POSTHOOK: type: QUERY +POSTHOOK: Input: default@lineitem_raw +POSTHOOK: Input: default@ll +#### A masked pattern was here #### +0 +PREHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where l_shipdate like '1998%31') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where l_shipdate like '1998%31') ss2 +) h +PREHOOK: type: QUERY +PREHOOK: Input: default@lineitem_raw +PREHOOK: Input: default@ll +#### A masked pattern was here #### +POSTHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where l_shipdate like '1998%31') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where l_shipdate like '1998%31') ss2 +) h +POSTHOOK: type: QUERY +POSTHOOK: Input: default@lineitem_raw +POSTHOOK: Input: default@ll +#### A masked pattern was here #### +0 +PREHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where l_shipdate like '2020%') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where l_shipdate like '2020%') ss2 +) h +PREHOOK: type: QUERY +PREHOOK: Input: default@lineitem_raw +PREHOOK: Input: default@ll +#### A masked pattern was here #### +POSTHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where l_shipdate like '2020%') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where l_shipdate like '2020%') ss2 +) h +POSTHOOK: type: QUERY +POSTHOOK: Input: default@lineitem_raw +POSTHOOK: Input: default@ll +#### A masked pattern was here #### +0 +PREHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where l_shipdate like '1970%') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where l_shipdate like '1970%') ss2 +) h +PREHOOK: type: QUERY +PREHOOK: Input: default@lineitem_raw +PREHOOK: Input: default@ll +#### A masked pattern was here #### +POSTHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where l_shipdate like '1970%') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where l_shipdate like '1970%') ss2 +) h +POSTHOOK: type: QUERY +POSTHOOK: Input: default@lineitem_raw +POSTHOOK: Input: default@ll +#### A masked pattern was here #### +0 +PREHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where l_shipdate like '%44') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where l_shipdate like '%44') ss2 +) h +PREHOOK: type: QUERY +PREHOOK: Input: default@lineitem_raw +PREHOOK: Input: default@ll +#### A masked pattern was here #### +POSTHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where l_shipdate like '%44') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where l_shipdate like '%44') ss2 +) h +POSTHOOK: type: QUERY +POSTHOOK: Input: default@lineitem_raw +POSTHOOK: Input: default@ll +#### A masked pattern was here #### +0 +PREHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_COMMITDATE like '1998%') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_COMMITDATE like '1998%') ss2 +) h +PREHOOK: type: QUERY +PREHOOK: Input: default@lineitem_raw +PREHOOK: Input: default@ll +#### A masked pattern was here #### +POSTHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_COMMITDATE like '1998%') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_COMMITDATE like '1998%') ss2 +) h +POSTHOOK: type: QUERY +POSTHOOK: Input: default@lineitem_raw +POSTHOOK: Input: default@ll +#### A masked pattern was here #### +1 +PREHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_COMMITDATE like '%1998') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_COMMITDATE like '%1998') ss2 +) h +PREHOOK: type: QUERY +PREHOOK: Input: default@lineitem_raw +PREHOOK: Input: default@ll +#### A masked pattern was here #### +POSTHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_COMMITDATE like '%1998') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_COMMITDATE like '%1998') ss2 +) h +POSTHOOK: type: QUERY +POSTHOOK: Input: default@lineitem_raw +POSTHOOK: Input: default@ll +#### A masked pattern was here #### +0 +PREHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_COMMITDATE like '%-12-%') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_COMMITDATE like '%-12-%') ss2 +) h +PREHOOK: type: QUERY +PREHOOK: Input: default@lineitem_raw +PREHOOK: Input: default@ll +#### A masked pattern was here #### +POSTHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_COMMITDATE like '%-12-%') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_COMMITDATE like '%-12-%') ss2 +) h +POSTHOOK: type: QUERY +POSTHOOK: Input: default@lineitem_raw +POSTHOOK: Input: default@ll +#### A masked pattern was here #### +1 +PREHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_COMMITDATE like '199_%') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_COMMITDATE like '199_%') ss2 +) h +PREHOOK: type: QUERY +PREHOOK: Input: default@lineitem_raw +PREHOOK: Input: default@ll +#### A masked pattern was here #### +POSTHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_COMMITDATE like '199_%') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_COMMITDATE like '199_%') ss2 +) h +POSTHOOK: type: QUERY +POSTHOOK: Input: default@lineitem_raw +POSTHOOK: Input: default@ll +#### A masked pattern was here #### +1 +PREHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_COMMITDATE like '1998-02_29') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_COMMITDATE like '1998-02_29') ss2 +) h +PREHOOK: type: QUERY +PREHOOK: Input: default@lineitem_raw +PREHOOK: Input: default@ll +#### A masked pattern was here #### +POSTHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_COMMITDATE like '1998-02_29') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_COMMITDATE like '1998-02_29') ss2 +) h +POSTHOOK: type: QUERY +POSTHOOK: Input: default@lineitem_raw +POSTHOOK: Input: default@ll +#### A masked pattern was here #### +0 +PREHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_COMMITDATE like '1998%31') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_COMMITDATE like '1998%31') ss2 +) h +PREHOOK: type: QUERY +PREHOOK: Input: default@lineitem_raw +PREHOOK: Input: default@ll +#### A masked pattern was here #### +POSTHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_COMMITDATE like '1998%31') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_COMMITDATE like '1998%31') ss2 +) h +POSTHOOK: type: QUERY +POSTHOOK: Input: default@lineitem_raw +POSTHOOK: Input: default@ll +#### A masked pattern was here #### +0 +PREHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_COMMITDATE like '2020%') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_COMMITDATE like '2020%') ss2 +) h +PREHOOK: type: QUERY +PREHOOK: Input: default@lineitem_raw +PREHOOK: Input: default@ll +#### A masked pattern was here #### +POSTHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_COMMITDATE like '2020%') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_COMMITDATE like '2020%') ss2 +) h +POSTHOOK: type: QUERY +POSTHOOK: Input: default@lineitem_raw +POSTHOOK: Input: default@ll +#### A masked pattern was here #### +0 +PREHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_COMMITDATE like '1970%') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_COMMITDATE like '1970%') ss2 +) h +PREHOOK: type: QUERY +PREHOOK: Input: default@lineitem_raw +PREHOOK: Input: default@ll +#### A masked pattern was here #### +POSTHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_COMMITDATE like '1970%') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_COMMITDATE like '1970%') ss2 +) h +POSTHOOK: type: QUERY +POSTHOOK: Input: default@lineitem_raw +POSTHOOK: Input: default@ll +#### A masked pattern was here #### +0 +PREHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_COMMITDATE like '%44') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_COMMITDATE like '%44') ss2 +) h +PREHOOK: type: QUERY +PREHOOK: Input: default@lineitem_raw +PREHOOK: Input: default@ll +#### A masked pattern was here #### +POSTHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_COMMITDATE like '%44') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_COMMITDATE like '%44') ss2 +) h +POSTHOOK: type: QUERY +POSTHOOK: Input: default@lineitem_raw +POSTHOOK: Input: default@ll +#### A masked pattern was here #### +0 +PREHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_RECEIPTDATE like '1998%') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_RECEIPTDATE like '1998%') ss2 +) h +PREHOOK: type: QUERY +PREHOOK: Input: default@lineitem_raw +PREHOOK: Input: default@ll +#### A masked pattern was here #### +POSTHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_RECEIPTDATE like '1998%') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_RECEIPTDATE like '1998%') ss2 +) h +POSTHOOK: type: QUERY +POSTHOOK: Input: default@lineitem_raw +POSTHOOK: Input: default@ll +#### A masked pattern was here #### +1 +PREHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_RECEIPTDATE like '%1998') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_RECEIPTDATE like '%1998') ss2 +) h +PREHOOK: type: QUERY +PREHOOK: Input: default@lineitem_raw +PREHOOK: Input: default@ll +#### A masked pattern was here #### +POSTHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_RECEIPTDATE like '%1998') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_RECEIPTDATE like '%1998') ss2 +) h +POSTHOOK: type: QUERY +POSTHOOK: Input: default@lineitem_raw +POSTHOOK: Input: default@ll +#### A masked pattern was here #### +0 +PREHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_RECEIPTDATE like '%-12-%') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_RECEIPTDATE like '%-12-%') ss2 +) h +PREHOOK: type: QUERY +PREHOOK: Input: default@lineitem_raw +PREHOOK: Input: default@ll +#### A masked pattern was here #### +POSTHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_RECEIPTDATE like '%-12-%') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_RECEIPTDATE like '%-12-%') ss2 +) h +POSTHOOK: type: QUERY +POSTHOOK: Input: default@lineitem_raw +POSTHOOK: Input: default@ll +#### A masked pattern was here #### +1 +PREHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_RECEIPTDATE like '199_%') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_RECEIPTDATE like '199_%') ss2 +) h +PREHOOK: type: QUERY +PREHOOK: Input: default@lineitem_raw +PREHOOK: Input: default@ll +#### A masked pattern was here #### +POSTHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_RECEIPTDATE like '199_%') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_RECEIPTDATE like '199_%') ss2 +) h +POSTHOOK: type: QUERY +POSTHOOK: Input: default@lineitem_raw +POSTHOOK: Input: default@ll +#### A masked pattern was here #### +1 +PREHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_RECEIPTDATE like '1998-02_29') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_RECEIPTDATE like '1998-02_29') ss2 +) h +PREHOOK: type: QUERY +PREHOOK: Input: default@lineitem_raw +PREHOOK: Input: default@ll +#### A masked pattern was here #### +POSTHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_RECEIPTDATE like '1998-02_29') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_RECEIPTDATE like '1998-02_29') ss2 +) h +POSTHOOK: type: QUERY +POSTHOOK: Input: default@lineitem_raw +POSTHOOK: Input: default@ll +#### A masked pattern was here #### +0 +PREHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_RECEIPTDATE like '1998%31') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_RECEIPTDATE like '1998%31') ss2 +) h +PREHOOK: type: QUERY +PREHOOK: Input: default@lineitem_raw +PREHOOK: Input: default@ll +#### A masked pattern was here #### +POSTHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_RECEIPTDATE like '1998%31') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_RECEIPTDATE like '1998%31') ss2 +) h +POSTHOOK: type: QUERY +POSTHOOK: Input: default@lineitem_raw +POSTHOOK: Input: default@ll +#### A masked pattern was here #### +0 +PREHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_RECEIPTDATE like '2020%') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_RECEIPTDATE like '2020%') ss2 +) h +PREHOOK: type: QUERY +PREHOOK: Input: default@lineitem_raw +PREHOOK: Input: default@ll +#### A masked pattern was here #### +POSTHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_RECEIPTDATE like '2020%') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_RECEIPTDATE like '2020%') ss2 +) h +POSTHOOK: type: QUERY +POSTHOOK: Input: default@lineitem_raw +POSTHOOK: Input: default@ll +#### A masked pattern was here #### +0 +PREHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_RECEIPTDATE like '1970%') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_RECEIPTDATE like '1970%') ss2 +) h +PREHOOK: type: QUERY +PREHOOK: Input: default@lineitem_raw +PREHOOK: Input: default@ll +#### A masked pattern was here #### +POSTHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_RECEIPTDATE like '1970%') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_RECEIPTDATE like '1970%') ss2 +) h +POSTHOOK: type: QUERY +POSTHOOK: Input: default@lineitem_raw +POSTHOOK: Input: default@ll +#### A masked pattern was here #### +0 +PREHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_RECEIPTDATE like '%44') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_RECEIPTDATE like '%44') ss2 +) h +PREHOOK: type: QUERY +PREHOOK: Input: default@lineitem_raw +PREHOOK: Input: default@ll +#### A masked pattern was here #### +POSTHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_RECEIPTDATE like '%44') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_RECEIPTDATE like '%44') ss2 +) h +POSTHOOK: type: QUERY +POSTHOOK: Input: default@lineitem_raw +POSTHOOK: Input: default@ll +#### A masked pattern was here #### +0 +PREHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_SHIPMODE like 'DELIVER%') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_SHIPMODE like 'DELIVER%') ss2 +) h +PREHOOK: type: QUERY +PREHOOK: Input: default@lineitem_raw +PREHOOK: Input: default@ll +#### A masked pattern was here #### +POSTHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_SHIPMODE like 'DELIVER%') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_SHIPMODE like 'DELIVER%') ss2 +) h +POSTHOOK: type: QUERY +POSTHOOK: Input: default@lineitem_raw +POSTHOOK: Input: default@ll +#### A masked pattern was here #### +0 +PREHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_SHIPMODE like 'COLLECT%') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_SHIPMODE like 'COLLECT%') ss2 +) h +PREHOOK: type: QUERY +PREHOOK: Input: default@lineitem_raw +PREHOOK: Input: default@ll +#### A masked pattern was here #### +POSTHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_SHIPMODE like 'COLLECT%') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_SHIPMODE like 'COLLECT%') ss2 +) h +POSTHOOK: type: QUERY +POSTHOOK: Input: default@lineitem_raw +POSTHOOK: Input: default@ll +#### A masked pattern was here #### +0 +PREHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_SHIPMODE like '%RETURN') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_SHIPMODE like '%RETURN') ss2 +) h +PREHOOK: type: QUERY +PREHOOK: Input: default@lineitem_raw +PREHOOK: Input: default@ll +#### A masked pattern was here #### +POSTHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_SHIPMODE like '%RETURN') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_SHIPMODE like '%RETURN') ss2 +) h +POSTHOOK: type: QUERY +POSTHOOK: Input: default@lineitem_raw +POSTHOOK: Input: default@ll +#### A masked pattern was here #### +0 +PREHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_SHIPMODE like '%AIR') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_SHIPMODE like '%AIR') ss2 +) h +PREHOOK: type: QUERY +PREHOOK: Input: default@lineitem_raw +PREHOOK: Input: default@ll +#### A masked pattern was here #### +POSTHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_SHIPMODE like '%AIR') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_SHIPMODE like '%AIR') ss2 +) h +POSTHOOK: type: QUERY +POSTHOOK: Input: default@lineitem_raw +POSTHOOK: Input: default@ll +#### A masked pattern was here #### +1 +PREHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_SHIPMODE like 'AIR%') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_SHIPMODE like 'AIR%') ss2 +) h +PREHOOK: type: QUERY +PREHOOK: Input: default@lineitem_raw +PREHOOK: Input: default@ll +#### A masked pattern was here #### +POSTHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_SHIPMODE like 'AIR%') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_SHIPMODE like 'AIR%') ss2 +) h +POSTHOOK: type: QUERY +POSTHOOK: Input: default@lineitem_raw +POSTHOOK: Input: default@ll +#### A masked pattern was here #### +1 +PREHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_SHIPMODE like 'SHIP%') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_SHIPMODE like 'SHIP%') ss2 +) h +PREHOOK: type: QUERY +PREHOOK: Input: default@lineitem_raw +PREHOOK: Input: default@ll +#### A masked pattern was here #### +POSTHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_SHIPMODE like 'SHIP%') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_SHIPMODE like 'SHIP%') ss2 +) h +POSTHOOK: type: QUERY +POSTHOOK: Input: default@lineitem_raw +POSTHOOK: Input: default@ll +#### A masked pattern was here #### +1 +PREHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_SHIPMODE like '%BACK%') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_SHIPMODE like '%BACK%') ss2 +) h +PREHOOK: type: QUERY +PREHOOK: Input: default@lineitem_raw +PREHOOK: Input: default@ll +#### A masked pattern was here #### +POSTHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_SHIPMODE like '%BACK%') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_SHIPMODE like '%BACK%') ss2 +) h +POSTHOOK: type: QUERY +POSTHOOK: Input: default@lineitem_raw +POSTHOOK: Input: default@ll +#### A masked pattern was here #### +0 +PREHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_SHIPMODE like 'TAKE%RETURN') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_SHIPMODE like 'TAKE%RETURN') ss2 +) h +PREHOOK: type: QUERY +PREHOOK: Input: default@lineitem_raw +PREHOOK: Input: default@ll +#### A masked pattern was here #### +POSTHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_SHIPMODE like 'TAKE%RETURN') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_SHIPMODE like 'TAKE%RETURN') ss2 +) h +POSTHOOK: type: QUERY +POSTHOOK: Input: default@lineitem_raw +POSTHOOK: Input: default@ll +#### A masked pattern was here #### +0 +PREHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_SHIPMODE like '%COD') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_SHIPMODE like '%COD') ss2 +) h +PREHOOK: type: QUERY +PREHOOK: Input: default@lineitem_raw +PREHOOK: Input: default@ll +#### A masked pattern was here #### +POSTHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_SHIPMODE like '%COD') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_SHIPMODE like '%COD') ss2 +) h +POSTHOOK: type: QUERY +POSTHOOK: Input: default@lineitem_raw +POSTHOOK: Input: default@ll +#### A masked pattern was here #### +0 +PREHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_SHIPINSTRUCT like 'DELIVER%') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_SHIPINSTRUCT like 'DELIVER%') ss2 +) h +PREHOOK: type: QUERY +PREHOOK: Input: default@lineitem_raw +PREHOOK: Input: default@ll +#### A masked pattern was here #### +POSTHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_SHIPINSTRUCT like 'DELIVER%') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_SHIPINSTRUCT like 'DELIVER%') ss2 +) h +POSTHOOK: type: QUERY +POSTHOOK: Input: default@lineitem_raw +POSTHOOK: Input: default@ll +#### A masked pattern was here #### +1 +PREHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_SHIPINSTRUCT like 'COLLECT%') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_SHIPINSTRUCT like 'COLLECT%') ss2 +) h +PREHOOK: type: QUERY +PREHOOK: Input: default@lineitem_raw +PREHOOK: Input: default@ll +#### A masked pattern was here #### +POSTHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_SHIPINSTRUCT like 'COLLECT%') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_SHIPINSTRUCT like 'COLLECT%') ss2 +) h +POSTHOOK: type: QUERY +POSTHOOK: Input: default@lineitem_raw +POSTHOOK: Input: default@ll +#### A masked pattern was here #### +1 +PREHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_SHIPINSTRUCT like '%RETURN') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_SHIPINSTRUCT like '%RETURN') ss2 +) h +PREHOOK: type: QUERY +PREHOOK: Input: default@lineitem_raw +PREHOOK: Input: default@ll +#### A masked pattern was here #### +POSTHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_SHIPINSTRUCT like '%RETURN') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_SHIPINSTRUCT like '%RETURN') ss2 +) h +POSTHOOK: type: QUERY +POSTHOOK: Input: default@lineitem_raw +POSTHOOK: Input: default@ll +#### A masked pattern was here #### +1 +PREHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_SHIPINSTRUCT like '%AIR') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_SHIPINSTRUCT like '%AIR') ss2 +) h +PREHOOK: type: QUERY +PREHOOK: Input: default@lineitem_raw +PREHOOK: Input: default@ll +#### A masked pattern was here #### +POSTHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_SHIPINSTRUCT like '%AIR') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_SHIPINSTRUCT like '%AIR') ss2 +) h +POSTHOOK: type: QUERY +POSTHOOK: Input: default@lineitem_raw +POSTHOOK: Input: default@ll +#### A masked pattern was here #### +0 +PREHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_SHIPINSTRUCT like 'AIR%') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_SHIPINSTRUCT like 'AIR%') ss2 +) h +PREHOOK: type: QUERY +PREHOOK: Input: default@lineitem_raw +PREHOOK: Input: default@ll +#### A masked pattern was here #### +POSTHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_SHIPINSTRUCT like 'AIR%') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_SHIPINSTRUCT like 'AIR%') ss2 +) h +POSTHOOK: type: QUERY +POSTHOOK: Input: default@lineitem_raw +POSTHOOK: Input: default@ll +#### A masked pattern was here #### +0 +PREHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_SHIPINSTRUCT like 'SHIP%') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_SHIPINSTRUCT like 'SHIP%') ss2 +) h +PREHOOK: type: QUERY +PREHOOK: Input: default@lineitem_raw +PREHOOK: Input: default@ll +#### A masked pattern was here #### +POSTHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_SHIPINSTRUCT like 'SHIP%') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_SHIPINSTRUCT like 'SHIP%') ss2 +) h +POSTHOOK: type: QUERY +POSTHOOK: Input: default@lineitem_raw +POSTHOOK: Input: default@ll +#### A masked pattern was here #### +0 +PREHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_SHIPINSTRUCT like '%BACK%') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_SHIPINSTRUCT like '%BACK%') ss2 +) h +PREHOOK: type: QUERY +PREHOOK: Input: default@lineitem_raw +PREHOOK: Input: default@ll +#### A masked pattern was here #### +POSTHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_SHIPINSTRUCT like '%BACK%') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_SHIPINSTRUCT like '%BACK%') ss2 +) h +POSTHOOK: type: QUERY +POSTHOOK: Input: default@lineitem_raw +POSTHOOK: Input: default@ll +#### A masked pattern was here #### +1 +PREHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_SHIPINSTRUCT like 'TAKE%RETURN') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_SHIPINSTRUCT like 'TAKE%RETURN') ss2 +) h +PREHOOK: type: QUERY +PREHOOK: Input: default@lineitem_raw +PREHOOK: Input: default@ll +#### A masked pattern was here #### +POSTHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_SHIPINSTRUCT like 'TAKE%RETURN') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_SHIPINSTRUCT like 'TAKE%RETURN') ss2 +) h +POSTHOOK: type: QUERY +POSTHOOK: Input: default@lineitem_raw +POSTHOOK: Input: default@ll +#### A masked pattern was here #### +1 +PREHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_SHIPINSTRUCT like '%COD') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_SHIPINSTRUCT like '%COD') ss2 +) h +PREHOOK: type: QUERY +PREHOOK: Input: default@lineitem_raw +PREHOOK: Input: default@ll +#### A masked pattern was here #### +POSTHOOK: query: select count (distinct h_key) from ( +select sum(hash(*)) as h_key from (select * from ll where L_SHIPINSTRUCT like '%COD') ss1 +UNION ALL +select sum(hash(*) )as h_key from (select * from lineitem_raw where L_SHIPINSTRUCT like '%COD') ss2 +) h +POSTHOOK: type: QUERY +POSTHOOK: Input: default@lineitem_raw +POSTHOOK: Input: default@ll +#### A masked pattern was here #### +1 diff --git serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/TypeInfoUtils.java serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/TypeInfoUtils.java index a4323d1..341620d 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/TypeInfoUtils.java +++ serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/TypeInfoUtils.java @@ -45,6 +45,7 @@ import org.apache.hadoop.hive.serde2.objectinspector.UnionObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils.PrimitiveGrouping; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils.PrimitiveTypeEntry; /** @@ -807,4 +808,17 @@ public static int getCharacterLengthForType(PrimitiveTypeInfo typeInfo) { return 0; } } + + /** + * Return the primitive grouping type of the input type info + * @param ti + * @return Primitive grouping of the type + */ + public static PrimitiveGrouping getPrimitiveGrouping(TypeInfo ti) { + if (ti.getCategory() == Category.PRIMITIVE) { + return PrimitiveObjectInspectorUtils.getPrimitiveGrouping(((PrimitiveTypeInfo) ti) + .getPrimitiveCategory()); + } + return null; + } }