diff --git common/src/java/org/apache/hadoop/hive/conf/HiveConf.java common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index e92466f..aa7647b 100644 --- common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -2592,6 +2592,13 @@ private static void populateLlapDaemonVarsSet(Set llapDaemonVarsSetLocal HIVE_VECTORIZATION_USE_ROW_DESERIALIZE("hive.vectorized.use.row.serde.deserialize", false, "This flag should be set to true to enable vectorizing using row deserialize.\n" + "The default value is false."), + HIVE_VECTOR_ADAPTOR_USAGE_MODE("hive.vectorized.adaptor.usage.mode", "all", new StringSet("none", "chosen", "all"), + "Specifies the extent to which the VectorUDFAdaptor will be used for UDFs that do not have a cooresponding vectorized class.\n" + + "0. none : disable any usage of VectorUDFAdaptor\n" + + "1. chosen : use VectorUDFAdaptor for a small set of UDFs that were choosen for good performance\n" + + "2. all : use VectorUDFAdaptor for all UDFs" + ), + HIVE_TYPE_CHECK_ON_INSERT("hive.typecheck.on.insert", true, "This property has been extended to control " + "whether to check, convert, and normalize partition value to conform to its column type in " + "partition operations including but not limited to insert, such as alter, describe etc."), diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java index 6e53526..2605203 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java @@ -767,7 +767,8 @@ public VectorGroupByOperator(CompilationOpContext ctx, isVectorOutput = desc.getVectorDesc().isVectorOutput(); - vOutContext = new VectorizationContext(getName(), desc.getOutputColumnNames()); + vOutContext = new VectorizationContext(getName(), desc.getOutputColumnNames(), + /* vContextEnvironment */ vContext); } /** Kryo ctor. */ diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapJoinBaseOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapJoinBaseOperator.java index 902a183..bcde25f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapJoinBaseOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapJoinBaseOperator.java @@ -88,7 +88,8 @@ public VectorMapJoinBaseOperator(CompilationOpContext ctx, noOuterJoin = desc.isNoOuterJoin(); // We are making a new output vectorized row batch. - vOutContext = new VectorizationContext(getName(), desc.getOutputColumnNames()); + vOutContext = new VectorizationContext(getName(), desc.getOutputColumnNames(), + /* vContextEnvironment */ vContext); } @Override diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSMBMapJoinOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSMBMapJoinOperator.java index 59153c8..80b0a14 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSMBMapJoinOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSMBMapJoinOperator.java @@ -126,7 +126,8 @@ public VectorSMBMapJoinOperator(CompilationOpContext ctx, bigTableValueExpressions = vContext.getVectorExpressions(exprs.get(posBigTable)); // We are making a new output vectorized row batch. - vOutContext = new VectorizationContext(getName(), desc.getOutputColumnNames()); + vOutContext = new VectorizationContext(getName(), desc.getOutputColumnNames(), + /* vContextEnvironment */ vContext); } @Override diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java index c0b9a4c..fca844a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java @@ -41,6 +41,7 @@ import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth; import org.apache.hadoop.hive.common.type.HiveVarchar; +import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator; import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluatorFactory; import org.apache.hadoop.hive.ql.exec.FunctionInfo; @@ -138,6 +139,8 @@ import org.apache.hadoop.util.StringUtils; import org.apache.hive.common.util.DateUtils; +import com.google.common.annotations.VisibleForTesting; + /** * Context class for vectorization execution. @@ -165,9 +168,32 @@ // private final Map columnMap; private int firstOutputColumnIndex; + private enum HiveVectorAdaptorUsageMode { + NONE, + CHOSEN, + ALL; + + public static HiveVectorAdaptorUsageMode getHiveConfValue(HiveConf hiveConf) { + String string = HiveConf.getVar(hiveConf, + HiveConf.ConfVars.HIVE_VECTOR_ADAPTOR_USAGE_MODE); + return valueOf(string.toUpperCase()); + } + } + + private HiveVectorAdaptorUsageMode hiveVectorAdaptorUsageMode; + + private void setHiveConfVars(HiveConf hiveConf) { + hiveVectorAdaptorUsageMode = HiveVectorAdaptorUsageMode.getHiveConfValue(hiveConf); + } + + private void copyHiveConfVars(VectorizationContext vContextEnvironment) { + hiveVectorAdaptorUsageMode = vContextEnvironment.hiveVectorAdaptorUsageMode; + } + // Convenient constructor for initial batch creation takes // a list of columns names and maps them to 0..n-1 indices. - public VectorizationContext(String contextName, List initialColumnNames) { + public VectorizationContext(String contextName, List initialColumnNames, + HiveConf hiveConf) { this.contextName = contextName; level = 0; this.initialColumnNames = initialColumnNames; @@ -183,11 +209,26 @@ public VectorizationContext(String contextName, List initialColumnNames) this.ocm = new OutputColumnManager(firstOutputColumnIndex); this.firstOutputColumnIndex = firstOutputColumnIndex; vMap = new VectorExpressionDescriptor(); + + if (hiveConf != null) { + setHiveConfVars(hiveConf); + } + } + + public VectorizationContext(String contextName, List initialColumnNames, + VectorizationContext vContextEnvironment) { + this(contextName, initialColumnNames, (HiveConf) null); + copyHiveConfVars(vContextEnvironment); + } + + @VisibleForTesting + public VectorizationContext(String contextName, List initialColumnNames) { + this(contextName, initialColumnNames, (HiveConf) null); } // Constructor to with the individual addInitialColumn method // followed by a call to finishedAddingInitialColumns. - public VectorizationContext(String contextName) { + public VectorizationContext(String contextName, HiveConf hiveConf) { this.contextName = contextName; level = 0; initialColumnNames = new ArrayList(); @@ -197,6 +238,16 @@ public VectorizationContext(String contextName) { this.ocm = new OutputColumnManager(0); this.firstOutputColumnIndex = 0; vMap = new VectorExpressionDescriptor(); + + if (hiveConf != null) { + setHiveConfVars(hiveConf); + } + + } + + @VisibleForTesting + public VectorizationContext(String contextName) { + this(contextName, (HiveConf) null); } // Constructor useful making a projection vectorization context. @@ -213,6 +264,8 @@ public VectorizationContext(String contextName, VectorizationContext vContext) { this.ocm = vContext.ocm; this.firstOutputColumnIndex = vContext.firstOutputColumnIndex; vMap = new VectorExpressionDescriptor(); + + copyHiveConfVars(vContext); } // Add an initial column to a vectorization context when @@ -491,10 +544,46 @@ public VectorExpression getVectorExpression(ExprNodeDesc exprDesc, VectorExpress ve = getGenericUdfVectorExpression(expr.getGenericUDF(), childExpressions, mode, exprDesc.getTypeInfo()); if (ve == null) { - /* - * Ok, no vectorized class available. No problem -- try to use the VectorUDFAdaptor. - */ - ve = getCustomUDFExpression(expr, mode); + // Ok, no vectorized class available. No problem -- try to use the VectorUDFAdaptor + // when configured. + // + // NOTE: We assume if hiveVectorAdaptorUsageMode has not been set it because we are + // executing a test that didn't create a HiveConf, etc. No usage of VectorUDFAdaptor in + // that case. + if (hiveVectorAdaptorUsageMode != null) { + switch (hiveVectorAdaptorUsageMode) { + case NONE: + // No VectorUDFAdaptor usage. + throw new HiveException( + "Could not vectorize expression (mode = " + mode.name() + "): " + exprDesc.toString() + + " because hive.vectorized.adaptor.usage.mode=none"); + case CHOSEN: + if (isNonVectorizedPathUDF(expr, mode)) { + ve = getCustomUDFExpression(expr, mode); + } else { + throw new HiveException( + "Could not vectorize expression (mode = " + mode.name() + "): " + exprDesc.toString() + + " because hive.vectorized.adaptor.usage.mode=chosen " + + " and the UDF wasn't one of the chosen ones"); + } + break; + case ALL: + if (LOG.isDebugEnabled()) { + LOG.debug("We will try to use the VectorUDFAdaptor for " + exprDesc.toString() + + " because hive.vectorized.adaptor.usage.mode=all"); + } + ve = getCustomUDFExpression(expr, mode); + break; + default: + throw new RuntimeException("Unknown hive vector adaptor usage mode " + + hiveVectorAdaptorUsageMode.name()); + } + if (ve == null) { + throw new HiveException( + "Unable vectorize expression (mode = " + mode.name() + "): " + exprDesc.toString() + + " even for the VectorUDFAdaptor"); + } + } } } } else if (exprDesc instanceof ExprNodeConstantDesc) { @@ -778,6 +867,64 @@ private GenericUDF getGenericUDFForCast(TypeInfo castType) throws HiveException return genericUdf; } + /* Return true if this is one of a small set of functions for which + * it is significantly easier to use the old code path in vectorized + * mode instead of implementing a new, optimized VectorExpression. + * + * Depending on performance requirements and frequency of use, these + * may be implemented in the future with an optimized VectorExpression. + */ + public static boolean isNonVectorizedPathUDF(ExprNodeGenericFuncDesc expr, + VectorExpressionDescriptor.Mode mode) { + GenericUDF gudf = expr.getGenericUDF(); + if (gudf instanceof GenericUDFBridge) { + GenericUDFBridge bridge = (GenericUDFBridge) gudf; + Class udfClass = bridge.getUdfClass(); + if (udfClass.equals(UDFHex.class) + || udfClass.equals(UDFRegExpExtract.class) + || udfClass.equals(UDFRegExpReplace.class) + || udfClass.equals(UDFConv.class) + || udfClass.equals(UDFFromUnixTime.class) && isIntFamily(arg0Type(expr)) + || isCastToIntFamily(udfClass) && isStringFamily(arg0Type(expr)) + || isCastToFloatFamily(udfClass) && isStringFamily(arg0Type(expr)) + || udfClass.equals(UDFToString.class) && + (arg0Type(expr).equals("timestamp") + || arg0Type(expr).equals("double") + || arg0Type(expr).equals("float"))) { + return true; + } + } else if ((gudf instanceof GenericUDFTimestamp && isStringFamily(arg0Type(expr))) + + /* GenericUDFCase and GenericUDFWhen are implemented with the UDF Adaptor because + * of their complexity and generality. In the future, variations of these + * can be optimized to run faster for the vectorized code path. For example, + * CASE col WHEN 1 then "one" WHEN 2 THEN "two" ELSE "other" END + * is an example of a GenericUDFCase that has all constant arguments + * except for the first argument. This is probably a common case and a + * good candidate for a fast, special-purpose VectorExpression. Then + * the UDF Adaptor code path could be used as a catch-all for + * non-optimized general cases. + */ + || gudf instanceof GenericUDFCase + || gudf instanceof GenericUDFWhen) { + return true; + } else if (gudf instanceof GenericUDFToChar && + (arg0Type(expr).equals("timestamp") + || arg0Type(expr).equals("double") + || arg0Type(expr).equals("float"))) { + return true; + } else if (gudf instanceof GenericUDFToVarchar && + (arg0Type(expr).equals("timestamp") + || arg0Type(expr).equals("double") + || arg0Type(expr).equals("float"))) { + return true; + } else if (gudf instanceof GenericUDFBetween && (mode == VectorExpressionDescriptor.Mode.PROJECTION)) { + // between has 4 args here, but can be vectorized like this + return true; + } + return false; + } + public static boolean isCastToIntFamily(Class udfClass) { return udfClass.equals(UDFToByte.class) || udfClass.equals(UDFToShort.class) diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java index 9802afc..b760988 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java @@ -1293,7 +1293,7 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, if (op.getParentOperators().size() == 0) { LOG.info("ReduceWorkVectorizationNodeProcessor process reduceColumnNames " + vectorTaskColumnInfo.allColumnNames.toString()); - vContext = new VectorizationContext("__Reduce_Shuffle__", vectorTaskColumnInfo.allColumnNames); + vContext = new VectorizationContext("__Reduce_Shuffle__", vectorTaskColumnInfo.allColumnNames, hiveConf); taskVectorizationContext = vContext; saveRootVectorOp = true; @@ -1332,8 +1332,8 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, } private static class ValidatorVectorizationContext extends VectorizationContext { - private ValidatorVectorizationContext() { - super("No Name"); + private ValidatorVectorizationContext(HiveConf hiveConf) { + super("No Name", hiveConf); } @Override @@ -1851,7 +1851,7 @@ boolean validateExprNodeDesc(ExprNodeDesc desc, VectorExpressionDescriptor.Mode return false; } try { - VectorizationContext vc = new ValidatorVectorizationContext(); + VectorizationContext vc = new ValidatorVectorizationContext(hiveConf); if (vc.getVectorExpression(desc, mode) == null) { // TODO: this cannot happen - VectorizationContext throws in such cases. LOG.info("getVectorExpression returned null"); @@ -1905,7 +1905,7 @@ private boolean validateAggregationIsPrimitive(VectorAggregateExpression vectorA } // See if we can vectorize the aggregation. - VectorizationContext vc = new ValidatorVectorizationContext(); + VectorizationContext vc = new ValidatorVectorizationContext(hiveConf); VectorAggregateExpression vectorAggrExpr; try { vectorAggrExpr = vc.getAggregatorExpression(aggDesc); @@ -1944,7 +1944,8 @@ public static boolean validateDataType(String type, VectorExpressionDescriptor.M private VectorizationContext getVectorizationContext(String contextName, VectorTaskColumnInfo vectorTaskColumnInfo) { - VectorizationContext vContext = new VectorizationContext(contextName, vectorTaskColumnInfo.allColumnNames); + VectorizationContext vContext = + new VectorizationContext(contextName, vectorTaskColumnInfo.allColumnNames, hiveConf); return vContext; } diff --git ql/src/test/queries/clientpositive/vector_adaptor_usage_mode.q ql/src/test/queries/clientpositive/vector_adaptor_usage_mode.q new file mode 100644 index 0000000..cef4e4c --- /dev/null +++ ql/src/test/queries/clientpositive/vector_adaptor_usage_mode.q @@ -0,0 +1,177 @@ +SET hive.vectorized.execution.enabled=true; +set hive.fetch.task.conversion=none; +SET hive.auto.convert.join=true; + +-- SORT_QUERY_RESULTS + +drop table varchar_udf_1; + +create table varchar_udf_1 (c1 string, c2 string, c3 varchar(10), c4 varchar(20)) STORED AS ORC; +insert overwrite table varchar_udf_1 + select key, value, key, value from src where key = '238' limit 1; + +DROP TABLE IF EXISTS DECIMAL_UDF_txt; +DROP TABLE IF EXISTS DECIMAL_UDF; + +CREATE TABLE DECIMAL_UDF_txt (key decimal(20,10), value int) +ROW FORMAT DELIMITED + FIELDS TERMINATED BY ' ' +STORED AS TEXTFILE; + +LOAD DATA LOCAL INPATH '../../data/files/kv7.txt' INTO TABLE DECIMAL_UDF_txt; + +CREATE TABLE DECIMAL_UDF (key decimal(20,10), value int) +STORED AS ORC; + +INSERT OVERWRITE TABLE DECIMAL_UDF SELECT * FROM DECIMAL_UDF_txt; + +drop table if exists count_case_groupby; + +create table count_case_groupby (key string, bool boolean) STORED AS orc; +insert into table count_case_groupby values ('key1', true),('key2', false),('key3', NULL),('key4', false),('key5',NULL); + +set hive.vectorized.adaptor.usage.mode=none; + +explain +select + c2 regexp 'val', + c4 regexp 'val', + (c2 regexp 'val') = (c4 regexp 'val') +from varchar_udf_1 limit 1; + +select + c2 regexp 'val', + c4 regexp 'val', + (c2 regexp 'val') = (c4 regexp 'val') +from varchar_udf_1 limit 1; + +explain +select + regexp_extract(c2, 'val_([0-9]+)', 1), + regexp_extract(c4, 'val_([0-9]+)', 1), + regexp_extract(c2, 'val_([0-9]+)', 1) = regexp_extract(c4, 'val_([0-9]+)', 1) +from varchar_udf_1 limit 1; + +select + regexp_extract(c2, 'val_([0-9]+)', 1), + regexp_extract(c4, 'val_([0-9]+)', 1), + regexp_extract(c2, 'val_([0-9]+)', 1) = regexp_extract(c4, 'val_([0-9]+)', 1) +from varchar_udf_1 limit 1; + +explain +select + regexp_replace(c2, 'val', 'replaced'), + regexp_replace(c4, 'val', 'replaced'), + regexp_replace(c2, 'val', 'replaced') = regexp_replace(c4, 'val', 'replaced') +from varchar_udf_1 limit 1; + +select + regexp_replace(c2, 'val', 'replaced'), + regexp_replace(c4, 'val', 'replaced'), + regexp_replace(c2, 'val', 'replaced') = regexp_replace(c4, 'val', 'replaced') +from varchar_udf_1 limit 1; + + +set hive.vectorized.adaptor.usage.mode=chosen; + +explain +select + c2 regexp 'val', + c4 regexp 'val', + (c2 regexp 'val') = (c4 regexp 'val') +from varchar_udf_1 limit 1; + +select + c2 regexp 'val', + c4 regexp 'val', + (c2 regexp 'val') = (c4 regexp 'val') +from varchar_udf_1 limit 1; + +explain +select + regexp_extract(c2, 'val_([0-9]+)', 1), + regexp_extract(c4, 'val_([0-9]+)', 1), + regexp_extract(c2, 'val_([0-9]+)', 1) = regexp_extract(c4, 'val_([0-9]+)', 1) +from varchar_udf_1 limit 1; + +select + regexp_extract(c2, 'val_([0-9]+)', 1), + regexp_extract(c4, 'val_([0-9]+)', 1), + regexp_extract(c2, 'val_([0-9]+)', 1) = regexp_extract(c4, 'val_([0-9]+)', 1) +from varchar_udf_1 limit 1; + +explain +select + regexp_replace(c2, 'val', 'replaced'), + regexp_replace(c4, 'val', 'replaced'), + regexp_replace(c2, 'val', 'replaced') = regexp_replace(c4, 'val', 'replaced') +from varchar_udf_1 limit 1; + +select + regexp_replace(c2, 'val', 'replaced'), + regexp_replace(c4, 'val', 'replaced'), + regexp_replace(c2, 'val', 'replaced') = regexp_replace(c4, 'val', 'replaced') +from varchar_udf_1 limit 1; + + +set hive.vectorized.adaptor.usage.mode=none; + +EXPLAIN SELECT POWER(key, 2) FROM DECIMAL_UDF; + +SELECT POWER(key, 2) FROM DECIMAL_UDF; + +EXPLAIN +SELECT + exp(key), ln(key), + log(key), log(key, key), log(key, value), log(value, key), + log10(key), sqrt(key) +FROM DECIMAL_UDF WHERE key = 10; + +SELECT + exp(key), ln(key), + log(key), log(key, key), log(key, value), log(value, key), + log10(key), sqrt(key) +FROM DECIMAL_UDF WHERE key = 10; + +set hive.vectorized.adaptor.usage.mode=chosen; + +EXPLAIN SELECT POWER(key, 2) FROM DECIMAL_UDF; + +SELECT POWER(key, 2) FROM DECIMAL_UDF; + +EXPLAIN +SELECT + exp(key), ln(key), + log(key), log(key, key), log(key, value), log(value, key), + log10(key), sqrt(key) +FROM DECIMAL_UDF WHERE key = 10; + +SELECT + exp(key), ln(key), + log(key), log(key, key), log(key, value), log(value, key), + log10(key), sqrt(key) +FROM DECIMAL_UDF WHERE key = 10; + + +set hive.vectorized.adaptor.usage.mode=none; + +explain +SELECT key, COUNT(CASE WHEN bool THEN 1 WHEN NOT bool THEN 0 ELSE NULL END) AS cnt_bool0_ok FROM count_case_groupby GROUP BY key; + +SELECT key, COUNT(CASE WHEN bool THEN 1 WHEN NOT bool THEN 0 ELSE NULL END) AS cnt_bool0_ok FROM count_case_groupby GROUP BY key; + +set hive.vectorized.adaptor.usage.mode=chosen; + +explain +SELECT key, COUNT(CASE WHEN bool THEN 1 WHEN NOT bool THEN 0 ELSE NULL END) AS cnt_bool0_ok FROM count_case_groupby GROUP BY key; + +SELECT key, COUNT(CASE WHEN bool THEN 1 WHEN NOT bool THEN 0 ELSE NULL END) AS cnt_bool0_ok FROM count_case_groupby GROUP BY key; + + +drop table varchar_udf_1; + +DROP TABLE DECIMAL_UDF_txt; +DROP TABLE DECIMAL_UDF; + +drop table count_case_groupby; + diff --git ql/src/test/results/clientpositive/vector_adaptor_usage_mode.q.out ql/src/test/results/clientpositive/vector_adaptor_usage_mode.q.out new file mode 100644 index 0000000..bacb3bb --- /dev/null +++ ql/src/test/results/clientpositive/vector_adaptor_usage_mode.q.out @@ -0,0 +1,933 @@ +PREHOOK: query: -- SORT_QUERY_RESULTS + +drop table varchar_udf_1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: -- SORT_QUERY_RESULTS + +drop table varchar_udf_1 +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table varchar_udf_1 (c1 string, c2 string, c3 varchar(10), c4 varchar(20)) STORED AS ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@varchar_udf_1 +POSTHOOK: query: create table varchar_udf_1 (c1 string, c2 string, c3 varchar(10), c4 varchar(20)) STORED AS ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@varchar_udf_1 +PREHOOK: query: insert overwrite table varchar_udf_1 + select key, value, key, value from src where key = '238' limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@varchar_udf_1 +POSTHOOK: query: insert overwrite table varchar_udf_1 + select key, value, key, value from src where key = '238' limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@varchar_udf_1 +POSTHOOK: Lineage: varchar_udf_1.c1 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_udf_1.c2 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_udf_1.c3 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_udf_1.c4 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: DROP TABLE IF EXISTS DECIMAL_UDF_txt +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS DECIMAL_UDF_txt +POSTHOOK: type: DROPTABLE +PREHOOK: query: DROP TABLE IF EXISTS DECIMAL_UDF +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS DECIMAL_UDF +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE DECIMAL_UDF_txt (key decimal(20,10), value int) +ROW FORMAT DELIMITED + FIELDS TERMINATED BY ' ' +STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@DECIMAL_UDF_txt +POSTHOOK: query: CREATE TABLE DECIMAL_UDF_txt (key decimal(20,10), value int) +ROW FORMAT DELIMITED + FIELDS TERMINATED BY ' ' +STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@DECIMAL_UDF_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/kv7.txt' INTO TABLE DECIMAL_UDF_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@decimal_udf_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/kv7.txt' INTO TABLE DECIMAL_UDF_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@decimal_udf_txt +PREHOOK: query: CREATE TABLE DECIMAL_UDF (key decimal(20,10), value int) +STORED AS ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@DECIMAL_UDF +POSTHOOK: query: CREATE TABLE DECIMAL_UDF (key decimal(20,10), value int) +STORED AS ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@DECIMAL_UDF +PREHOOK: query: INSERT OVERWRITE TABLE DECIMAL_UDF SELECT * FROM DECIMAL_UDF_txt +PREHOOK: type: QUERY +PREHOOK: Input: default@decimal_udf_txt +PREHOOK: Output: default@decimal_udf +POSTHOOK: query: INSERT OVERWRITE TABLE DECIMAL_UDF SELECT * FROM DECIMAL_UDF_txt +POSTHOOK: type: QUERY +POSTHOOK: Input: default@decimal_udf_txt +POSTHOOK: Output: default@decimal_udf +POSTHOOK: Lineage: decimal_udf.key SIMPLE [(decimal_udf_txt)decimal_udf_txt.FieldSchema(name:key, type:decimal(20,10), comment:null), ] +POSTHOOK: Lineage: decimal_udf.value SIMPLE [(decimal_udf_txt)decimal_udf_txt.FieldSchema(name:value, type:int, comment:null), ] +PREHOOK: query: drop table if exists count_case_groupby +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists count_case_groupby +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table count_case_groupby (key string, bool boolean) STORED AS orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@count_case_groupby +POSTHOOK: query: create table count_case_groupby (key string, bool boolean) STORED AS orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@count_case_groupby +PREHOOK: query: insert into table count_case_groupby values ('key1', true),('key2', false),('key3', NULL),('key4', false),('key5',NULL) +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__1 +PREHOOK: Output: default@count_case_groupby +POSTHOOK: query: insert into table count_case_groupby values ('key1', true),('key2', false),('key3', NULL),('key4', false),('key5',NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__1 +POSTHOOK: Output: default@count_case_groupby +POSTHOOK: Lineage: count_case_groupby.bool EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: count_case_groupby.key SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: explain +select + c2 regexp 'val', + c4 regexp 'val', + (c2 regexp 'val') = (c4 regexp 'val') +from varchar_udf_1 limit 1 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select + c2 regexp 'val', + c4 regexp 'val', + (c2 regexp 'val') = (c4 regexp 'val') +from varchar_udf_1 limit 1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: varchar_udf_1 + Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: c2 regexp 'val' (type: boolean), c4 regexp 'val' (type: boolean), (c2 regexp 'val' = c4 regexp 'val') (type: boolean) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 1 + Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 1 + Processor Tree: + ListSink + +PREHOOK: query: select + c2 regexp 'val', + c4 regexp 'val', + (c2 regexp 'val') = (c4 regexp 'val') +from varchar_udf_1 limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@varchar_udf_1 +#### A masked pattern was here #### +POSTHOOK: query: select + c2 regexp 'val', + c4 regexp 'val', + (c2 regexp 'val') = (c4 regexp 'val') +from varchar_udf_1 limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@varchar_udf_1 +#### A masked pattern was here #### +true true true +PREHOOK: query: explain +select + regexp_extract(c2, 'val_([0-9]+)', 1), + regexp_extract(c4, 'val_([0-9]+)', 1), + regexp_extract(c2, 'val_([0-9]+)', 1) = regexp_extract(c4, 'val_([0-9]+)', 1) +from varchar_udf_1 limit 1 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select + regexp_extract(c2, 'val_([0-9]+)', 1), + regexp_extract(c4, 'val_([0-9]+)', 1), + regexp_extract(c2, 'val_([0-9]+)', 1) = regexp_extract(c4, 'val_([0-9]+)', 1) +from varchar_udf_1 limit 1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: varchar_udf_1 + Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: regexp_extract(c2, 'val_([0-9]+)', 1) (type: string), regexp_extract(c4, 'val_([0-9]+)', 1) (type: string), (regexp_extract(c2, 'val_([0-9]+)', 1) = regexp_extract(c4, 'val_([0-9]+)', 1)) (type: boolean) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 1 + Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 1 + Processor Tree: + ListSink + +PREHOOK: query: select + regexp_extract(c2, 'val_([0-9]+)', 1), + regexp_extract(c4, 'val_([0-9]+)', 1), + regexp_extract(c2, 'val_([0-9]+)', 1) = regexp_extract(c4, 'val_([0-9]+)', 1) +from varchar_udf_1 limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@varchar_udf_1 +#### A masked pattern was here #### +POSTHOOK: query: select + regexp_extract(c2, 'val_([0-9]+)', 1), + regexp_extract(c4, 'val_([0-9]+)', 1), + regexp_extract(c2, 'val_([0-9]+)', 1) = regexp_extract(c4, 'val_([0-9]+)', 1) +from varchar_udf_1 limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@varchar_udf_1 +#### A masked pattern was here #### +238 238 true +PREHOOK: query: explain +select + regexp_replace(c2, 'val', 'replaced'), + regexp_replace(c4, 'val', 'replaced'), + regexp_replace(c2, 'val', 'replaced') = regexp_replace(c4, 'val', 'replaced') +from varchar_udf_1 limit 1 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select + regexp_replace(c2, 'val', 'replaced'), + regexp_replace(c4, 'val', 'replaced'), + regexp_replace(c2, 'val', 'replaced') = regexp_replace(c4, 'val', 'replaced') +from varchar_udf_1 limit 1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: varchar_udf_1 + Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: regexp_replace(c2, 'val', 'replaced') (type: string), regexp_replace(c4, 'val', 'replaced') (type: string), (regexp_replace(c2, 'val', 'replaced') = regexp_replace(c4, 'val', 'replaced')) (type: boolean) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 1 + Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 1 + Processor Tree: + ListSink + +PREHOOK: query: select + regexp_replace(c2, 'val', 'replaced'), + regexp_replace(c4, 'val', 'replaced'), + regexp_replace(c2, 'val', 'replaced') = regexp_replace(c4, 'val', 'replaced') +from varchar_udf_1 limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@varchar_udf_1 +#### A masked pattern was here #### +POSTHOOK: query: select + regexp_replace(c2, 'val', 'replaced'), + regexp_replace(c4, 'val', 'replaced'), + regexp_replace(c2, 'val', 'replaced') = regexp_replace(c4, 'val', 'replaced') +from varchar_udf_1 limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@varchar_udf_1 +#### A masked pattern was here #### +replaced_238 replaced_238 true +PREHOOK: query: explain +select + c2 regexp 'val', + c4 regexp 'val', + (c2 regexp 'val') = (c4 regexp 'val') +from varchar_udf_1 limit 1 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select + c2 regexp 'val', + c4 regexp 'val', + (c2 regexp 'val') = (c4 regexp 'val') +from varchar_udf_1 limit 1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: varchar_udf_1 + Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: c2 regexp 'val' (type: boolean), c4 regexp 'val' (type: boolean), (c2 regexp 'val' = c4 regexp 'val') (type: boolean) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 1 + Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 1 + Processor Tree: + ListSink + +PREHOOK: query: select + c2 regexp 'val', + c4 regexp 'val', + (c2 regexp 'val') = (c4 regexp 'val') +from varchar_udf_1 limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@varchar_udf_1 +#### A masked pattern was here #### +POSTHOOK: query: select + c2 regexp 'val', + c4 regexp 'val', + (c2 regexp 'val') = (c4 regexp 'val') +from varchar_udf_1 limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@varchar_udf_1 +#### A masked pattern was here #### +true true true +PREHOOK: query: explain +select + regexp_extract(c2, 'val_([0-9]+)', 1), + regexp_extract(c4, 'val_([0-9]+)', 1), + regexp_extract(c2, 'val_([0-9]+)', 1) = regexp_extract(c4, 'val_([0-9]+)', 1) +from varchar_udf_1 limit 1 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select + regexp_extract(c2, 'val_([0-9]+)', 1), + regexp_extract(c4, 'val_([0-9]+)', 1), + regexp_extract(c2, 'val_([0-9]+)', 1) = regexp_extract(c4, 'val_([0-9]+)', 1) +from varchar_udf_1 limit 1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: varchar_udf_1 + Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: regexp_extract(c2, 'val_([0-9]+)', 1) (type: string), regexp_extract(c4, 'val_([0-9]+)', 1) (type: string), (regexp_extract(c2, 'val_([0-9]+)', 1) = regexp_extract(c4, 'val_([0-9]+)', 1)) (type: boolean) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 1 + Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: 1 + Processor Tree: + ListSink + +PREHOOK: query: select + regexp_extract(c2, 'val_([0-9]+)', 1), + regexp_extract(c4, 'val_([0-9]+)', 1), + regexp_extract(c2, 'val_([0-9]+)', 1) = regexp_extract(c4, 'val_([0-9]+)', 1) +from varchar_udf_1 limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@varchar_udf_1 +#### A masked pattern was here #### +POSTHOOK: query: select + regexp_extract(c2, 'val_([0-9]+)', 1), + regexp_extract(c4, 'val_([0-9]+)', 1), + regexp_extract(c2, 'val_([0-9]+)', 1) = regexp_extract(c4, 'val_([0-9]+)', 1) +from varchar_udf_1 limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@varchar_udf_1 +#### A masked pattern was here #### +238 238 true +PREHOOK: query: explain +select + regexp_replace(c2, 'val', 'replaced'), + regexp_replace(c4, 'val', 'replaced'), + regexp_replace(c2, 'val', 'replaced') = regexp_replace(c4, 'val', 'replaced') +from varchar_udf_1 limit 1 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select + regexp_replace(c2, 'val', 'replaced'), + regexp_replace(c4, 'val', 'replaced'), + regexp_replace(c2, 'val', 'replaced') = regexp_replace(c4, 'val', 'replaced') +from varchar_udf_1 limit 1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: varchar_udf_1 + Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: regexp_replace(c2, 'val', 'replaced') (type: string), regexp_replace(c4, 'val', 'replaced') (type: string), (regexp_replace(c2, 'val', 'replaced') = regexp_replace(c4, 'val', 'replaced')) (type: boolean) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 1 + Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: 1 + Processor Tree: + ListSink + +PREHOOK: query: select + regexp_replace(c2, 'val', 'replaced'), + regexp_replace(c4, 'val', 'replaced'), + regexp_replace(c2, 'val', 'replaced') = regexp_replace(c4, 'val', 'replaced') +from varchar_udf_1 limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@varchar_udf_1 +#### A masked pattern was here #### +POSTHOOK: query: select + regexp_replace(c2, 'val', 'replaced'), + regexp_replace(c4, 'val', 'replaced'), + regexp_replace(c2, 'val', 'replaced') = regexp_replace(c4, 'val', 'replaced') +from varchar_udf_1 limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@varchar_udf_1 +#### A masked pattern was here #### +replaced_238 replaced_238 true +PREHOOK: query: EXPLAIN SELECT POWER(key, 2) FROM DECIMAL_UDF +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT POWER(key, 2) FROM DECIMAL_UDF +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: decimal_udf + Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: power(key, 2) (type: double) + outputColumnNames: _col0 + Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT POWER(key, 2) FROM DECIMAL_UDF +PREHOOK: type: QUERY +PREHOOK: Input: default@decimal_udf +#### A masked pattern was here #### +POSTHOOK: query: SELECT POWER(key, 2) FROM DECIMAL_UDF +POSTHOOK: type: QUERY +POSTHOOK: Input: default@decimal_udf +#### A masked pattern was here #### +0.0 +0.0 +0.0 +0.010000000000000002 +0.04000000000000001 +0.09 +0.09 +0.10890000000000001 +0.10890000000000001 +0.11088900000000002 +0.11088900000000002 +1.0 +1.0 +1.0 +1.0E-4 +1.2544000000000002 +1.2544000000000002 +1.2544000000000002 +1.2588840000000003 +1.2588840000000003 +1.52415787532388352E18 +1.52415787532388352E18 +1.936E7 +100.0 +10000.0 +15376.0 +15675.04 +1576255.1401 +4.0 +4.0 +4.0E-4 +400.0 +40000.0 +9.8596 +9.8596 +9.8596 +9.8596 +NULL +PREHOOK: query: EXPLAIN +SELECT + exp(key), ln(key), + log(key), log(key, key), log(key, value), log(value, key), + log10(key), sqrt(key) +FROM DECIMAL_UDF WHERE key = 10 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT + exp(key), ln(key), + log(key), log(key, key), log(key, value), log(value, key), + log10(key), sqrt(key) +FROM DECIMAL_UDF WHERE key = 10 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: decimal_udf + Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key = 10) (type: boolean) + Statistics: Num rows: 19 Data size: 2148 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 22026.465794806718 (type: double), 2.302585092994046 (type: double), 2.302585092994046 (type: double), 1.0 (type: double), log(10, value) (type: double), log(value, 10) (type: double), 1.0 (type: double), 3.1622776601683795 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 19 Data size: 2148 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 19 Data size: 2148 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT + exp(key), ln(key), + log(key), log(key, key), log(key, value), log(value, key), + log10(key), sqrt(key) +FROM DECIMAL_UDF WHERE key = 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@decimal_udf +#### A masked pattern was here #### +POSTHOOK: query: SELECT + exp(key), ln(key), + log(key), log(key, key), log(key, value), log(value, key), + log10(key), sqrt(key) +FROM DECIMAL_UDF WHERE key = 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@decimal_udf +#### A masked pattern was here #### +22026.465794806718 2.302585092994046 2.302585092994046 1.0 1.0 1.0 1.0 3.1622776601683795 +PREHOOK: query: EXPLAIN SELECT POWER(key, 2) FROM DECIMAL_UDF +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT POWER(key, 2) FROM DECIMAL_UDF +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: decimal_udf + Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: power(key, 2) (type: double) + outputColumnNames: _col0 + Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT POWER(key, 2) FROM DECIMAL_UDF +PREHOOK: type: QUERY +PREHOOK: Input: default@decimal_udf +#### A masked pattern was here #### +POSTHOOK: query: SELECT POWER(key, 2) FROM DECIMAL_UDF +POSTHOOK: type: QUERY +POSTHOOK: Input: default@decimal_udf +#### A masked pattern was here #### +0.0 +0.0 +0.0 +0.010000000000000002 +0.04000000000000001 +0.09 +0.09 +0.10890000000000001 +0.10890000000000001 +0.11088900000000002 +0.11088900000000002 +1.0 +1.0 +1.0 +1.0E-4 +1.2544000000000002 +1.2544000000000002 +1.2544000000000002 +1.2588840000000003 +1.2588840000000003 +1.52415787532388352E18 +1.52415787532388352E18 +1.936E7 +100.0 +10000.0 +15376.0 +15675.04 +1576255.1401 +4.0 +4.0 +4.0E-4 +400.0 +40000.0 +9.8596 +9.8596 +9.8596 +9.8596 +NULL +PREHOOK: query: EXPLAIN +SELECT + exp(key), ln(key), + log(key), log(key, key), log(key, value), log(value, key), + log10(key), sqrt(key) +FROM DECIMAL_UDF WHERE key = 10 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT + exp(key), ln(key), + log(key), log(key, key), log(key, value), log(value, key), + log10(key), sqrt(key) +FROM DECIMAL_UDF WHERE key = 10 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: decimal_udf + Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key = 10) (type: boolean) + Statistics: Num rows: 19 Data size: 2148 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 22026.465794806718 (type: double), 2.302585092994046 (type: double), 2.302585092994046 (type: double), 1.0 (type: double), log(10, value) (type: double), log(value, 10) (type: double), 1.0 (type: double), 3.1622776601683795 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 19 Data size: 2148 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 19 Data size: 2148 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT + exp(key), ln(key), + log(key), log(key, key), log(key, value), log(value, key), + log10(key), sqrt(key) +FROM DECIMAL_UDF WHERE key = 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@decimal_udf +#### A masked pattern was here #### +POSTHOOK: query: SELECT + exp(key), ln(key), + log(key), log(key, key), log(key, value), log(value, key), + log10(key), sqrt(key) +FROM DECIMAL_UDF WHERE key = 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@decimal_udf +#### A masked pattern was here #### +22026.465794806718 2.302585092994046 2.302585092994046 1.0 1.0 1.0 1.0 3.1622776601683795 +PREHOOK: query: explain +SELECT key, COUNT(CASE WHEN bool THEN 1 WHEN NOT bool THEN 0 ELSE NULL END) AS cnt_bool0_ok FROM count_case_groupby GROUP BY key +PREHOOK: type: QUERY +POSTHOOK: query: explain +SELECT key, COUNT(CASE WHEN bool THEN 1 WHEN NOT bool THEN 0 ELSE NULL END) AS cnt_bool0_ok FROM count_case_groupby GROUP BY key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: count_case_groupby + Statistics: Num rows: 5 Data size: 452 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), CASE WHEN (bool) THEN (1) WHEN ((not bool)) THEN (0) ELSE (null) END (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 5 Data size: 452 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(_col1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 5 Data size: 452 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 5 Data size: 452 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 180 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 180 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT key, COUNT(CASE WHEN bool THEN 1 WHEN NOT bool THEN 0 ELSE NULL END) AS cnt_bool0_ok FROM count_case_groupby GROUP BY key +PREHOOK: type: QUERY +PREHOOK: Input: default@count_case_groupby +#### A masked pattern was here #### +POSTHOOK: query: SELECT key, COUNT(CASE WHEN bool THEN 1 WHEN NOT bool THEN 0 ELSE NULL END) AS cnt_bool0_ok FROM count_case_groupby GROUP BY key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@count_case_groupby +#### A masked pattern was here #### +key1 1 +key2 1 +key3 0 +key4 1 +key5 0 +PREHOOK: query: explain +SELECT key, COUNT(CASE WHEN bool THEN 1 WHEN NOT bool THEN 0 ELSE NULL END) AS cnt_bool0_ok FROM count_case_groupby GROUP BY key +PREHOOK: type: QUERY +POSTHOOK: query: explain +SELECT key, COUNT(CASE WHEN bool THEN 1 WHEN NOT bool THEN 0 ELSE NULL END) AS cnt_bool0_ok FROM count_case_groupby GROUP BY key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: count_case_groupby + Statistics: Num rows: 5 Data size: 452 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), CASE WHEN (bool) THEN (1) WHEN ((not bool)) THEN (0) ELSE (null) END (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 5 Data size: 452 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(_col1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 5 Data size: 452 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 5 Data size: 452 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 180 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 180 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT key, COUNT(CASE WHEN bool THEN 1 WHEN NOT bool THEN 0 ELSE NULL END) AS cnt_bool0_ok FROM count_case_groupby GROUP BY key +PREHOOK: type: QUERY +PREHOOK: Input: default@count_case_groupby +#### A masked pattern was here #### +POSTHOOK: query: SELECT key, COUNT(CASE WHEN bool THEN 1 WHEN NOT bool THEN 0 ELSE NULL END) AS cnt_bool0_ok FROM count_case_groupby GROUP BY key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@count_case_groupby +#### A masked pattern was here #### +key1 1 +key2 1 +key3 0 +key4 1 +key5 0 +PREHOOK: query: drop table varchar_udf_1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@varchar_udf_1 +PREHOOK: Output: default@varchar_udf_1 +POSTHOOK: query: drop table varchar_udf_1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@varchar_udf_1 +POSTHOOK: Output: default@varchar_udf_1 +PREHOOK: query: DROP TABLE DECIMAL_UDF_txt +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@decimal_udf_txt +PREHOOK: Output: default@decimal_udf_txt +POSTHOOK: query: DROP TABLE DECIMAL_UDF_txt +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@decimal_udf_txt +POSTHOOK: Output: default@decimal_udf_txt +PREHOOK: query: DROP TABLE DECIMAL_UDF +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@decimal_udf +PREHOOK: Output: default@decimal_udf +POSTHOOK: query: DROP TABLE DECIMAL_UDF +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@decimal_udf +POSTHOOK: Output: default@decimal_udf +PREHOOK: query: drop table count_case_groupby +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@count_case_groupby +PREHOOK: Output: default@count_case_groupby +POSTHOOK: query: drop table count_case_groupby +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@count_case_groupby +POSTHOOK: Output: default@count_case_groupby