diff --git itests/qtest/pom.xml itests/qtest/pom.xml
index 1a19610..919036f 100644
--- itests/qtest/pom.xml
+++ itests/qtest/pom.xml
@@ -38,7 +38,7 @@
false
stats_counter_partitioned.q,list_bucket_dml_10.q,input16_cc.q,scriptfile1.q,scriptfile1_win.q,bucket4.q,bucketmapjoin6.q,disable_merge_for_bucketing.q,reduce_deduplicate.q,smb_mapjoin_8.q,join1.q,groupby2.q,bucketizedhiveinputformat.q,bucketmapjoin7.q,optrstat_groupby.q,bucket_num_reducers.q,bucket5.q,load_fs2.q,bucket_num_reducers2.q,infer_bucket_sort_merge.q,infer_bucket_sort_reducers_power_two.q,infer_bucket_sort_dyn_part.q,infer_bucket_sort_bucketed_table.q,infer_bucket_sort_map_operators.q,infer_bucket_sort_num_buckets.q,leftsemijoin_mr.q,schemeAuthority.q,schemeAuthority2.q,truncate_column_buckets.q,remote_script.q,,load_hdfs_file_with_space_in_the_name.q,parallel_orderby.q,import_exported_table.q,stats_counter.q,auto_sortmerge_join_16.q,quotedid_smb.q,file_with_header_footer.q,external_table_with_space_in_location_path.q,root_dir_external_table.q,index_bitmap3.q,ql_rewrite_gbtoidx.q,index_bitmap_auto.q,udf_using.q
cluster_tasklog_retrieval.q,minimr_broken_pipe.q,mapreduce_stack_trace.q,mapreduce_stack_trace_turnoff.q,mapreduce_stack_trace_hadoop20.q,mapreduce_stack_trace_turnoff_hadoop20.q,file_with_header_footer_negative.q,udf_local_resource.q
- tez_fsstat.q,mapjoin_decimal.q,tez_join_tests.q,tez_joins_explain.q,mrr.q,tez_dml.q,tez_insert_overwrite_local_directory_1.q,tez_union.q,bucket_map_join_tez1.q,bucket_map_join_tez2.q,tez_schema_evolution.q
+ tez_fsstat.q,mapjoin_decimal.q,tez_join_tests.q,tez_joins_explain.q,mrr.q,tez_dml.q,tez_insert_overwrite_local_directory_1.q,tez_union.q,bucket_map_join_tez1.q,bucket_map_join_tez2.q,tez_schema_evolution.q,udf_concat_tez.q
cross_product_check_1.q,cross_product_check_2.q,dynpart_sort_opt_vectorization.q,dynpart_sort_optimization.q,orc_analyze.q,join0.q,join1.q,auto_join0.q,auto_join1.q,bucket2.q,bucket3.q,bucket4.q,count.q,create_merge_compressed.q,cross_join.q,ctas.q,custom_input_output_format.q,disable_merge_for_bucketing.q,enforce_order.q,filter_join_breaktask.q,filter_join_breaktask2.q,groupby1.q,groupby2.q,groupby3.q,having.q,insert1.q,insert_into1.q,insert_into2.q,leftsemijoin.q,limit_pushdown.q,load_dyn_part1.q,load_dyn_part2.q,load_dyn_part3.q,mapjoin_mapjoin.q,mapreduce1.q,mapreduce2.q,merge1.q,merge2.q,metadata_only_queries.q,sample1.q,subquery_in.q,subquery_exists.q,vectorization_15.q,ptf.q,stats_counter.q,stats_noscan_1.q,stats_counter_partitioned.q,union2.q,union3.q,union4.q,union5.q,union6.q,union7.q,union8.q,union9.q,transform1.q,transform2.q,transform_ppr1.q,transform_ppr2.q,script_env_var1.q,script_env_var2.q,script_pipe.q,scriptfile1.q
add_part_exist.q,alter1.q,alter2.q,alter4.q,alter5.q,alter_rename_partition.q,alter_rename_partition_authorization.q,archive.q,archive_corrupt.q,archive_multi.q,archive_mr_1806.q,archive_multi_mr_1806.q,authorization_1.q,authorization_2.q,authorization_4.q,authorization_5.q,authorization_6.q,authorization_7.q,ba_table1.q,ba_table2.q,ba_table3.q,ba_table_udfs.q,binary_table_bincolserde.q,binary_table_colserde.q,cluster.q,columnarserde_create_shortcut.q,combine2.q,constant_prop.q,create_nested_type.q,create_or_replace_view.q,create_struct_table.q,create_union_table.q,database.q,database_location.q,database_properties.q,ddltime.q,describe_database_json.q,drop_database_removes_partition_dirs.q,escape1.q,escape2.q,exim_00_nonpart_empty.q,exim_01_nonpart.q,exim_02_00_part_empty.q,exim_02_part.q,exim_03_nonpart_over_compat.q,exim_04_all_part.q,exim_04_evolved_parts.q,exim_05_some_part.q,exim_06_one_part.q,exim_07_all_part_over_nonoverlap.q,exim_08_nonpart_rename.q,exim_09_part_spec_nonoverlap.q,exim_10_external_managed.q,exim_11_managed_external.q,exim_12_external_location.q,exim_13_managed_location.q,exim_14_managed_location_over_existing.q,exim_15_external_part.q,exim_16_part_external.q,exim_17_part_managed.q,exim_18_part_external.q,exim_19_00_part_external_location.q,exim_19_part_external_location.q,exim_20_part_managed_location.q,exim_21_export_authsuccess.q,exim_22_import_exist_authsuccess.q,exim_23_import_part_authsuccess.q,exim_24_import_nonexist_authsuccess.q,global_limit.q,groupby_complex_types.q,groupby_complex_types_multi_single_reducer.q,index_auth.q,index_auto.q,index_auto_empty.q,index_bitmap.q,index_bitmap1.q,index_bitmap2.q,index_bitmap3.q,index_bitmap_auto.q,index_bitmap_rc.q,index_compact.q,index_compact_1.q,index_compact_2.q,index_compact_3.q,index_stale_partitioned.q,init_file.q,input16.q,input16_cc.q,input46.q,input_columnarserde.q,input_dynamicserde.q,input_lazyserde.q,input_testxpath3.q,input_testxpath4.q,insert2_overwrite_partitions.q,insertexternal1.q,join_thrift.q,lateral_view.q,load_binary_data.q,load_exist_part_authsuccess.q,load_nonpart_authsuccess.q,load_part_authsuccess.q,loadpart_err.q,lock1.q,lock2.q,lock3.q,lock4.q,merge_dynamic_partition.q,multi_insert.q,multi_insert_move_tasks_share_dependencies.q,null_column.q,ppd_clusterby.q,query_with_semi.q,rename_column.q,sample6.q,sample_islocalmode_hook.q,set_processor_namespaces.q,show_tables.q,source.q,split_sample.q,str_to_map.q,transform1.q,udaf_collect_set.q,udaf_context_ngrams.q,udaf_histogram_numeric.q,udaf_ngrams.q,udaf_percentile_approx.q,udf_array.q,udf_bitmap_and.q,udf_bitmap_or.q,udf_explode.q,udf_format_number.q,udf_map.q,udf_map_keys.q,udf_map_values.q,udf_max.q,udf_min.q,udf_named_struct.q,udf_percentile.q,udf_printf.q,udf_sentences.q,udf_sort_array.q,udf_split.q,udf_struct.q,udf_substr.q,udf_translate.q,udf_union.q,udf_xpath.q,udtf_stack.q,view.q,virtual_column.q
diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExpressionDescriptor.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExpressionDescriptor.java
index bfdd3ce..e6d10af 100644
--- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExpressionDescriptor.java
+++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExpressionDescriptor.java
@@ -150,6 +150,10 @@ public Builder setInputExpressionType(int index, InputExpressionType type) {
return this;
}
+ public ArgumentType getArgumentType(int index) {
+ return argTypes[index];
+ }
+
public Descriptor build() {
return new Descriptor(mode, argCount, argTypes, exprTypes);
}
diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
index 9772b4d..45b8538 100644
--- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
+++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
@@ -39,6 +39,7 @@
import org.apache.hadoop.hive.ql.exec.FunctionInfo;
import org.apache.hadoop.hive.ql.exec.FunctionRegistry;
import org.apache.hadoop.hive.ql.exec.UDF;
+import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor.ArgumentType;
import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor.InputExpressionType;
import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor.Mode;
import org.apache.hadoop.hive.ql.exec.vector.expressions.*;
@@ -100,6 +101,7 @@
import org.apache.hadoop.hive.serde2.typeinfo.HiveDecimalUtils;
import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
/**
* Context class for vectorization execution.
@@ -484,7 +486,7 @@ private ExprNodeDesc getImplicitCastExpression(GenericUDF udf, ExprNodeDesc chil
} else {
// Casts to exact types including long to double etc. are needed in some special cases.
- if (udf instanceof GenericUDFCoalesce) {
+ if (udf == null || udf instanceof GenericUDFCoalesce) {
GenericUDF genericUdf = getGenericUDFForCast(castType);
List children = new ArrayList();
children.add(child);
@@ -784,20 +786,105 @@ private VectorExpression getVectorExpressionForUdf(Class> udf, List vclass = this.vMap.getVectorExpressionClass(udf, descriptor);
if (vclass == null) {
if (LOG.isDebugEnabled()) {
- LOG.debug("No vector udf found for "+udf.getSimpleName() + ", descriptor: "+descriptor);
+ LOG.debug("Failed to vectorizing " + udf.getSimpleName() + " with " + descriptor);
+ }
+ if (numChildren > 0) {
+ casters = getCaster(udf, childExpr, builder);
+ if (casters != null) {
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Retry vectorizing " + udf.getSimpleName() + " with " + builder.build());
+ }
+ vclass = this.vMap.getVectorExpressionClass(udf, builder.build());
+ }
+ }
+ if (vclass == null) {
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("No vector udf found for " + udf.getSimpleName() + ", descriptor: " + descriptor);
+ }
+ return null;
}
- return null;
}
Mode childrenMode = getChildrenMode(mode, udf);
- return createVectorExpression(vclass, childExpr, childrenMode, returnType);
+ return createVectorExpression(vclass, childExpr, childrenMode, casters, returnType);
+ }
+
+ private TypeInfo[] getCaster(Class> udf, List childExpr,
+ VectorExpressionDescriptor.Builder builder) {
+ String[] preferred = getPreferredTypes(udf, childExpr.size());
+ if (preferred == null) {
+ return null;
+ }
+ TypeInfo[] casters = new TypeInfo[preferred.length];
+ for (int i = 0; i < preferred.length; i++) {
+ ExprNodeDesc child = childExpr.get(i);
+ ArgumentType sourceType = builder.getArgumentType(i);
+ ArgumentType targetType = ArgumentType.getType(preferred[i]);
+ if (sourceType != targetType) {
+ try {
+ casters[i] = TypeInfoFactory.getPrimitiveTypeInfo(targetType.name().toLowerCase());
+ builder.setArgumentType(i, targetType);
+ } catch (Exception e) {
+ LOG.info("Invalid type " + targetType.name(), e);
+ return null;
+ }
+ }
+ }
+ return casters;
+ }
+
+ private String[] getPreferredTypes(Class> udf, int numChildren) {
+ VectorizedExpressions annotation = udf.getAnnotation(VectorizedExpressions.class);
+ String[] preferred = toPreferredTypes(numChildren, annotation.preferredType());
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Preferred arguments " + udf.getSimpleName() + Arrays.toString(preferred));
+ }
+ return preferred;
+ }
+
+ static String[] toPreferredTypes(int numChildren, String description) {
+ String[] spec = splitAndTrim(description);
+ if (spec.length - 1 > numChildren) {
+ return null;
+ }
+ int i = 0;
+ for (; i < spec.length; i++) {
+ if (spec[i].endsWith("...")) {
+ break;
+ }
+ }
+ if (i == spec.length && numChildren != spec.length) {
+ return null;
+ }
+ String[] types = new String[numChildren];
+ System.arraycopy(spec, 0, types, 0, i);
+ System.arraycopy(spec, i + 1, types, types.length - i, spec.length - i - 1);
+ if (i != spec.length) {
+ Arrays.fill(types, i, types.length - (spec.length - i - 1), spec[i].substring(0, spec[i].length() - 3));
+ }
+ return types;
+ }
+
+ private static String[] splitAndTrim(String description) {
+ String[] split = description.split(",");
+ for (int i = 0; i < split.length; i++) {
+ split[i] = split[i].trim();
+ }
+ return split;
}
private VectorExpression createVectorExpression(Class> vectorClass,
List childExpr, Mode childrenMode, TypeInfo returnType) throws HiveException {
+ return createVectorExpression(vectorClass, childExpr, childrenMode, null, returnType);
+ }
+
+ private VectorExpression createVectorExpression(Class> vectorClass,
+ List childExpr, Mode childrenMode, TypeInfo[] casters, TypeInfo returnType)
+ throws HiveException {
int numChildren = childExpr == null ? 0: childExpr.size();
VectorExpression.Type [] inputTypes = new VectorExpression.Type[numChildren];
List children = new ArrayList();
@@ -805,24 +892,27 @@ private VectorExpression createVectorExpression(Class> vectorClass,
try {
for (int i = 0; i < numChildren; i++) {
ExprNodeDesc child = childExpr.get(i);
- inputTypes[i] = VectorExpression.Type.getValue(child.getTypeInfo().getTypeName());
+ if (casters != null && casters[i] != null) {
+ child = getImplicitCastExpression(null, child, casters[i]);
+ }
if (child instanceof ExprNodeGenericFuncDesc) {
VectorExpression vChild = getVectorExpression(child, childrenMode);
- children.add(vChild);
- arguments[i] = vChild.getOutputColumn();
+ children.add(vChild);
+ arguments[i] = vChild.getOutputColumn();
} else if (child instanceof ExprNodeColumnDesc) {
int colIndex = getInputColumnIndex((ExprNodeColumnDesc) child);
- if (childrenMode == Mode.FILTER) {
- // In filter mode, the column must be a boolean
- children.add(new SelectColumnIsTrue(colIndex));
- }
- arguments[i] = colIndex;
+ if (childrenMode == Mode.FILTER) {
+ // In filter mode, the column must be a boolean
+ children.add(new SelectColumnIsTrue(colIndex));
+ }
+ arguments[i] = colIndex;
} else if (child instanceof ExprNodeConstantDesc) {
Object scalarValue = getVectorTypeScalarValue((ExprNodeConstantDesc) child);
arguments[i] = scalarValue;
} else {
- throw new HiveException("Cannot handle expression type: " + child.getClass().getSimpleName());
+ throw new HiveException("Cannot handle expression: " + child);
}
+ inputTypes[i] = VectorExpression.Type.getValue(child.getTypeInfo().getTypeName());
}
VectorExpression vectorExpression = instantiateExpression(vectorClass, returnType, arguments);
vectorExpression.setInputTypes(inputTypes);
diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedExpressions.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedExpressions.java
index e7d1957..a6ec697 100644
--- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedExpressions.java
+++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedExpressions.java
@@ -26,6 +26,8 @@
@Retention(RetentionPolicy.RUNTIME)
public @interface VectorizedExpressions {
+ String preferredType() default "";
+
Class extends VectorExpression>[] value();
}
diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFConcat.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFConcat.java
index d4d7e7c..d51f573 100644
--- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFConcat.java
+++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFConcat.java
@@ -27,11 +27,11 @@
import org.apache.hadoop.hive.ql.exec.vector.expressions.StringConcatColScalar;
import org.apache.hadoop.hive.ql.exec.vector.expressions.StringConcatScalarCol;
import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorConverter.StringConverter;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.typeinfo.BaseCharTypeInfo;
@@ -49,15 +49,17 @@
+ "Example:\n"
+ " > SELECT _FUNC_('abc', 'def') FROM src LIMIT 1;\n"
+ " 'abcdef'")
-@VectorizedExpressions({StringConcatColCol.class, StringConcatColScalar.class,
- StringConcatScalarCol.class})
+@VectorizedExpressions(preferredType="STRING...",
+ value={StringConcatColCol.class, StringConcatColScalar.class, StringConcatScalarCol.class})
public class GenericUDFConcat extends GenericUDF {
- private transient ObjectInspector[] argumentOIs;
+ private transient PrimitiveObjectInspector[] argumentOIs;
private transient StringConverter[] stringConverters;
private transient PrimitiveCategory returnType = PrimitiveCategory.STRING;
private transient BytesWritable[] bw;
private transient GenericUDFUtils.StringHelper returnHelper;
+ private transient Object[] constants;
+
@Override
public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
@@ -68,16 +70,17 @@ public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumen
// All CHAR/VARCHAR inputs: return VARCHAR
// All BINARY inputs: return BINARY
// Otherwise return STRING
- argumentOIs = arguments;
-
PrimitiveCategory currentCategory;
PrimitiveObjectInspector poi;
boolean fixedLengthReturnValue = true;
int returnLength = 0; // Only for char/varchar return types
+
+ argumentOIs = new PrimitiveObjectInspector[arguments.length];
for (int idx = 0; idx < arguments.length; ++idx) {
if (arguments[idx].getCategory() != Category.PRIMITIVE) {
throw new UDFArgumentException("CONCAT only takes primitive arguments");
}
+ argumentOIs[idx] = (PrimitiveObjectInspector) arguments[idx];
poi = (PrimitiveObjectInspector)arguments[idx];
currentCategory = poi.getPrimitiveCategory();
if (idx == 0) {
@@ -121,34 +124,59 @@ public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumen
}
}
+ constants = new Object[arguments.length];
+
+ boolean hasConstant = hasAnyConstants(arguments);
+ if (hasConstant) {
+ for (int i = 0; i < arguments.length; i++) {
+ if (arguments[i] instanceof ConstantObjectInspector) {
+ constants[i] = ((ConstantObjectInspector)arguments[i]).getWritableConstantValue();
+ }
+ }
+ }
+
if (returnType == PrimitiveCategory.BINARY) {
bw = new BytesWritable[arguments.length];
return PrimitiveObjectInspectorFactory.writableBinaryObjectInspector;
- } else {
- // treat all inputs as string, the return value will be converted to the appropriate type.
- createStringConverters();
- returnHelper = new GenericUDFUtils.StringHelper(returnType);
- BaseCharTypeInfo typeInfo;
- switch (returnType) {
- case STRING:
- return PrimitiveObjectInspectorFactory.writableStringObjectInspector;
- case CHAR:
- typeInfo = TypeInfoFactory.getCharTypeInfo(returnLength);
- return PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(typeInfo);
- case VARCHAR:
- typeInfo = TypeInfoFactory.getVarcharTypeInfo(returnLength);
- return PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(typeInfo);
- default:
- throw new UDFArgumentException("Unexpected CONCAT return type of " + returnType);
+ }
+ // treat all inputs as string, the return value will be converted to the appropriate type.
+ stringConverters = createStringConverters(argumentOIs);
+ if (hasConstant) {
+ for (int i = 0; i < constants.length; i++) {
+ constants[i] = constants[i] == null ? null : stringConverters[i].convert(constants[i]);
}
}
+ returnHelper = new GenericUDFUtils.StringHelper(returnType);
+ BaseCharTypeInfo typeInfo;
+ switch (returnType) {
+ case STRING:
+ return PrimitiveObjectInspectorFactory.writableStringObjectInspector;
+ case CHAR:
+ typeInfo = TypeInfoFactory.getCharTypeInfo(returnLength);
+ return PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(typeInfo);
+ case VARCHAR:
+ typeInfo = TypeInfoFactory.getVarcharTypeInfo(returnLength);
+ return PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(typeInfo);
+ default:
+ throw new UDFArgumentException("Unexpected CONCAT return type of " + returnType);
+ }
}
- private void createStringConverters() {
- stringConverters = new StringConverter[argumentOIs.length];
+ private boolean hasAnyConstants(ObjectInspector[] argumentOIs) {
+ for (ObjectInspector argumentOI : argumentOIs) {
+ if (argumentOI instanceof ConstantObjectInspector) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ private StringConverter[] createStringConverters(ObjectInspector[] argumentOIs) {
+ StringConverter[] stringConverters = new StringConverter[argumentOIs.length];
for (int idx = 0; idx < argumentOIs.length; ++idx) {
stringConverters[idx] = new StringConverter((PrimitiveObjectInspector) argumentOIs[idx]);
}
+ return stringConverters;
}
@Override
@@ -160,11 +188,11 @@ public Object evaluate(DeferredObject[] arguments) throws HiveException {
}
}
- public Object binaryEvaluate(DeferredObject[] arguments) throws HiveException {
+ private Object binaryEvaluate(DeferredObject[] arguments) throws HiveException {
int len = 0;
for (int idx = 0; idx < arguments.length; ++idx) {
- bw[idx] = ((BinaryObjectInspector)argumentOIs[idx])
- .getPrimitiveWritableObject(arguments[idx].get());
+ bw[idx] = (BytesWritable) (constants[idx] != null ? constants[idx] :
+ (argumentOIs[idx]).getPrimitiveWritableObject(arguments[idx].get()));
if (bw[idx] == null){
return null;
}
@@ -181,19 +209,19 @@ public Object binaryEvaluate(DeferredObject[] arguments) throws HiveException {
return new BytesWritable(out);
}
- public String stringEvaluate(DeferredObject[] arguments) throws HiveException {
- StringBuilder sb = new StringBuilder();
+ private transient StringBuilder builder = new StringBuilder();
+
+ private String stringEvaluate(DeferredObject[] arguments) throws HiveException {
+ builder.setLength(0);
for (int idx = 0; idx < arguments.length; ++idx) {
- String val = null;
- if (arguments[idx] != null) {
- val = (String) stringConverters[idx].convert(arguments[idx].get());
- }
+ String val = (String)(constants[idx] != null ? constants[idx] :
+ stringConverters[idx].convert(arguments[idx].get()));
if (val == null) {
return null;
}
- sb.append(val);
+ builder.append(val);
}
- return sb.toString();
+ return builder.toString();
}
@Override
diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizationContext.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizationContext.java
index 2329f52..42634d0 100644
--- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizationContext.java
+++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizationContext.java
@@ -19,10 +19,12 @@
package org.apache.hadoop.hive.ql.exec.vector;
import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNull;
import static org.junit.Assert.assertTrue;
import java.sql.Timestamp;
import java.util.ArrayList;
+import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
@@ -1209,6 +1211,20 @@ public void testFoldConstantsForUnaryExpression() throws HiveException {
assertTrue(constFoldNodeDesc instanceof ExprNodeConstantDesc);
assertTrue(((HiveDecimal)
(((ExprNodeConstantDesc)constFoldNodeDesc).getValue())).toString().equals("1"));
- }
+ }
+ @Test
+ public void testPreferredTypes() {
+ assertEquals("[STRING, STRING, STRING]", Arrays.toString(VectorizationContext.toPreferredTypes(3, "STRING...")));
+ assertEquals("[INT, STRING, STRING]", Arrays.toString(VectorizationContext.toPreferredTypes(3, "INT, STRING...")));
+ assertEquals("[INT, STRING, INT]", Arrays.toString(VectorizationContext.toPreferredTypes(3, "INT, STRING..., INT")));
+
+ assertEquals("[STRING]", Arrays.toString(VectorizationContext.toPreferredTypes(1, "STRING...")));
+ assertEquals("[INT]", Arrays.toString(VectorizationContext.toPreferredTypes(1, "INT, STRING...")));
+ assertEquals("[INT, INT]", Arrays.toString(VectorizationContext.toPreferredTypes(2, "INT, STRING..., INT")));
+
+ assertNull(VectorizationContext.toPreferredTypes(2, "INT, INT, STRING..., INT"));
+ assertNull(VectorizationContext.toPreferredTypes(2, "INT, INT, INT"));
+ assertNull(VectorizationContext.toPreferredTypes(2, "INT"));
+ }
}
diff --git ql/src/test/queries/clientpositive/udf_concat_tez.q ql/src/test/queries/clientpositive/udf_concat_tez.q
new file mode 100644
index 0000000..bad85de
--- /dev/null
+++ ql/src/test/queries/clientpositive/udf_concat_tez.q
@@ -0,0 +1,8 @@
+set hive.vectorized.execution.enabled=true;
+
+create table lineitem (l_orderkey bigint, l_linestatus string) STORED AS ORC;
+insert overwrite table lineitem select * from src tablesample (10 rows);
+
+explain
+SELECT concat(l_orderkey, l_linestatus) FROM lineitem;
+SELECT concat(l_orderkey, l_linestatus) FROM lineitem;
diff --git ql/src/test/results/clientpositive/tez/udf_concat_tez.q.out ql/src/test/results/clientpositive/tez/udf_concat_tez.q.out
new file mode 100644
index 0000000..4c0f8c5
--- /dev/null
+++ ql/src/test/results/clientpositive/tez/udf_concat_tez.q.out
@@ -0,0 +1,74 @@
+PREHOOK: query: create table lineitem (l_orderkey bigint, l_linestatus string) STORED AS ORC
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+POSTHOOK: query: create table lineitem (l_orderkey bigint, l_linestatus string) STORED AS ORC
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@lineitem
+PREHOOK: query: insert overwrite table lineitem select * from src tablesample (10 rows)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@lineitem
+POSTHOOK: query: insert overwrite table lineitem select * from src tablesample (10 rows)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@lineitem
+POSTHOOK: Lineage: lineitem.l_linestatus SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: lineitem.l_orderkey EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+PREHOOK: query: explain
+SELECT concat(l_orderkey, l_linestatus) FROM lineitem
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+SELECT concat(l_orderkey, l_linestatus) FROM lineitem
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: lineitem
+ Statistics: Num rows: 10 Data size: 980 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: concat(l_orderkey, l_linestatus) (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 10 Data size: 980 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 10 Data size: 980 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: SELECT concat(l_orderkey, l_linestatus) FROM lineitem
+PREHOOK: type: QUERY
+PREHOOK: Input: default@lineitem
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT concat(l_orderkey, l_linestatus) FROM lineitem
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@lineitem
+#### A masked pattern was here ####
+238val_238
+86val_86
+311val_311
+27val_27
+165val_165
+409val_409
+255val_255
+278val_278
+98val_98
+484val_484