From 8be18d4a78ac7c3af15765359bc09368840bacea Mon Sep 17 00:00:00 2001
From: Tim Armstrong <tarmstrong@cloudera.com>
Date: Fri, 14 Aug 2020 09:45:07 -0700
Subject: [PATCH] WIP: IMPALA-1652: fix char semantics

Change-Id: I00980c162257ce85e7ec2f4f03c48b142e5ef2e2
---

diff --git a/be/src/exprs/anyval-util.cc b/be/src/exprs/anyval-util.cc
index 8f4f927..e9d8c23 100644
--- a/be/src/exprs/anyval-util.cc
+++ b/be/src/exprs/anyval-util.cc
@@ -75,7 +75,7 @@
       out.type = FunctionContext::TYPE_STRING;
       break;
     case TYPE_CHAR:
-      out.type = FunctionContext::TYPE_FIXED_BUFFER;
+      out.type = FunctionContext::TYPE_CHAR;
       out.len = type.len;
       break;
     case TYPE_FIXED_UDA_INTERMEDIATE:
@@ -125,7 +125,7 @@
       return ColumnType(TYPE_STRING);
     case FunctionContext::TYPE_DECIMAL:
       return ColumnType::CreateDecimalType(type.precision, type.scale);
-    case FunctionContext::TYPE_FIXED_BUFFER:
+    case FunctionContext::TYPE_CHAR:
       return ColumnType::CreateCharType(type.len);
     case FunctionContext::TYPE_FIXED_UDA_INTERMEDIATE:
       return ColumnType::CreateFixedUdaIntermediateType(type.len);
diff --git a/be/src/exprs/cast-functions-ir.cc b/be/src/exprs/cast-functions-ir.cc
index 84a899b..b53780d 100644
--- a/be/src/exprs/cast-functions-ir.cc
+++ b/be/src/exprs/cast-functions-ir.cc
@@ -211,6 +211,11 @@
   StringVal sv;
   sv.ptr = val.ptr;
   sv.len = val.len;
+  if (ctx->GetArgType(0)->type == FunctionContext::TYPE_CHAR) {
+    // IMPALA-1652: trim trailing spaces when converting with CHAR.
+    while (sv.len > 0 && sv.ptr[sv.len - 1] == ' ') --sv.len;
+  }
+  // TODO: codegen this with GetConstFnAttr()
   AnyValUtil::TruncateIfNecessary(ctx->GetReturnType(), &sv);
   return sv;
 }
@@ -219,7 +224,7 @@
   if (val.is_null) return StringVal::null();
 
   const FunctionContext::TypeDesc& type = ctx->GetReturnType();
-  DCHECK(type.type == FunctionContext::TYPE_FIXED_BUFFER);
+  DCHECK(type.type == FunctionContext::TYPE_CHAR);
   DCHECK_GE(type.len, 1);
   char* cptr;
   if (type.len > val.len) {
diff --git a/be/src/exprs/string-functions-ir.cc b/be/src/exprs/string-functions-ir.cc
index 55fe239..9bc93d2 100644
--- a/be/src/exprs/string-functions-ir.cc
+++ b/be/src/exprs/string-functions-ir.cc
@@ -201,7 +201,7 @@
 IntVal StringFunctions::CharLength(FunctionContext* context, const StringVal& str) {
   if (str.is_null) return IntVal::null();
   const FunctionContext::TypeDesc* t = context->GetArgType(0);
-  DCHECK_EQ(t->type, FunctionContext::TYPE_FIXED_BUFFER);
+  DCHECK_EQ(t->type, FunctionContext::TYPE_CHAR);
   return StringValue::UnpaddedCharLength(reinterpret_cast<char*>(str.ptr), t->len);
 }
 
diff --git a/be/src/testutil/test-udas.cc b/be/src/testutil/test-udas.cc
index dfed46e..5b9f22e 100644
--- a/be/src/testutil/test-udas.cc
+++ b/be/src/testutil/test-udas.cc
@@ -371,7 +371,7 @@
 static void ValidateCharIntermediateFunctionContext(const FunctionContext* context) {
   assert(context->GetNumArgs() == 1);
   assert(context->GetArgType(0)->type == FunctionContext::TYPE_INT);
-  assert(context->GetIntermediateType().type == FunctionContext::TYPE_FIXED_BUFFER);
+  assert(context->GetIntermediateType().type == FunctionContext::TYPE_CHAR);
   assert(context->GetIntermediateType().len == 10);
   assert(context->GetReturnType().type == FunctionContext::TYPE_INT);
 }
diff --git a/be/src/udf/uda-test-harness.h b/be/src/udf/uda-test-harness.h
index 2cddbe9..b74f25d 100644
--- a/be/src/udf/uda-test-harness.h
+++ b/be/src/udf/uda-test-harness.h
@@ -62,7 +62,7 @@
     result_comparator_fn_ = fn;
   }
 
-  /// This must be called if the INTERMEDIATE is TYPE_FIXED_BUFFER
+  /// This must be called if the INTERMEDIATE is TYPE_CHAR
   void SetIntermediateSize(int byte_size) {
     fixed_buffer_byte_size_ = byte_size;
   }
@@ -134,7 +134,7 @@
   /// Set during Execute() by subclass
   int num_input_values_;
 
-  /// Buffer len for intermediate results if the type is TYPE_FIXED_BUFFER
+  /// Buffer len for intermediate results if the type is TYPE_CHAR
   int fixed_buffer_byte_size_;
 
   /// Error message if anything went wrong during the execution.
diff --git a/be/src/udf/udf.cc b/be/src/udf/udf.cc
index 7865a90..a3e8e04 100644
--- a/be/src/udf/udf.cc
+++ b/be/src/udf/udf.cc
@@ -561,6 +561,7 @@
   return GetConstFnAttr(state_->decimal_v2(), return_type_, arg_types_, t, i);
 }
 
+// TODO: move this to header to reduce overhead?
 int FunctionContextImpl::GetConstFnAttr(bool uses_decimal_v2,
     const FunctionContext::TypeDesc& return_type,
     const vector<FunctionContext::TypeDesc>& arg_types, ConstFnAttr t, int i) {
diff --git a/be/src/udf/udf.h b/be/src/udf/udf.h
index 2358672..a6e73e1 100644
--- a/be/src/udf/udf.h
+++ b/be/src/udf/udf.h
@@ -85,8 +85,8 @@
     TYPE_TIMESTAMP,
     TYPE_STRING,
     TYPE_DATE,
-    // Not used - maps to CHAR(N), which is not supported for UDFs and UDAs.
-    TYPE_FIXED_BUFFER,
+    // CHAR(N), which is not supported for UDFs and UDAs.
+    TYPE_CHAR,
     TYPE_DECIMAL,
     TYPE_VARCHAR,
     // A fixed-size buffer, passed as a StringVal.
@@ -100,7 +100,7 @@
     int precision;
     int scale;
 
-    /// Only valid if type is one of TYPE_FIXED_BUFFER, TYPE_FIXED_UDA_INTERMEDIATE or
+    /// Only valid if type is one of TYPE_CHAR, TYPE_FIXED_UDA_INTERMEDIATE or
     /// TYPE_VARCHAR.
     int len;
   };
