diff --git a/be/src/exprs/string-functions.cc b/be/src/exprs/string-functions.cc index 9bd83e5..faf89d9 100644 --- a/be/src/exprs/string-functions.cc +++ b/be/src/exprs/string-functions.cc @@ -15,6 +15,8 @@ #include "exprs/string-functions.h" #include +#include +#include #include "exprs/expr.h" #include "exprs/function-call.h" @@ -570,4 +572,124 @@ void* StringFunctions::ParseUrlKey(Expr* e, TupleRow* row) { return &e->result_.string_val; } +void* StringFunctions::StrToMap(Expr* e, TupleRow* row) { + DCHECK(e->GetNumChildren() == 1 || e->GetNumChildren() == 3); + StringValue* str = reinterpret_cast(e->children()[0]->GetValue(row)); + StringValue* delim1 = NULL; + StringValue* delim2 = NULL; + char* key = str->ptr; + int32_t key_len = 0; + int32_t value_len = 0; + int32_t idx = 0; + int32_t delim_idx = 0; + int32_t token_start = 0; + bool after_delim2 = false; + typedef unordered_map KvMap; + KvMap kvmap; + string result; + // reserving enough space for reducing re-allocation + result.reserve(str->len * 2); + + if (str == NULL) return NULL; + + static StringValue default_delim1(const_cast(","), 1); + static StringValue default_delim2(const_cast(":"), 1); + if(e->GetNumChildren() == 1) { + delim1 = &default_delim1; + delim2 = &default_delim2; + } + else { + delim1 = reinterpret_cast(e->children()[1]->GetValue(row)); + delim2 = reinterpret_cast(e->children()[2]->GetValue(row)); + } + + while (idx < str->len) { + if (str->ptr[idx] == delim1->ptr[0]) { + while (++delim_idx < delim1->len) { + if (str->len <= idx + delim_idx) { + idx += delim_idx; + break; + } + if (str->ptr[idx + delim_idx] != delim1->ptr[delim_idx]) { + delim_idx = 0; + ++idx; + break; + } + } + if (delim_idx == delim1->len) { + if (after_delim2 == false) { + key = str->ptr + token_start; + key_len = idx - token_start; + if (key_len > 0) { + kvmap[string(key, key_len)] = ""; + } + } else { + if ( key_len > 0 && key != NULL ) { + value_len = idx - token_start; + kvmap[string(key, key_len)] = (value_len > 0) ? + string(str->ptr + token_start, value_len) : ""; + } + after_delim2 = false; + } + idx += delim_idx; + token_start = idx; + key_len = 0; + } + delim_idx = 0; + } else if (str->ptr[idx] == delim2->ptr[0] && after_delim2 != true) { + while (++delim_idx < delim2->len) { + if (str->len <= idx + delim_idx) { + idx += delim_idx; + break; + } + if (str->ptr[idx+delim_idx] != delim2->ptr[delim_idx]) { + delim_idx = 0; + ++idx; + break; + } + } + if (delim_idx == delim2->len) { + key = str->ptr + token_start; + key_len = idx - token_start; + idx += delim_idx; + token_start = idx; + after_delim2 = true; + } + delim_idx = 0; + } else { + ++idx; + } + + // check on end of string + if (str->len <= idx ) { + if (key_len > 0) { + value_len = idx - token_start; + kvmap[string(key, key_len)] = (value_len > 0) ? + string(str->ptr + token_start, value_len) : ""; + } else { + key_len = idx - token_start; + if (key_len > 0 && after_delim2 != true) { + kvmap[string(str->ptr + token_start, key_len)] = ""; + } + } + break; + } + } + + bool begin = true; + result.append("{"); + BOOST_FOREACH(KvMap::value_type i , kvmap) { + if(!begin) { + result.append(","); + } + else { + begin = false; + } + result.append("\"").append(i.first).append("\":\"").append(i.second).append("\""); + } + result.append("}"); + e->result_.SetStringVal(result); + return &e->result_.string_val; +} + } diff --git a/be/src/exprs/string-functions.h b/be/src/exprs/string-functions.h index 8621a6c..d3ecba0 100644 --- a/be/src/exprs/string-functions.h +++ b/be/src/exprs/string-functions.h @@ -52,6 +52,7 @@ class StringFunctions { static void* FindInSet(Expr* e, TupleRow* row); static void* ParseUrl(Expr* e, TupleRow* row); static void* ParseUrlKey(Expr* e, TupleRow* row); + static void* StrToMap(Expr* e, TupleRow* row); }; } diff --git a/common/function-registry/impala_functions.py b/common/function-registry/impala_functions.py index 537a960..b20acfe 100755 --- a/common/function-registry/impala_functions.py +++ b/common/function-registry/impala_functions.py @@ -121,6 +121,10 @@ functions = [ ['String_Parse_Url', 'STRING', ['STRING', 'STRING', 'STRING'], \ 'StringFunctions::ParseUrlKey', ['parse_url']], ['Utility_Version', 'STRING', [], 'UtilityFunctions::Version', ['version']], + ['String_Str_To_Map', 'STRING', ['STRING', 'STRING', 'STRING'], \ + 'StringFunctions::StrToMap', ['str_to_map']], + ['String_Str_To_Map', 'STRING', ['STRING'], \ + 'StringFunctions::StrToMap', ['str_to_map']], # Timestamp Functions ['Unix_Timestamp', 'INT', [], \