Index: vm/include/open/rt_helpers.h =================================================================== --- vm/include/open/rt_helpers.h (revision 637959) +++ vm/include/open/rt_helpers.h (working copy) @@ -594,31 +594,7 @@ // Non-VM specific helpers for the JIT ///// - VM_RT_F2I=900, - VM_RT_F2L=901, - VM_RT_D2I=902, - VM_RT_D2L=903, - // Conversions - - /** - * @param The parameters are the following: - * (new version) - * arg\ Long value to be shifted - * arg\ Int value with count - * (old version) - * arg\ (edx:eax) A long value to be shifted - * arg\ (ecx) Count - * @return (edx:eax) The shifted value - */ - - - - VM_RT_FREM=940, - VM_RT_DREM=942, - // Float/double arithmetic - - VM_RT_CHAR_ARRAYCOPY_NO_EXC=950, /** * @param The parameters are the following: * arg\ Object reference for the source array. Must be non-null and refer to an array Index: vm/vmcore/src/jit/rt_helper_info.cpp =================================================================== --- vm/vmcore/src/jit/rt_helper_info.cpp (revision 637959) +++ vm/vmcore/src/jit/rt_helper_info.cpp (working copy) @@ -194,30 +194,6 @@ {VM_RT_INITIALIZE_CLASS_WITHRESOLVE, "VM_RT_INITIALIZE_CLASS_WITHRESOLVE", INTERRUPTIBLE_ALWAYS, CALLING_CONVENTION_STDCALL, 2, NULL, NULL, NULL, NULL}, - - - {VM_RT_F2I, "VM_RT_F2I", - INTERRUPTIBLE_NEVER, CALLING_CONVENTION_STDCALL, 1, - NULL, NULL, NULL, NULL}, - {VM_RT_F2L, "VM_RT_F2L", - INTERRUPTIBLE_NEVER, CALLING_CONVENTION_STDCALL, 1, - NULL, NULL, NULL, NULL}, - {VM_RT_D2I, "VM_RT_D2I", - INTERRUPTIBLE_NEVER, CALLING_CONVENTION_STDCALL, 1, - NULL, NULL, NULL, NULL}, - {VM_RT_D2L, "VM_RT_D2L", - INTERRUPTIBLE_NEVER, CALLING_CONVENTION_STDCALL, 1, - NULL, NULL, NULL, NULL}, - {VM_RT_FREM, "VM_RT_FREM", - INTERRUPTIBLE_NEVER, CALLING_CONVENTION_STDCALL, 2, - NULL, NULL, NULL, NULL}, - {VM_RT_DREM, "VM_RT_DREM", - INTERRUPTIBLE_NEVER, CALLING_CONVENTION_STDCALL, 2, - NULL, NULL, NULL, NULL}, - - {VM_RT_CHAR_ARRAYCOPY_NO_EXC, "VM_RT_CHAR_ARRAYCOPY_NO_EXC", - INTERRUPTIBLE_ALWAYS, CALLING_CONVENTION_STDCALL, 5, - NULL, NULL, NULL, NULL}, }; static JIT_RT_Function_Entry *jit_rt_function_entries = &(_jit_rt_function_entries_base[0]); Index: vm/vmcore/src/jit/jit_runtime_support.cpp =================================================================== --- vm/vmcore/src/jit/jit_runtime_support.cpp (revision 637959) +++ vm/vmcore/src/jit/jit_runtime_support.cpp (working copy) @@ -795,300 +795,6 @@ return addr; } -/////////////////////////////////////////////////////////// -// Copy Array - -static int32 f2i(float f) -{ -#ifdef PLATFORM_POSIX - if (isnan(f)) -#else - if (_isnan(f)) -#endif - return 0; - if (f>(double)2147483647) - return 2147483647; // maxint - if (f<(double)(int32)0x80000000) - return (int32)0x80000000; // minint - return (int32)f; -} - -static NativeCodePtr rth_get_lil_f2i(int* dyn_count) -{ - static NativeCodePtr addr = NULL; - - if (!addr) { - int32 (*p_f2i)(float) = f2i; - LilCodeStub* cs = lil_parse_code_stub("entry 0:stdcall:f4:g4;"); - assert(cs); - if (dyn_count) { - cs = lil_parse_onto_end(cs, "inc [%0i:pint];", dyn_count); - assert(cs); - } - cs = lil_parse_onto_end(cs, - "in2out platform:g4;" - "call %0i;" - "ret;", - p_f2i); - assert(cs && lil_is_valid(cs)); - addr = LilCodeGenerator::get_platform()->compile(cs); - - DUMP_STUB(addr, "rth_f2i", lil_cs_get_code_size(cs)); - - lil_free_code_stub(cs); - } - - return addr; -} - -#if defined (__INTEL_COMPILER) || defined (_MSC_VER) -#pragma warning( push ) -#pragma warning (disable:4146)// disable warning 4146: unary minus operator applied to unsigned type, result still unsigned -#endif -static int64 f2l(float f) -{ -#ifdef PLATFORM_POSIX - if (isnan(f)) -#else - if (_isnan(f)) -#endif - return 0; - - if (f >= (double)(__INT64_C(0x7fffffffffffffff))) { - return __INT64_C(0x7fffffffffffffff); // maxint - } else if (f < -(double)__INT64_C(0x8000000000000000)) { - return -__INT64_C(0x8000000000000000); // minint - } - - return (int64)f; -} -#if defined (__INTEL_COMPILER) || defined (_MSC_VER) -#pragma warning( pop ) -#endif - -static NativeCodePtr rth_get_lil_f2l(int* dyn_count) -{ - static NativeCodePtr addr = NULL; - - if (!addr) { - int64 (*p_f2l)(float) = f2l; - LilCodeStub* cs = lil_parse_code_stub("entry 0:stdcall:f4:g8;"); - assert(cs); - if (dyn_count) { - cs = lil_parse_onto_end(cs, "inc [%0i:pint];", dyn_count); - assert(cs); - } - cs = lil_parse_onto_end(cs, - "in2out platform:g8;" - "call %0i;" - "ret;", - p_f2l); - assert(cs && lil_is_valid(cs)); - addr = LilCodeGenerator::get_platform()->compile(cs); - - DUMP_STUB(addr, "rth_f2l", lil_cs_get_code_size(cs)); - - lil_free_code_stub(cs); - } - - return addr; -} - -static int32 d2i(double f) -{ -#ifdef PLATFORM_POSIX - if (isnan(f)) -#else - if (_isnan(f)) -#endif - return 0; - - if (f>(double)2147483647) - return 2147483647; // maxint - if (f<(double)(int32)0x80000000) - return (int32)0x80000000; // minint - - return (int32)f; -} - -static NativeCodePtr rth_get_lil_d2i(int* dyn_count) -{ - static NativeCodePtr addr = NULL; - - if (!addr) { - int32 (*p_d2i)(double) = d2i; - LilCodeStub* cs = lil_parse_code_stub("entry 0:stdcall:f8:g4;"); - assert(cs); - if (dyn_count) { - cs = lil_parse_onto_end(cs, "inc [%0i:pint];", dyn_count); - assert(cs); - } - cs = lil_parse_onto_end(cs, - "in2out platform:g4;" - "call %0i;" - "ret;", - p_d2i); - assert(cs && lil_is_valid(cs)); - addr = LilCodeGenerator::get_platform()->compile(cs); - - DUMP_STUB(addr, "rth_d2i", lil_cs_get_code_size(cs)); - - lil_free_code_stub(cs); - } - - return addr; -} - -#if defined (__INTEL_COMPILER) || defined (_MSC_VER) -#pragma warning( push ) -#pragma warning (disable:4146)// disable warning 4146: unary minus operator applied to unsigned type, result still unsigned -#endif -static int64 d2l(double f) -{ -#ifdef PLATFORM_POSIX - if (isnan(f)) -#else - if (_isnan(f)) -#endif - return 0; - - if(f >= (double)(__INT64_C(0x7fffffffffffffff))) { - return __INT64_C(0x7fffffffffffffff); // maxint - } else if(f < -(double)__INT64_C(0x8000000000000000)) { - return -__INT64_C(0x8000000000000000); // minint - } - - return (int64)f; -} -#if defined (__INTEL_COMPILER) || defined (_MSC_VER) -#pragma warning( pop ) -#endif - -static NativeCodePtr rth_get_lil_d2l(int* dyn_count) -{ - static NativeCodePtr addr = NULL; - - if (!addr) { - int64 (*p_d2l)(double) = d2l; - LilCodeStub* cs = lil_parse_code_stub("entry 0:stdcall:f8:g8;"); - assert(cs); - if (dyn_count) { - cs = lil_parse_onto_end(cs, "inc [%0i:pint];", dyn_count); - assert(cs); - } - cs = lil_parse_onto_end(cs, - "in2out platform:g8;" - "call %0i;" - "ret;", - p_d2l); - assert(cs && lil_is_valid(cs)); - addr = LilCodeGenerator::get_platform()->compile(cs); - - DUMP_STUB(addr, "rth_d2l", lil_cs_get_code_size(cs)); - - lil_free_code_stub(cs); - } - - return addr; -} - -const int nan_data = 0xffc00000; -#define NANF (*(float*)&nan_data) - -inline static bool is_finite_f(float f) -{ -#ifdef PLATFORM_NT - return _finite(f); -#else - return finite(f); -#endif // PLATFORM_NT -} - -bool is_infinite_f(float f) -{ -#ifdef PLATFORM_NT - return (! _finite(f)) && (! _isnan(f)); -#else - return isinf(f); -#endif // PLATFORM_NT -} - -static float frem(float m, float n) -{ - if ( is_finite_f(m) ) { - if ( is_infinite_f(n) ) { - return m; - } - if ( (n > 0 || n < 0) ) { - return (float)fmod(m, n); - } - } - - return NANF; -} - -static NativeCodePtr rth_get_lil_frem(int* dyn_count) -{ - static NativeCodePtr addr = NULL; - - if (!addr) { - float (*p_frem)(float, float) = frem; - LilCodeStub* cs = lil_parse_code_stub("entry 0:stdcall:f4,f4:f4;"); - assert(cs); - if (dyn_count) { - cs = lil_parse_onto_end(cs, "inc [%0i:pint];", dyn_count); - assert(cs); - } - cs = lil_parse_onto_end(cs, - "in2out platform:f4;" - "call %0i;" - "ret;", - p_frem); - assert(cs && lil_is_valid(cs)); - addr = LilCodeGenerator::get_platform()->compile(cs); - - DUMP_STUB(addr, "rth_frem", lil_cs_get_code_size(cs)); - - lil_free_code_stub(cs); - } - - return addr; -} - -static double my_drem(double m, double n) -{ - return fmod(m, n); -} - -static NativeCodePtr rth_get_lil_drem(int* dyn_count) -{ - static NativeCodePtr addr = NULL; - - if (!addr) { - double (*p_drem)(double, double) = my_drem; - LilCodeStub* cs = lil_parse_code_stub("entry 0:stdcall:f8,f8:f8;"); - assert(cs); - if (dyn_count) { - cs = lil_parse_onto_end(cs, "inc [%0i:pint];", dyn_count); - assert(cs); - } - cs = lil_parse_onto_end(cs, - "in2out platform:f8;" - "call %0i;" - "ret;", - p_drem); - assert(cs && lil_is_valid(cs)); - addr = LilCodeGenerator::get_platform()->compile(cs); - - DUMP_STUB(addr, "rth_drem", lil_cs_get_code_size(cs)); - - lil_free_code_stub(cs); - } - - return addr; -} - - ////////////////////////////////////////////////////////////////////////// // Get LIL version of Runtime Helper @@ -2006,18 +1712,6 @@ case VM_RT_JVMTI_FIELD_MODIFICATION_CALLBACK: return rth_get_lil_jvmti_field_modification_callback(dyn_count); // Non-VM - case VM_RT_F2I: - return rth_get_lil_f2i(dyn_count); - case VM_RT_F2L: - return rth_get_lil_f2l(dyn_count); - case VM_RT_D2I: - return rth_get_lil_d2i(dyn_count); - case VM_RT_D2L: - return rth_get_lil_d2l(dyn_count); - case VM_RT_FREM: - return rth_get_lil_frem(dyn_count); - case VM_RT_DREM: - return rth_get_lil_drem(dyn_count); case VM_RT_NEWOBJ_WITHRESOLVE: return rth_get_lil_newobj_withresolve(dyn_count); case VM_RT_NEWARRAY_WITHRESOLVE: Index: vm/vmcore/src/util/ia32/base/jit_generic_rt_support_ia32.cpp =================================================================== --- vm/vmcore/src/util/ia32/base/jit_generic_rt_support_ia32.cpp (revision 637959) +++ vm/vmcore/src/util/ia32/base/jit_generic_rt_support_ia32.cpp (working copy) @@ -48,568 +48,3 @@ #include "dump.h" #include "vm_stats.h" - - -static int32 d2i_infinite(double d) -{ -#ifdef __INTEL_COMPILER -#pragma warning(disable: 4146) -#endif - if(_isnan(d)) { - return 0; - } else if(d > (double)2147483647) { - return 2147483647; // maxint - } else if(d < (double)(-2147483647-1)) { - return (-2147483647-1); // minint - } else { - ABORT("The above should exhaust all possibilities"); - return 0; - } -#ifdef __INTEL_COMPILER -#pragma warning(default: 4146) -#endif -} - -static short fpstatus = 0x0e7f; -void *getaddress__vm_d2i() -{ - static void *addr = 0; - if (addr) { - return addr; - } - - const int stub_size = 55; - char *stub = (char *)malloc_fixed_code_for_jit(stub_size, DEFAULT_CODE_ALIGNMENT, CODE_BLOCK_HEAT_DEFAULT, CAA_Allocate); -#ifdef _DEBUG - memset(stub, 0xcc /*int 3*/, stub_size); -#endif - char *ss = stub; - - ss = fld(ss, M_Base_Opnd(esp_reg, 4), 1); - - ss = fnstcw(ss, M_Base_Opnd(esp_reg, -8) ); - ss = fldcw(ss, M_Opnd((unsigned)&fpstatus)); - - - ss = fist(ss, M_Base_Opnd(esp_reg, -4), false, true); - ss = fldcw(ss, M_Base_Opnd(esp_reg, -8) ); - ss = mov(ss, eax_opnd, M_Base_Opnd(esp_reg, -4) ); - ss = alu(ss, cmp_opc, eax_opnd, Imm_Opnd(0x80000000) ); - ss = branch8(ss, Condition_Z, Imm_Opnd(size_8, 0)); - char *backpatch_address__infinite = ((char *)ss) - 1; - ss = ret(ss, Imm_Opnd(8)); - - signed offset = (signed)ss-(signed)backpatch_address__infinite - 1; - *backpatch_address__infinite = (char)offset; - ss = push(ss, M_Base_Opnd(esp_reg, 8)); - ss = push(ss, M_Base_Opnd(esp_reg, 8)); - ss = call(ss, (char *)d2i_infinite); - ss = alu(ss, add_opc, esp_opnd, Imm_Opnd(8)); - ss = ret(ss, Imm_Opnd(8)); - - addr = stub; - assert((ss - stub) <= stub_size); - - compile_add_dynamic_generated_code_chunk("vm_d2i", false, stub, stub_size); - - if (jvmti_should_report_event(JVMTI_EVENT_DYNAMIC_CODE_GENERATED)) { - jvmti_send_dynamic_code_generated_event("vm_d2i", stub, stub_size); - } - - DUMP_STUB(stub, "getaddress__vm_d2i", ss - stub); - - return addr; -} //getaddress__vm_d2i - -void *getaddress__vm_d2l() -{ - static void *addr = 0; - if (addr) { - return addr; - } - - const int stub_size = 45; - char *stub = (char *)malloc_fixed_code_for_jit(stub_size, DEFAULT_CODE_ALIGNMENT, CODE_BLOCK_HEAT_DEFAULT, CAA_Allocate); -#ifdef _DEBUG - memset(stub, 0xcc /*int 3*/, stub_size); -#endif - char *ss = stub; - - ss = push(ss, ebp_opnd); - ss = mov(ss, ebp_opnd, esp_opnd); - ss = push(ss, ebx_opnd); - ss = push(ss, esi_opnd); - ss = push(ss, edi_opnd); - - - ss = fld(ss, M_Base_Opnd(ebp_reg, +0x14), 1); - ss = wait(ss); - ss = fnstcw(ss, M_Base_Opnd(ebp_reg, +8) ); - ss = mov(ss, eax_opnd, M_Base_Opnd(ebp_reg, +8), size_16); - ss = alu(ss, or_opc, eax_opnd, Imm_Opnd(size_16, 0xc7f), size_16); - ss = mov(ss, M_Base_Opnd(ebp_reg, +0x0c), eax_opnd, size_16); - - ss = fldcw(ss, M_Base_Opnd(ebp_reg, +0x0c) ); - ss = fist(ss, M_Base_Opnd(ebp_reg, +0x0c), true, true); - ss = fldcw(ss, M_Base_Opnd(ebp_reg, +8) ); - - ss = mov(ss, eax_opnd, M_Base_Opnd(ebp_reg, +0x0c) ); - ss = mov(ss, edx_opnd, M_Base_Opnd(ebp_reg, +0x10) ); - - ss = pop(ss, edi_opnd); - ss = pop(ss, esi_opnd); - ss = pop(ss, ebx_opnd); - ss = pop(ss, ebp_opnd); - - ss = ret(ss); - - addr = stub; - assert((ss - stub) <= stub_size); - - compile_add_dynamic_generated_code_chunk("vm_d2l", false, stub, stub_size); - - if (jvmti_should_report_event(JVMTI_EVENT_DYNAMIC_CODE_GENERATED)) { - jvmti_send_dynamic_code_generated_event("vm_d2l", stub, stub_size); - } - - DUMP_STUB(stub, "getaddress__vm_d2l", ss - stub); - - return addr; -} //getaddress__vm_d2l - - -static int64 __stdcall vm_d2l(double d); - -static int64 __stdcall vm_d2l(double d) -{ - assert(!hythread_is_suspend_enabled()); - -#ifdef VM_STATS - VM_Statistics::get_vm_stats().num_d2l++; -#endif - - int64 result; - - int64 (*gad2l)(int, int, int, double); - gad2l = (int64 ( *)(int, int, int, double) )getaddress__vm_d2l(); - - result = gad2l(0, 0, 0, d); - -#if defined (__INTEL_COMPILER) || defined (_MSC_VER) -#pragma warning( push ) -#pragma warning (disable:4146)// disable warning 4146: unary minus operator applied to unsigned type, result still unsigned -#endif - // 0x80000000 is the integer indefinite value - if(0x80000000 == *(uint32*)((char*)&result+4)) { - -#ifdef PLATFORM_POSIX - if (isnan(d)) - return 0; -#else - if (_isnan(d)) - return 0; -#endif - - if(d >= (double)(__INT64_C(0x7fffffffffffffff))) { - return __INT64_C(0x7fffffffffffffff); // maxint - } else if(d < (double)-__INT64_C(0x8000000000000000)) { - return -__INT64_C(0x8000000000000000); // minint - } else { - ABORT("The above should exhaust all possibilities"); - return result; - } - - } else { - return result; - } - -#if defined (__INTEL_COMPILER) || defined (_MSC_VER) -#pragma warning( pop ) -#endif -} //vm_d2l - -static int32 f2i_infinite(float f) -{ -#ifdef __INTEL_COMPILER -#pragma warning(disable: 4146) -#endif - if(_isnan(f)) { - return 0; - } else if(f > (double)2147483647) { - return 2147483647; // maxint - } else if(f < (double)(-2147483647-1)) { - return (-2147483647-1); // minint - } else { - ABORT("The above should exhaust all possibilities"); - return 0; - } -#ifdef __INTEL_COMPILER -#pragma warning(default: 4146) -#endif -} - -void *getaddress__vm_f2i() -{ - static void *addr = 0; - if (addr) { - return addr; - } - - const int stub_size = 64; - char *stub = (char *)malloc_fixed_code_for_jit(stub_size, DEFAULT_CODE_ALIGNMENT, CODE_BLOCK_HEAT_DEFAULT, CAA_Allocate); -#ifdef _DEBUG - memset(stub, 0xcc /*int 3*/, stub_size); -#endif - char *ss = stub; - - ss = fld(ss, M_Base_Opnd(esp_reg, 4), 0); - ss = wait(ss); //I dont think this is necessary, cuz fist will guarantee the context - ss = fnstcw(ss, M_Base_Opnd(esp_reg, -8) ); - ss = mov(ss, eax_opnd, M_Base_Opnd(esp_reg, -8), size_16); - ss = alu(ss, or_opc, eax_opnd, Imm_Opnd(size_16, 0xc00), size_16); - ss = mov(ss, M_Base_Opnd(esp_reg, -0xc), eax_opnd, size_16); - ss = fldcw(ss, M_Base_Opnd(esp_reg, -0xc) ); - ss = fist(ss, M_Base_Opnd(esp_reg, -4), false, true); - ss = fldcw(ss, M_Base_Opnd(esp_reg, -8) ); - ss = mov(ss, eax_opnd, M_Base_Opnd(esp_reg, -4) ); - ss = alu(ss, cmp_opc, eax_opnd, Imm_Opnd(0x80000000) ); - ss = branch8(ss, Condition_Z, Imm_Opnd(size_8, 0)); - char *backpatch_address__infinite = ((char *)ss) - 1; - ss = ret(ss, Imm_Opnd(4)); - - signed offset=0; - - offset = (signed)ss-(signed)backpatch_address__infinite - 1; - *backpatch_address__infinite = (char)offset; - ss = push(ss, M_Base_Opnd(esp_reg, 4)); - ss = call(ss, (char *)f2i_infinite); - ss = alu(ss, add_opc, esp_opnd, Imm_Opnd(4)); - ss = ret(ss, Imm_Opnd(4)); - - addr = stub; - assert((ss - stub) <= stub_size); - - compile_add_dynamic_generated_code_chunk("vm_f2i", false, stub, stub_size); - - if (jvmti_should_report_event(JVMTI_EVENT_DYNAMIC_CODE_GENERATED)) { - jvmti_send_dynamic_code_generated_event("vm_f2i", stub, stub_size); - } - - DUMP_STUB(stub, "getaddress__vm_f2i", ss - stub); - - return addr; -} //getaddress__vm_f2i - -static void *getaddress__vm_f2l() -{ - static void *addr = 0; - if (addr) { - return addr; - } - - const int stub_size = 100; - char *stub = (char *)malloc_fixed_code_for_jit(stub_size, DEFAULT_CODE_ALIGNMENT, CODE_BLOCK_HEAT_DEFAULT, CAA_Allocate); -#ifdef _DEBUG - memset(stub, 0xcc /*int 3*/, stub_size); -#endif - char *ss = stub; - - ss = push(ss, ebp_opnd); - ss = mov(ss, ebp_opnd, esp_opnd); - ss = push(ss, ebx_opnd); - ss = push(ss, esi_opnd); - ss = push(ss, edi_opnd); - - ss = fld(ss, M_Base_Opnd(ebp_reg, +0x14), 0); - ss = wait(ss); - ss = fnstcw(ss, M_Base_Opnd(ebp_reg, +8) ); - ss = mov(ss, eax_opnd, M_Base_Opnd(ebp_reg, +8), size_16); - ss = alu(ss, or_opc, eax_opnd, Imm_Opnd(size_16, 0xc7f), size_16); - ss = mov(ss, M_Base_Opnd(ebp_reg, +0x0c), eax_opnd, size_16); - - ss = fldcw(ss, M_Base_Opnd(ebp_reg, +0x0c) ); - ss = fist(ss, M_Base_Opnd(ebp_reg, +0x0c), true, true); - ss = fldcw(ss, M_Base_Opnd(ebp_reg, +8) ); - - ss = mov(ss, eax_opnd, M_Base_Opnd(ebp_reg, +0x0c) ); - ss = mov(ss, edx_opnd, M_Base_Opnd(ebp_reg, +0x10) ); - - ss = pop(ss, edi_opnd); - ss = pop(ss, esi_opnd); - ss = pop(ss, ebx_opnd); - ss = pop(ss, ebp_opnd); - - ss = ret(ss); - - addr = stub; - assert((ss - stub) < stub_size); - - compile_add_dynamic_generated_code_chunk("vm_f2l", false, stub, stub_size); - - if (jvmti_should_report_event(JVMTI_EVENT_DYNAMIC_CODE_GENERATED)) { - jvmti_send_dynamic_code_generated_event("vm_f2l", stub, stub_size); - } - - DUMP_STUB(stub, "getaddress__vm_f2l", ss - stub); - - return addr; -} //getaddress__vm_f2l - - -static int64 __stdcall vm_f2l(float f); - -static int64 __stdcall vm_f2l(float f) -{ - assert(!hythread_is_suspend_enabled()); - -#ifdef VM_STATS - VM_Statistics::get_vm_stats().num_f2l++; -#endif - - int64 result; - - int64 (*gaf2l)(int, int, int, float); - gaf2l = (int64 ( *)(int, int, int, float) )getaddress__vm_f2l(); - - result = gaf2l(0, 0, 0, f); - -#if defined (__INTEL_COMPILER) || defined (_MSC_VER) -#pragma warning( push ) -#pragma warning (disable:4146)// disable warning 4146: unary minus operator applied to unsigned type, result still unsigned -#endif - // 0x80000000 is the integer indefinite value - if(0x80000000 == *(uint32*)((char*)&result+4)) { - if(_isnan(f)) { - return 0; - } else if(f >= __INT64_C(0x7fffffffffffffff) ) { - return __INT64_C(0x7fffffffffffffff); // maxint - } else if(f < (double)__INT64_C(-0x8000000000000000) ) { - return __INT64_C(-0x8000000000000000); // minint - } else { - ABORT("The above should exhaust all possibilities"); - return result; - } - } else { - return result; - } -#if defined (__INTEL_COMPILER) || defined (_MSC_VER) -#pragma warning( pop ) -#endif -} //vm_f2l - - -// -// If fprem succeeds in producing a remainder that is less than the -// modulus, the function is complete and the C2 flag is cleared. -// Otherwise, C2 is set, and the result on the top of the fp stack -// is the partial remainder. We need to re-execute the fprem instruction -// (using the partial remainder) until C2 is cleared. -// - - -void *getaddress__vm_frem() -{ - static void *addr = 0; - if (addr) { - return addr; - } - - const int stub_size = 24; - char *stub = (char *)malloc_fixed_code_for_jit(stub_size, DEFAULT_CODE_ALIGNMENT, CODE_BLOCK_HEAT_DEFAULT, CAA_Allocate); -#ifdef _DEBUG - memset(stub, 0xcc /*int 3*/, stub_size); -#endif - char *ss = stub; - - ss = fld(ss, M_Base_Opnd(esp_reg, 4), 0); - ss = fld(ss, M_Base_Opnd(esp_reg, 8), 0); - -//rem_not_complete: - int rem_not_complete = (int)ss; - - ss = fprem(ss); - ss = fnstsw(ss); - ss = alu(ss, and_opc, eax_opnd, Imm_Opnd(size_16, 0x400), size_16); - - int offset = rem_not_complete - (int)ss - 2; - ss = branch8(ss, Condition_NZ, Imm_Opnd(size_8, offset)); // jne rem_not_complete - - ss = fst(ss, 1, true); - - ss = ret(ss, Imm_Opnd(8)); - - addr = stub; - assert((ss - stub) < stub_size); - - compile_add_dynamic_generated_code_chunk("vm_frem", false, stub, stub_size); - - if (jvmti_should_report_event(JVMTI_EVENT_DYNAMIC_CODE_GENERATED)) { - jvmti_send_dynamic_code_generated_event("vm_frem", stub, stub_size); - } - - DUMP_STUB(stub, "getaddress__vm_frem", ss - stub); - - return addr; -} //getaddress__vm_frem - - - -void *getaddress__vm_drem() -{ - static void *addr = 0; - if (addr) { - return addr; - } - - const int stub_size = 24; - char *stub = (char *)malloc_fixed_code_for_jit(stub_size, DEFAULT_CODE_ALIGNMENT, CODE_BLOCK_HEAT_DEFAULT, CAA_Allocate); -#ifdef _DEBUG - memset(stub, 0xcc /*int 3*/, stub_size); -#endif - char *ss = stub; - - ss = fld(ss, M_Base_Opnd(esp_reg, 4), 1); // 2nd arg: denominator - ss = fld(ss, M_Base_Opnd(esp_reg, 12), 1); // 1st arg: numerator - // at this point, st0 has numerator, st1 has denominator - -//rem_not_complete: - int rem_not_complete = (int)ss; - - ss = fprem(ss); - ss = fnstsw(ss); - ss = alu(ss, and_opc, eax_opnd, Imm_Opnd(size_16, 0x400), size_16); - - int offset = rem_not_complete - (int)ss - 2; - ss = branch8(ss, Condition_NZ, Imm_Opnd(size_8, offset)); // jne rem_not_complete - - // st0 has the result, st1 has the original denominator - // Need to pop st(1) before returning. - ss = fst(ss, 1, true); - - ss = ret(ss, Imm_Opnd(0x10)); - - addr = stub; - assert((ss - stub) < stub_size); - - compile_add_dynamic_generated_code_chunk("vm_drem", false, stub, stub_size); - - if (jvmti_should_report_event(JVMTI_EVENT_DYNAMIC_CODE_GENERATED)) { - jvmti_send_dynamic_code_generated_event("vm_drem", stub, stub_size); - } - - DUMP_STUB(stub, "getaddress__vm_drem", ss - stub); - - return addr; -} //getaddress__vm_drem - -#ifdef VM_STATS // exclude remark in release mode (defined but not used) -// Return the log base 2 of the integer operand. If the argument is less than or equal to zero, return zero. -static int get_log2(int value) -{ - register int n = value; - register int result = 0; - - while (n > 1) { - n = n >> 1; - result++; - } - return result; -} //get_log2 -#endif - -static void vm_rt_char_arraycopy_no_exc(ManagedObject* src, - int32 srcOffset, - ManagedObject* dst, - int32 dstOffset, - int32 length) -{ - // 20030303 Use a C loop to (hopefully) speed up short array copies. - - // Check that the array references are non-null. - assert(src && dst); - // Check that the arrays are arrays of 16 bit characters. - Class * UNUSED src_class = src->vt()->clss; - assert(src_class); - Class * UNUSED dst_class = dst->vt()->clss; - assert(dst_class); - assert(src_class->is_array() && (dst_class->is_array())); - assert(src_class->is_array_of_primitives() && dst_class->is_array_of_primitives()); - assert(strcmp(src_class->get_name()->bytes, "[C") == 0); - assert(strcmp(dst_class->get_name()->bytes, "[C") == 0); - // Check the offsets - assert(srcOffset >= 0); - assert(dstOffset >= 0); - assert(length >= 0); - assert((srcOffset + length) <= get_vector_length((Vector_Handle)src)); - assert((dstOffset + length) <= get_vector_length((Vector_Handle)dst)); - - tmn_suspend_disable(); // vvvvvvvvvvvvvvvvvvv - - register uint16 *dst_addr = get_vector_element_address_uint16(dst, dstOffset); - register uint16 *src_addr = get_vector_element_address_uint16(src, srcOffset); - -#ifdef VM_STATS - VM_Statistics::get_vm_stats().num_char_arraycopies++; - if (dst_addr == src_addr) { - VM_Statistics::get_vm_stats().num_same_array_char_arraycopies++; - } - if (srcOffset == 0) { - VM_Statistics::get_vm_stats().num_zero_src_offset_char_arraycopies++; - } - if (dstOffset == 0) { - VM_Statistics::get_vm_stats().num_zero_dst_offset_char_arraycopies++; - } - if ((((POINTER_SIZE_INT)dst_addr & 0x7) == 0) && (((POINTER_SIZE_INT)src_addr & 0x7) == 0)) { - VM_Statistics::get_vm_stats().num_aligned_char_arraycopies++; - } - VM_Statistics::get_vm_stats().total_char_arraycopy_length += length; - VM_Statistics::get_vm_stats().char_arraycopy_count[get_log2(length)]++; -#endif //VM_STATS - - // 20030219 The length threshold 32 here works well for SPECjbb and should be reasonable for other applications. - if (length < 32) { - register int i; - if (src_addr > dst_addr) { - for (i = length; i > 0; i--) { - *dst_addr++ = *src_addr++; - } - } else { - // copy down, from higher address to lower - src_addr += length-1; - dst_addr += length-1; - for (i = length; i > 0; i--) { - *dst_addr-- = *src_addr--; - } - } - } else { - memmove(dst_addr, src_addr, (length * sizeof(uint16))); - } - - tmn_suspend_enable(); // ^^^^^^^^^^^^^^^^^^^ -} //vm_rt_char_arraycopy_no_exc - - -void *get_generic_rt_support_addr_ia32(VM_RT_SUPPORT f) -{ - switch(f) { - case VM_RT_F2I: - return getaddress__vm_f2i(); - case VM_RT_F2L: - return (void *)vm_f2l; - case VM_RT_D2I: - return getaddress__vm_d2i(); - case VM_RT_D2L: - return (void *)vm_d2l; - case VM_RT_FREM: - return getaddress__vm_frem(); - case VM_RT_DREM: - return getaddress__vm_drem(); - - case VM_RT_CHAR_ARRAYCOPY_NO_EXC: - return (void *)vm_rt_char_arraycopy_no_exc; - - default: - ABORT("Unexpected helper id"); - return 0; - } -} Index: vm/vmcore/src/util/ia32/base/jit_runtime_support_ia32.cpp =================================================================== --- vm/vmcore/src/util/ia32/base/jit_runtime_support_ia32.cpp (revision 637959) +++ vm/vmcore/src/util/ia32/base/jit_runtime_support_ia32.cpp (working copy) @@ -1083,14 +1083,6 @@ case VM_RT_THROW_LINKING_EXCEPTION: return getaddress__vm_throw_linking_exception_naked(); - case VM_RT_F2I: - case VM_RT_F2L: - case VM_RT_D2I: - case VM_RT_D2L: - case VM_RT_FREM: - case VM_RT_DREM: - case VM_RT_CHAR_ARRAYCOPY_NO_EXC: - return get_generic_rt_support_addr_ia32(f); case VM_RT_GC_HEAP_WRITE_REF: return (void*)gc_heap_slot_write_ref; default: Index: vm/vmcore/src/util/ipf/base/jit_runtime_support_ipf.cpp =================================================================== --- vm/vmcore/src/util/ipf/base/jit_runtime_support_ipf.cpp (revision 637959) +++ vm/vmcore/src/util/ipf/base/jit_runtime_support_ipf.cpp (working copy) @@ -80,11 +80,6 @@ unsigned reg); ///////// begin arithmetic helpers - -float vm_rt_frem(float a, float b); -double vm_rt_drem(double a, double b); -int32 vm_rt_f2i(float f); -int64 vm_rt_f2l(float f); void *get_vm_rt_int_div_address_compactor(void *func, char *stub_name); ///////// end arithmetic helpers @@ -1646,552 +1641,7 @@ } -static void collect_char_arraycopy_stats(ManagedObject *src, - int64 srcOffset, - ManagedObject *dst, - int64 dstOffset, - int64 length); -static void *gen_faster_char_arraycopy_no_exc() -{ - static void *addr = NULL; - if (addr != NULL) - return addr; - - tl::MemoryPool mem_pool; - Merced_Code_Emitter emitter(mem_pool, 2, 11); - emitter.disallow_instruction_exchange(); - emitter.memory_type_is_unknown(); - - emitter.ipf_sxt(sxt_size_4, IN_REG1, IN_REG1); - emitter.ipf_sxt(sxt_size_4, IN_REG3, IN_REG3); - emitter.ipf_sxt(sxt_size_4, IN_REG4, IN_REG4); - - // There are 5 arguments: src, srcOffset, dst, dstOffset, length - // - // p1,p2 = (length == 0) - // p3,p4 = (length >= 32) - // (p1) br.ret - // p5,p6 = (length == 1) - // sc1 = srcOffset<<1 + src - // sc2 = dstOffset<<1 + dst - // sc7 = length - 2 - // sc3 = ar.lc - // (p3) br mem_cpy - // - // p1,p2 = (dstOffset > srcOffset) - // ar.lc = sc7 - // (p1) br copyDown - // - // sc4 = sc1 + first_element_offset - // sc5 = sc2 + first_element_offset - // sc6 = [sc4], 2 - // (p5) br.cond loop1Exit - // loop1: - // [sc5],2 = sc6 - // sc6 = [sc4], 2 - // br.loopc loop1 - // loop1Exit: - // [sc5],2 = sc6 - // ar.lc = sc3 - // br.ret - // - // copyDown: - // sc4 = sc1 + first_element_offset-2 - // sc5 = sc2 + first_element_offset-2 - // sc4 = length<<1 + sc4 - // sc5 = length<<1 + sc5 - // sc6 = [sc4], -2 - // (p5) br.cond loop2Exit - // loop2: - // [sc5],-2 = sc6 - // sc6 = [sc4], -2 - // br.cloop loop2 - // loop2Exit: - // [sc5],-2 = sc6 - // ar.lc = sc3 - // br.ret - // - // mem_cpy: - // alloc, save b0, save gp - // br.call memmove(dst, src, length*2) - // restore b0, gp, pfs - // br.ret - - void (*p_collect_char_arraycopy_stats)( - ManagedObject *src, - int64 srcOffset, - ManagedObject *dst, - int64 dstOffset, - int64 length); - p_collect_char_arraycopy_stats = collect_char_arraycopy_stats; - emit_vm_stats_update_call(emitter, (void **)p_collect_char_arraycopy_stats, 5); - - const int src = IN_REG0; - const int srcOffset = IN_REG1; - const int dst = IN_REG2; - const int dstOffset = IN_REG3; - const int length = IN_REG4; - const int first_element_offset = vector_first_element_offset(VM_DATA_TYPE_CHAR); - const int sc1 = SCRATCH_GENERAL_REG; - const int sc2 = SCRATCH_GENERAL_REG2; - const int sc3 = SCRATCH_GENERAL_REG3; - const int sc4 = SCRATCH_GENERAL_REG4; - const int sc5 = SCRATCH_GENERAL_REG5; - const int sc6 = SCRATCH_GENERAL_REG6; - const int sc7 = SCRATCH_GENERAL_REG7; - const int sc8 = SCRATCH_GENERAL_REG8; - const int sc9 = SCRATCH_GENERAL_REG9; - const int sc10 = SCRATCH_GENERAL_REG10; - const int sc11 = SCRATCH_GENERAL_REG11; - const int sc12 = SCRATCH_GENERAL_REG12; - const int sc13 = SCRATCH_GENERAL_REG13; - const int sc17 = SCRATCH_GENERAL_REG17; - const int sc18 = SCRATCH_GENERAL_REG18; - const int p0 = 0; - const int p1 = SCRATCH_PRED_REG; - const int p2 = SCRATCH_PRED_REG2; - const int p3 = SCRATCH_PRED_REG3; - const int p4 = SCRATCH_PRED_REG4; - const int p5 = SCRATCH_PRED_REG5; - const int p6 = SCRATCH_PRED_REG6; - const int loop1 = 0; - const int loop2 = 1; - const int copyDown = 2; - const int mem_cpy = 3; - const int loop1Exit = 4; - const int loop2Exit = 5; - const int loop3 = 6; - const int mem_cpy_final = 7; -// const int mem_cpy_pre_final = 8; - const int loop1ExitA = 9; - const int loop2ExitA = 10; - - { - // sc1 = srcOffset<<1 + src - // sc2 = dstOffset<<1 + dst - // p3,p0 = (length > 32) - // p4,p0 = (length < 2) - // sc8 = length >> 1 - // (p3) br mem_cpy - // - // p6,p0 = (length < 4) - // sc9 = length & 1 - // p1,p0 = (dstOffset > srcOffset) - // sc18 = length<<1 + sc1 - // sc3 = ar.lc - // sc7 = sc8 - 2 - // - // p2,p0 = (sc9 != 0) - // ar.lc = sc7 - // sc18 = sc18 + first_element_offset-2 - // (p1) p1,p0 = cmp.unc (src == dst) - // (p1) br copyDown - // - // sc17 =.2 [sc18] // prefetch - // sc4 = sc1 + first_element_offset - // sc5 = sc2 + first_element_offset - // sc10 = sc1 + (first_element_offset+2) - // sc11 = sc2 + (first_element_offset+2) - // (p4) br.cond loop1ExitA - // - // sc6 =.2 [sc4],4 - // sc12 =.2 [sc10],4 - // (p6) br.cond loop1Exit - // - // loop1: - // [sc5],4 =.2 sc6 - // [sc11],4 =.2 sc12 - // sc6 =.2 [sc4],4 - // sc12 =.2 [sc10],4 - // br.loopc loop1 - // - // loop1Exit: - // [sc5],4 =.2 sc6 - // [sc11],4 =.2 sc12 - // - // loop1ExitA: - // (p2) sc6 =.2 [sc4],4 - // ar.lc = sc3 - // (p2) [sc5],4 =.2 sc6 - // br.ret - // - // copyDown: - // sc4 = sc1 + first_element_offset-2 - // sc5 = sc2 + first_element_offset-2 - // sc10 = sc1 + first_element_offset-4 - // sc11 = sc2 + first_element_offset-4 - // sc18 = sc1 + first_element_offset-2 - // - // sc17 =.2 [sc18] // prefetch - // sc4 = length<<1 + sc4 - // sc5 = length<<1 + sc5 - // sc10 = length<<1 + sc10 - // sc11 = length<<1 + sc11 - // (p4) br.cond loop2ExitA - // - // sc6 =.2 [sc4],-4 - // sc12 =.2 [sc10],-4 - // (p6) br.cond loop2Exit - // - // loop2: - // [sc5],-4 =.2 sc6 - // [sc11],-4 =.2 sc12 - // sc6 =.2 [sc4],-4 - // sc12 =.2 [sc10],-4 - // br.cloop loop2 - // - // loop2Exit: - // [sc5],-4 =.2 sc6 - // [sc11],-4 =.2 sc12 - // - // loop2ExitA: - // (p2) sc6 =.2 [sc4],-4 - // ar.lc = sc3 - // (p2) [sc5],-4 =.2 sc6 - // br.ret - - - // sc1 = srcOffset<<1 + src - // sc2 = dstOffset<<1 + dst - // p3,p0 = (length > 32) - // p4,p0 = (length < 2) - // sc8 = length >> 1 - // (p3) br mem_cpy - emitter.ipf_shladd(sc1, srcOffset, 1, src); - emitter.ipf_shladd(sc2, dstOffset, 1, dst); - emitter.ipf_cmpi(icmp_lt, cmp_none, p3, p0, 32, length); - emitter.ipf_cmpi(icmp_gt, cmp_none, p4, p0, 2, length); - emitter.ipf_shri(sc8, length, 1); - emitter.ipf_br(br_cond, br_few, br_spnt, br_none, mem_cpy, p3); - emitter.flush_buffer(); - - // p6,p0 = (length < 4) - // sc9 = length & 1 - // p1,p0 = (dstOffset > srcOffset) - // sc18 = length<<1 + sc1 - // sc3 = ar.lc - // sc7 = sc8 - 2 - emitter.ipf_cmpi(icmp_gt, cmp_none, p6, p0, 4, length); - emitter.ipf_andi(sc9, 1, length); - emitter.ipf_cmp(icmp_gt, cmp_none, p1, p0, dstOffset, srcOffset); - emitter.ipf_shladd(sc18, length, 1, sc1); - emitter.ipf_mfap(sc3, AR_lc); - emitter.ipf_adds(sc7, -2, sc8); - emitter.flush_buffer(); - - // p2 = (sc9 != 0) - // ar.lc = sc7 - // sc18 = sc18 + first_element_offset-2 - // (p1) p1,p0 = cmp.unc (src == dst) - // (p1) br copyDown - emitter.ipf_cmp(icmp_ne, cmp_none, p2, p0, sc9, 0); - emitter.ipf_mtap(AR_lc, sc7); - emitter.ipf_adds(sc18, first_element_offset-2, sc18); - emitter.ipf_cmp(icmp_eq, cmp_unc, p1, p0, src, dst, false, p1); - emitter.ipf_br(br_cond, br_few, br_spnt, br_none, copyDown, p1); - emitter.flush_buffer(); - - // sc17 =.2 [sc18] // prefetch - // sc4 = sc1 + first_element_offset - // sc5 = sc2 + first_element_offset - // sc10 = sc1 + (first_element_offset+2) - // sc11 = sc2 + (first_element_offset+2) - // (p4) br.cond loop1ExitA - emitter.ipf_ld(int_mem_size_2, mem_ld_none, mem_none, sc18, sc18); - emitter.ipf_adds(sc4, first_element_offset, sc1); - emitter.ipf_adds(sc5, first_element_offset, sc2); - emitter.ipf_adds(sc10, first_element_offset+2, sc1); - emitter.ipf_adds(sc11, first_element_offset+2, sc2); - emitter.ipf_br(br_cond, br_few, br_spnt, br_none, loop1ExitA, p4); - emitter.flush_buffer(); - - // sc6 =.2 [sc4],4 - // sc12 =.2 [sc10],4 - // (p6) br.cond loop1Exit - emitter.ipf_ld_inc_imm(int_mem_size_2, mem_ld_none, mem_none, sc6, sc4, 4); - emitter.ipf_ld_inc_imm(int_mem_size_2, mem_ld_none, mem_none, sc12, sc10, 4); - emitter.ipf_br(br_cond, br_few, br_spnt, br_none, loop1Exit, p6); - emitter.flush_buffer(); - - // loop1: - // [sc5],4 =.2 sc6 - // [sc11],4 =.2 sc12 - // sc6 =.2 [sc4],4 - // sc12 =.2 [sc10],4 - // br.cloop loop1 - emitter.set_target(loop1); - emitter.ipf_st_inc_imm(int_mem_size_2, mem_st_none, mem_none, sc5, sc6, 4); - emitter.ipf_st_inc_imm(int_mem_size_2, mem_st_none, mem_none, sc11, sc12, 4); - emitter.ipf_ld_inc_imm(int_mem_size_2, mem_ld_none, mem_none, sc6, sc4, 4); - emitter.ipf_ld_inc_imm(int_mem_size_2, mem_ld_none, mem_none, sc12, sc10, 4); - emitter.ipf_br(br_cloop, br_few, br_sptk, br_none, loop1); - - // loop1Exit: - // [sc5],4 =.2 sc6 - // [sc11],4 =.2 sc12 - emitter.set_target(loop1Exit); - emitter.ipf_st_inc_imm(int_mem_size_2, mem_st_none, mem_none, sc5, sc6, 4); - emitter.ipf_st_inc_imm(int_mem_size_2, mem_st_none, mem_none, sc11, sc12, 4); - - // loop1ExitA: - // (p2) sc6 =.2 [sc4],4 - // ar.lc = sc3 - // (p2) [sc5],4 =.2 sc6 - // br.ret - emitter.set_target(loop1ExitA); - emitter.ipf_ld_inc_imm(int_mem_size_2, mem_ld_none, mem_none, sc6, sc4, 4, p2); - emitter.ipf_mtap(AR_lc, sc3); - emitter.ipf_st_inc_imm(int_mem_size_2, mem_st_none, mem_none, sc5, sc6, 4, p2); - emitter.ipf_brret(br_many, br_sptk, br_none, BRANCH_RETURN_LINK_REG); - - // copyDown: - // sc4 = sc1 + first_element_offset-2 - // sc5 = sc2 + first_element_offset-2 - // sc10 = sc1 + first_element_offset-4 - // sc11 = sc2 + first_element_offset-4 - // sc18 = sc1 + first_element_offset-2 - emitter.set_target(copyDown); - emitter.ipf_adds(sc4, first_element_offset-2, sc1); - emitter.ipf_adds(sc5, first_element_offset-2, sc2); - emitter.ipf_adds(sc10, first_element_offset-4, sc1); - emitter.ipf_adds(sc11, first_element_offset-4, sc2); - emitter.ipf_adds(sc18, first_element_offset-2, sc1); - emitter.flush_buffer(); - - // sc17 =.2 [sc18] // prefetch - // sc4 = length<<1 + sc4 - // sc5 = length<<1 + sc5 - // sc10 = length<<1 + sc10 - // sc11 = length<<1 + sc11 - // (p4) br.cond loop2ExitA - emitter.ipf_ld(int_mem_size_2, mem_ld_none, mem_none, sc17, sc18); - emitter.ipf_shladd(sc4, length, 1, sc4); - emitter.ipf_shladd(sc5, length, 1, sc5); - emitter.ipf_shladd(sc10, length, 1, sc10); - emitter.ipf_shladd(sc11, length, 1, sc11); - emitter.ipf_br(br_cond, br_few, br_spnt, br_none, loop2ExitA, p4); - emitter.flush_buffer(); - - // sc6 =.2 [sc4],-4 - // sc12 =.2 [sc10],-4 - // (p6) br.cond loop2Exit - emitter.ipf_ld_inc_imm(int_mem_size_2, mem_ld_none, mem_none, sc6, sc4, unsigned(-4)); - emitter.ipf_ld_inc_imm(int_mem_size_2, mem_ld_none, mem_none, sc12, sc10, unsigned(-4)); - emitter.ipf_br(br_cond, br_few, br_spnt, br_none, loop2Exit, p6); - // - // loop2: - // [sc5],-4 =.2 sc6 - // [sc11],-4 =.2 sc12 - // sc6 =.2 [sc4],-4 - // sc12 =.2 [sc10],-4 - // br.cloop loop2 - emitter.set_target(loop2); - emitter.ipf_st_inc_imm(int_mem_size_2, mem_st_none, mem_none, sc5, sc6, unsigned(-4)); - emitter.ipf_st_inc_imm(int_mem_size_2, mem_st_none, mem_none, sc11, sc12, unsigned(-4)); - emitter.ipf_ld_inc_imm(int_mem_size_2, mem_ld_none, mem_none, sc6, sc4, unsigned(-4)); - emitter.ipf_ld_inc_imm(int_mem_size_2, mem_ld_none, mem_none, sc12, sc10, unsigned(-4)); - emitter.ipf_br(br_cloop, br_few, br_sptk, br_none, loop2); - - // loop2Exit: - // [sc5],-4 =.2 sc6 - // [sc11],-4 =.2 sc12 - emitter.set_target(loop2Exit); - emitter.ipf_st_inc_imm(int_mem_size_2, mem_st_none, mem_none, sc5, sc6, unsigned(-4)); - emitter.ipf_st_inc_imm(int_mem_size_2, mem_st_none, mem_none, sc11, sc12, unsigned(-4)); - - // loop2ExitA: - // (p2) sc6 =.2 [sc4],-4 - // ar.lc = sc3 - // (p2) [sc5],-4 =.2 sc6 - // br.ret - emitter.set_target(loop2ExitA); - emitter.ipf_ld_inc_imm(int_mem_size_2, mem_ld_none, mem_none, sc6, sc4, unsigned(-4), p2); - emitter.ipf_mtap(AR_lc, sc3); - emitter.ipf_st_inc_imm(int_mem_size_2, mem_st_none, mem_none, sc5, sc6, unsigned(-4), p2); - emitter.ipf_brret(br_many, br_sptk, br_none, BRANCH_RETURN_LINK_REG); - } - - // mem_cpy: - // p1,p0 = cmp.ne srcOffset, 0 - // sc8 = andi 3, length - // sc9 = shri 2, length - // sc5 = src + first_element_offset - // sc3 = ar.lc - // sc10 = 128 - - // sc7 =.8 [sc5],8 - // sc4 = sc9 - 2 - // p2,p0 = cmp.ne dstOffset, 0 - // p3,p0 = cmp.gt length, sc10 - // p4,p0 = cmp.ne sc8, 0 - // (p1) br.cond mem_cpy_final - // - // nop - // ar.lc = sc4 - // sc6 = dst + first_element_offset - // (p2) br.cond mem_cpy_pre_final - // (p3) br.cond mem_cpy_pre_final - // (p4) br.cond mem_cpy_pre_final - // - // loop3: - // [sc6],8 =.8 sc7 - // sc7 =.8 [sc5],8 - // br.cloop loop3 - // - // [sc6],8 =.8 sc7 - // ar.lc = sc3 - // br.ret - - // mem_cpy_pre_final: - // ar.lc = sc3 - // - // mem_cpy_final: - // ... - // - emitter.set_target(mem_cpy); - { - // p1,p0 = cmp.ne srcOffset, 0 - // sc9 = shri 3, length - // p2,p0 = cmp.ne dstOffset, 0 - // sc8 = andi 7, length - // (p1) br.cond mem_cpy_final - // (p2) br.cond mem_cpy_final - // - // sc5 = src + first_element_offset - // sc18 = src + first_element_offset+64 - // p3,p0 = cmp.lt 16, sc9 - // p4,p0 = cmp.ne sc8, 0 - // (p3) br.cond mem_cpy_final - // (p4) br.cond mem_cpy_final - // - // sc11 = src + (first_element_offset+8) - // sc3 = ar.lc - // sc4 = sc9 - 2 - // sc7 =.8 [sc5],16 - // sc17 =.8 [sc18],64 // prefetch - // p5,p0 = cmp.le 64, length - // - // sc12 = dst + (first_element_offset+8) - // ar.lc = sc4 - // p6,p0 = cmp.le 96, length - // sc13 =.8 [sc11],16 - // (p5) sc17 =.8 [sc18],64 // prefetch - // sc6 = dst + first_element_offset - // - // loop3: - // [sc6],16 =.8 sc7 - // [sc12],16 =.8 sc13 - // sc7 =.8 [sc5],16 - // sc13 =.8 [sc11],16 - // br.cloop loop3 - // - // [sc6],16 =.8 sc7 - // [sc12],16 =.8 sc13 - // ar.lc = sc3 - // br.ret - - - // p1,p0 = cmp.ne srcOffset, 0 - // sc9 = shri 3, length - // p2,p0 = cmp.ne dstOffset, 0 - // sc8 = andi 7, length - // (p1) br.cond mem_cpy_final - // (p2) br.cond mem_cpy_final - emitter.ipf_cmp(icmp_ne, cmp_none, p1, p0, srcOffset, 0); - emitter.ipf_shri(sc9, length, 3); - emitter.ipf_cmp(icmp_ne, cmp_none, p2, p0, dstOffset, 0); - emitter.ipf_andi(sc8, 7, length); - emitter.ipf_br(br_cond, br_many, br_spnt, br_none, mem_cpy_final, p1); - emitter.ipf_br(br_cond, br_many, br_spnt, br_none, mem_cpy_final, p2); - emitter.flush_buffer(); - - // sc5 = src + first_element_offset - // sc18 = src + first_element_offset+64 - // p3,p0 = cmp.lt 16, sc9 - // p4,p0 = cmp.ne sc8, 0 - // (p3) br.cond mem_cpy_final - // (p4) br.cond mem_cpy_final - emitter.ipf_adds(sc5, first_element_offset, src); - emitter.ipf_adds(sc18, first_element_offset+64, src); - emitter.ipf_cmpi(icmp_lt, cmp_none, p3, p0, 16, sc9); - emitter.ipf_cmp(icmp_ne, cmp_none, p4, p0, sc8, 0); - emitter.ipf_br(br_cond, br_many, br_spnt, br_none, mem_cpy_final, p3); - emitter.ipf_br(br_cond, br_many, br_spnt, br_none, mem_cpy_final, p4); - emitter.flush_buffer(); - - // sc11 = src + (first_element_offset+8) - // sc3 = ar.lc - // sc4 = sc9 - 2 - // sc7 =.8 [sc5],16 - // sc17 =.8 [sc18],64 // prefetch - // p5,p0 = cmp.le 64, length - emitter.ipf_adds(sc11, first_element_offset+8, src); - emitter.ipf_mfap(sc3, AR_lc); - emitter.ipf_adds(sc4, -2, sc9); - emitter.ipf_ld_inc_imm(int_mem_size_8, mem_ld_none, mem_none, sc7, sc5, 16); - emitter.ipf_ld_inc_imm(int_mem_size_8, mem_ld_none, mem_none, sc17, sc18, 64); - emitter.ipf_cmpi(icmp_le, cmp_none, p5, p0, 64, length); - emitter.flush_buffer(); - - emitter.ipf_adds(sc12, first_element_offset+8, dst); - emitter.ipf_mtap(AR_lc, sc4); - emitter.ipf_cmpi(icmp_le, cmp_none, p6, p0, 96, length); - emitter.ipf_ld_inc_imm(int_mem_size_8, mem_ld_none, mem_none, sc13, sc11, 16); - emitter.ipf_ld_inc_imm(int_mem_size_8, mem_ld_none, mem_none, sc17, sc18, 64, p5); - emitter.ipf_adds(sc6, first_element_offset, dst); - emitter.flush_buffer(); - - emitter.set_target(loop3); - emitter.ipf_st_inc_imm(int_mem_size_8, mem_st_none, mem_none, sc6, sc7, 16); - emitter.ipf_st_inc_imm(int_mem_size_8, mem_st_none, mem_none, sc12, sc13, 16); - emitter.ipf_ld_inc_imm(int_mem_size_8, mem_ld_none, mem_none, sc7, sc5, 16); - emitter.ipf_ld_inc_imm(int_mem_size_8, mem_ld_none, mem_none, sc13, sc11, 16); - emitter.ipf_br(br_cloop, br_few, br_sptk, br_none, loop3); - - emitter.ipf_st_inc_imm(int_mem_size_8, mem_st_none, mem_none, sc6, sc7, 16); - emitter.ipf_st_inc_imm(int_mem_size_8, mem_st_none, mem_none, sc12, sc13, 16); - emitter.ipf_mtap(AR_lc, sc3); - emitter.ipf_brret(br_many, br_sptk, br_none, BRANCH_RETURN_LINK_REG); - } - - // mem_cpy: - // alloc, save b0, save gp - // br.call memmove(dst, src, length*2) - // restore b0, gp, pfs - // br.ret - emitter.set_target(mem_cpy_final); - int out0, save_pfs, save_b0, save_gp; - const int num_in_args = 5, num_out_args = 3; - void *(*p_memmove)(void *dst, const void *src, size_t length); - p_memmove = memmove; - emit_alloc_for_single_call(emitter, num_in_args, num_out_args, - (void **)p_memmove, - out0, save_pfs, save_b0, save_gp); - emitter.ipf_shladd(sc1, srcOffset, 1, src); - emitter.ipf_shladd(sc2, dstOffset, 1, dst); - emitter.ipf_adds(out0+0, first_element_offset, sc2); - emitter.ipf_adds(out0+1, first_element_offset, sc1); - emitter.ipf_add(out0+2, length, length); - emit_call_with_gp(emitter, (void **)p_memmove, false); - - // Restore pfs, b0, and gp - emit_dealloc_for_single_call(emitter, save_pfs, save_b0, save_gp); - emitter.ipf_brret(br_many, br_sptk, br_none, BRANCH_RETURN_LINK_REG); - - emitter.flush_buffer(); - size_t size = emitter.get_size(); - - addr = (void *)malloc_fixed_code_for_jit(size, DEFAULT_CODE_ALIGNMENT, CODE_BLOCK_HEAT_MAX/2, CAA_Allocate); - emitter.copy((char *)addr); - flush_hw_cache((Byte *)addr, size); - sync_i_cache(); - - - return addr; -} - - #ifdef VM_STATS static unsigned compute_index(int64 value, bool is_length) { @@ -2205,92 +1655,6 @@ } #endif // VM_STATS -static void collect_char_arraycopy_stats(ManagedObject *src, - int64 srcOffset, - ManagedObject *dst, - int64 dstOffset, - int64 length) -{ -#ifdef VM_STATS - // 2003-07-24. The following code helps to find the distribution of parameters - // to the char arraycopy routine. It is useful in the debugger only. -#define COUNT_MAX 128 - static unsigned srcOffsetCount[COUNT_MAX], dstOffsetCount[COUNT_MAX], lengthCount[COUNT_MAX]; - static unsigned src0_dst0_lengthEven, src0_dstEven_lengthEven, srcEven_dstEven_lengthEven; - static unsigned lengthShort; - static unsigned srcOffset80[COUNT_MAX], dstOffset80[COUNT_MAX]; - static unsigned iteration = 0; - static unsigned properties[4][4][4]; - iteration ++; - if (iteration >= 1000000) - { - int i,j,k; - for (i=0; i (double)2147483647) { - result = 2147483647; // maxint - } else if(f < -(double)2147483648) { - result = 0x80000000; // minint - } else { - // The above should exhaust all possibilities - result = (int32)f; - } - return result; -} //vm_rt_f2i - - - -int64 vm_rt_f2l(float f) -{ - printf("vm_rt_f2l hasn't been tested yet\n"); - int64 result; - if(_isnan(f)) { - result = 0; - } else if(f > (double)0x7fffffffffffffff) { - result = 0x7fffffffffffffff; // maxint - } else if(f < -(double)0x8000000000000000) { - result = 0x8000000000000000; // minint - } else { - // The above should exhaust all possibilities - result = (int64)f; - } - return result; -} //vm_rt_f2l - - Index: vm/jitrino/src/codegenerator/ia32/Ia32PeepHole.cpp =================================================================== --- vm/jitrino/src/codegenerator/ia32/Ia32PeepHole.cpp (revision 637959) +++ vm/jitrino/src/codegenerator/ia32/Ia32PeepHole.cpp (working copy) @@ -85,7 +85,7 @@ // Changed handleInst_MOV(Inst* inst); Changed handleInst_Call(Inst* inst); - Changed handleInst_HelperCall(Inst* inst, const Opnd::RuntimeInfo* ri); + Changed handleInst_InternalHelperCall(Inst* inst, const Opnd::RuntimeInfo* ri); Changed handleInst_Convert_F2I_D2I(Inst* inst); Changed handleInst_ALU(Inst* inst); Changed handleInst_MUL(Inst* inst); @@ -241,25 +241,24 @@ rt_kind = ri->getKind(); } - if (Opnd::RuntimeInfo::Kind_HelperAddress == rt_kind) { - return handleInst_HelperCall(inst, ri); + if (Opnd::RuntimeInfo::Kind_InternalHelperAddress == rt_kind) { + return handleInst_InternalHelperCall(inst, ri); } return Changed_Nothing; } -PeepHoleOpt::Changed PeepHoleOpt::handleInst_HelperCall( +PeepHoleOpt::Changed PeepHoleOpt::handleInst_InternalHelperCall( Inst* inst, const Opnd::RuntimeInfo* ri) { - assert(Opnd::RuntimeInfo::Kind_HelperAddress == ri->getKind()); - void* rt_data = ri->getValue(0); - POINTER_SIZE_INT helperId = (POINTER_SIZE_INT)rt_data; - switch(helperId) { - case VM_RT_F2I: - case VM_RT_D2I: + assert(Opnd::RuntimeInfo::Kind_InternalHelperAddress == ri->getKind()); + /** The value of the operand is irManager.getInternalHelperInfo((const char*)[0]).pfn */ + char* rt_data = (char*)(ri->getValue(0)); + + if (strcmp(rt_data,"convF4I4") == 0) { return handleInst_Convert_F2I_D2I(inst); - default: - break; + } else if (strcmp(rt_data,"convF8I4") == 0) { + return handleInst_Convert_F2I_D2I(inst); } return Changed_Nothing; } Index: vm/jitrino/src/codegenerator/ia32/Ia32InstCodeSelector.cpp =================================================================== --- vm/jitrino/src/codegenerator/ia32/Ia32InstCodeSelector.cpp (revision 637959) +++ vm/jitrino/src/codegenerator/ia32/Ia32InstCodeSelector.cpp (working copy) @@ -113,6 +113,67 @@ float __stdcall convF8F4 (double v) stdcall__; float __stdcall convF8F4 (double v) { return (float)v; } +int32 __stdcall convF4I4 (float v) stdcall__; +int32 __stdcall convF4I4 (float v) { +#ifdef PLATFORM_POSIX + if (isnan(v)) +#else + if (_isnan(v)) +#endif + return 0; + if (v>(double)(int32)0x7fffffff) + return (int32)0x7fffffff; // maxint + if (v<(double)(int32)0x80000000) + return (int32)0x80000000; // minint + return (int32)v; +} + +int64 __stdcall convF4I8 (float v) stdcall__; +int64 __stdcall convF4I8 (float v) { +#ifdef PLATFORM_POSIX + if (isnan(v)) +#else + if (_isnan(v)) +#endif + return 0; + if (v >= (double)(int64)(__INT64_C(0x7fffffffffffffff))) + return (int64) + __INT64_C(0x7fffffffffffffff); // maxint + else if (v < (double)(int64)__INT64_C(0x8000000000000000)) + return (int64)__INT64_C(0x8000000000000000); // minint + return (int64)v; +} + +int32 __stdcall convF8I4 (double v) stdcall__; +int32 __stdcall convF8I4 (double v) { +#ifdef PLATFORM_POSIX + if (isnan(v)) +#else + if (_isnan(v)) +#endif + return 0; + if (v>(double)(int32)0x7fffffff) + return (int32)0x7fffffff; // maxint + if (v<(double)(int32)0x80000000) + return (int32)0x80000000; // minint + return (int32)v; +} + +int64 __stdcall convF8I8 (double v) stdcall__; +int64 __stdcall convF8I8 (double v) { +#ifdef PLATFORM_POSIX + if (isnan(v)) +#else + if (_isnan(v)) +#endif + return 0; + if (v >= (double)(int64)(__INT64_C(0x7fffffffffffffff))) + return (int64)__INT64_C(0x7fffffffffffffff); // maxint + else if (v < (double)(int64)__INT64_C(0x8000000000000000)) + return (int64)__INT64_C(0x8000000000000000); // minint + return (int64)v; +} + double __stdcall convI4F8 (uint32 v) stdcall__; double __stdcall convI4F8 (uint32 v) { return (double)(int32)v; } @@ -165,6 +226,10 @@ // FP conversion internal helpers (temp solution to be optimized) irManager.registerInternalHelperInfo("convF4F8", IRManager::InternalHelperInfo((void*)&convF4F8,&CallingConvention_STDCALL)); irManager.registerInternalHelperInfo("convF8F4", IRManager::InternalHelperInfo((void*)&convF8F4,&CallingConvention_STDCALL)); + irManager.registerInternalHelperInfo("convF4I4", IRManager::InternalHelperInfo((void*)&convF4I4,&CallingConvention_STDCALL)); + irManager.registerInternalHelperInfo("convF4I8", IRManager::InternalHelperInfo((void*)&convF4I8,&CallingConvention_STDCALL)); + irManager.registerInternalHelperInfo("convF8I4", IRManager::InternalHelperInfo((void*)&convF8I4,&CallingConvention_STDCALL)); + irManager.registerInternalHelperInfo("convF8I8", IRManager::InternalHelperInfo((void*)&convF8I8,&CallingConvention_STDCALL)); irManager.registerInternalHelperInfo("convI4F8", IRManager::InternalHelperInfo((void*)&convI4F8,&CallingConvention_STDCALL)); irManager.registerInternalHelperInfo("convI4F4", IRManager::InternalHelperInfo((void*)&convI4F4,&CallingConvention_STDCALL)); irManager.registerInternalHelperInfo("convI8F8", IRManager::InternalHelperInfo((void*)&convI8F8,&CallingConvention_STDCALL)); @@ -370,21 +435,21 @@ Opnd * InstCodeSelector::convertFpToInt(Opnd * srcOpnd, Type * dstType, Opnd * dstOpnd) { assert(srcOpnd->getType()->isFP() && dstType->isInteger()); - VM_RT_SUPPORT helperId; + const char * helperName; OpndSize dstSize=irManager.getTypeSize(dstType); if (dstSize<=OpndSize_32){ if (dstOpnd==NULL) dstOpnd=irManager.newOpnd(typeManager.getInt32Type()); - helperId=srcOpnd->getType()->isSingle()?VM_RT_F2I:VM_RT_D2I; + helperName=srcOpnd->getType()->isSingle()?"convF4I4":"convF8I4"; }else{ assert(dstSize==OpndSize_64); if (dstOpnd==NULL) dstOpnd=irManager.newOpnd(dstType); - helperId=srcOpnd->getType()->isSingle()?VM_RT_F2L:VM_RT_D2L; + helperName=srcOpnd->getType()->isSingle()?"convF4I8":"convF8I8"; } Opnd * args[] = {srcOpnd}; - appendInsts(irManager.newRuntimeHelperCallInst(helperId, 1, args, dstOpnd)); + appendInsts(irManager.newInternalRuntimeHelperCallInst(helperName, 1, args, dstOpnd)); if (dstSizegetValue())) #define IMM64U(o) ((uint64)(((Opnd *)(o))->getValue())) +// FP remainder internal helpers (temp solution to be optimized) +float remF4 (float v0, float v1); +float remF4 (float v0, float v1) { + return (float)fmod((double)v0,(double)v1); +} + +double remF8 (double v0, double v1); +double remF8 (double v0, double v1) { + return fmod(v0,v1); +} + +static int32 feOf; +static int32 elSz; + +void char_arraycopy (void* s, int32 sOf, void* d, int32 dOf, int32 l); +void char_arraycopy (void* s, int32 sOf, void* d, int32 dOf, int32 l) { + // Check that the array references are non-null. + assert(s && d); + // Check the offsets + assert(sOf >= 0); + assert(dOf >= 0); + assert(l >= 0); + + register uint16* dst_addr = (uint16*)((uint8*)d + feOf + dOf*elSz); + register uint16* src_addr = (uint16*)((uint8*)s + feOf + sOf*elSz); + + // 20030219 The length threshold 32 here works well for SPECjbb and should be reasonable for other applications. + if (l < 32) { + register int i; + if (src_addr > dst_addr) { + for (i = l; i > 0; i--) { + *dst_addr++ = *src_addr++; + } + } else { + // copy down, from higher address to lower + src_addr += l-1; + dst_addr += l-1; + for (i = l; i > 0; i--) { + *dst_addr-- = *src_addr--; + } + } + } else { + memmove(dst_addr, src_addr, (l * sizeof(uint16))); + } +} + + //===========================================================================// // IpfInstCodeSelector //===========================================================================// @@ -51,6 +98,10 @@ opnds(opnds_), compilationInterface(compilationInterface_) { + + void * vmTypeHandle = compilationInterface.getTypeManager().getCharType()->getVMTypeHandle(); + feOf = VMInterface::getArrayElemOffset(vmTypeHandle, false); + elSz = VMInterface::getArrayElemSize(vmTypeHandle); opndManager = cfg.getOpndManager(); p0 = opndManager->getP0(); } @@ -1315,8 +1366,7 @@ IPF_LOG << " tau_callintr" << endl; - VM_RT_SUPPORT hId = VM_RT_CHAR_ARRAYCOPY_NO_EXC; - uint64 address = (uint64) compilationInterface.getRuntimeHelperAddress(hId); + uint64 address = (uint64) char_arraycopy; Opnd *helperAddress = opndManager->newImm(address); RegOpnd *retOpnd = NULL; @@ -2915,13 +2965,12 @@ addNewInst(INST_FMA, CMPLT_PC_DOUBLE, CMPLT_SF0, pX, fRes, fr, fy3, fq3); } else { - // Call runtime helper to do FP remainder. We only inline the integer + // Call internal helper to do FP remainder. We only inline the integer // remainder sequence // Opnd *helperArgs[] = { (Opnd *)src1, (Opnd *)src2 }; - VM_RT_SUPPORT hId = VM_RT_DREM; - uint64 address = (uint64) compilationInterface.getRuntimeHelperAddress(hId); + uint64 address = (uint64) remF8; Opnd *helperAddress = opndManager->newImm(address); directCall(2, helperArgs, dst, helperAddress, p0); @@ -3030,13 +3079,12 @@ addNewInst(INST_FMA, CMPLT_PC_DOUBLE, CMPLT_SF1, pX, fq3, fq2, fe4, fq2); addNewInst(INST_FNORM, CMPLT_PC_SINGLE, CMPLT_SF0, pX, fRes, fq3); } else { - // Call runtime helper to do FP remainder. We only inline the integer + // Call internal helper to do FP remainder. We only inline the integer // remainder sequence // Opnd *helperArgs[] = { (Opnd *)src1, (Opnd *)src2 }; - VM_RT_SUPPORT hId = VM_RT_FREM; - uint64 address = (uint64) compilationInterface.getRuntimeHelperAddress(hId); + uint64 address = (uint64) remF4; Opnd *helperAddress = opndManager->newImm(address); directCall(2, helperArgs, dst, helperAddress, p0);