Index: vm/include/open/hycomp.h =================================================================== --- vm/include/open/hycomp.h (revision 512349) +++ vm/include/open/hycomp.h (working copy) @@ -112,7 +112,7 @@ /** * Win32 - Windows 3.1 & NT using Win32 */ -#if defined(WIN32) +#if defined(WIN32) || defined(_WIN64) typedef __int64 I_64; typedef unsigned __int64 U_64; Index: vm/port/src/encoder/ia32_em64t/encoder.h =================================================================== --- vm/port/src/encoder/ia32_em64t/encoder.h (revision 512349) +++ vm/port/src/encoder/ia32_em64t/encoder.h (working copy) @@ -41,14 +41,38 @@ #include "enc_base.h" #include "open/types.h" -#ifdef _EM64T_ +#ifdef _EM64T_ // size of general-purpose value on the stack in bytes #define GR_STACK_SIZE 8 // size of floating-point value on the stack in bytes #define FR_STACK_SIZE 8 + +#if defined(WIN32) || defined(_WIN64) + // maximum number of GP registers for inputs + const int MAX_GR = 4; + // maximum number of FP registers for inputs + const int MAX_FR = 4; + // WIN64 reserves 4 words for shadow space + const int SHADOW = 4 * GR_STACK_SIZE; #else + // maximum number of GP registers for inputs + const int MAX_GR = 6; + // maximum number of FP registers for inputs + const int MAX_FR = 8; + // Linux x64 doesn't reserve shadow space + const int SHADOW = 0; +#endif + +#else +// size of general-purpose value on the stack in bytes #define GR_STACK_SIZE 4 +// size of general-purpose value on the stack in bytes #define FR_STACK_SIZE 8 + +// maximum number of GP registers for inputs +const int MAX_GR = 0; +// maximum number of FP registers for inputs +const int MAX_FR = 0; #endif enum Reg_No { Index: vm/port/src/lil/em64t/pim/m2n_em64t.cpp =================================================================== --- vm/port/src/lil/em64t/pim/m2n_em64t.cpp (revision 512349) +++ vm/port/src/lil/em64t/pim/m2n_em64t.cpp (working copy) @@ -144,8 +144,11 @@ unsigned num_ret_need_to_save) { // we can't preserve rax and return value on it at the same time assert (num_ret_need_to_save == 0 || reg != &rax_opnd); -#ifdef PLATFORM_POSIX - // preserve std places + + +//#ifdef PLATFORM_POSIX + + // preserve std places unsigned i; unsigned num_std_saved = 0; // use calle-saves registers first @@ -236,10 +239,10 @@ LcgEM64TContext::GR_LOCALS_OFFSET + num_callee_saves_used + num_std_saved), size_64); } -#else //!PLATFORM_POSIX - buf = prefix(buf, prefix_fs); - buf = mov(buf, *reg, M_Opnd(0x14), size_64); -#endif //!PLATFORM_POSIX +//#else //!PLATFORM_POSIX +// buf = prefix(buf, prefix_fs); +// buf = mov(buf, *reg, M_Opnd(0x14), size_64); +//#endif //!PLATFORM_POSIX return buf; } Index: vm/vmcore/src/util/em64t/base/ini_em64t.cpp =================================================================== --- vm/vmcore/src/util/em64t/base/ini_em64t.cpp (revision 512349) +++ vm/vmcore/src/util/em64t/base/ini_em64t.cpp (working copy) @@ -46,14 +46,14 @@ typedef int64 ( * invoke_managed_func_int_t) ( // six fake parameters should be passed over GR - void *, void *, void *, void *, void *, void *, + void *, void *, void *, void *, //void *, void *, const void * const method_entry_point, int64 gr_nargs, int64 fr_nargs, int64 stack_nargs, uint64 gr_args[], double fr_args[], uint64 stack_args[]); typedef double ( * invoke_managed_func_double_t)( // six fake parameters should be passed over GR - void *, void *, void *, void *, void *, void *, + void *, void *, void *, void *, //void *, void *, const void * const method_entry_point, int64 gr_nargs, int64 fr_nargs, int64 stack_nargs, uint64 gr_args[], double fr_args[], uint64 stack_args[]); @@ -76,13 +76,13 @@ // [rbp + 48] - gr_args // [rbp + 56] - fr_args // [rbp + 64] - stack_args - const int32 METHOD_ENTRY_POINT_OFFSET = 16; - const int32 UNUSED GR_NARGS_OFFSET = 24; - const int32 FR_NARGS_OFFSET = 32; - const int32 STACK_NARGS_OFFSET = 40; - const int32 GR_ARGS_OFFSET = 48; - const int32 FR_ARGS_OFFSET = 56; - const int32 STACK_ARGS_OFFSET = 64; + const int32 METHOD_ENTRY_POINT_OFFSET = 16 + SHADOW; + const int32 UNUSED GR_NARGS_OFFSET = 24 + SHADOW; + const int32 FR_NARGS_OFFSET = 32 + SHADOW; + const int32 STACK_NARGS_OFFSET = 40 + SHADOW; + const int32 GR_ARGS_OFFSET = 48 + SHADOW; + const int32 FR_ARGS_OFFSET = 56 + SHADOW; + const int32 STACK_ARGS_OFFSET = 64 + SHADOW; const int STUB_SIZE = 200; char * stub = (char *) malloc_fixed_code_for_jit(STUB_SIZE, @@ -126,6 +126,11 @@ labels.define_label(MOVE_STACK_ARGS_END, stub, false); +#ifdef _WIN64 + // 1a) WIN64, reserve 4 words of shadow space + stub = alu(stub, sub_opc, rsp_opnd, Imm_Opnd(32)); +#endif + // 2) move from fr_args to registers stub = mov(stub, rcx_opnd, M_Base_Opnd(rbp_reg, FR_NARGS_OFFSET)); stub = alu(stub, or_opc, rcx_opnd, rcx_opnd); @@ -137,23 +142,33 @@ stub = movq(stub, xmm1_opnd, M_Base_Opnd(r10_reg, 1 * FR_STACK_SIZE)); stub = movq(stub, xmm2_opnd, M_Base_Opnd(r10_reg, 2 * FR_STACK_SIZE)); stub = movq(stub, xmm3_opnd, M_Base_Opnd(r10_reg, 3 * FR_STACK_SIZE)); +#ifndef _WIN64 stub = movq(stub, xmm4_opnd, M_Base_Opnd(r10_reg, 4 * FR_STACK_SIZE)); stub = movq(stub, xmm5_opnd, M_Base_Opnd(r10_reg, 5 * FR_STACK_SIZE)); stub = movq(stub, xmm6_opnd, M_Base_Opnd(r10_reg, 6 * FR_STACK_SIZE)); stub = movq(stub, xmm7_opnd, M_Base_Opnd(r10_reg, 7 * FR_STACK_SIZE)); +#endif labels.define_label(MOVE_FR_ARGS_END, stub, false); // 3) unconditionally move from gr_args to registers +#ifdef _WIN64 stub = mov(stub, r10_opnd, M_Base_Opnd(rbp_reg, GR_ARGS_OFFSET)); + stub = mov(stub, rcx_opnd, M_Base_Opnd(r10_reg, 0 * GR_STACK_SIZE)); + stub = mov(stub, rdx_opnd, M_Base_Opnd(r10_reg, 1 * GR_STACK_SIZE)); + stub = mov(stub, r8_opnd, M_Base_Opnd(r10_reg, 2 * GR_STACK_SIZE)); + stub = mov(stub, r9_opnd, M_Base_Opnd(r10_reg, 3 * GR_STACK_SIZE)); +#else + stub = mov(stub, r10_opnd, M_Base_Opnd(rbp_reg, GR_ARGS_OFFSET)); stub = mov(stub, rdi_opnd, M_Base_Opnd(r10_reg, 0 * GR_STACK_SIZE)); stub = mov(stub, rsi_opnd, M_Base_Opnd(r10_reg, 1 * GR_STACK_SIZE)); stub = mov(stub, rdx_opnd, M_Base_Opnd(r10_reg, 2 * GR_STACK_SIZE)); stub = mov(stub, rcx_opnd, M_Base_Opnd(r10_reg, 3 * GR_STACK_SIZE)); stub = mov(stub, r8_opnd, M_Base_Opnd(r10_reg, 4 * GR_STACK_SIZE)); stub = mov(stub, r9_opnd, M_Base_Opnd(r10_reg, 5 * GR_STACK_SIZE)); +#endif - // 4) transfer control + // 4) transfer control stub = mov(stub, r10_opnd, M_Base_Opnd(rbp_reg, METHOD_ENTRY_POINT_OFFSET)); stub = call(stub, r10_opnd); @@ -176,11 +191,8 @@ static const invoke_managed_func_int_t invoke_managed_func = (invoke_managed_func_int_t) gen_invoke_managed_func(); - // maximum number of GP registers for inputs - const int MAX_GR = 6; - // maximum number of FP registers for inputs - const int MAX_FR = 8; - // holds arguments that should be placed in GR's + + // holds arguments that should be placed in GR's uint64 gr_args[MAX_GR]; // holds arguments that should be placed in FR's double fr_args[MAX_FR]; @@ -195,7 +207,11 @@ uint64 * const stack_args = (uint64 *) STD_MALLOC(sizeof(uint64) * method->get_num_args()); int64 gr_nargs = 0; - int64 fr_nargs = 0; +#ifdef _WIN64 +# define fr_nargs gr_nargs +#else + int64 fr_nargs = 0; +#endif int64 stack_nargs = 0; int64 arg_num = 0; @@ -309,7 +325,7 @@ type = method->get_return_java_type(); switch(type) { case JAVA_TYPE_VOID: - invoke_managed_func(NULL, NULL, NULL, NULL, NULL, NULL, + invoke_managed_func(NULL, NULL, NULL, NULL, //NULL, NULL, method_entry_point, gr_nargs, fr_nargs, stack_nargs, gr_args, fr_args, stack_args); @@ -317,7 +333,7 @@ case JAVA_TYPE_ARRAY: case JAVA_TYPE_CLASS: { ObjectHandle handle = NULL; - uint64 ref = invoke_managed_func(NULL, NULL, NULL, NULL, NULL, NULL, + uint64 ref = invoke_managed_func(NULL, NULL, NULL, NULL, //NULL, NULL, method_entry_point, gr_nargs, fr_nargs, stack_nargs, gr_args, fr_args, stack_args); @@ -338,7 +354,7 @@ case JAVA_TYPE_CHAR: case JAVA_TYPE_BYTE: case JAVA_TYPE_BOOLEAN: - result->j = invoke_managed_func(NULL, NULL, NULL, NULL, NULL, NULL, + result->j = invoke_managed_func(NULL, NULL, NULL, NULL, //NULL, NULL, method_entry_point, gr_nargs, fr_nargs, stack_nargs, gr_args, fr_args, stack_args); @@ -346,7 +362,7 @@ case JAVA_TYPE_DOUBLE: case JAVA_TYPE_FLOAT: result->d = (invoke_managed_func_double_t(invoke_managed_func))( - NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, //NULL, NULL, method_entry_point, gr_nargs, fr_nargs, stack_nargs, gr_args, fr_args, stack_args); Index: vm/vmcore/src/util/em64t/base/compile_em64t.cpp =================================================================== --- vm/vmcore/src/util/em64t/base/compile_em64t.cpp (revision 512349) +++ vm/vmcore/src/util/em64t/base/compile_em64t.cpp (working copy) @@ -52,14 +52,14 @@ } void compile_protect_arguments(Method_Handle method, GcFrame * gc) { - const unsigned MAX_GP = 6; - const unsigned MAX_FP = 8; // adress of the top of m2n frame uint64 * const m2n_base_addr = (uint64 *)m2n_get_frame_base(m2n_get_last_frame()); // 6(scratched registers on the stack) assert(m2n_get_size() % 8 == 0); - // 15 = 1(alignment) + 8(fp) + 6(gp) registers were preserved on the stack - uint64 * const inputs_addr = m2n_base_addr - (m2n_get_size() / 8) - 15; + // 15 = 1(alignment) + n(fp) + n(gp) registers were preserved on the stack + uint64 * const inputs_addr = m2n_base_addr + - (m2n_get_size() / 8) + - 1 - MAX_GR - MAX_FR; // 1(return ip); uint64 * extra_inputs_addr = m2n_base_addr + 1; @@ -84,7 +84,7 @@ case VM_DATA_TYPE_BOOLEAN: case VM_DATA_TYPE_CHAR: case VM_DATA_TYPE_UP: - if (num_gp_used < MAX_GP) { + if (num_gp_used < MAX_GR) { ++num_gp_used; } else { ++extra_inputs_addr; @@ -93,7 +93,7 @@ case VM_DATA_TYPE_CLASS: case VM_DATA_TYPE_ARRAY: { uint64 * ref_addr; - if (num_gp_used < MAX_GP) { + if (num_gp_used < MAX_GR) { ref_addr = inputs_addr + num_gp_used; ++num_gp_used; } else { @@ -105,7 +105,7 @@ } case VM_DATA_TYPE_MP: { uint64 * ref_addr; - if (num_gp_used < MAX_GP) { + if (num_gp_used < MAX_GR) { ref_addr = inputs_addr + num_gp_used; ++num_gp_used; } else { @@ -117,7 +117,7 @@ } case VM_DATA_TYPE_F4: case VM_DATA_TYPE_F8: - if (num_fp_used < MAX_FP) { + if (num_fp_used < MAX_FR) { ++num_fp_used; } else { ++extra_inputs_addr; @@ -136,18 +136,31 @@ // compile_me stack frame // m2n frame // 8 byte alignment -// 6 xmm registers -// 6 gp registers +// 8 xmm registers on linux and 4 on windows +// 6 gp registers on linux and 4 on windows +// 0 byte shadow on linux and 32 byte on windows // method handle -const int32 stack_size = (int32)m2n_get_size() + 8 + 120; +// Stack size should be (% 8 == 0) but shouldn't be (% 16 == 0) +const int ALIGNMENT = 8; + +const int32 gr_stack_size = (1 + MAX_GR)*GR_STACK_SIZE + + SHADOW; +const int32 stack_size = (int32)m2n_get_size() + + MAX_FR*FR_STACK_SIZE + + gr_stack_size + ALIGNMENT; + static NativeCodePtr compile_get_compile_me_generic() { static NativeCodePtr addr = NULL; if (addr) { return addr; } +#ifdef _WIN64 + const int STUB_SIZE = 400; +#else const int STUB_SIZE = 416; +#endif char * stub = (char *) malloc_fixed_code_for_jit(STUB_SIZE, DEFAULT_CODE_ALIGNMENT, CODE_BLOCK_HEAT_DEFAULT, CAA_Allocate); addr = stub; @@ -156,32 +169,49 @@ #endif assert(stack_size % 8 == 0); assert(stack_size % 16 != 0); + // set up stack frame stub = alu(stub, sub_opc, rsp_opnd, Imm_Opnd(stack_size)); + // TODO: think over saving xmm registers conditionally - stub = movq(stub, M_Base_Opnd(rsp_reg, 112), xmm7_opnd); - stub = movq(stub, M_Base_Opnd(rsp_reg, 104), xmm6_opnd); - stub = movq(stub, M_Base_Opnd(rsp_reg, 96), xmm5_opnd); - stub = movq(stub, M_Base_Opnd(rsp_reg, 88), xmm4_opnd); - stub = movq(stub, M_Base_Opnd(rsp_reg, 80), xmm3_opnd); - stub = movq(stub, M_Base_Opnd(rsp_reg, 72), xmm2_opnd); - stub = movq(stub, M_Base_Opnd(rsp_reg, 64), xmm1_opnd); - stub = movq(stub, M_Base_Opnd(rsp_reg, 56), xmm0_opnd); +#ifndef _WIN64 + stub = movq(stub, M_Base_Opnd(rsp_reg, 7*FR_STACK_SIZE + gr_stack_size), xmm7_opnd); + stub = movq(stub, M_Base_Opnd(rsp_reg, 6*FR_STACK_SIZE + gr_stack_size), xmm6_opnd); + stub = movq(stub, M_Base_Opnd(rsp_reg, 5*FR_STACK_SIZE + gr_stack_size), xmm5_opnd); + stub = movq(stub, M_Base_Opnd(rsp_reg, 4*FR_STACK_SIZE + gr_stack_size), xmm4_opnd); +#endif + stub = movq(stub, M_Base_Opnd(rsp_reg, 3*FR_STACK_SIZE + gr_stack_size), xmm3_opnd); + stub = movq(stub, M_Base_Opnd(rsp_reg, 2*FR_STACK_SIZE + gr_stack_size), xmm2_opnd); + stub = movq(stub, M_Base_Opnd(rsp_reg, 1*FR_STACK_SIZE + gr_stack_size), xmm1_opnd); + stub = movq(stub, M_Base_Opnd(rsp_reg, 0*FR_STACK_SIZE + gr_stack_size), xmm0_opnd); + // we need to preserve all general purpose registers here // to protect managed objects from GC during compilation - stub = mov(stub, M_Base_Opnd(rsp_reg, 48), r9_opnd); - stub = mov(stub, M_Base_Opnd(rsp_reg, 40), r8_opnd); - stub = mov(stub, M_Base_Opnd(rsp_reg, 32), rcx_opnd); - stub = mov(stub, M_Base_Opnd(rsp_reg, 24), rdx_opnd); - stub = mov(stub, M_Base_Opnd(rsp_reg, 16), rsi_opnd); - stub = mov(stub, M_Base_Opnd(rsp_reg, 8), rdi_opnd); +#ifdef _WIN64 + stub = mov(stub, M_Base_Opnd(rsp_reg, (1 + 3)*GR_STACK_SIZE + SHADOW), r9_opnd); + stub = mov(stub, M_Base_Opnd(rsp_reg, (1 + 2)*GR_STACK_SIZE + SHADOW), r8_opnd); + stub = mov(stub, M_Base_Opnd(rsp_reg, (1 + 1)*GR_STACK_SIZE + SHADOW), rdx_opnd); + stub = mov(stub, M_Base_Opnd(rsp_reg, (1 + 0)*GR_STACK_SIZE + SHADOW), rcx_opnd); +#else + stub = mov(stub, M_Base_Opnd(rsp_reg, (1 + 5)*GR_STACK_SIZE + SHADOW), r9_opnd); + stub = mov(stub, M_Base_Opnd(rsp_reg, (1 + 4)*GR_STACK_SIZE + SHADOW), r8_opnd); + stub = mov(stub, M_Base_Opnd(rsp_reg, (1 + 3)*GR_STACK_SIZE + SHADOW), rcx_opnd); + stub = mov(stub, M_Base_Opnd(rsp_reg, (1 + 2)*GR_STACK_SIZE + SHADOW), rdx_opnd); + stub = mov(stub, M_Base_Opnd(rsp_reg, (1 + 1)*GR_STACK_SIZE + SHADOW), rsi_opnd); + stub = mov(stub, M_Base_Opnd(rsp_reg, (1 + 0)*GR_STACK_SIZE + SHADOW), rdi_opnd); +#endif + // push m2n to the stack - // skip m2n frame, 6 xmm registers, 6 gp registers and method handle - stub = m2n_gen_push_m2n(stub, NULL, FRAME_COMPILATION, false, 0, 0, stack_size); + // restore Method_Handle +#ifdef _WIN64 + stub = mov(stub, rcx_opnd, M_Base_Opnd(rsp_reg, 0 + SHADOW)); +#else stub = mov(stub, rdi_opnd, M_Base_Opnd(rsp_reg, 0)); +#endif + // compile the method stub = call(stub, (char *)&compile_me); @@ -190,21 +220,31 @@ stub = m2n_gen_pop_m2n(stub, false, 0, bytes_to_m2n_bottom, 1); // restore gp inputs from the stack - stub = mov(stub, rdi_opnd, M_Base_Opnd(rsp_reg, 8)); - stub = mov(stub, rsi_opnd, M_Base_Opnd(rsp_reg, 16)); - stub = mov(stub, rdx_opnd, M_Base_Opnd(rsp_reg, 24)); - stub = mov(stub, rcx_opnd, M_Base_Opnd(rsp_reg, 32)); - stub = mov(stub, r8_opnd, M_Base_Opnd(rsp_reg, 40)); - stub = mov(stub, r9_opnd, M_Base_Opnd(rsp_reg, 48)); +#ifdef _WIN64 + stub = mov(stub, rcx_opnd, M_Base_Opnd(rsp_reg, (1 + 0)*GR_STACK_SIZE + SHADOW)); + stub = mov(stub, rdx_opnd, M_Base_Opnd(rsp_reg, (1 + 1)*GR_STACK_SIZE + SHADOW)); + stub = mov(stub, r8_opnd, M_Base_Opnd(rsp_reg, (1 + 2)*GR_STACK_SIZE + SHADOW)); + stub = mov(stub, r9_opnd, M_Base_Opnd(rsp_reg, (1 + 3)*GR_STACK_SIZE + SHADOW)); +#else + stub = mov(stub, rdi_opnd, M_Base_Opnd(rsp_reg, (1 + 0)*GR_STACK_SIZE + SHADOW)); + stub = mov(stub, rsi_opnd, M_Base_Opnd(rsp_reg, (1 + 1)*GR_STACK_SIZE + SHADOW)); + stub = mov(stub, rdx_opnd, M_Base_Opnd(rsp_reg, (1 + 2)*GR_STACK_SIZE + SHADOW)); + stub = mov(stub, rcx_opnd, M_Base_Opnd(rsp_reg, (1 + 3)*GR_STACK_SIZE + SHADOW)); + stub = mov(stub, r8_opnd, M_Base_Opnd(rsp_reg, (1 + 4)*GR_STACK_SIZE + SHADOW)); + stub = mov(stub, r9_opnd, M_Base_Opnd(rsp_reg, (1 + 5)*GR_STACK_SIZE + SHADOW)); +#endif + // restore fp inputs from the stack - stub = movq(stub, xmm0_opnd, M_Base_Opnd(rsp_reg, 56)); - stub = movq(stub, xmm1_opnd, M_Base_Opnd(rsp_reg, 64)); - stub = movq(stub, xmm2_opnd, M_Base_Opnd(rsp_reg, 72)); - stub = movq(stub, xmm3_opnd, M_Base_Opnd(rsp_reg, 80)); - stub = movq(stub, xmm4_opnd, M_Base_Opnd(rsp_reg, 88)); - stub = movq(stub, xmm5_opnd, M_Base_Opnd(rsp_reg, 96)); - stub = movq(stub, xmm6_opnd, M_Base_Opnd(rsp_reg, 104)); - stub = movq(stub, xmm7_opnd, M_Base_Opnd(rsp_reg, 112)); + stub = movq(stub, xmm0_opnd, M_Base_Opnd(rsp_reg, 0*FR_STACK_SIZE + gr_stack_size)); + stub = movq(stub, xmm1_opnd, M_Base_Opnd(rsp_reg, 1*FR_STACK_SIZE + gr_stack_size)); + stub = movq(stub, xmm2_opnd, M_Base_Opnd(rsp_reg, 2*FR_STACK_SIZE + gr_stack_size)); + stub = movq(stub, xmm3_opnd, M_Base_Opnd(rsp_reg, 3*FR_STACK_SIZE + gr_stack_size)); +#ifndef _WIN64 + stub = movq(stub, xmm4_opnd, M_Base_Opnd(rsp_reg, 4*FR_STACK_SIZE + gr_stack_size)); + stub = movq(stub, xmm5_opnd, M_Base_Opnd(rsp_reg, 5*FR_STACK_SIZE + gr_stack_size)); + stub = movq(stub, xmm6_opnd, M_Base_Opnd(rsp_reg, 6*FR_STACK_SIZE + gr_stack_size)); + stub = movq(stub, xmm7_opnd, M_Base_Opnd(rsp_reg, 7*FR_STACK_SIZE + gr_stack_size)); +#endif // adjust stack pointer stub = alu(stub, add_opc, rsp_opnd, Imm_Opnd(stack_size)); @@ -245,7 +285,7 @@ #endif // preserve method handle stub = mov(stub, r10_opnd, Imm_Opnd(size_64, (int64)method)); - stub = mov(stub, M_Base_Opnd(rsp_reg, -stack_size), r10_opnd); + stub = mov(stub, M_Base_Opnd(rsp_reg, - stack_size + SHADOW), r10_opnd); // transfer control to generic part stub = jump(stub, (char *)compile_get_compile_me_generic()); assert(stub - (char *)addr <= STUB_SIZE);