Index: vm/include/open/hycomp.h =================================================================== --- vm/include/open/hycomp.h (revision 514652) +++ vm/include/open/hycomp.h (working copy) @@ -112,7 +112,7 @@ /** * Win32 - Windows 3.1 & NT using Win32 */ -#if defined(WIN32) +#if defined(WIN32) || defined(_WIN64) typedef __int64 I_64; typedef unsigned __int64 U_64; Index: vm/port/src/encoder/ia32_em64t/encoder.h =================================================================== --- vm/port/src/encoder/ia32_em64t/encoder.h (revision 514652) +++ vm/port/src/encoder/ia32_em64t/encoder.h (working copy) @@ -41,14 +41,38 @@ #include "enc_base.h" #include "open/types.h" -#ifdef _EM64T_ +#ifdef _EM64T_ // size of general-purpose value on the stack in bytes #define GR_STACK_SIZE 8 // size of floating-point value on the stack in bytes #define FR_STACK_SIZE 8 + +#if defined(WIN32) || defined(_WIN64) + // maximum number of GP registers for inputs + const int MAX_GR = 4; + // maximum number of FP registers for inputs + const int MAX_FR = 4; + // WIN64 reserves 4 words for shadow space + const int SHADOW = 4 * GR_STACK_SIZE; #else + // maximum number of GP registers for inputs + const int MAX_GR = 6; + // maximum number of FP registers for inputs + const int MAX_FR = 8; + // Linux x64 doesn't reserve shadow space + const int SHADOW = 0; +#endif + +#else +// size of general-purpose value on the stack in bytes #define GR_STACK_SIZE 4 +// size of general-purpose value on the stack in bytes #define FR_STACK_SIZE 8 + +// maximum number of GP registers for inputs +const int MAX_GR = 0; +// maximum number of FP registers for inputs +const int MAX_FR = 0; #endif enum Reg_No { @@ -70,7 +94,7 @@ xmm4_reg, xmm5_reg, xmm6_reg, xmm7_reg, fs_reg, #endif - /** @brief Total number of registers.*/ + /** @brief Total number of registers.*/ n_reg }; // Index: vm/port/src/lil/em64t/pim/include/lil_code_generator_em64t.h =================================================================== --- vm/port/src/lil/em64t/pim/include/lil_code_generator_em64t.h (revision 514652) +++ vm/port/src/lil/em64t/pim/include/lil_code_generator_em64t.h (working copy) @@ -85,20 +85,43 @@ class LcgEM64TContext: public LilInstructionVisitor { public: +#ifdef _WIN64 + // maximum number of GR reserved for returns + static const unsigned MAX_GR_RETURNS = 1; + // maximum number of GR reserved for outputs/inputs + static const unsigned MAX_GR_OUTPUTS = 4; // maximum number of locals that can be placed in GR - static const unsigned MAX_GR_LOCALS = 6; + static const unsigned MAX_GR_LOCALS = 8; // maximum number of stand places - static const unsigned MAX_STD_PLACES = 2; - // maximum number of GR reserved for outputs/inputs - static const unsigned MAX_GR_OUTPUTS = 6; + static const unsigned MAX_STD_PLACES = 2; + + // maximum number of FR reserved for returns + static const unsigned MAX_FR_RETURNS = 1; // maximum number of FR reserved for outputs/inputs - static const unsigned MAX_FR_OUTPUTS = 8; + static const unsigned MAX_FR_OUTPUTS = 4; + // maximum number of temporary XMM registers + static const unsigned MAX_FR_LOCALS = 10; + // maximum number of temporary XMM registers + static const unsigned MAX_FR_TEMPORARY = 2; +#else // maximum number of GR reserved for returns static const unsigned MAX_GR_RETURNS = 2; + // maximum number of GR reserved for outputs/inputs + static const unsigned MAX_GR_OUTPUTS = 6; + // maximum number of locals that can be placed in GR + static const unsigned MAX_GR_LOCALS = 6; + // maximum number of stand places + static const unsigned MAX_STD_PLACES = 2; + // maximum number of FR reserved for returns static const unsigned MAX_FR_RETURNS = 2; + // maximum number of FR reserved for outputs/inputs + static const unsigned MAX_FR_OUTPUTS = 8; // maximum number of temporary XMM registers - static const unsigned MAX_FR_TEMPORARY = 8; + static const unsigned MAX_FR_LOCALS = 8; + // maximum number of temporary XMM registers + static const unsigned MAX_FR_TEMPORARY = 0; +#endif // size of GR in bytes // TODO: Think about using GR_STACK_SIZE @@ -118,6 +141,7 @@ static const unsigned FR_OUTPUTS_OFFSET = 0; static const unsigned FR_RETURNS_OFFSET = FR_OUTPUTS_OFFSET + MAX_FR_OUTPUTS; static const unsigned FR_TEMPORARY_OFFSET = FR_RETURNS_OFFSET + MAX_FR_RETURNS; + static const unsigned FR_LOCALS_OFFSET = FR_TEMPORARY_OFFSET + MAX_FR_TEMPORARY; private: @@ -160,6 +184,7 @@ LcgEM64TContext(LilCodeStub * stub, tl::MemoryPool & m); +#ifdef _WIN64 /** * returns general purpose register associated with given index * this association is used across whole lil code generator @@ -169,6 +194,74 @@ // std places (scratched) &r10_opnd, &r11_opnd, // GR locals (calee-saved) + &r12_opnd, &r13_opnd, &r14_opnd, &r15_opnd, + &rdi_opnd, &rsi_opnd, &rbp_opnd, &rbx_opnd, + // gr inputs/outputs (scratched) + &rcx_opnd, &rdx_opnd, &r8_opnd, &r9_opnd, + // gr returns (scratched) + &rax_opnd, + // rsp + &rsp_opnd + }; + return *REG_MAP[index]; + } + + /** + * returns xmm register associated with given index + * this association is used across whole lil code generator + */ + static const XMM_Opnd & get_xmm_reg_from_map(unsigned index) { + static const XMM_Opnd * XMM_REG_MAP[] = { + // fr inputs/outputs (scratched) + &xmm0_opnd, &xmm1_opnd, &xmm2_opnd, &xmm3_opnd, + // fr returns (scratched) + &xmm0_opnd, + // temporary xmm registers (scratched) + &xmm4_opnd, &xmm5_opnd, + // locals xmm registers + &xmm6_opnd, &xmm7_opnd, &xmm8_opnd, &xmm9_opnd, + &xmm10_opnd, &xmm11_opnd, &xmm12_opnd, &xmm13_opnd, + &xmm14_opnd, &xmm15_opnd + }; + return *XMM_REG_MAP[index]; + } + + /** + * an association between register number and index in the REG_MAP array + */ + static unsigned get_index_in_map(const Reg_No reg) { + static const unsigned INDEX_MAP[] = { + // rax_reg, rbx_reg, rcx_reg, + GR_RETURNS_OFFSET, GR_LOCALS_OFFSET + 7, GR_OUTPUTS_OFFSET + 0, + // rdx_reg, rdi_reg, rsi_reg, + GR_OUTPUTS_OFFSET + 1, GR_LOCALS_OFFSET + 4, GR_LOCALS_OFFSET + 5, + // rsp_reg, rbp_reg, r8_reg, + RSP_OFFSET, GR_LOCALS_OFFSET + 6, GR_OUTPUTS_OFFSET + 2, + // r9_reg, r10_reg, r11_reg, + GR_OUTPUTS_OFFSET + 3, STD_PLACES_OFFSET, STD_PLACES_OFFSET + 1, + // r12_reg, r13_reg, r14_reg, + GR_LOCALS_OFFSET, GR_LOCALS_OFFSET + 1, GR_LOCALS_OFFSET + 2, + // r15_reg, xmm0_reg, xmm1_reg, + GR_LOCALS_OFFSET + 3, FR_OUTPUTS_OFFSET, FR_OUTPUTS_OFFSET + 1, + // xmm2_reg, xmm3_reg, xmm4_reg, + FR_OUTPUTS_OFFSET + 2, FR_OUTPUTS_OFFSET + 3, FR_TEMPORARY_OFFSET, + // xmm5_reg, xmm6_reg, xmm7_reg, + FR_TEMPORARY_OFFSET + 1, FR_LOCALS_OFFSET, FR_LOCALS_OFFSET + 1, + // xmm8_reg, xmm9_reg, xmm10_reg, + FR_LOCALS_OFFSET + 2, FR_LOCALS_OFFSET + 3, FR_LOCALS_OFFSET + 4, + // xmm11_reg, xmm12_reg, xmm13_reg, + FR_LOCALS_OFFSET + 5, FR_LOCALS_OFFSET + 6, FR_LOCALS_OFFSET + 7, + // xmm14_reg, xmm15_reg + FR_LOCALS_OFFSET + 8, FR_LOCALS_OFFSET + 9 + }; + return INDEX_MAP[reg]; + } +#else + static const R_Opnd & get_reg_from_map(unsigned index) { + static const R_Opnd * REG_MAP[] = { + // std places (scratched) + &r10_opnd, &r11_opnd, + // GR locals (calee-saved) &r12_opnd, &r13_opnd, &r14_opnd, &r15_opnd, &rbp_opnd, &rbx_opnd, // gr inputs/outputs (scratched) &rdi_opnd, &rsi_opnd, &rdx_opnd, &rcx_opnd, &r8_opnd, &r9_opnd, @@ -228,7 +321,7 @@ }; return INDEX_MAP[reg]; } - +#endif void * operator new(size_t sz, tl::MemoryPool & m) { return m.alloc(sz); } @@ -517,7 +610,11 @@ // reserve enough GR & FR outputs unsigned gp_out_cnt = 0; +#ifdef _WIN64 +# define fp_out_cnt gp_out_cnt +#else unsigned fp_out_cnt = 0; +#endif for (unsigned i = 0; i < lil_sig_get_num_args(sig); i++) { LilType t = lil_sig_get_arg_type(sig, i); if (is_fp_type(t)) { Index: vm/port/src/lil/em64t/pim/lil_code_generator_em64t.cpp =================================================================== --- vm/port/src/lil/em64t/pim/lil_code_generator_em64t.cpp (revision 514652) +++ vm/port/src/lil/em64t/pim/lil_code_generator_em64t.cpp (working copy) @@ -239,7 +239,8 @@ // next temporary register is allocated from unused scratched // registers in the following order: // xmm8, xmm9, ... xmm15 - ASSERT(get_num_used_reg() < LcgEM64TContext::MAX_FR_TEMPORARY, + ASSERT(get_num_used_reg() < LcgEM64TContext::MAX_FR_TEMPORARY + LcgEM64TContext:: MAX_FR_LOCALS, + //ASSERT(get_num_used_reg() < LcgEM64TContext::MAX_FR_TEMPORARY , "LIL INTERNAL ERROR: Not enough temporary registers"); m_idx = LcgEM64TContext::get_xmm_reg_from_map( LcgEM64TContext::FR_TEMPORARY_OFFSET + get_num_used_reg()).get_idx(); @@ -1587,6 +1588,9 @@ } case LCK_Call: case LCK_CallNoRet: { +#ifdef _WIN64 + buf = alu(buf, add_opc, rsp_opnd, Imm_Opnd(-SHADOW), size_64); +#endif if (lil_operand_is_immed(target)) { // check if we can perform relative call int64 target_value = lil_operand_get_immed(target); @@ -1618,6 +1622,9 @@ const LcgEM64TLoc * loc = get_op_loc(target, false); buf = ::call(buf, get_rm_opnd(loc), size_64); } +#ifdef _WIN64 + buf = alu(buf, add_opc, rsp_opnd, Imm_Opnd(SHADOW), size_64); +#endif take_inputs_from_stack = true; break; } Index: vm/port/src/lil/em64t/pim/m2n_em64t.cpp =================================================================== --- vm/port/src/lil/em64t/pim/m2n_em64t.cpp (revision 514652) +++ vm/port/src/lil/em64t/pim/m2n_em64t.cpp (working copy) @@ -144,7 +144,10 @@ unsigned num_ret_need_to_save) { // we can't preserve rax and return value on it at the same time assert (num_ret_need_to_save == 0 || reg != &rax_opnd); -#ifdef PLATFORM_POSIX + + +//#ifdef PLATFORM_POSIX + // preserve std places unsigned i; unsigned num_std_saved = 0; @@ -193,7 +196,13 @@ // TODO: FIXME: only absolute addressing mode is supported now buf = mov(buf, rax_opnd, Imm_Opnd(size_64, (uint64)get_thread_ptr), size_64); +#ifdef _WIN64 + buf = alu(buf, add_opc, rsp_opnd, Imm_Opnd(-SHADOW)); +#endif buf = call(buf, rax_opnd, size_64); +#ifdef _WIN64 + buf = alu(buf, add_opc, rsp_opnd, Imm_Opnd(SHADOW)); +#endif if (reg != &rax_opnd) { buf = mov(buf, *reg, rax_opnd, size_64); } @@ -236,10 +245,10 @@ LcgEM64TContext::GR_LOCALS_OFFSET + num_callee_saves_used + num_std_saved), size_64); } -#else //!PLATFORM_POSIX - buf = prefix(buf, prefix_fs); - buf = mov(buf, *reg, M_Opnd(0x14), size_64); -#endif //!PLATFORM_POSIX +//#else //!PLATFORM_POSIX +// buf = prefix(buf, prefix_fs); +// buf = mov(buf, *reg, M_Opnd(0x14), size_64); +//#endif //!PLATFORM_POSIX return buf; } @@ -358,7 +367,13 @@ // NOTE: the following should be true before the call ($rsp % 8 == 0 && $rsp % 16 != 0)! // Call m2n_pop_local_handles or m2n_free_local_handles +#ifdef _WIN64 + buf = alu(buf, add_opc, rsp_opnd, Imm_Opnd(-SHADOW)); +#endif buf = call(buf, rax_opnd, size_64); +#ifdef _WIN64 + buf = alu(buf, add_opc, rsp_opnd, Imm_Opnd(SHADOW)); +#endif if (num_preserve_ret > 0) { // Restore return value Index: vm/port/src/lil/em64t/pim/m2n_em64t_internal.h =================================================================== --- vm/port/src/lil/em64t/pim/m2n_em64t_internal.h (revision 514652) +++ vm/port/src/lil/em64t/pim/m2n_em64t_internal.h (working copy) @@ -57,6 +57,10 @@ frame_type current_frame_type; uint64 rbx; uint64 rbp; +#ifdef _WIN64 + uint64 rsi; + uint64 rdi; +#endif uint64 r15; uint64 r14; uint64 r13; Index: vm/vmcore/src/util/em64t/base/compile_em64t.cpp =================================================================== --- vm/vmcore/src/util/em64t/base/compile_em64t.cpp (revision 514652) +++ vm/vmcore/src/util/em64t/base/compile_em64t.cpp (working copy) @@ -52,16 +52,23 @@ } void compile_protect_arguments(Method_Handle method, GcFrame * gc) { - const unsigned MAX_GP = 6; - const unsigned MAX_FP = 8; // adress of the top of m2n frame uint64 * const m2n_base_addr = (uint64 *)m2n_get_frame_base(m2n_get_last_frame()); // 6(scratched registers on the stack) assert(m2n_get_size() % 8 == 0); - // 15 = 1(alignment) + 8(fp) + 6(gp) registers were preserved on the stack - uint64 * const inputs_addr = m2n_base_addr - (m2n_get_size() / 8) - 15; + // 15 = 1(alignment) + n(fp) + n(gp) registers were preserved on the stack + uint64 * const inputs_addr = m2n_base_addr + - (m2n_get_size() / 8) + - 1 - MAX_GR - MAX_FR; // 1(return ip); +#ifdef _WIN64 + // WIN64, reserve 4 words of shadow space + //uint64 * extra_inputs_addr = m2n_base_addr + SHADOW/8 + 1; + // but jit doesn't suppoert it now uint64 * extra_inputs_addr = m2n_base_addr + 1; +#else + uint64 * extra_inputs_addr = m2n_base_addr + 1; +#endif assert(!hythread_is_suspend_enabled()); Method_Signature_Handle msh = method_get_signature(method); @@ -84,7 +91,7 @@ case VM_DATA_TYPE_BOOLEAN: case VM_DATA_TYPE_CHAR: case VM_DATA_TYPE_UP: - if (num_gp_used < MAX_GP) { + if (num_gp_used < MAX_GR) { ++num_gp_used; } else { ++extra_inputs_addr; @@ -93,7 +100,7 @@ case VM_DATA_TYPE_CLASS: case VM_DATA_TYPE_ARRAY: { uint64 * ref_addr; - if (num_gp_used < MAX_GP) { + if (num_gp_used < MAX_GR) { ref_addr = inputs_addr + num_gp_used; ++num_gp_used; } else { @@ -105,7 +112,7 @@ } case VM_DATA_TYPE_MP: { uint64 * ref_addr; - if (num_gp_used < MAX_GP) { + if (num_gp_used < MAX_GR) { ref_addr = inputs_addr + num_gp_used; ++num_gp_used; } else { @@ -117,7 +124,7 @@ } case VM_DATA_TYPE_F4: case VM_DATA_TYPE_F8: - if (num_fp_used < MAX_FP) { + if (num_fp_used < MAX_FR) { ++num_fp_used; } else { ++extra_inputs_addr; @@ -136,18 +143,31 @@ // compile_me stack frame // m2n frame // 8 byte alignment -// 6 xmm registers -// 6 gp registers +// 8 xmm registers on linux and 4 on windows +// 6 gp registers on linux and 4 on windows +// 0 byte shadow on linux and 32 byte on windows // method handle -const int32 stack_size = (int32)m2n_get_size() + 8 + 120; +// Stack size should be (% 8 == 0) but shouldn't be (% 16 == 0) +const int ALIGNMENT = 8; + +const int32 gr_stack_size = (1 + MAX_GR)*GR_STACK_SIZE + + SHADOW; +const int32 stack_size = (int32)m2n_get_size() + + MAX_FR*FR_STACK_SIZE + + gr_stack_size + ALIGNMENT; + static NativeCodePtr compile_get_compile_me_generic() { static NativeCodePtr addr = NULL; if (addr) { return addr; } +#ifdef _WIN64 + const int STUB_SIZE = 400; +#else const int STUB_SIZE = 416; +#endif char * stub = (char *) malloc_fixed_code_for_jit(STUB_SIZE, DEFAULT_CODE_ALIGNMENT, CODE_BLOCK_HEAT_DEFAULT, CAA_Allocate); addr = stub; @@ -156,32 +176,49 @@ #endif assert(stack_size % 8 == 0); assert(stack_size % 16 != 0); + // set up stack frame stub = alu(stub, sub_opc, rsp_opnd, Imm_Opnd(stack_size)); + // TODO: think over saving xmm registers conditionally - stub = movq(stub, M_Base_Opnd(rsp_reg, 112), xmm7_opnd); - stub = movq(stub, M_Base_Opnd(rsp_reg, 104), xmm6_opnd); - stub = movq(stub, M_Base_Opnd(rsp_reg, 96), xmm5_opnd); - stub = movq(stub, M_Base_Opnd(rsp_reg, 88), xmm4_opnd); - stub = movq(stub, M_Base_Opnd(rsp_reg, 80), xmm3_opnd); - stub = movq(stub, M_Base_Opnd(rsp_reg, 72), xmm2_opnd); - stub = movq(stub, M_Base_Opnd(rsp_reg, 64), xmm1_opnd); - stub = movq(stub, M_Base_Opnd(rsp_reg, 56), xmm0_opnd); +#ifndef _WIN64 + stub = movq(stub, M_Base_Opnd(rsp_reg, 7*FR_STACK_SIZE + gr_stack_size), xmm7_opnd); + stub = movq(stub, M_Base_Opnd(rsp_reg, 6*FR_STACK_SIZE + gr_stack_size), xmm6_opnd); + stub = movq(stub, M_Base_Opnd(rsp_reg, 5*FR_STACK_SIZE + gr_stack_size), xmm5_opnd); + stub = movq(stub, M_Base_Opnd(rsp_reg, 4*FR_STACK_SIZE + gr_stack_size), xmm4_opnd); +#endif + stub = movq(stub, M_Base_Opnd(rsp_reg, 3*FR_STACK_SIZE + gr_stack_size), xmm3_opnd); + stub = movq(stub, M_Base_Opnd(rsp_reg, 2*FR_STACK_SIZE + gr_stack_size), xmm2_opnd); + stub = movq(stub, M_Base_Opnd(rsp_reg, 1*FR_STACK_SIZE + gr_stack_size), xmm1_opnd); + stub = movq(stub, M_Base_Opnd(rsp_reg, 0*FR_STACK_SIZE + gr_stack_size), xmm0_opnd); + // we need to preserve all general purpose registers here // to protect managed objects from GC during compilation - stub = mov(stub, M_Base_Opnd(rsp_reg, 48), r9_opnd); - stub = mov(stub, M_Base_Opnd(rsp_reg, 40), r8_opnd); - stub = mov(stub, M_Base_Opnd(rsp_reg, 32), rcx_opnd); - stub = mov(stub, M_Base_Opnd(rsp_reg, 24), rdx_opnd); - stub = mov(stub, M_Base_Opnd(rsp_reg, 16), rsi_opnd); - stub = mov(stub, M_Base_Opnd(rsp_reg, 8), rdi_opnd); +#ifdef _WIN64 + stub = mov(stub, M_Base_Opnd(rsp_reg, (1 + 3)*GR_STACK_SIZE + SHADOW), r9_opnd); + stub = mov(stub, M_Base_Opnd(rsp_reg, (1 + 2)*GR_STACK_SIZE + SHADOW), r8_opnd); + stub = mov(stub, M_Base_Opnd(rsp_reg, (1 + 1)*GR_STACK_SIZE + SHADOW), rdx_opnd); + stub = mov(stub, M_Base_Opnd(rsp_reg, (1 + 0)*GR_STACK_SIZE + SHADOW), rcx_opnd); +#else + stub = mov(stub, M_Base_Opnd(rsp_reg, (1 + 5)*GR_STACK_SIZE + SHADOW), r9_opnd); + stub = mov(stub, M_Base_Opnd(rsp_reg, (1 + 4)*GR_STACK_SIZE + SHADOW), r8_opnd); + stub = mov(stub, M_Base_Opnd(rsp_reg, (1 + 3)*GR_STACK_SIZE + SHADOW), rcx_opnd); + stub = mov(stub, M_Base_Opnd(rsp_reg, (1 + 2)*GR_STACK_SIZE + SHADOW), rdx_opnd); + stub = mov(stub, M_Base_Opnd(rsp_reg, (1 + 1)*GR_STACK_SIZE + SHADOW), rsi_opnd); + stub = mov(stub, M_Base_Opnd(rsp_reg, (1 + 0)*GR_STACK_SIZE + SHADOW), rdi_opnd); +#endif + // push m2n to the stack - // skip m2n frame, 6 xmm registers, 6 gp registers and method handle - stub = m2n_gen_push_m2n(stub, NULL, FRAME_COMPILATION, false, 0, 0, stack_size); + // restore Method_Handle +#ifdef _WIN64 + stub = mov(stub, rcx_opnd, M_Base_Opnd(rsp_reg, 0 + SHADOW)); +#else stub = mov(stub, rdi_opnd, M_Base_Opnd(rsp_reg, 0)); +#endif + // compile the method stub = call(stub, (char *)&compile_me); @@ -190,21 +227,31 @@ stub = m2n_gen_pop_m2n(stub, false, 0, bytes_to_m2n_bottom, 1); // restore gp inputs from the stack - stub = mov(stub, rdi_opnd, M_Base_Opnd(rsp_reg, 8)); - stub = mov(stub, rsi_opnd, M_Base_Opnd(rsp_reg, 16)); - stub = mov(stub, rdx_opnd, M_Base_Opnd(rsp_reg, 24)); - stub = mov(stub, rcx_opnd, M_Base_Opnd(rsp_reg, 32)); - stub = mov(stub, r8_opnd, M_Base_Opnd(rsp_reg, 40)); - stub = mov(stub, r9_opnd, M_Base_Opnd(rsp_reg, 48)); +#ifdef _WIN64 + stub = mov(stub, rcx_opnd, M_Base_Opnd(rsp_reg, (1 + 0)*GR_STACK_SIZE + SHADOW)); + stub = mov(stub, rdx_opnd, M_Base_Opnd(rsp_reg, (1 + 1)*GR_STACK_SIZE + SHADOW)); + stub = mov(stub, r8_opnd, M_Base_Opnd(rsp_reg, (1 + 2)*GR_STACK_SIZE + SHADOW)); + stub = mov(stub, r9_opnd, M_Base_Opnd(rsp_reg, (1 + 3)*GR_STACK_SIZE + SHADOW)); +#else + stub = mov(stub, rdi_opnd, M_Base_Opnd(rsp_reg, (1 + 0)*GR_STACK_SIZE + SHADOW)); + stub = mov(stub, rsi_opnd, M_Base_Opnd(rsp_reg, (1 + 1)*GR_STACK_SIZE + SHADOW)); + stub = mov(stub, rdx_opnd, M_Base_Opnd(rsp_reg, (1 + 2)*GR_STACK_SIZE + SHADOW)); + stub = mov(stub, rcx_opnd, M_Base_Opnd(rsp_reg, (1 + 3)*GR_STACK_SIZE + SHADOW)); + stub = mov(stub, r8_opnd, M_Base_Opnd(rsp_reg, (1 + 4)*GR_STACK_SIZE + SHADOW)); + stub = mov(stub, r9_opnd, M_Base_Opnd(rsp_reg, (1 + 5)*GR_STACK_SIZE + SHADOW)); +#endif + // restore fp inputs from the stack - stub = movq(stub, xmm0_opnd, M_Base_Opnd(rsp_reg, 56)); - stub = movq(stub, xmm1_opnd, M_Base_Opnd(rsp_reg, 64)); - stub = movq(stub, xmm2_opnd, M_Base_Opnd(rsp_reg, 72)); - stub = movq(stub, xmm3_opnd, M_Base_Opnd(rsp_reg, 80)); - stub = movq(stub, xmm4_opnd, M_Base_Opnd(rsp_reg, 88)); - stub = movq(stub, xmm5_opnd, M_Base_Opnd(rsp_reg, 96)); - stub = movq(stub, xmm6_opnd, M_Base_Opnd(rsp_reg, 104)); - stub = movq(stub, xmm7_opnd, M_Base_Opnd(rsp_reg, 112)); + stub = movq(stub, xmm0_opnd, M_Base_Opnd(rsp_reg, 0*FR_STACK_SIZE + gr_stack_size)); + stub = movq(stub, xmm1_opnd, M_Base_Opnd(rsp_reg, 1*FR_STACK_SIZE + gr_stack_size)); + stub = movq(stub, xmm2_opnd, M_Base_Opnd(rsp_reg, 2*FR_STACK_SIZE + gr_stack_size)); + stub = movq(stub, xmm3_opnd, M_Base_Opnd(rsp_reg, 3*FR_STACK_SIZE + gr_stack_size)); +#ifndef _WIN64 + stub = movq(stub, xmm4_opnd, M_Base_Opnd(rsp_reg, 4*FR_STACK_SIZE + gr_stack_size)); + stub = movq(stub, xmm5_opnd, M_Base_Opnd(rsp_reg, 5*FR_STACK_SIZE + gr_stack_size)); + stub = movq(stub, xmm6_opnd, M_Base_Opnd(rsp_reg, 6*FR_STACK_SIZE + gr_stack_size)); + stub = movq(stub, xmm7_opnd, M_Base_Opnd(rsp_reg, 7*FR_STACK_SIZE + gr_stack_size)); +#endif // adjust stack pointer stub = alu(stub, add_opc, rsp_opnd, Imm_Opnd(stack_size)); @@ -245,7 +292,7 @@ #endif // preserve method handle stub = mov(stub, r10_opnd, Imm_Opnd(size_64, (int64)method)); - stub = mov(stub, M_Base_Opnd(rsp_reg, -stack_size), r10_opnd); + stub = mov(stub, M_Base_Opnd(rsp_reg, - stack_size + SHADOW), r10_opnd); // transfer control to generic part stub = jump(stub, (char *)compile_get_compile_me_generic()); assert(stub - (char *)addr <= STUB_SIZE); Index: vm/vmcore/src/util/em64t/base/ini_em64t.cpp =================================================================== --- vm/vmcore/src/util/em64t/base/ini_em64t.cpp (revision 514652) +++ vm/vmcore/src/util/em64t/base/ini_em64t.cpp (working copy) @@ -44,16 +44,29 @@ #include "dump.h" +#if defined(WIN32) || defined(_WIN64) + +// four fake parameters should be passed over GR +#define FAKE_PARAMETERS void *, void *, void *, void * +#define FAKE_ARGUMENTS NULL, NULL, NULL, NULL + +#else // defined(WIN32) || defined(_WIN64) + +// six fake parameters should be passed over GR +#define FAKE_PARAMETERS void *, void *, void *, void *, void *, void * +#define FAKE_ARGUMENTS NULL, NULL, NULL, NULL, NULL, NULL + +#endif // defined(WIN32) || defined(_WIN64) + + typedef int64 ( * invoke_managed_func_int_t) ( - // six fake parameters should be passed over GR - void *, void *, void *, void *, void *, void *, + FAKE_PARAMETERS, const void * const method_entry_point, int64 gr_nargs, int64 fr_nargs, int64 stack_nargs, uint64 gr_args[], double fr_args[], uint64 stack_args[]); typedef double ( * invoke_managed_func_double_t)( - // six fake parameters should be passed over GR - void *, void *, void *, void *, void *, void *, + FAKE_PARAMETERS, const void * const method_entry_point, int64 gr_nargs, int64 fr_nargs, int64 stack_nargs, uint64 gr_args[], double fr_args[], uint64 stack_args[]); @@ -76,13 +89,13 @@ // [rbp + 48] - gr_args // [rbp + 56] - fr_args // [rbp + 64] - stack_args - const int32 METHOD_ENTRY_POINT_OFFSET = 16; - const int32 UNUSED GR_NARGS_OFFSET = 24; - const int32 FR_NARGS_OFFSET = 32; - const int32 STACK_NARGS_OFFSET = 40; - const int32 GR_ARGS_OFFSET = 48; - const int32 FR_ARGS_OFFSET = 56; - const int32 STACK_ARGS_OFFSET = 64; + const int32 METHOD_ENTRY_POINT_OFFSET = 16 + SHADOW; + const int32 UNUSED GR_NARGS_OFFSET = 24 + SHADOW; + const int32 FR_NARGS_OFFSET = 32 + SHADOW; + const int32 STACK_NARGS_OFFSET = 40 + SHADOW; + const int32 GR_ARGS_OFFSET = 48 + SHADOW; + const int32 FR_ARGS_OFFSET = 56 + SHADOW; + const int32 STACK_ARGS_OFFSET = 64 + SHADOW; const int STUB_SIZE = 200; char * stub = (char *) malloc_fixed_code_for_jit(STUB_SIZE, @@ -126,6 +139,12 @@ labels.define_label(MOVE_STACK_ARGS_END, stub, false); +#ifdef _WIN64 + // 1a) WIN64, reserve 4 words of shadow space + //stub = alu(stub, sub_opc, rsp_opnd, Imm_Opnd(SHADOW)); + // but jit doesn't suppoert it now +#endif + // 2) move from fr_args to registers stub = mov(stub, rcx_opnd, M_Base_Opnd(rbp_reg, FR_NARGS_OFFSET)); stub = alu(stub, or_opc, rcx_opnd, rcx_opnd); @@ -137,21 +156,31 @@ stub = movq(stub, xmm1_opnd, M_Base_Opnd(r10_reg, 1 * FR_STACK_SIZE)); stub = movq(stub, xmm2_opnd, M_Base_Opnd(r10_reg, 2 * FR_STACK_SIZE)); stub = movq(stub, xmm3_opnd, M_Base_Opnd(r10_reg, 3 * FR_STACK_SIZE)); +#ifndef _WIN64 stub = movq(stub, xmm4_opnd, M_Base_Opnd(r10_reg, 4 * FR_STACK_SIZE)); stub = movq(stub, xmm5_opnd, M_Base_Opnd(r10_reg, 5 * FR_STACK_SIZE)); stub = movq(stub, xmm6_opnd, M_Base_Opnd(r10_reg, 6 * FR_STACK_SIZE)); stub = movq(stub, xmm7_opnd, M_Base_Opnd(r10_reg, 7 * FR_STACK_SIZE)); +#endif labels.define_label(MOVE_FR_ARGS_END, stub, false); // 3) unconditionally move from gr_args to registers +#ifdef _WIN64 stub = mov(stub, r10_opnd, M_Base_Opnd(rbp_reg, GR_ARGS_OFFSET)); + stub = mov(stub, rcx_opnd, M_Base_Opnd(r10_reg, 0 * GR_STACK_SIZE)); + stub = mov(stub, rdx_opnd, M_Base_Opnd(r10_reg, 1 * GR_STACK_SIZE)); + stub = mov(stub, r8_opnd, M_Base_Opnd(r10_reg, 2 * GR_STACK_SIZE)); + stub = mov(stub, r9_opnd, M_Base_Opnd(r10_reg, 3 * GR_STACK_SIZE)); +#else + stub = mov(stub, r10_opnd, M_Base_Opnd(rbp_reg, GR_ARGS_OFFSET)); stub = mov(stub, rdi_opnd, M_Base_Opnd(r10_reg, 0 * GR_STACK_SIZE)); stub = mov(stub, rsi_opnd, M_Base_Opnd(r10_reg, 1 * GR_STACK_SIZE)); stub = mov(stub, rdx_opnd, M_Base_Opnd(r10_reg, 2 * GR_STACK_SIZE)); stub = mov(stub, rcx_opnd, M_Base_Opnd(r10_reg, 3 * GR_STACK_SIZE)); stub = mov(stub, r8_opnd, M_Base_Opnd(r10_reg, 4 * GR_STACK_SIZE)); stub = mov(stub, r9_opnd, M_Base_Opnd(r10_reg, 5 * GR_STACK_SIZE)); +#endif // 4) transfer control stub = mov(stub, r10_opnd, M_Base_Opnd(rbp_reg, METHOD_ENTRY_POINT_OFFSET)); @@ -176,10 +205,7 @@ static const invoke_managed_func_int_t invoke_managed_func = (invoke_managed_func_int_t) gen_invoke_managed_func(); - // maximum number of GP registers for inputs - const int MAX_GR = 6; - // maximum number of FP registers for inputs - const int MAX_FR = 8; + // holds arguments that should be placed in GR's uint64 gr_args[MAX_GR]; // holds arguments that should be placed in FR's @@ -195,7 +221,11 @@ uint64 * const stack_args = (uint64 *) STD_MALLOC(sizeof(uint64) * method->get_num_args()); int64 gr_nargs = 0; - int64 fr_nargs = 0; +#ifdef _WIN64 +# define fr_nargs gr_nargs +#else + int64 fr_nargs = 0; +#endif int64 stack_nargs = 0; int64 arg_num = 0; @@ -309,7 +339,7 @@ type = method->get_return_java_type(); switch(type) { case JAVA_TYPE_VOID: - invoke_managed_func(NULL, NULL, NULL, NULL, NULL, NULL, + invoke_managed_func(FAKE_ARGUMENTS, method_entry_point, gr_nargs, fr_nargs, stack_nargs, gr_args, fr_args, stack_args); @@ -317,7 +347,7 @@ case JAVA_TYPE_ARRAY: case JAVA_TYPE_CLASS: { ObjectHandle handle = NULL; - uint64 ref = invoke_managed_func(NULL, NULL, NULL, NULL, NULL, NULL, + uint64 ref = invoke_managed_func(FAKE_ARGUMENTS, method_entry_point, gr_nargs, fr_nargs, stack_nargs, gr_args, fr_args, stack_args); @@ -338,7 +368,7 @@ case JAVA_TYPE_CHAR: case JAVA_TYPE_BYTE: case JAVA_TYPE_BOOLEAN: - result->j = invoke_managed_func(NULL, NULL, NULL, NULL, NULL, NULL, + result->j = invoke_managed_func(FAKE_ARGUMENTS, method_entry_point, gr_nargs, fr_nargs, stack_nargs, gr_args, fr_args, stack_args); @@ -346,7 +376,7 @@ case JAVA_TYPE_DOUBLE: case JAVA_TYPE_FLOAT: result->d = (invoke_managed_func_double_t(invoke_managed_func))( - NULL, NULL, NULL, NULL, NULL, NULL, + FAKE_ARGUMENTS, method_entry_point, gr_nargs, fr_nargs, stack_nargs, gr_args, fr_args, stack_args);