From a6f127c19af629a1ae2732dea657b52efe02f41e Mon Sep 17 00:00:00 2001 From: Pavel Afremov Date: Fri, 16 Mar 2007 15:40:42 +0300 Subject: [PATCH] [PATCH 1/2] Fast TLS on Linux, thread manager part. Merged to resolve conflicts with HARMONY-3397 --- vm/include/open/hycomp.h | 25 +++++++++++ vm/include/open/hythread_ext.h | 2 + vm/thread/src/hythr.def | 1 vm/thread/src/hythr.exp | 2 + vm/thread/src/thread_native_basic.c | 26 +++++------ vm/thread/src/thread_native_tls.c | 80 ++++++++++++++++++++++++++++------- vm/thread/src/thread_private.h | 78 ++++++++++++++++------------------ 7 files changed, 143 insertions(+), 71 deletions(-) diff --git a/vm/include/open/hycomp.h b/vm/include/open/hycomp.h index be9709a..e8d493d 100644 --- a/vm/include/open/hycomp.h +++ b/vm/include/open/hycomp.h @@ -423,4 +423,29 @@ #if !defined(HY_SORT) #define HY_SORT(base, nmemb, size, compare) qsort((base), (nmemb), (size), (compare)) #endif +#ifdef _WIN32 + #define HYTHREAD_FAST_TLS (1) + #define HYTHREAD_FAST_TLS_ATTRIBUTE + #define FS14_TLS_USE (1) +#else + // some kind of nix - the threads internals are known for IA32/Intel64 + // kernels, will use slow way on other HWs (TODO: add IPF support) + #if defined(__linux__) + #if defined(_IA32_) || defined(_EM64T_) + #define HYTHREAD_FAST_TLS (1) + #define HYTHREAD_FAST_TLS_ATTRIBUTE __attribute__((tls_model("initial-exec"))) + #else + #undef HYTHREAD_FAST_TLS + #endif + #else + #undef HYTHREAD_FAST_TLS + #endif +#endif + +#if !defined(HYTHREAD_FAST_TLS) + #define HYTHREAD_FAST_TLS_ATTRIBUTE + #undef FS14_TLS_USE + #define APR_TLS_USE +#endif + #endif /* hycomp_h */ diff --git a/vm/include/open/hythread_ext.h b/vm/include/open/hythread_ext.h index 9d7e4fc..c0e68df 100644 --- a/vm/include/open/hythread_ext.h +++ b/vm/include/open/hythread_ext.h @@ -180,6 +180,8 @@ IDATA VMCALL hythread_set_private_data(h UDATA VMCALL hythread_tls_get_offset(hythread_tls_key_t key); UDATA VMCALL hythread_tls_get_suspend_request_offset(); +UDATA VMCALL hythread_uses_fast_tls(void); +IDATA VMCALL hythread_get_hythread_offset_in_tls(void); //@} diff --git a/vm/thread/src/hythr.def b/vm/thread/src/hythr.def index f4387fa..c342cdd 100644 --- a/vm/thread/src/hythr.def +++ b/vm/thread/src/hythr.def @@ -3,6 +3,7 @@ LIBRARY HYTHR EXPORTS hythread_detach +hythread_uses_fast_tls hythread_tls_alloc hythread_sleep_interruptable hythread_cancel diff --git a/vm/thread/src/hythr.exp b/vm/thread/src/hythr.exp index aaf8a32..35526d3 100644 --- a/vm/thread/src/hythr.exp +++ b/vm/thread/src/hythr.exp @@ -36,6 +36,8 @@ hythread_monitor_notify; hythread_get_priority; hythread_tls_get; hythread_tls_get_suspend_request_offset; +hythread_get_hythread_offset_in_tls; +hythread_uses_fast_tls; hythread_tls_get_offset; hythread_global_lock; hythread_global_unlock; diff --git a/vm/thread/src/thread_native_basic.c b/vm/thread/src/thread_native_basic.c index d629dd1..d9ca989 100644 --- a/vm/thread/src/thread_native_basic.c +++ b/vm/thread/src/thread_native_basic.c @@ -17,8 +17,7 @@ /** * @author Nikolay Kuznetsov - * @version $Revision: 1.1.2.13 $ - */ + */ /** * @file thread_native_basic.c @@ -44,16 +43,15 @@ static void* APR_THREAD_FUNC thread_star static hythread_t allocate_thread(); static void reset_thread(hythread_t thread); static IDATA register_to_group(hythread_t thread, hythread_group_t group); -//#define APR_TLS_USE 1 #define NAKED __declspec(naked) -#if !defined (APR_TLS_USE) && !defined (FS14_TLS_USE) -#ifdef PLATFORM_POSIX -__thread hythread_t tm_self_tls = NULL; -#else -__declspec(thread) hythread_t tm_self_tls = NULL; -#endif +#if !defined (APR_TLS_USE) + #if !defined(_WIN32) + __thread hythread_t tm_self_tls HYTHREAD_FAST_TLS_ATTRIBUTE; + #elif !defined(HYTHREAD_FAST_TLS) + __declspec(thread) hythread_t tm_self_tls = NULL; + #endif #endif #define MAX_ID 1000000 @@ -331,7 +329,7 @@ IDATA VMCALL hythread_join_interruptable return hylatch_wait_interruptable(t->join_event, millis, nanos); } -/** +/** * Yield the processor. * * @return none @@ -364,11 +362,11 @@ #ifdef APR_TLS_USE hythread_t hythread_self() { hythread_t thread; apr_status_t UNUSED apr_status; - + // Extract hythread_t from TLS - apr_status = apr_threadkey_private_get((void **)(&thread), TM_THREAD_KEY); + apr_status = apr_threadkey_private_get((void **)(&thread), TM_THREAD_KEY); assert(apr_status == APR_SUCCESS); - + return thread; } @@ -376,7 +374,7 @@ static void thread_set_self(hythread_t apr_threadkey_private_set(thread, TM_THREAD_KEY); } #else -#ifdef FS14_TLS_USE +#if defined(_WIN32) && defined(HYTHREAD_FAST_TLS) /** * Return the hythread_t for the current thread. * diff --git a/vm/thread/src/thread_native_tls.c b/vm/thread/src/thread_native_tls.c index d723bd7..49ba075 100644 --- a/vm/thread/src/thread_native_tls.c +++ b/vm/thread/src/thread_native_tls.c @@ -16,6 +16,10 @@ */ /** + * @author Nikolay Kuznetsov + */ + +/** * @file thread_native_tls.c * @brief Hythread TLS related functions */ @@ -33,18 +37,64 @@ int16 tm_tls_size = 0; static void tls_finalizer_placeholder(void *args) {} +/** + * Returns non-zero if a 'fast TLS access' is in use. + * + * The 'fast TLS access' varies depending on platform. + * + * On Windows, the thread manager's structure is stored in the free slot + * of the TIB (see http://www.microsoft.com/msj/archive/S2CE.aspx). + * On Linux, it's stored in app's TLS where it can be retrieved from. + */ +UDATA VMCALL hythread_uses_fast_tls(void) { +#ifdef HYTHREAD_FAST_TLS + return 1; +#else + return 0; +#endif +} + +#if !defined(_WIN32) && defined(HYTHREAD_FAST_TLS) + //TODO: GCC-specific, need to add ICL + #if defined(_EM64T_) + #define HYTHREAD_TLS_GET_VAR_OFFSET(var,offset) \ + { void* tmp; __asm ("movq " #var "@GOTTPOFF(%%rip), %0" : "=r" (tmp)); offset = tmp; } + #elif defined(_IA32_) + #define HYTHREAD_TLS_GET_VAR_OFFSET(var,offset) \ + { __asm ("movl $" #var "@ntpoff, %0" : "=r" (offset)); } + #elif defined(_IPF_) + //TODO: NOTE: not even tested, may require fix. + #define HYTHREAD_TLS_GET_VAR_OFFSET(var,offset) \ + { __asm ("addl %0 = @tprel(" #var "#), r0 ;;\n" : "=r" (offset)) } + #else + #error "Don't know how to get the variable offset in TLS on this platfrom. Try to undef HYTHREAD_FAST_TLS or provide the macros here." + #endif +#else + #define HYTHREAD_TLS_GET_VAR_OFFSET(var, offset) { offset = -1; } +#endif + +/** + * Returns offset of the hythread's control structure in the TLS. + * + * @note Only meaningful on Linux-es \b and when hythread_uses_fast_tls() returns \c true. + */ +IDATA VMCALL hythread_get_hythread_offset_in_tls(void) { + int threadOffset; + HYTHREAD_TLS_GET_VAR_OFFSET(tm_self_tls, threadOffset); + return threadOffset; +} /** * Allocate a thread local storage (TLS) key. - * - * Create and return a new, unique key for thread local storage. - * + * + * Create and return a new, unique key for thread local storage. + * * @note The handle returned will be >=0, so it is safe to test the handle against 0 to see if it's been * allocated yet. - * + * * @param[out] handle pointer to a key to be initialized with a key value * @return 0 on success or negative value if a key could not be allocated (i.e. all TLS has been allocated) - * + * * @see hythread_tls_free, hythread_tls_set */ IDATA VMCALL hythread_tls_alloc(hythread_tls_key_t *handle) { @@ -53,17 +103,17 @@ IDATA VMCALL hythread_tls_alloc(hythread /** * Allocate a thread local storage (TLS) key. - * - * Create and return a new, unique key for thread local storage. - * - * @note The handle returned will be >=0, so it is safe to test the handle against 0 to see if it's been + * + * Create and return a new, unique key for thread local storage. + * + * @note The hande returned will be >=0, so it is safe to test the handle against 0 to see if it's been * allocated yet. - * + * * @param[out] handle pointer to a key to be initialized with a key value * @param[in] finalizer a finalizer function which will be invoked when a thread is * detached or terminates if the thread's TLS entry for this key is non-NULL * @return 0 on success or negative value if a key could not be allocated (i.e. all TLS has been allocated) - * + * * @see hythread_tls_free, hythread_tls_set */ IDATA VMCALL hythread_tls_alloc_with_finalizer(hythread_tls_key_t *handle, hythread_tls_finalizer_t finalizer) { @@ -85,11 +135,11 @@ void* VMCALL hythread_tls_get(hythread_t /** * Set a thread's TLS value. * - * @param[in] thread a thread + * @param[in] thread a thread * @param[in] key key to have TLS value set (any value returned by hythread_alloc) * @param[in] data value to be stored in TLS * @return 0 on success or negative value on failure - * + * * @see hythread_tls_alloc, hythread_tls_free, hythread_tls_get */ IDATA VMCALL hythread_tls_set(hythread_t thread, hythread_tls_key_t key, void *data) { @@ -100,9 +150,9 @@ IDATA VMCALL hythread_tls_set(hythread_t /** * Release a TLS key. - * + * * Release a TLS key previously allocated by hythread_tls_alloc. - * + * * @param[in] key TLS key to be freed * @return 0 on success or negative value on failure * diff --git a/vm/thread/src/thread_private.h b/vm/thread/src/thread_private.h old mode 100644 new mode 100755 index 124d7c4..382ac2e --- a/vm/thread/src/thread_private.h +++ b/vm/thread/src/thread_private.h @@ -18,7 +18,6 @@ #ifndef THREAD_PRIVATE_H #define THREAD_PRIVATE_H #include -//#include #include #include #include @@ -52,27 +51,23 @@ #define SPIN_COUNT 5 #endif // !defined (_IPF_) -#ifdef _EM64T_ +#if defined (_EM64T_) # ifdef _WIN64 - //don't use optimized asm monitor enter and exit helpers + //don't use optimized asm monitor enter and exit helpers +# else + //use optimized asm monitor enter and exit helpers +# define ASM_MONITOR_HELPER +# endif +#elif defined (_IPF_) + //don't use optimized asm monitor enter and exit helpers +#else // _IA32_ +# ifdef WIN32 + //use optimized asm monitor enter and exit helpers +# define ASM_MONITOR_HELPER # else //use optimized asm monitor enter and exit helpers # define ASM_MONITOR_HELPER -#endif -#else -#ifdef _IPF_ - //don't use optimized asm monitor enter and exit helpers -#else - //use optimized asm monitor enter and exit helpers -# define ASM_MONITOR_HELPER -#endif -#endif - -#ifdef WIN32 -// FS14_TLS_USE define turns on windows specific TLS access optimization -// We use free TIB slot with 14 offset, see following article for details -// http://www.microsoft.com/msj/archive/S2CE.aspx -#define FS14_TLS_USE +# endif #endif /* @@ -85,34 +80,33 @@ #ifdef __cplusplus extern "C" { #endif /* __cplusplus */ // optimization code -#if !defined (APR_TLS_USE ) && !defined (FS14_TLS_USE) - -#ifdef PLATFORM_POSIX -extern __thread hythread_t tm_self_tls; -#else -extern __declspec(thread) hythread_t tm_self_tls; -#endif //PLATFORM_POSIX - -#else -#if defined (WIN32) && defined (FS14_TLS_USE) - -__forceinline hythread_t tmn_self_macro() { - register hythread_t t; - _asm { mov eax, fs:[0x14] - mov t, eax; - } - return t; -} +#if defined (APR_TLS_USE) +# define tm_self_tls (hythread_self()) -#define store_tm_self(self) (__asm(mov self, fs:[0x14])) -#define tm_self_tls (tmn_self_macro()) -#endif +#elif defined (FS14_TLS_USE) // && !defined (APR_TLS_USE) -#endif +# ifdef WIN32 + __forceinline hythread_t tmn_self_macro() { + register hythread_t t; + _asm { mov eax, fs:[0x14] + mov t, eax; + } + return t; + } + +# define store_tm_self(self) (__asm(mov self, fs:[0x14])) +# define tm_self_tls (tmn_self_macro()) +# endif + +#else // !defined (FS14_TLS_USE) && !defined (APR_TLS_USE) + +# ifdef PLATFORM_POSIX + extern __thread hythread_t tm_self_tls HYTHREAD_FAST_TLS_ATTRIBUTE; +# else // ! PLATFORM_POSIX + extern __declspec(thread) hythread_t tm_self_tls; +# endif // ! PLATFORM_POSIX -#ifdef APR_TLS_USE -#define tm_self_tls (hythread_self()) #endif /** -- 1.4.2