Skip to content

Commit a357ff1

Browse files
Improve ARM64 atomics for Clang (#4870)
1 parent 8657d15 commit a357ff1

File tree

2 files changed

+33
-45
lines changed

2 files changed

+33
-45
lines changed

stl/inc/atomic

Lines changed: 28 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -33,18 +33,29 @@ _STL_DISABLE_CLANG_WARNINGS
3333
#pragma clang attribute _STD_ATOMIC_HEADER.push([[gnu::target("cx16")]], apply_to = function)
3434
#endif // ^^^ defined(__clang__) && defined(_M_X64) ^^^
3535

36-
// Controls whether ARM64 ldar/ldapr/stlr should be used
37-
#ifndef _STD_ATOMIC_USE_ARM64_LDAR_STLR
3836
#if defined(_M_ARM64) || defined(_M_ARM64EC) || defined(_M_HYBRID_X86_ARM64)
39-
#ifdef __clang__ // TRANSITION, LLVM-62103
40-
#define _STD_ATOMIC_USE_ARM64_LDAR_STLR 0
41-
#else // ^^^ Clang doesn't support new intrinsics / __load_acquire/__stlr intrinsics are available vvv
4237
#define _STD_ATOMIC_USE_ARM64_LDAR_STLR 1
43-
#endif // ^^^ __load_acquire/__stlr intrinsics are available ^^^
38+
#ifdef __clang__
39+
#define __LOAD_ACQUIRE_ARM64(_Width, _Ptr) \
40+
static_cast<__int##_Width>(__atomic_load_n(reinterpret_cast<const volatile unsigned __int##_Width*>(_Ptr), 2))
41+
#define __STORE_RELEASE(_Width, _Ptr, _Desired) \
42+
_Compiler_barrier(); \
43+
__atomic_store_n( \
44+
reinterpret_cast<volatile unsigned __int##_Width*>(_Ptr), static_cast<unsigned __int##_Width>(_Desired), 3)
45+
#else // ^^^ Clang / MSVC vvv
46+
#define __LOAD_ACQUIRE_ARM64(_Width, _Ptr) \
47+
static_cast<__int##_Width>(__load_acquire##_Width(reinterpret_cast<const volatile unsigned __int##_Width*>(_Ptr)))
48+
#define __STORE_RELEASE(_Width, _Ptr, _Desired) \
49+
_Compiler_barrier(); \
50+
__stlr##_Width( \
51+
reinterpret_cast<volatile unsigned __int##_Width*>(_Ptr), static_cast<unsigned __int##_Width>(_Desired))
52+
#endif // ^^^ MSVC ^^^
4453
#else // ^^^ ARM64/ARM64EC/HYBRID_X86_ARM64 / Other architectures vvv
4554
#define _STD_ATOMIC_USE_ARM64_LDAR_STLR 0
55+
#define __STORE_RELEASE(_Width, _Ptr, _Desired) \
56+
_Compiler_or_memory_barrier(); \
57+
__iso_volatile_store##_Width((_Ptr), (_Desired))
4658
#endif // ^^^ Other architectures ^^^
47-
#endif // _STD_ATOMIC_USE_ARM64_LDAR_STLR
4859

4960
#define ATOMIC_BOOL_LOCK_FREE 2
5061
#define ATOMIC_CHAR_LOCK_FREE 2
@@ -122,9 +133,6 @@ extern "C" inline void _Check_memory_order(const unsigned int _Order) noexcept {
122133

123134
#if _STD_ATOMIC_USE_ARM64_LDAR_STLR == 1
124135

125-
#define __LOAD_ACQUIRE_ARM64(_Width, _Ptr) \
126-
static_cast<__int##_Width>(__load_acquire##_Width(reinterpret_cast<const volatile unsigned __int##_Width*>(_Ptr)))
127-
128136
#define _ATOMIC_LOAD_ARM64(_Result, _Width, _Ptr, _Order_var) \
129137
switch (_Order_var) { \
130138
case _Atomic_memory_order_relaxed: \
@@ -162,27 +170,12 @@ extern "C" inline void _Check_memory_order(const unsigned int _Order) noexcept {
162170
break; \
163171
}
164172

165-
#if _STD_ATOMIC_USE_ARM64_LDAR_STLR == 1
166-
167-
#define __STORE_RELEASE(_Width, _Ptr, _Desired) \
168-
_Compiler_barrier(); \
169-
__stlr##_Width( \
170-
reinterpret_cast<volatile unsigned __int##_Width*>(_Ptr), static_cast<unsigned __int##_Width>(_Desired));
171-
172-
#else // ^^^ _STD_ATOMIC_USE_ARM64_LDAR_STLR == 1 / _STD_ATOMIC_USE_ARM64_LDAR_STLR == 0 vvv
173-
174-
#define __STORE_RELEASE(_Width, _Ptr, _Desired) \
175-
_Compiler_or_memory_barrier(); \
176-
__iso_volatile_store##_Width((_Ptr), (_Desired));
177-
178-
#endif // ^^^ _STD_ATOMIC_USE_ARM64_LDAR_STLR == 0 ^^^
179-
180173
#define _ATOMIC_STORE_PREFIX(_Width, _Ptr, _Desired) \
181174
case _Atomic_memory_order_relaxed: \
182175
__iso_volatile_store##_Width((_Ptr), (_Desired)); \
183176
return; \
184177
case _Atomic_memory_order_release: \
185-
__STORE_RELEASE(_Width, _Ptr, _Desired) \
178+
__STORE_RELEASE(_Width, _Ptr, _Desired); \
186179
return; \
187180
default: \
188181
case _Atomic_memory_order_consume: \
@@ -196,15 +189,9 @@ extern "C" inline void _Check_memory_order(const unsigned int _Order) noexcept {
196189
__iso_volatile_store##_Width((_Ptr), (_Desired)); \
197190
_Memory_barrier();
198191

199-
#if _STD_ATOMIC_USE_ARM64_LDAR_STLR == 1
200-
#define _ATOMIC_STORE_SEQ_CST_ARM64(_Width, _Ptr, _Desired) \
201-
_Compiler_barrier(); \
202-
__stlr##_Width( \
203-
reinterpret_cast<volatile unsigned __int##_Width*>(_Ptr), static_cast<unsigned __int##_Width>(_Desired)); \
192+
#define _ATOMIC_STORE_SEQ_CST_ARM64(_Width, _Ptr, _Desired) \
193+
__STORE_RELEASE(_Width, _Ptr, _Desired); \
204194
_Memory_barrier();
205-
#else // ^^^ _STD_ATOMIC_USE_ARM64_LDAR_STLR == 1 / _STD_ATOMIC_USE_ARM64_LDAR_STLR == 0 vvv
206-
#define _ATOMIC_STORE_SEQ_CST_ARM64 _ATOMIC_STORE_SEQ_CST_ARM
207-
#endif // ^^^ _STD_ATOMIC_USE_ARM64_LDAR_STLR == 0 ^^^
208195

209196
#define _ATOMIC_STORE_SEQ_CST_X86_X64(_Width, _Ptr, _Desired) (void) _InterlockedExchange##_Width((_Ptr), (_Desired));
210197
#define _ATOMIC_STORE_32_SEQ_CST_X86_X64(_Ptr, _Desired) \
@@ -257,7 +244,11 @@ extern "C" inline void _Atomic_thread_fence(const unsigned int _Order) noexcept
257244
_Compiler_barrier();
258245
}
259246
#elif defined(_M_ARM) || defined(_M_ARM64) || defined(_M_ARM64EC) || defined(_M_HYBRID_X86_ARM64)
260-
_Memory_barrier();
247+
if (_Order == _Atomic_memory_order_acquire || _Order == _Atomic_memory_order_consume) {
248+
_Memory_load_acquire_barrier();
249+
} else {
250+
_Memory_barrier();
251+
}
261252
#else // ^^^ ARM32/ARM64/ARM64EC/HYBRID_X86_ARM64 / unsupported hardware vvv
262253
#error Unsupported hardware
263254
#endif // ^^^ unsupported hardware ^^^
@@ -519,7 +510,7 @@ inline void _Atomic_lock_acquire(long& _Spinlock) noexcept {
519510
}
520511
}
521512
#elif defined(_M_ARM) || defined(_M_ARM64) || defined(_M_ARM64EC) || defined(_M_HYBRID_X86_ARM64)
522-
while (_InterlockedExchange(&_Spinlock, 1) != 0) { // TRANSITION, GH-1133: _InterlockedExchange_acq
513+
while (_InterlockedExchange_acq(&_Spinlock, 1) != 0) {
523514
while (__iso_volatile_load32(&reinterpret_cast<int&>(_Spinlock)) != 0) {
524515
__yield();
525516
}
@@ -530,15 +521,7 @@ inline void _Atomic_lock_acquire(long& _Spinlock) noexcept {
530521
}
531522

532523
inline void _Atomic_lock_release(long& _Spinlock) noexcept {
533-
#if (defined(_M_IX86) && !defined(_M_HYBRID_X86_ARM64)) || (defined(_M_X64) && !defined(_M_ARM64EC))
534-
_InterlockedExchange(&_Spinlock, 0); // TRANSITION, GH-1133: same as ARM
535-
#elif defined(_M_ARM) || defined(_M_ARM64) || defined(_M_ARM64EC) || defined(_M_HYBRID_X86_ARM64)
536-
_Memory_barrier();
537-
__iso_volatile_store32(reinterpret_cast<int*>(&_Spinlock), 0);
538-
_Memory_barrier(); // TRANSITION, GH-1133: remove
539-
#else // ^^^ defined(_M_ARM) || defined(_M_ARM64) || defined(_M_ARM64EC) || defined(_M_HYBRID_X86_ARM64) ^^^
540-
#error Unsupported hardware
541-
#endif
524+
__STORE_RELEASE(32, reinterpret_cast<int*>(&_Spinlock), 0);
542525
}
543526

544527
inline void _Atomic_lock_acquire(_Smtx_t* _Spinlock) noexcept {

stl/inc/xatomic.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,11 @@ _STL_DISABLE_CLANG_WARNINGS
5858
#if defined(_M_ARM) || defined(_M_ARM64) || defined(_M_ARM64EC) || defined(_M_HYBRID_X86_ARM64)
5959
#define _Memory_barrier() __dmb(0xB) // inner shared data memory barrier
6060
#define _Compiler_or_memory_barrier() _Memory_barrier()
61+
#if defined(_M_ARM64) || defined(_M_ARM64EC) || defined(_M_HYBRID_X86_ARM64)
62+
#define _Memory_load_acquire_barrier() __dmb(0x9) // inner shared data memory load barrier
63+
#else // ^^^ ARM64/ARM64EC/HYBRID_X86_ARM64 / ARM32 vvv
64+
#define _Memory_load_acquire_barrier() _Memory_barrier()
65+
#endif // ^^^ ARM32 ^^^
6166
#elif defined(_M_IX86) || defined(_M_X64)
6267
// x86/x64 hardware only emits memory barriers inside _Interlocked intrinsics
6368
#define _Compiler_or_memory_barrier() _Compiler_barrier()

0 commit comments

Comments
 (0)