Skip to content

Commit f5d494b

Browse files
committed
runtime: ensure GC sees type-safe memory on weak machines
Currently its possible for the garbage collector to observe uninitialized memory or stale heap bitmap bits on weakly ordered architectures such as ARM and PPC. On such architectures, the stores that zero newly allocated memory and initialize its heap bitmap may move after a store in user code that makes the allocated object observable by the garbage collector. To fix this, add a "publication barrier" (also known as an "export barrier") before returning from mallocgc. This is a store/store barrier that ensures any write done by user code that makes the returned object observable to the garbage collector will be ordered after the initialization performed by mallocgc. No barrier is necessary on the reading side because of the data dependency between loading the pointer and loading the contents of the object. Fixes one of the issues raised in #9984. Change-Id: Ia3d96ad9c5fc7f4d342f5e05ec0ceae700cd17c8 Reviewed-on: https://go-review.googlesource.com/11083 Reviewed-by: Rick Hudson <[email protected]> Reviewed-by: Dmitry Vyukov <[email protected]> Reviewed-by: Minux Ma <[email protected]> Reviewed-by: Martin Capitanio <[email protected]> Reviewed-by: Russ Cox <[email protected]>
1 parent 75ce330 commit f5d494b

14 files changed

+96
-0
lines changed

src/runtime/asm_386.s

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -632,6 +632,11 @@ TEXT runtime·atomicand8(SB), NOSPLIT, $0-5
632632
ANDB BX, (AX)
633633
RET
634634

635+
TEXT ·publicationBarrier(SB),NOSPLIT,$0-0
636+
// Stores are already ordered on x86, so this is just a
637+
// compile barrier.
638+
RET
639+
635640
// void jmpdefer(fn, sp);
636641
// called from deferreturn.
637642
// 1. pop the caller

src/runtime/asm_amd64.s

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -615,6 +615,11 @@ TEXT runtime·atomicand8(SB), NOSPLIT, $0-9
615615
ANDB BX, (AX)
616616
RET
617617

618+
TEXT ·publicationBarrier(SB),NOSPLIT,$0-0
619+
// Stores are already ordered on x86, so this is just a
620+
// compile barrier.
621+
RET
622+
618623
// void jmpdefer(fn, sp);
619624
// called from deferreturn.
620625
// 1. pop the caller

src/runtime/asm_amd64p32.s

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -569,6 +569,11 @@ TEXT runtime·atomicand8(SB), NOSPLIT, $0-5
569569
ANDB AX, 0(BX)
570570
RET
571571

572+
TEXT ·publicationBarrier(SB),NOSPLIT,$0-0
573+
// Stores are already ordered on x86, so this is just a
574+
// compile barrier.
575+
RET
576+
572577
// void jmpdefer(fn, sp);
573578
// called from deferreturn.
574579
// 1. pop the caller

src/runtime/asm_arm.s

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -736,6 +736,17 @@ TEXT runtime·atomicloaduint(SB),NOSPLIT,$0-8
736736
TEXT runtime·atomicstoreuintptr(SB),NOSPLIT,$0-8
737737
B runtime·atomicstore(SB)
738738

739+
// armPublicationBarrier is a native store/store barrier for ARMv7+.
740+
// To implement publiationBarrier in sys_$GOOS_arm.s using the native
741+
// instructions, use:
742+
//
743+
// TEXT ·publicationBarrier(SB),NOSPLIT,$-4-0
744+
// B runtime·armPublicationBarrier(SB)
745+
//
746+
TEXT runtime·armPublicationBarrier(SB),NOSPLIT,$-4-0
747+
WORD $0xf57ff05e // DMB ST
748+
RET
749+
739750
// AES hashing not implemented for ARM
740751
TEXT runtime·aeshash(SB),NOSPLIT,$-4-0
741752
MOVW $0, R0

src/runtime/atomic_arm64.s

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -111,3 +111,7 @@ again:
111111

112112
TEXT runtime·xchguintptr(SB), NOSPLIT, $0-24
113113
B runtime·xchg64(SB)
114+
115+
TEXT ·publicationBarrier(SB),NOSPLIT,$-8-0
116+
DMB $0xe // DMB ST
117+
RET

src/runtime/atomic_ppc64x.s

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,3 +38,10 @@ TEXT ·atomicloadp(SB),NOSPLIT,$-8-16
3838
ISYNC
3939
MOVD R3, ret+8(FP)
4040
RET
41+
42+
TEXT ·publicationBarrier(SB),NOSPLIT,$-8-0
43+
// LWSYNC is the "export" barrier recommended by Power ISA
44+
// v2.07 book II, appendix B.2.2.2.
45+
// LWSYNC is a load/load, load/store, and store/store barrier.
46+
WORD $0x7c2004ac // LWSYNC
47+
RET

src/runtime/malloc.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -657,6 +657,14 @@ func mallocgc(size uintptr, typ *_type, flags uint32) unsafe.Pointer {
657657
} else {
658658
c.local_scan += typ.ptrdata
659659
}
660+
661+
// Ensure that the stores above that initialize x to
662+
// type-safe memory and set the heap bits occur before
663+
// the caller can make x observable to the garbage
664+
// collector. Otherwise, on weakly ordered machines,
665+
// the garbage collector could follow a pointer to x,
666+
// but see uninitialized memory or stale heap bits.
667+
publicationBarrier()
660668
}
661669

662670
// GCmarkterminate allocates black

src/runtime/stubs.go

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,23 @@ func xaddint64(ptr *int64, delta int64) int64 {
167167
return int64(xadd64((*uint64)(unsafe.Pointer(ptr)), delta))
168168
}
169169

170+
// publicationBarrier performs a store/store barrier (a "publication"
171+
// or "export" barrier). Some form of synchronization is required
172+
// between initializing an object and making that object accessible to
173+
// another processor. Without synchronization, the initialization
174+
// writes and the "publication" write may be reordered, allowing the
175+
// other processor to follow the pointer and observe an uninitialized
176+
// object. In general, higher-level synchronization should be used,
177+
// such as locking or an atomic pointer write. publicationBarrier is
178+
// for when those aren't an option, such as in the implementation of
179+
// the memory manager.
180+
//
181+
// There's no corresponding barrier for the read side because the read
182+
// side naturally has a data dependency order. All architectures that
183+
// Go supports or seems likely to ever support automatically enforce
184+
// data dependency ordering.
185+
func publicationBarrier()
186+
170187
//go:noescape
171188
func setcallerpc(argp unsafe.Pointer, pc uintptr)
172189

src/runtime/sys_darwin_arm.s

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -301,6 +301,9 @@ TEXT runtime·cas(SB),NOSPLIT,$0
301301
TEXT runtime·casp1(SB),NOSPLIT,$0
302302
B runtime·cas(SB)
303303

304+
TEXT ·publicationBarrier(SB),NOSPLIT,$-4-0
305+
B runtime·armPublicationBarrier(SB)
306+
304307
TEXT runtime·sysctl(SB),NOSPLIT,$0
305308
MOVW mib+0(FP), R0
306309
MOVW miblen+4(FP), R1

src/runtime/sys_freebsd_arm.s

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -381,6 +381,10 @@ TEXT runtime·casp1(SB),NOSPLIT,$0
381381
TEXT runtime·cas(SB),NOSPLIT,$0
382382
B runtime·armcas(SB)
383383

384+
// TODO: this is only valid for ARMv7+
385+
TEXT ·publicationBarrier(SB),NOSPLIT,$-4-0
386+
B runtime·armPublicationBarrier(SB)
387+
384388
// TODO(minux): this only supports ARMv6K+.
385389
TEXT runtime·read_tls_fallback(SB),NOSPLIT,$-4
386390
WORD $0xee1d0f70 // mrc p15, 0, r0, c13, c0, 3

0 commit comments

Comments
 (0)