-
Notifications
You must be signed in to change notification settings - Fork 13.8k
Use load
+store
instead of memcpy
for small integer arrays
#111999
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -380,7 +380,19 @@ pub fn memcpy_ty<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>>( | |
return; | ||
} | ||
|
||
bx.memcpy(dst, dst_align, src, src_align, bx.cx().const_usize(size), flags); | ||
if flags == MemFlags::empty() | ||
&& let Some(bty) = bx.cx().scalar_copy_backend_type(layout) | ||
{ | ||
// I look forward to only supporting opaque pointers | ||
|
||
let pty = bx.type_ptr_to(bty); | ||
let src = bx.pointercast(src, pty); | ||
let dst = bx.pointercast(dst, pty); | ||
|
||
|
||
let temp = bx.load(bty, src, src_align); | ||
bx.store(temp, dst, dst_align); | ||
} else { | ||
bx.memcpy(dst, dst_align, src, src_align, bx.cx().const_usize(size), flags); | ||
} | ||
} | ||
|
||
pub fn codegen_instance<'a, 'tcx: 'a, Bx: BuilderMethods<'a, 'tcx>>( | ||
|
Original file line number | Diff line number | Diff line change | ||||||||
---|---|---|---|---|---|---|---|---|---|---|
|
@@ -126,6 +126,28 @@ pub trait LayoutTypeMethods<'tcx>: Backend<'tcx> { | |||||||||
index: usize, | ||||||||||
immediate: bool, | ||||||||||
) -> Self::Type; | ||||||||||
|
||||||||||
/// A type that can be used in a [`super::BuilderMethods::load`] + | ||||||||||
/// [`super::BuilderMethods::store`] pair to implement a *typed* copy, | ||||||||||
/// such as a MIR `*_0 = *_1`. | ||||||||||
/// | ||||||||||
/// It's always legal to return `None` here, as the provided impl does, | ||||||||||
/// in which case callers should use [`super::BuilderMethods::memcpy`] | ||||||||||
/// instead of the `load`+`store` pair. | ||||||||||
/// | ||||||||||
/// This can be helpful for things like arrays, where the LLVM backend type | ||||||||||
/// `[3 x i16]` optimizes to three separate loads and stores, but it can | ||||||||||
/// instead be copied via an `i48` that stays as the single `load`+`store`. | ||||||||||
/// (As of 2023-05 LLVM cannot necessarily optimize away a `memcpy` in these | ||||||||||
/// cases, due to `poison` handling, but in codegen we have more information | ||||||||||
/// about the type invariants, so can emit something better instead.) | ||||||||||
/// | ||||||||||
/// This *should* return `None` for particularly-large types, where leaving | ||||||||||
/// the `memcpy` may well be important to avoid code size explosion. | ||||||||||
fn scalar_copy_backend_type(&self, layout: TyAndLayout<'tcx>) -> Option<Self::Type> { | ||||||||||
|
fn const_poison(&self, typ: Type<'gcc>) -> RValue<'gcc> { | |
// No distinction between undef and poison. | |
self.const_undef(typ) | |
} |
so indeed it might just never need to do this.
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
// compile-flags: -O -C no-prepopulate-passes | ||
// min-llvm-version: 15.0 (for opaque pointers) | ||
|
||
#![crate_type = "lib"] | ||
|
||
// CHECK-LABEL: @array_load | ||
#[no_mangle] | ||
pub fn array_load(a: &[u8; 4]) -> [u8; 4] { | ||
// CHECK: %0 = alloca [4 x i8], align 1 | ||
// CHECK: %[[TEMP1:.+]] = load <4 x i8>, ptr %a, align 1 | ||
// CHECK: store <4 x i8> %[[TEMP1]], ptr %0, align 1 | ||
// CHECK: %[[TEMP2:.+]] = load i32, ptr %0, align 1 | ||
// CHECK: ret i32 %[[TEMP2]] | ||
*a | ||
} | ||
|
||
// CHECK-LABEL: @array_store | ||
#[no_mangle] | ||
pub fn array_store(a: [u8; 4], p: &mut [u8; 4]) { | ||
// CHECK: %a = alloca [4 x i8] | ||
// CHECK: %[[TEMP:.+]] = load <4 x i8>, ptr %a, align 1 | ||
// CHECK-NEXT: store <4 x i8> %[[TEMP]], ptr %p, align 1 | ||
*p = a; | ||
} | ||
|
||
// CHECK-LABEL: @array_copy | ||
#[no_mangle] | ||
pub fn array_copy(a: &[u8; 4], p: &mut [u8; 4]) { | ||
// CHECK: %[[LOCAL:.+]] = alloca [4 x i8], align 1 | ||
// CHECK: %[[TEMP1:.+]] = load <4 x i8>, ptr %a, align 1 | ||
// CHECK: store <4 x i8> %[[TEMP1]], ptr %[[LOCAL]], align 1 | ||
// CHECK: %[[TEMP2:.+]] = load <4 x i8>, ptr %[[LOCAL]], align 1 | ||
// CHECK: store <4 x i8> %[[TEMP2]], ptr %p, align 1 | ||
*p = *a; | ||
} |
Uh oh!
There was an error while loading. Please reload this page.