Skip to content

Commit f67439c

Browse files
gbaraldiKristofferC
authored andcommitted
Make have_fma consistent between interpreter and compiled (#52206)
Currently the interpreter always returns false. Which isn't very good. Make it follow whatever the JIT will do. (cherry picked from commit a6c656e)
1 parent 0218599 commit f67439c

File tree

9 files changed

+49
-5
lines changed

9 files changed

+49
-5
lines changed

src/jl_exported_funcs.inc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -214,6 +214,7 @@
214214
XX(jl_get_binding_wr) \
215215
XX(jl_get_cpu_name) \
216216
XX(jl_get_cpu_features) \
217+
XX(jl_cpu_has_fma) \
217218
XX(jl_get_current_task) \
218219
XX(jl_get_default_sysimg_path) \
219220
XX(jl_get_excstack) \

src/llvm-cpufeatures.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -60,15 +60,15 @@ static bool have_fma(Function &intr, Function &caller, const Triple &TT) JL_NOTS
6060
StringRef FS =
6161
FSAttr.isValid() ? FSAttr.getValueAsString() : jl_ExecutionEngine->getTargetFeatureString();
6262

63-
SmallVector<StringRef, 6> Features;
63+
SmallVector<StringRef, 128> Features;
6464
FS.split(Features, ',');
6565
for (StringRef Feature : Features)
6666
if (TT.isARM()) {
6767
if (Feature == "+vfp4")
6868
return typ == "f32" || typ == "f64";
6969
else if (Feature == "+vfp4sp")
7070
return typ == "f32";
71-
} else {
71+
} else if (TT.isX86()) {
7272
if (Feature == "+fma" || Feature == "+fma4")
7373
return typ == "f32" || typ == "f64";
7474
}

src/processor.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -224,6 +224,8 @@ JL_DLLEXPORT jl_value_t *jl_get_cpu_name(void);
224224
// Return the features of the host CPU as a julia string.
225225
JL_DLLEXPORT jl_value_t *jl_get_cpu_features(void);
226226
// Dump the name and feature set of the host CPU
227+
JL_DLLEXPORT jl_value_t *jl_cpu_has_fma(int bits);
228+
// Check if the CPU has native FMA instructions;
227229
// For debugging only
228230
JL_DLLEXPORT void jl_dump_host_cpu(void);
229231
JL_DLLEXPORT jl_value_t* jl_check_pkgimage_clones(char* data);

src/processor_arm.cpp

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1808,6 +1808,22 @@ JL_DLLEXPORT jl_value_t *jl_get_cpu_features(void)
18081808
return jl_cstr_to_string(jl_get_cpu_features_llvm().c_str());
18091809
}
18101810

1811+
JL_DLLEXPORT jl_value_t *jl_cpu_has_fma(int bits)
1812+
{
1813+
#ifdef _CPU_AARCH64_
1814+
return jl_true;
1815+
#else
1816+
TargetData<feature_sz> target = jit_targets.front();
1817+
FeatureList<feature_sz> features = target.en.features;
1818+
if (bits == 32 && test_nbit(features, Feature::vfp4sp))
1819+
return jl_true;
1820+
else if ((bits == 64 || bits == 32) && test_nbit(features, Feature::vfp4))
1821+
return jl_true;
1822+
else
1823+
return jl_false;
1824+
#endif
1825+
}
1826+
18111827
jl_image_t jl_init_processor_sysimg(void *hdl)
18121828
{
18131829
if (!jit_targets.empty())

src/processor_fallback.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -172,6 +172,11 @@ JL_DLLEXPORT jl_value_t *jl_get_cpu_features(void)
172172
return jl_cstr_to_string(jl_get_cpu_features_llvm().c_str());
173173
}
174174

175+
JL_DLLEXPORT jl_value_t *jl_cpu_has_fma(int bits)
176+
{
177+
return jl_false; // Match behaviour of have_fma in src/llvm-cpufeatures.cpp (assume false)
178+
}
179+
175180
JL_DLLEXPORT void jl_dump_host_cpu(void)
176181
{
177182
jl_safe_printf("CPU: %s\n", host_cpu_name().c_str());

src/processor_x86.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
// CPUID
66

7+
#include "julia.h"
78
extern "C" JL_DLLEXPORT void jl_cpuid(int32_t CPUInfo[4], int32_t InfoType)
89
{
910
asm volatile (
@@ -1055,6 +1056,16 @@ JL_DLLEXPORT jl_value_t *jl_get_cpu_features(void)
10551056
return jl_cstr_to_string(jl_get_cpu_features_llvm().c_str());
10561057
}
10571058

1059+
JL_DLLEXPORT jl_value_t *jl_cpu_has_fma(int bits)
1060+
{
1061+
TargetData<feature_sz> target = jit_targets.front();
1062+
FeatureList<feature_sz> features = target.en.features;
1063+
if ((bits == 32 || bits == 64) && (test_nbit(features, Feature::fma) || test_nbit(features, Feature::fma4)))
1064+
return jl_true;
1065+
else
1066+
return jl_false;
1067+
}
1068+
10581069
jl_image_t jl_init_processor_sysimg(void *hdl)
10591070
{
10601071
if (!jit_targets.empty())

src/runtime_intrinsics.c

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1454,6 +1454,7 @@ un_fintrinsic(trunc_float,trunc_llvm)
14541454
un_fintrinsic(rint_float,rint_llvm)
14551455
un_fintrinsic(sqrt_float,sqrt_llvm)
14561456
un_fintrinsic(sqrt_float,sqrt_llvm_fast)
1457+
jl_value_t *jl_cpu_has_fma(int bits);
14571458

14581459
JL_DLLEXPORT jl_value_t *jl_arraylen(jl_value_t *a)
14591460
{
@@ -1463,7 +1464,11 @@ JL_DLLEXPORT jl_value_t *jl_arraylen(jl_value_t *a)
14631464

14641465
JL_DLLEXPORT jl_value_t *jl_have_fma(jl_value_t *typ)
14651466
{
1466-
JL_TYPECHK(have_fma, datatype, typ);
1467-
// TODO: run-time feature check?
1468-
return jl_false;
1467+
JL_TYPECHK(have_fma, datatype, typ); // TODO what about float16/bfloat16?
1468+
if (typ == (jl_value_t*)jl_float32_type)
1469+
return jl_cpu_has_fma(32);
1470+
else if (typ == (jl_value_t*)jl_float64_type)
1471+
return jl_cpu_has_fma(64);
1472+
else
1473+
return jl_false;
14691474
}

test/llvmpasses/cpu-features.ll

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55

66
; RUN: opt -enable-new-pm=0 --opaque-pointers=1 -load libjulia-codegen%shlibext -CPUFeatures -simplifycfg -S %s | FileCheck %s
77
; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='CPUFeatures,simplifycfg' -S %s | FileCheck %s
8+
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128-ni:10:11:12:13"
9+
target triple = "x86_64-linux-gnu"
810

911
declare i1 @julia.cpu.have_fma.f64()
1012
declare double @with_fma(double %0, double %1, double %2)

test/sysinfo.jl

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@ Base.Sys.loadavg()
1212

1313
@test length(ccall(:jl_get_cpu_name, String, ())) != 0
1414
@test length(ccall(:jl_get_cpu_features, String, ())) >= 0
15+
foo_fma() = Core.Intrinsics.have_fma(Int64)
16+
@test ccall(:jl_cpu_has_fma, Bool, (Cint,), 64) == foo_fma()
1517

1618
if Sys.isunix()
1719
mktempdir() do tempdir

0 commit comments

Comments
 (0)