32
32
#include <sys/blake3.h>
33
33
#include <sys/sha2.h>
34
34
35
- /* limit benchmarking to max 256KiB, when EdonR is slower then this: */
36
- #define LIMIT_PERF_MBS 300
37
-
38
35
typedef struct {
39
36
const char * name ;
40
37
const char * impl ;
@@ -52,9 +49,15 @@ typedef struct {
52
49
zio_checksum_tmpl_free_t * (free );
53
50
} chksum_stat_t ;
54
51
52
+ #define AT_STARTUP 0
53
+ #define AT_BENCHMARK 1
54
+ #define AT_DONE 2
55
+
55
56
static chksum_stat_t * chksum_stat_data = 0 ;
56
- static int chksum_stat_cnt = 0 ;
57
57
static kstat_t * chksum_kstat = NULL ;
58
+ static int chksum_stat_limit = AT_STARTUP ;
59
+ static int chksum_stat_cnt = 0 ;
60
+ static void chksum_benchmark (void );
58
61
59
62
/*
60
63
* Sample output on i3-1005G1 System:
@@ -129,6 +132,9 @@ chksum_kstat_data(char *buf, size_t size, void *data)
129
132
static void *
130
133
chksum_kstat_addr (kstat_t * ksp , loff_t n )
131
134
{
135
+ /* full benchmark */
136
+ chksum_benchmark ();
137
+
132
138
if (n < chksum_stat_cnt )
133
139
ksp -> ks_private = (void * )(chksum_stat_data + n );
134
140
else
@@ -176,47 +182,36 @@ chksum_run(chksum_stat_t *cs, abd_t *abd, void *ctx, int round,
176
182
kpreempt_enable ();
177
183
178
184
run_bw = size * run_count * NANOSEC ;
179
- run_bw /= run_time_ns ; /* B/s */
185
+ run_bw /= run_time_ns ; /* B/s */
180
186
* result = run_bw /1024 /1024 ; /* MiB/s */
181
187
}
182
188
183
- #define LIMIT_INIT 0
184
- #define LIMIT_NEEDED 1
185
- #define LIMIT_NOLIMIT 2
186
-
187
189
static void
188
190
chksum_benchit (chksum_stat_t * cs )
189
191
{
190
192
abd_t * abd ;
191
193
void * ctx = 0 ;
192
194
void * salt = & cs -> salt .zcs_bytes ;
193
- static int chksum_stat_limit = LIMIT_INIT ;
194
195
195
196
memset (salt , 0 , sizeof (cs -> salt .zcs_bytes ));
196
197
if (cs -> init )
197
198
ctx = cs -> init (& cs -> salt );
198
199
200
+ /* benchmarks in startup mode */
201
+ if (chksum_stat_limit == AT_STARTUP ) {
202
+ abd = abd_alloc_linear (1 <<18 , B_FALSE );
203
+ chksum_run (cs , abd , ctx , 5 , & cs -> bs256k );
204
+ goto done ;
205
+ }
206
+
199
207
/* allocate test memory via abd linear interface */
200
208
abd = abd_alloc_linear (1 <<20 , B_FALSE );
209
+
210
+ /* benchmarks when requested */
201
211
chksum_run (cs , abd , ctx , 1 , & cs -> bs1k );
202
212
chksum_run (cs , abd , ctx , 2 , & cs -> bs4k );
203
213
chksum_run (cs , abd , ctx , 3 , & cs -> bs16k );
204
214
chksum_run (cs , abd , ctx , 4 , & cs -> bs64k );
205
- chksum_run (cs , abd , ctx , 5 , & cs -> bs256k );
206
-
207
- /* check if we ran on a slow cpu */
208
- if (chksum_stat_limit == LIMIT_INIT ) {
209
- if (cs -> bs1k < LIMIT_PERF_MBS ) {
210
- chksum_stat_limit = LIMIT_NEEDED ;
211
- } else {
212
- chksum_stat_limit = LIMIT_NOLIMIT ;
213
- }
214
- }
215
-
216
- /* skip benchmarks >= 1MiB when the CPU is to slow */
217
- if (chksum_stat_limit == LIMIT_NEEDED )
218
- goto abort ;
219
-
220
215
chksum_run (cs , abd , ctx , 6 , & cs -> bs1m );
221
216
abd_free (abd );
222
217
@@ -225,7 +220,7 @@ chksum_benchit(chksum_stat_t *cs)
225
220
chksum_run (cs , abd , ctx , 7 , & cs -> bs4m );
226
221
chksum_run (cs , abd , ctx , 8 , & cs -> bs16m );
227
222
228
- abort :
223
+ done :
229
224
abd_free (abd );
230
225
231
226
/* free up temp memory */
@@ -243,16 +238,21 @@ chksum_benchmark(void)
243
238
/* we need the benchmark only for the kernel module */
244
239
return ;
245
240
#endif
246
-
247
241
chksum_stat_t * cs ;
248
242
uint64_t max ;
249
243
uint32_t id , cbid = 0 , id_save ;
250
244
const zfs_impl_t * blake3 = zfs_impl_get_ops ("blake3" );
251
245
const zfs_impl_t * sha256 = zfs_impl_get_ops ("sha256" );
252
246
const zfs_impl_t * sha512 = zfs_impl_get_ops ("sha512" );
253
247
248
+ /* benchmarks are done */
249
+ if (chksum_stat_limit == AT_DONE )
250
+ return ;
251
+
252
+
254
253
/* count implementations */
255
- chksum_stat_cnt = 2 ;
254
+ chksum_stat_cnt = 1 ; /* edonr */
255
+ chksum_stat_cnt += 1 ; /* skein */
256
256
chksum_stat_cnt += sha256 -> getcnt ();
257
257
chksum_stat_cnt += sha512 -> getcnt ();
258
258
chksum_stat_cnt += blake3 -> getcnt ();
@@ -332,6 +332,17 @@ chksum_benchmark(void)
332
332
}
333
333
}
334
334
blake3 -> setid (id_save );
335
+
336
+ switch (chksum_stat_limit ) {
337
+ case AT_STARTUP :
338
+ /* next time we want a full benchmark */
339
+ chksum_stat_limit = AT_BENCHMARK ;
340
+ break ;
341
+ case AT_BENCHMARK :
342
+ /* no further benchmarks */
343
+ chksum_stat_limit = AT_DONE ;
344
+ break ;
345
+ }
335
346
}
336
347
337
348
void
@@ -341,7 +352,7 @@ chksum_init(void)
341
352
blake3_per_cpu_ctx_init ();
342
353
#endif
343
354
344
- /* Benchmark supported implementations */
355
+ /* 256KiB benchmark */
345
356
chksum_benchmark ();
346
357
347
358
/* Install kstats for all implementations */
0 commit comments