@@ -261,30 +261,37 @@ private:
261
261
vector<result_type> _Myvec;
262
262
};
263
263
264
- _NODISCARD constexpr int _Generate_canonical_iterations(const int _Bits, const uint64_t _Gmin, const uint64_t _Gmax) {
265
- // For a URBG type `G` with range == `(G::max() - G::min()) + 1`, returns the number of calls to generate at least
266
- // _Bits bits of entropy. Specifically, max(1, ceil(_Bits / log2(range))).
267
-
268
- _STL_INTERNAL_CHECK(0 <= _Bits && _Bits <= 64);
264
+ struct _Urbg_gen_can_params {
265
+ bool _Rx_is_pow2 = false; // $R$ (in N4981 [rand.util.canonical]/1.2) is a power of 2
266
+ int _Kx = 0; // $k$ in N4981 [rand.util.canonical]/1.4
267
+ uint64_t _Xx = 0; // $x$ in N4981 [rand.util.canonical]/1.5
268
+ float _Scale = 0.0f; // $r^{-d}$, always representable as a float
269
+ int _Smax_bits = 0; // number of bits in $R^k$
270
+ };
269
271
270
- if (_Bits == 0 || (_Gmax == UINT64_MAX && _Gmin == 0)) {
271
- return 1;
272
+ _NODISCARD constexpr _Urbg_gen_can_params _Generate_canonical_params(const size_t _Bits, const uint64_t _Rm1) {
273
+ const auto _Rx = _Unsigned128{_Rm1} + 1; // $R$ in N4981 [rand.util.canonical]/1.2
274
+ const auto _Target = _Unsigned128{1} << _Bits; // $r^d$
275
+ _Unsigned128 _Product = 1;
276
+ _Urbg_gen_can_params _Result;
277
+ _Result._Rx_is_pow2 = (_Rx & (_Rx - 1)) == 0;
278
+ _Result._Kx = 0;
279
+ while (_Product < _Target) {
280
+ _Product *= _Rx;
281
+ ++_Result._Kx;
272
282
}
273
283
274
- const auto _Range = (_Gmax - _Gmin) + 1;
275
- const auto _Target = ~uint64_t{0} >> (64 - _Bits);
276
- uint64_t _Prod = 1;
277
- int _Ceil = 0;
278
- while (_Prod <= _Target) {
279
- ++_Ceil;
280
- if (_Prod > UINT64_MAX / _Range) {
281
- break;
282
- }
283
-
284
- _Prod *= _Range;
284
+ _Result._Xx = static_cast<uint64_t>(_Product / _Target);
285
+ _Result._Scale = 1.0f / static_cast<float>(1ULL << _Bits);
286
+ _Result._Smax_bits = 0;
287
+ // $S \in [0, _Product)$
288
+ --_Product;
289
+ while (_Product > 0) {
290
+ ++_Result._Smax_bits;
291
+ _Product >>= 1;
285
292
}
286
293
287
- return _Ceil ;
294
+ return _Result ;
288
295
}
289
296
290
297
_EXPORT_STD template <class _Real, size_t _Bits, class _Gen>
@@ -294,27 +301,110 @@ _NODISCARD _Real generate_canonical(_Gen& _Gx) { // build a floating-point value
294
301
constexpr auto _Digits = static_cast<size_t>(numeric_limits<_Real>::digits);
295
302
constexpr auto _Minbits = static_cast<int>(_Digits < _Bits ? _Digits : _Bits);
296
303
297
- constexpr auto _Gxmin = static_cast<_Real>((_Gen::min)());
298
- constexpr auto _Gxmax = static_cast<_Real>((_Gen::max)());
299
- constexpr auto _Rx = (_Gxmax - _Gxmin) + _Real{1};
304
+ if constexpr (_Minbits == 0) {
305
+ return _Real{0};
306
+ } else {
307
+ using _Result_uint_type = conditional_t<_Minbits <= 32, uint32_t, uint64_t>;
308
+
309
+ constexpr auto _Gxmin = (_Gen::min)();
310
+ constexpr auto _Gxmax = (_Gen::max)();
311
+ constexpr auto _Params = _Generate_canonical_params(_Minbits, _Gxmax - _Gxmin);
312
+
313
+ using _Sx_type = conditional_t<_Params._Smax_bits <= 32, uint32_t,
314
+ conditional_t<_Params._Smax_bits <= 64, uint64_t, _Unsigned128>>;
315
+ _Sx_type _Sx;
316
+
317
+ if constexpr (_Params._Rx_is_pow2) {
318
+ // Always needs only one attempt. Multiplications can be replaced with shift/add. Optimize k=1 case.
319
+ if constexpr (_Params._Kx == 1) {
320
+ _Sx = static_cast<_Sx_type>(_Gx() - _Gxmin);
321
+ } else if constexpr (_Params._Kx > 1) {
322
+ constexpr int _Rx_bits = _Params._Smax_bits / _Params._Kx;
323
+ _Sx = 0;
324
+ int _Shift = 0;
325
+ for (int _Idx = 0; _Idx < _Params._Kx; ++_Idx) {
326
+ _Sx += static_cast<_Sx_type>(_Gx() - _Gxmin) << _Shift;
327
+ _Shift += _Rx_bits;
328
+ }
329
+ } else {
330
+ _STL_INTERNAL_STATIC_ASSERT(false); // unexpected k
331
+ }
300
332
301
- constexpr int _Kx = _Generate_canonical_iterations(_Minbits, (_Gen::min)(), (_Gen::max)());
333
+ _Sx >>= _Params._Smax_bits - _Minbits;
334
+ return static_cast<_Real>(static_cast<_Result_uint_type>(_Sx)) * static_cast<_Real>(_Params._Scale);
335
+ } else {
336
+ constexpr auto _Rx = _Sx_type{_Gxmax - _Gxmin} + 1u;
337
+ constexpr _Sx_type _Limit = static_cast<_Sx_type>(_Params._Xx) * (_Sx_type{1} << _Minbits);
338
+
339
+ do {
340
+ // unroll first two iterations to avoid unnecessary multiplications
341
+ _Sx = static_cast<_Sx_type>(_Gx() - _Gxmin);
342
+ if constexpr (_Params._Kx == 2) {
343
+ _Sx += static_cast<_Sx_type>(_Gx() - _Gxmin) * _Rx;
344
+ } else if constexpr (_Params._Kx > 2) {
345
+ _Sx += static_cast<_Sx_type>(_Gx() - _Gxmin) * _Rx;
346
+ _Sx_type _Factor = _Rx;
347
+ for (int _Idx = 2; _Idx < _Params._Kx; ++_Idx) {
348
+ _Factor *= _Rx;
349
+ _Sx += static_cast<_Sx_type>(_Gx() - _Gxmin) * _Factor;
350
+ }
351
+ }
352
+ } while (_Sx >= _Limit);
302
353
303
- _Real _Ans{0};
304
- _Real _Factor{1};
354
+ return static_cast<_Real>(static_cast<_Result_uint_type>(_Sx / _Params._Xx))
355
+ * static_cast<_Real>(_Params._Scale);
356
+ }
357
+ }
358
+ }
305
359
306
- for (int _Idx = 0; _Idx < _Kx; ++_Idx) { // add in another set of bits
307
- _Ans += (static_cast<_Real>(_Gx()) - _Gxmin) * _Factor;
308
- _Factor *= _Rx;
360
+ template <class _Uint_type, class _Gen, class _Real>
361
+ bool _Nrand_for_tr1(
362
+ const uint64_t _Rd, const uint64_t _Rx, _Gen& _Gx, _Real& _Val, uint64_t& _Sx_init, uint64_t& _Factor_init) {
363
+ // Minimally-constexpr generate_canonical algorithm. Will save work and exit if _Factor would overflow.
364
+ _Uint_type _Sx = _Sx_init;
365
+ _Uint_type _Factor = _Factor_init;
366
+ const auto _Gxmin = (_Gx.min)();
367
+ const auto _Overflow_limit = UINT64_MAX / _Rx;
368
+
369
+ _Uint_type _Xx = 0;
370
+ _Uint_type _Limit = 0;
371
+ bool _Init = false;
372
+ for (;;) {
373
+ while (_Factor < _Rd) {
374
+ if constexpr (is_same_v<_Uint_type, uint64_t>) {
375
+ if (_Factor > _Overflow_limit) {
376
+ _Sx_init = _Sx;
377
+ _Factor_init = _Factor;
378
+ return true;
379
+ }
380
+ }
381
+
382
+ _Sx += (_Gx() - _Gxmin) * _Factor;
383
+ _Factor *= _Rx;
384
+ }
385
+
386
+ if (!_Init) {
387
+ _Init = true;
388
+ _Xx = _Factor / _Rd;
389
+ _Limit = _Xx * _Rd;
390
+ }
391
+
392
+ if (_Sx < _Limit) {
393
+ break;
394
+ } else {
395
+ _Sx = 0;
396
+ _Factor = 1;
397
+ }
309
398
}
310
399
311
- return _Ans / _Factor;
400
+ _Val = static_cast<_Real>(static_cast<uint64_t>(_Sx / _Xx));
401
+ return false;
312
402
}
313
403
314
404
template <class _Gen, class = void>
315
405
struct _Has_static_min_max : false_type {};
316
406
317
- // This checks a requirement of N4950 [rand.req.urng] `concept uniform_random_bit_generator` but doesn't attempt
407
+ // This checks a requirement of N4981 [rand.req.urng] `concept uniform_random_bit_generator` but doesn't attempt
318
408
// to implement the whole concept - we just need to distinguish Standard machinery from tr1 machinery.
319
409
template <class _Gen>
320
410
struct _Has_static_min_max<_Gen, void_t<decltype(bool_constant<(_Gen::min)() < (_Gen::max)()>::value)>> : true_type {};
@@ -323,18 +413,40 @@ template <class _Real, class _Gen>
323
413
_NODISCARD _Real _Nrand_impl(_Gen& _Gx) { // build a floating-point value from random sequence
324
414
_RNG_REQUIRE_REALTYPE(_Nrand_impl, _Real);
325
415
326
- constexpr auto _Digits = static_cast<size_t>(numeric_limits<_Real>::digits);
327
- constexpr auto _Bits = ~size_t{0};
328
- constexpr auto _Minbits = _Digits < _Bits ? _Digits : _Bits;
416
+ constexpr auto _Digits = static_cast<size_t>(numeric_limits<_Real>::digits);
417
+
418
+ if constexpr (_Has_static_min_max<_Gen>::value) {
419
+ return _STD generate_canonical<_Real, _Digits>(_Gx);
420
+ } else if constexpr (is_integral_v<typename _Gen::result_type>) {
421
+ // TRANSITION, for integral tr1 machinery only; Standard machinery can call generate_canonical directly
422
+ const auto _Gxmax = (_Gx.max)();
423
+ const auto _Gxmin = (_Gx.min)();
424
+ _Real _Val{0};
425
+
426
+ if (_Gxmax == UINT64_MAX && _Gxmin == 0) {
427
+ // special case when uint64_t can't hold the full URBG range
428
+ _Val = static_cast<_Real>(static_cast<uint64_t>(_Gx()) >> (64 - _Digits));
429
+ } else {
430
+ constexpr auto _Rd = 1ULL << _Digits;
431
+ const auto _Rx = static_cast<uint64_t>(_Gxmax - _Gxmin) + 1;
432
+ uint64_t _Sx = 0;
433
+ uint64_t _Factor = 1;
434
+
435
+ // Try with 64 bits first, upgrade to 128 if necessary.
436
+ const bool _Would_overflow = _Nrand_for_tr1<uint64_t>(_Rd, _Rx, _Gx, _Val, _Sx, _Factor);
437
+ if (_Would_overflow) {
438
+ _Nrand_for_tr1<_Unsigned128>(_Rd, _Rx, _Gx, _Val, _Sx, _Factor);
439
+ }
440
+ }
329
441
330
- if constexpr (_Has_static_min_max<_Gen>::value && _Minbits <= 64) {
331
- return _STD generate_canonical<_Real, _Minbits>(_Gx);
332
- } else { // TRANSITION, for tr1 machinery only; Standard machinery can call generate_canonical directly
442
+ return _STD ldexp(_Val, -static_cast<int>(_Digits));
443
+ } else {
444
+ // TRANSITION, the pre-P0952R2 algorithm, for non-integral tr1 machinery only (e.g. subtract_with_carry_01)
333
445
const _Real _Gxmin = static_cast<_Real>((_Gx.min)());
334
446
const _Real _Gxmax = static_cast<_Real>((_Gx.max)());
335
447
const _Real _Rx = (_Gxmax - _Gxmin) + _Real{1};
336
448
337
- const int _Ceil = static_cast<int>(_STD ceil(static_cast<_Real>(_Minbits ) / _STD log2(_Rx)));
449
+ const int _Ceil = static_cast<int>(_STD ceil(static_cast<_Real>(_Digits ) / _STD log2(_Rx)));
338
450
const int _Kx = _Ceil < 1 ? 1 : _Ceil;
339
451
340
452
_Real _Ans{0};
@@ -1968,8 +2080,8 @@ public:
1968
2080
explicit _Rng_from_urng_v2(_Urng& _Func) noexcept : _Ref(_Func) {}
1969
2081
1970
2082
_Diff operator()(_Diff _Index) { // adapt _Urng closed range to [0, _Index)
1971
- // From Daniel Lemire, "Fast Random Integer Generation in an Interval", ACM Trans. Model. Comput. Simul. 29 (1),
1972
- // 2019.
2083
+ // From Daniel Lemire, "Fast Random Integer Generation in an Interval",
2084
+ // ACM Trans. Model. Comput. Simul. 29 (1), 2019.
1973
2085
//
1974
2086
// Algorithm 5 <-> This Code:
1975
2087
// m <-> _Product
@@ -2925,8 +3037,8 @@ public:
2925
3037
2926
3038
private:
2927
3039
template <class _Engine>
2928
- result_type _Eval(
2929
- _Engine& _Eng, const param_type& _Par0) const { // Press et al., Numerical Recipes in C, 2nd ed., pp 295-296.
3040
+ result_type _Eval(_Engine& _Eng, const param_type& _Par0) const {
3041
+ // Press et al., Numerical Recipes in C, 2nd ed., pp 295-296.
2930
3042
_Ty _Res;
2931
3043
if (_Par0._Tx < 25) { // generate directly
2932
3044
_Res = 0;
0 commit comments