Skip to content

Commit f9b0af8

Browse files
Implement P0952R2: 'A new specification for std::generate_canonical' (#4740)
Co-authored-by: Stephan T. Lavavej <[email protected]>
1 parent 14b54ca commit f9b0af8

File tree

8 files changed

+366
-107
lines changed

8 files changed

+366
-107
lines changed

stl/inc/random

Lines changed: 153 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -261,30 +261,37 @@ private:
261261
vector<result_type> _Myvec;
262262
};
263263

264-
_NODISCARD constexpr int _Generate_canonical_iterations(const int _Bits, const uint64_t _Gmin, const uint64_t _Gmax) {
265-
// For a URBG type `G` with range == `(G::max() - G::min()) + 1`, returns the number of calls to generate at least
266-
// _Bits bits of entropy. Specifically, max(1, ceil(_Bits / log2(range))).
267-
268-
_STL_INTERNAL_CHECK(0 <= _Bits && _Bits <= 64);
264+
struct _Urbg_gen_can_params {
265+
bool _Rx_is_pow2 = false; // $R$ (in N4981 [rand.util.canonical]/1.2) is a power of 2
266+
int _Kx = 0; // $k$ in N4981 [rand.util.canonical]/1.4
267+
uint64_t _Xx = 0; // $x$ in N4981 [rand.util.canonical]/1.5
268+
float _Scale = 0.0f; // $r^{-d}$, always representable as a float
269+
int _Smax_bits = 0; // number of bits in $R^k$
270+
};
269271

270-
if (_Bits == 0 || (_Gmax == UINT64_MAX && _Gmin == 0)) {
271-
return 1;
272+
_NODISCARD constexpr _Urbg_gen_can_params _Generate_canonical_params(const size_t _Bits, const uint64_t _Rm1) {
273+
const auto _Rx = _Unsigned128{_Rm1} + 1; // $R$ in N4981 [rand.util.canonical]/1.2
274+
const auto _Target = _Unsigned128{1} << _Bits; // $r^d$
275+
_Unsigned128 _Product = 1;
276+
_Urbg_gen_can_params _Result;
277+
_Result._Rx_is_pow2 = (_Rx & (_Rx - 1)) == 0;
278+
_Result._Kx = 0;
279+
while (_Product < _Target) {
280+
_Product *= _Rx;
281+
++_Result._Kx;
272282
}
273283

274-
const auto _Range = (_Gmax - _Gmin) + 1;
275-
const auto _Target = ~uint64_t{0} >> (64 - _Bits);
276-
uint64_t _Prod = 1;
277-
int _Ceil = 0;
278-
while (_Prod <= _Target) {
279-
++_Ceil;
280-
if (_Prod > UINT64_MAX / _Range) {
281-
break;
282-
}
283-
284-
_Prod *= _Range;
284+
_Result._Xx = static_cast<uint64_t>(_Product / _Target);
285+
_Result._Scale = 1.0f / static_cast<float>(1ULL << _Bits);
286+
_Result._Smax_bits = 0;
287+
// $S \in [0, _Product)$
288+
--_Product;
289+
while (_Product > 0) {
290+
++_Result._Smax_bits;
291+
_Product >>= 1;
285292
}
286293

287-
return _Ceil;
294+
return _Result;
288295
}
289296

290297
_EXPORT_STD template <class _Real, size_t _Bits, class _Gen>
@@ -294,27 +301,110 @@ _NODISCARD _Real generate_canonical(_Gen& _Gx) { // build a floating-point value
294301
constexpr auto _Digits = static_cast<size_t>(numeric_limits<_Real>::digits);
295302
constexpr auto _Minbits = static_cast<int>(_Digits < _Bits ? _Digits : _Bits);
296303

297-
constexpr auto _Gxmin = static_cast<_Real>((_Gen::min)());
298-
constexpr auto _Gxmax = static_cast<_Real>((_Gen::max)());
299-
constexpr auto _Rx = (_Gxmax - _Gxmin) + _Real{1};
304+
if constexpr (_Minbits == 0) {
305+
return _Real{0};
306+
} else {
307+
using _Result_uint_type = conditional_t<_Minbits <= 32, uint32_t, uint64_t>;
308+
309+
constexpr auto _Gxmin = (_Gen::min)();
310+
constexpr auto _Gxmax = (_Gen::max)();
311+
constexpr auto _Params = _Generate_canonical_params(_Minbits, _Gxmax - _Gxmin);
312+
313+
using _Sx_type = conditional_t<_Params._Smax_bits <= 32, uint32_t,
314+
conditional_t<_Params._Smax_bits <= 64, uint64_t, _Unsigned128>>;
315+
_Sx_type _Sx;
316+
317+
if constexpr (_Params._Rx_is_pow2) {
318+
// Always needs only one attempt. Multiplications can be replaced with shift/add. Optimize k=1 case.
319+
if constexpr (_Params._Kx == 1) {
320+
_Sx = static_cast<_Sx_type>(_Gx() - _Gxmin);
321+
} else if constexpr (_Params._Kx > 1) {
322+
constexpr int _Rx_bits = _Params._Smax_bits / _Params._Kx;
323+
_Sx = 0;
324+
int _Shift = 0;
325+
for (int _Idx = 0; _Idx < _Params._Kx; ++_Idx) {
326+
_Sx += static_cast<_Sx_type>(_Gx() - _Gxmin) << _Shift;
327+
_Shift += _Rx_bits;
328+
}
329+
} else {
330+
_STL_INTERNAL_STATIC_ASSERT(false); // unexpected k
331+
}
300332

301-
constexpr int _Kx = _Generate_canonical_iterations(_Minbits, (_Gen::min)(), (_Gen::max)());
333+
_Sx >>= _Params._Smax_bits - _Minbits;
334+
return static_cast<_Real>(static_cast<_Result_uint_type>(_Sx)) * static_cast<_Real>(_Params._Scale);
335+
} else {
336+
constexpr auto _Rx = _Sx_type{_Gxmax - _Gxmin} + 1u;
337+
constexpr _Sx_type _Limit = static_cast<_Sx_type>(_Params._Xx) * (_Sx_type{1} << _Minbits);
338+
339+
do {
340+
// unroll first two iterations to avoid unnecessary multiplications
341+
_Sx = static_cast<_Sx_type>(_Gx() - _Gxmin);
342+
if constexpr (_Params._Kx == 2) {
343+
_Sx += static_cast<_Sx_type>(_Gx() - _Gxmin) * _Rx;
344+
} else if constexpr (_Params._Kx > 2) {
345+
_Sx += static_cast<_Sx_type>(_Gx() - _Gxmin) * _Rx;
346+
_Sx_type _Factor = _Rx;
347+
for (int _Idx = 2; _Idx < _Params._Kx; ++_Idx) {
348+
_Factor *= _Rx;
349+
_Sx += static_cast<_Sx_type>(_Gx() - _Gxmin) * _Factor;
350+
}
351+
}
352+
} while (_Sx >= _Limit);
302353

303-
_Real _Ans{0};
304-
_Real _Factor{1};
354+
return static_cast<_Real>(static_cast<_Result_uint_type>(_Sx / _Params._Xx))
355+
* static_cast<_Real>(_Params._Scale);
356+
}
357+
}
358+
}
305359

306-
for (int _Idx = 0; _Idx < _Kx; ++_Idx) { // add in another set of bits
307-
_Ans += (static_cast<_Real>(_Gx()) - _Gxmin) * _Factor;
308-
_Factor *= _Rx;
360+
template <class _Uint_type, class _Gen, class _Real>
361+
bool _Nrand_for_tr1(
362+
const uint64_t _Rd, const uint64_t _Rx, _Gen& _Gx, _Real& _Val, uint64_t& _Sx_init, uint64_t& _Factor_init) {
363+
// Minimally-constexpr generate_canonical algorithm. Will save work and exit if _Factor would overflow.
364+
_Uint_type _Sx = _Sx_init;
365+
_Uint_type _Factor = _Factor_init;
366+
const auto _Gxmin = (_Gx.min)();
367+
const auto _Overflow_limit = UINT64_MAX / _Rx;
368+
369+
_Uint_type _Xx = 0;
370+
_Uint_type _Limit = 0;
371+
bool _Init = false;
372+
for (;;) {
373+
while (_Factor < _Rd) {
374+
if constexpr (is_same_v<_Uint_type, uint64_t>) {
375+
if (_Factor > _Overflow_limit) {
376+
_Sx_init = _Sx;
377+
_Factor_init = _Factor;
378+
return true;
379+
}
380+
}
381+
382+
_Sx += (_Gx() - _Gxmin) * _Factor;
383+
_Factor *= _Rx;
384+
}
385+
386+
if (!_Init) {
387+
_Init = true;
388+
_Xx = _Factor / _Rd;
389+
_Limit = _Xx * _Rd;
390+
}
391+
392+
if (_Sx < _Limit) {
393+
break;
394+
} else {
395+
_Sx = 0;
396+
_Factor = 1;
397+
}
309398
}
310399

311-
return _Ans / _Factor;
400+
_Val = static_cast<_Real>(static_cast<uint64_t>(_Sx / _Xx));
401+
return false;
312402
}
313403

314404
template <class _Gen, class = void>
315405
struct _Has_static_min_max : false_type {};
316406

317-
// This checks a requirement of N4950 [rand.req.urng] `concept uniform_random_bit_generator` but doesn't attempt
407+
// This checks a requirement of N4981 [rand.req.urng] `concept uniform_random_bit_generator` but doesn't attempt
318408
// to implement the whole concept - we just need to distinguish Standard machinery from tr1 machinery.
319409
template <class _Gen>
320410
struct _Has_static_min_max<_Gen, void_t<decltype(bool_constant<(_Gen::min)() < (_Gen::max)()>::value)>> : true_type {};
@@ -323,18 +413,40 @@ template <class _Real, class _Gen>
323413
_NODISCARD _Real _Nrand_impl(_Gen& _Gx) { // build a floating-point value from random sequence
324414
_RNG_REQUIRE_REALTYPE(_Nrand_impl, _Real);
325415

326-
constexpr auto _Digits = static_cast<size_t>(numeric_limits<_Real>::digits);
327-
constexpr auto _Bits = ~size_t{0};
328-
constexpr auto _Minbits = _Digits < _Bits ? _Digits : _Bits;
416+
constexpr auto _Digits = static_cast<size_t>(numeric_limits<_Real>::digits);
417+
418+
if constexpr (_Has_static_min_max<_Gen>::value) {
419+
return _STD generate_canonical<_Real, _Digits>(_Gx);
420+
} else if constexpr (is_integral_v<typename _Gen::result_type>) {
421+
// TRANSITION, for integral tr1 machinery only; Standard machinery can call generate_canonical directly
422+
const auto _Gxmax = (_Gx.max)();
423+
const auto _Gxmin = (_Gx.min)();
424+
_Real _Val{0};
425+
426+
if (_Gxmax == UINT64_MAX && _Gxmin == 0) {
427+
// special case when uint64_t can't hold the full URBG range
428+
_Val = static_cast<_Real>(static_cast<uint64_t>(_Gx()) >> (64 - _Digits));
429+
} else {
430+
constexpr auto _Rd = 1ULL << _Digits;
431+
const auto _Rx = static_cast<uint64_t>(_Gxmax - _Gxmin) + 1;
432+
uint64_t _Sx = 0;
433+
uint64_t _Factor = 1;
434+
435+
// Try with 64 bits first, upgrade to 128 if necessary.
436+
const bool _Would_overflow = _Nrand_for_tr1<uint64_t>(_Rd, _Rx, _Gx, _Val, _Sx, _Factor);
437+
if (_Would_overflow) {
438+
_Nrand_for_tr1<_Unsigned128>(_Rd, _Rx, _Gx, _Val, _Sx, _Factor);
439+
}
440+
}
329441

330-
if constexpr (_Has_static_min_max<_Gen>::value && _Minbits <= 64) {
331-
return _STD generate_canonical<_Real, _Minbits>(_Gx);
332-
} else { // TRANSITION, for tr1 machinery only; Standard machinery can call generate_canonical directly
442+
return _STD ldexp(_Val, -static_cast<int>(_Digits));
443+
} else {
444+
// TRANSITION, the pre-P0952R2 algorithm, for non-integral tr1 machinery only (e.g. subtract_with_carry_01)
333445
const _Real _Gxmin = static_cast<_Real>((_Gx.min)());
334446
const _Real _Gxmax = static_cast<_Real>((_Gx.max)());
335447
const _Real _Rx = (_Gxmax - _Gxmin) + _Real{1};
336448

337-
const int _Ceil = static_cast<int>(_STD ceil(static_cast<_Real>(_Minbits) / _STD log2(_Rx)));
449+
const int _Ceil = static_cast<int>(_STD ceil(static_cast<_Real>(_Digits) / _STD log2(_Rx)));
338450
const int _Kx = _Ceil < 1 ? 1 : _Ceil;
339451

340452
_Real _Ans{0};
@@ -1968,8 +2080,8 @@ public:
19682080
explicit _Rng_from_urng_v2(_Urng& _Func) noexcept : _Ref(_Func) {}
19692081

19702082
_Diff operator()(_Diff _Index) { // adapt _Urng closed range to [0, _Index)
1971-
// From Daniel Lemire, "Fast Random Integer Generation in an Interval", ACM Trans. Model. Comput. Simul. 29 (1),
1972-
// 2019.
2083+
// From Daniel Lemire, "Fast Random Integer Generation in an Interval",
2084+
// ACM Trans. Model. Comput. Simul. 29 (1), 2019.
19732085
//
19742086
// Algorithm 5 <-> This Code:
19752087
// m <-> _Product
@@ -2925,8 +3037,8 @@ public:
29253037

29263038
private:
29273039
template <class _Engine>
2928-
result_type _Eval(
2929-
_Engine& _Eng, const param_type& _Par0) const { // Press et al., Numerical Recipes in C, 2nd ed., pp 295-296.
3040+
result_type _Eval(_Engine& _Eng, const param_type& _Par0) const {
3041+
// Press et al., Numerical Recipes in C, 2nd ed., pp 295-296.
29303042
_Ty _Res;
29313043
if (_Par0._Tx < 25) { // generate directly
29323044
_Res = 0;

stl/inc/yvals_core.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@
6262
// P0883R2 Fixing Atomic Initialization
6363
// P0935R0 Eradicating Unnecessarily Explicit Default Constructors
6464
// P0941R2 Feature-Test Macros
65+
// P0952R2 A New Specification For generate_canonical()
6566
// P0972R0 noexcept For <chrono> zero(), min(), max()
6667
// P1065R2 constexpr INVOKE
6768
// (the std::invoke function only; other components like bind and reference_wrapper are C++20 only)

tests/libcxx/expected_results.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,9 @@ std/containers/unord/unord.set/insert_and_emplace_allocator_requirements.pass.cp
7373
# Bogus test believes that copyability of array<T, 0> must be the same as array<T, 1>
7474
std/containers/sequences/array/array.cons/implicit_copy.pass.cpp FAIL
7575

76+
# libc++ hasn't implemented P0952R2, which changes the generate_canonical algorithm.
77+
std/numerics/rand/rand.util/rand.util.canonical/generate_canonical.pass.cpp FAIL
78+
7679
# Test expects __cpp_lib_chrono to have the old value 201611L for P0505R0; we define the C++20 value 201907L for P1466R3.
7780
std/language.support/support.limits/support.limits.general/chrono.version.compile.pass.cpp FAIL
7881

tests/std/test.lst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -198,7 +198,6 @@ tests\GH_001850_clog_tied_to_cout
198198
tests\GH_001858_iostream_exception
199199
tests\GH_001912_random_distribution_operator_const
200200
tests\GH_001914_cached_position
201-
tests\GH_001964_constexpr_generate_canonical
202201
tests\GH_002030_asan_annotate_string
203202
tests\GH_002030_asan_annotate_vector
204203
tests\GH_002039_byte_is_not_trivially_swappable
@@ -512,6 +511,7 @@ tests\P0898R3_identity
512511
tests\P0912R5_coroutine
513512
tests\P0919R3_heterogeneous_unordered_lookup
514513
tests\P0943R6_stdatomic_h
514+
tests\P0952R2_new_generate_canonical
515515
tests\P0966R1_string_reserve_should_not_shrink
516516
tests\P0980R1_constexpr_strings
517517
tests\P1004R2_constexpr_vector
Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
# Copyright (c) Microsoft Corporation.
22
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
33

4-
RUNALL_INCLUDE ..\usual_matrix.lst
4+
# Skip /clr:pure configurations; they pass but take an extremely long time to run, risking test timeouts.
5+
RUNALL_INCLUDE ..\impure_matrix.lst

tests/std/tests/GH_001964_constexpr_generate_canonical/test.cpp

Lines changed: 0 additions & 64 deletions
This file was deleted.
File renamed without changes.

0 commit comments

Comments
 (0)