|
56 | 56 | //! Under this scheme, the query `Low_Line` will find `U+005F LOW LINE`, as well as `l o w L-I-N-E`,
|
57 | 57 | //! `lowline`, and `low\nL-I-N-E`, but not `low- line`.
|
58 | 58 | //! Similarly, `tibetan letter -a` will find `U+0F60 TIBETAN LETTER -A`, as well as
|
59 |
| -//! `tibetanletter - a` and `TIBETAN L_ETTE_R- __a__`, but not `tibetan letter-a` or `TIBETAN LETTER A`. |
| 59 | +//! `tibetanletter - a` and `TIBETAN L_ETTE_R- __a__`, but not `tibetan letter-a` or |
| 60 | +//! `TIBETAN LETTER A`. |
60 | 61 | //!
|
61 | 62 | //! In the implementation of this crate, 'whitespace' is determined by the [`is_ascii_whitespace`]
|
62 | 63 | //! method on `u8` and `char`. See its documentation for more info.
|
@@ -124,11 +125,17 @@ fn is_cjk_unified_ideograph(ch: char) -> bool {
|
124 | 125 | .any(|&(lo, hi)| lo <= ch && ch <= hi)
|
125 | 126 | }
|
126 | 127 |
|
127 |
| -/// An iterator over the components of a code point's name, it also |
128 |
| -/// implements `Show`. |
| 128 | +/// An iterator over the components of a code point's name. Notably implements `Display`. |
129 | 129 | ///
|
130 |
| -/// The size hint is exact for the number of pieces, but iterates |
131 |
| -/// (although iteration is cheap and all names are short). |
| 130 | +/// To reconstruct the full Unicode name from this iterator, you can concatenate every string slice |
| 131 | +/// yielded from it. Each such slice is either a word matching `[A-Z0-9]*`, a space `" "`, or a |
| 132 | +/// hyphen `"-"`. (In particular, words can be the empty string `""`). |
| 133 | +/// |
| 134 | +/// The [size hint] returns an exact size, by cloning the iterator and iterating it fully. |
| 135 | +/// Cloning and iteration are cheap, and all names are relatively short, so this should not have a |
| 136 | +/// high impact. |
| 137 | +/// |
| 138 | +/// [size hint]: std::iter::Iterator::size_hint |
132 | 139 | #[derive(Clone)]
|
133 | 140 | pub struct Name {
|
134 | 141 | data: Name_,
|
@@ -182,6 +189,7 @@ impl Name {
|
182 | 189 |
|
183 | 190 | impl Iterator for Name {
|
184 | 191 | type Item = &'static str;
|
| 192 | + |
185 | 193 | fn next(&mut self) -> Option<&'static str> {
|
186 | 194 | match self.data {
|
187 | 195 | Name_::Plain(ref mut s) => s.next(),
|
@@ -246,20 +254,16 @@ impl fmt::Display for Name {
|
246 | 254 |
|
247 | 255 | /// Find the name of `c`, or `None` if `c` has no name.
|
248 | 256 | ///
|
249 |
| -/// The return value is an iterator that yields `&str` components of |
250 |
| -/// the name successively (including spaces and hyphens). It |
251 |
| -/// implements `Show`, and thus can be used naturally to build |
252 |
| -/// `String`s, or be printed, etc. |
| 257 | +/// The return value is an iterator that yields `&'static str` components of the name successively |
| 258 | +/// (including spaces and hyphens). It implements `Display`, so can be used naturally to build |
| 259 | +/// `String`s or be printed. See also the [type-level docs][Name]. |
253 | 260 | ///
|
254 | 261 | /// # Example
|
255 | 262 | ///
|
256 | 263 | /// ```rust
|
257 |
| -/// assert_eq!(unicode_names2::name('a').map(|n| n.to_string()), |
258 |
| -/// Some("LATIN SMALL LETTER A".to_string())); |
259 |
| -/// assert_eq!(unicode_names2::name('\u{2605}').map(|n| n.to_string()), |
260 |
| -/// Some("BLACK STAR".to_string())); |
261 |
| -/// assert_eq!(unicode_names2::name('☃').map(|n| n.to_string()), |
262 |
| -/// Some("SNOWMAN".to_string())); |
| 264 | +/// assert_eq!(unicode_names2::name('a').unwrap().to_string(), "LATIN SMALL LETTER A"); |
| 265 | +/// assert_eq!(unicode_names2::name('\u{2605}').unwrap().to_string(), "BLACK STAR"); |
| 266 | +/// assert_eq!(unicode_names2::name('☃').unwrap().to_string(), "SNOWMAN"); |
263 | 267 | ///
|
264 | 268 | /// // control code
|
265 | 269 | /// assert!(unicode_names2::name('\x00').is_none());
|
@@ -342,8 +346,8 @@ fn character_by_alias(name: &[u8]) -> Option<char> {
|
342 | 346 | /// Find the character called `name`, or `None` if no such character
|
343 | 347 | /// exists.
|
344 | 348 | ///
|
345 |
| -/// This function uses the [UAX44-LM2] loose matching scheme for lookup. For more information, see the |
346 |
| -/// [crate-level docs][self]. |
| 349 | +/// This function uses the [UAX44-LM2] loose matching scheme for lookup. For more information, see |
| 350 | +/// the [crate-level docs][self]. |
347 | 351 | ///
|
348 | 352 | /// [UAX44-LM2]: https://www.unicode.org/reports/tr44/tr44-34.html#UAX44-LM2
|
349 | 353 | ///
|
@@ -471,7 +475,7 @@ pub fn character(search_name: &str) -> Option<char> {
|
471 | 475 | }
|
472 | 476 |
|
473 | 477 | /// Convert a Unicode name to a form that can be used for loose matching, as per
|
474 |
| -/// [UAX#44](https://www.unicode.org/reports/tr44/tr44-34.html#Matching_Names) |
| 478 | +/// [UAX#44](https://www.unicode.org/reports/tr44/tr44-34.html#Matching_Names). |
475 | 479 | ///
|
476 | 480 | /// This function matches `unicode_names2_generator::normalise_name` in implementation, except that
|
477 | 481 | /// the special case of U+1180 HANGUL JUNGSEONG O-E isn't handled here, because we don't yet know
|
|
0 commit comments