From ad1e088014d7fa54aee4127eaf49f0c42dc839d0 Mon Sep 17 00:00:00 2001 From: Dawn Perchik Date: Tue, 27 Jun 2023 14:21:57 -0700 Subject: [PATCH 1/3] P1885R12 Naming Text Encodings to Demystify Them Editorial notes: - [text.*] Minor changes were made for consistency with existing text. - [text.encoding.overview] Fix reference for "trivially copyable type". - [text.encoding.hash] Make section match synopsis; remove stray declaration. --- source/back.tex | 8 + source/lib-intro.tex | 7 +- source/locales.tex | 735 ++++++++++++++++++++++++++++++++++++++++++- source/support.tex | 1 + 4 files changed, 747 insertions(+), 4 deletions(-) diff --git a/source/back.tex b/source/back.tex index 055fb32146..c11c841111 100644 --- a/source/back.tex +++ b/source/back.tex @@ -18,6 +18,14 @@ \chapter{Bibliography} Programming languages, their environments, and system software interfaces --- Floating-point extensions for C --- Part 3: Interchange and extended types} % Other international standards. +\item + IANA Character Sets Database. + Available from:\newline + \url{https://www.iana.org/assignments/character-sets/}, 2021-04-01 +\item + Unicode Character Mapping Markup Language [online]. + Edited by Mark Davis and Markus Scherer. Revision 5.0.1; 2017-05-31 + Available from: \url{http://www.unicode.org/reports/tr22/tr22-8.html} \item IANA Time Zone Database. Available from: \url{https://www.iana.org/time-zones} diff --git a/source/lib-intro.tex b/source/lib-intro.tex index b774ca2f1a..4b9e59445a 100644 --- a/source/lib-intro.tex +++ b/source/lib-intro.tex @@ -1093,8 +1093,8 @@ \tcode{} \\ \tcode{} \\ \tcode{} \\ -\columnbreak \tcode{} \\ +\columnbreak \tcode{} \\ \tcode{} \\ \tcode{} \\ @@ -1116,8 +1116,8 @@ \tcode{} \\ \tcode{} \\ \tcode{} \\ -\columnbreak \tcode{} \\ +\columnbreak \tcode{} \\ \tcode{} \\ \tcode{} \\ @@ -1140,8 +1140,8 @@ \tcode{} \\ \tcode{} \\ \tcode{} \\ -\columnbreak \tcode{} \\ +\columnbreak \tcode{} \\ \tcode{} \\ \tcode{} \\ @@ -1150,6 +1150,7 @@ \tcode{} \\ \tcode{} \\ \tcode{} \\ +\tcode{} \\ \tcode{} \\ \tcode{} \\ \tcode{} \\ diff --git a/source/locales.tex b/source/locales.tex index 4b6e0376e0..f9b9efa791 100644 --- a/source/locales.tex +++ b/source/locales.tex @@ -22,7 +22,8 @@ \begin{libsumtab}{Localization library summary}{localization.summary} \ref{locales} & Locales & \tcode{} \\ \ref{locale.categories} & Standard \tcode{locale} categories & \\ \rowsep -\ref{c.locales} & C library locales & \tcode{} \\ +\ref{c.locales} & C library locales & \tcode{} \\ \rowsep +\ref{text.encoding} & Text encodings identification & \tcode{} \\ \end{libsumtab} \rSec1[locale.syn]{Header \tcode{} synopsis} @@ -148,6 +149,7 @@ // locale operations string name() const; + text_encoding encoding() const; bool operator==(const locale& other) const; @@ -691,6 +693,23 @@ otherwise, the string \tcode{"*"}. \end{itemdescr} +\indexlibrarymember{locale}{encoding}% +\begin{itemdecl} +text_encoding encoding() const; +\end{itemdecl} + +\begin{itemdescr} +\pnum +\mandates +\tcode{CHAR_BIT == 8} is \tcode{true}. + +\pnum +\returns +A \tcode{text_encoding} object representing +the implementation-defined encoding scheme +associated with the locale \tcode{*this}. +\end{itemdescr} + \rSec3[locale.operators]{Operators} \indexlibrarymember{locale}{operator==}% @@ -4632,3 +4651,717 @@ \tcode{strxfrm} & \tcode{wctomb} \\ \end{floattable} + +\rSec1[text.encoding]{Text encodings identification} + +\rSec2[text.encoding.syn]{Header \tcode{} synopsis} + +\indexheader{text_encoding}% +\begin{codeblock} +namespace std { + struct text_encoding; + + // \ref{text.encoding.hash}, hash support + template struct hash; + template<> struct hash; +} +\end{codeblock} + +\rSec2[text.encoding.class]{Class \tcode{text_encoding}} + +\rSec3[text.encoding.overview]{Overview} + +\pnum +The class \tcode{text_encoding} describes an interface +for accessing the IANA Character Sets registry. + +\indexlibraryglobal{text_encoding}% +\begin{codeblock} +namespace std { + struct text_encoding { + static constexpr size_t max_name_length = 63; + + // \ref{text.encoding.id}, enumeration \tcode{text_encoding::id} + enum class id : int_least32_t { + @\seebelow@ + }; + using enum id; + + constexpr text_encoding() = default; + constexpr explicit text_encoding(string_view enc) noexcept; + constexpr text_encoding(id i) noexcept; + + constexpr id mib() const noexcept; + constexpr const char* name() const noexcept; + + struct aliases_view; + constexpr aliases_view aliases() const noexcept; + + friend constexpr bool operator==(const text_encoding& encoding, + const text_encoding& other) noexcept; + friend constexpr bool operator==(const text_encoding& encoding, id i) noexcept; + + static consteval text_encoding literal() noexcept; + static text_encoding environment(); + template static bool environment_is(); + + private: + id @\exposid{mib_}@ = id::unknown; // \expos + char @\exposid{name_}@[max_name_length + 1] = {0}; // \expos + static constexpr bool @\exposidnc{comp-name}@(string_view a, string_view b); // \expos + }; +} +\end{codeblock} + +\pnum +Class \tcode{text_encoding} is +a trivially copyable type\iref{term.trivially.copyable.type}. + +\rSec3[text.encoding.general]{General} + +\pnum +A \defnadj{registered character}{encoding} is +a character encoding scheme in the IANA Character Sets registry. +\begin{note} +The IANA Character Sets registry uses the term ``character sets'' +to refer to character encodings. +\end{note} +The primary name of a registered character encoding is +the name of that encoding specified in the IANA Character Sets registry. + +\pnum +The set of known registered character encodings contains +every registered character encoding +specified in the IANA Character Sets registry except for the following: +\begin{itemize} +\item NATS-DANO (33) +\item NATS-DANO-ADD (34) +\end{itemize} + +\pnum +Each known registered character encoding +is identified by an enumerator in \tcode{text_encoding::id}, and +has a set of zero or more \defnx{aliases}{encoding!registered character!alias}. + +\pnum +The set of aliases of a known registered character encoding is an +\impldef{set of aliases of a known registered character encoding} +superset of the aliases specified in the IANA Character Sets registry. +No two aliases or primary names of distinct registered character encodings +are equivalent when compared by \tcode{text_encoding::\exposid{comp-name}}. + +\pnum +How a \tcode{text_encoding} object +is determined to be representative of a character encoding scheme +implemented in the translation or execution environment is +\impldef{how \tcode{text_encoding} objects are +determined to be representative of a character encoding scheme}. + +\pnum +An object \tcode{e} of type \tcode{text_encoding} such that +\tcode{e.mib() == text_encoding::id::unknown} is \tcode{false} and +\tcode{e.mib() == text_encoding::id::other} is \tcode{false} +maintains the following invariants: +\begin{itemize} +\item \tcode{e.name() == nullptr} is \tcode{false}, and +\item \tcode{e.mib() == text_encoding(e.name()).mib()} is \tcode{true}. +\end{itemize} + +\pnum +\recommended +\begin{itemize} +\item +Implementations should not consider registered encodings to be interchangeable. +\begin{example} +Shift_JIS and Windows-31J denote different encodings. +\end{example} +\item +Implementations should not use the name of a registered encoding +to describe another similar yet different non-registered encoding +unless there is a precedent on that implementation. +\begin{example} +Big5 +\end{example} +\end{itemize} + +\rSec3[text.encoding.members]{Members} + +\indexlibraryctor{text_encoding}% +\begin{itemdecl} +constexpr explicit text_encoding(string_view enc) noexcept; +\end{itemdecl} + +\begin{itemdescr} +\pnum +\expects +\begin{itemize} +\item +\tcode{enc} represents a string in the ordinary literal encoding +consisting only of elements of the basic character set\iref{lex.charset}. +\item +\tcode{enc.size() <= max_name_length} is \tcode{true}. +\item +\tcode{enc.contains('\textbackslash 0')} is \tcode{false}. +\end{itemize} + +\pnum +\ensures +\begin{itemize} +\item +If there exists a primary name or alias \tcode{a} +of a known registered character encoding such that +\tcode{\exposid{comp-name}(a, enc)} is \tcode{true}, +\exposid{mib_} has the value of the enumerator of \tcode{id} +associated with that registered character encoding. +Otherwise, \tcode{\exposid{mib_} == id::other} is \tcode{true}. +\item +\tcode{enc.compare(\exposid{name_}) == 0} is \tcode{true}. +\end{itemize} +\end{itemdescr} + +\indexlibraryctor{text_encoding}% +\begin{itemdecl} +constexpr text_encoding(id i) noexcept; +\end{itemdecl} + +\begin{itemdescr} +\pnum +\expects +\tcode{i} has the value of one of the enumerators of \tcode{id}. + +\pnum +\ensures +\begin{itemize} +\item +\tcode{\exposid{mib_} == i} is \tcode{true}. +\item +If \tcode{(\exposid{mib_} == id::unknown || \exposid{mib_} == id::other)} +is \tcode{true}, +\tcode{strlen(\exposid{name_}) == 0} is \tcode{true}. +Otherwise, +\tcode{ranges::contains(aliases(), string_view(\exposid{name_}))} +is \tcode{true}. +\end{itemize} +\end{itemdescr} + +\indexlibrarymember{mib}{text_encoding}% +\begin{itemdecl} +constexpr id mib() const noexcept; +\end{itemdecl} + +\begin{itemdescr} +\pnum +\returns +\tcode{\exposid{mib_}}. +\end{itemdescr} + +\indexlibrarymember{name}{text_encoding}% +\begin{itemdecl} +constexpr const char* name() const noexcept; +\end{itemdecl} + +\begin{itemdescr} +\pnum +\returns +\tcode{\exposid{name_}} if \tcode{(\exposid{name_}[0] != '\textbackslash 0')} +is \tcode{true}, and +\keyword{nullptr} otherwise. + +\pnum +\remarks +If \tcode{name() == nullptr} is \tcode{false}, +\tcode{name()} is an \ntbs{} and +accessing elements of \tcode{\exposid{name_}} +outside of the range \countedrange{name()}{strlen(name()) + 1} +is undefined behavior. +\end{itemdescr} + +\indexlibrarymember{aliases}{text_encoding}% +\begin{itemdecl} +constexpr aliases_view aliases() const noexcept; +\end{itemdecl} + +\begin{itemdescr} +Let \tcode{r} denote an instance of \tcode{aliases_view}. +If \tcode{*this} represents a known registered character encoding, then: +\begin{itemize} +\item +\tcode{r.front()} is the primary name of the registered character encoding, +\item +\tcode{r} contains the aliases of the registered character encoding, and +\item +\tcode{r} does not contain duplicate values when compared with \tcode{strcmp}. +\end{itemize} +Otherwise, \tcode{r} is an empty range. + +\pnum +Each element in \tcode{r} +is a non-null, non-empty \ntbs{} encoded in the literal character encoding and +comprising only characters from the basic character set. + +\pnum +\returns +\tcode{r}. + +\pnum +\begin{note} +The order of aliases in \tcode{r} is unspecified. +\end{note} +\end{itemdescr} + +\indexlibrarymember{literal}{text_encoding}% +\begin{itemdecl} +static consteval text_encoding literal() noexcept; +\end{itemdecl} + +\begin{itemdescr} +\pnum +\mandates +\tcode{CHAR_BIT == 8} is \tcode{true}. + +\pnum +\returns +A \tcode{text_encoding} object representing +the ordinary character literal encoding\iref{lex.charset}. +\end{itemdescr} + +\indexlibrarymember{environment}{text_encoding}% +\begin{itemdecl} +static text_encoding environment(); +\end{itemdecl} + +\begin{itemdescr} +\pnum +\mandates +\tcode{CHAR_BIT == 8} is \tcode{true}. + +\pnum +\returns +A \tcode{text_encoding} object representing +the \impldef{character encoding scheme of the environment} +character encoding scheme of the environment. +On a POSIX implementation, this is the encoding scheme associated with +the POSIX locale denoted by the empty string \tcode{""}. + +\pnum +\begin{note} +This function is not affected by calls to \tcode{setlocale}. +\end{note} + +\pnum +\recommended +Implementations should return a value that is not affected by calls to +the POSIX function \tcode{setenv} and +other functions which can modify the environment\iref{support.runtime}. +\end{itemdescr} + +\indexlibrarymember{environment_is}{text_encoding}% +\begin{itemdecl} +template + static bool environment_is(); +\end{itemdecl} + +\begin{itemdescr} +\pnum +\mandates +\tcode{CHAR_BIT == 8} is \tcode{true}. + +\pnum +\returns +\tcode{environment() == i}. +\end{itemdescr} + +\indexlibrarymember{\exposid{comp-name}}{text_encoding}% +\begin{itemdecl} +static constexpr bool @\exposid{comp-name}@(string_view a, string_view b); +\end{itemdecl} + +\begin{itemdescr} +\pnum +\returns +\tcode{true} if the two strings \tcode{a} and \tcode{b} +encoded in the ordinary literal encoding +are equal, ignoring, from left-to-right, +\begin{itemize} +\item +all elements that are not digits or letters\iref{character.seq.general}, +\item +character case, and +\item +any sequence of one or more \tcode{0} characters +not immediately preceded by a numeric prefix, where +a numeric prefix is a sequence consisting of +a digit in the range \crange{1}{9} +optionally followed by one or more elements which are not digits or letters, +\end{itemize} +and \tcode{false} otherwise. + +\begin{note} +This comparison is identical to +the ``Charset Alias Matching'' algorithm +described in the Unicode Technical Standard 22. +\end{note} + +\begin{example} +\begin{codeblock} +assert(@\exposid{comp-name}@("UTF-8", "utf8") == true); +assert(@\exposid{comp-name}@("u.t.f-008", "utf8") == true); +assert(@\exposid{comp-name}@("ut8", "utf8") == false); +assert(@\exposid{comp-name}@("utf-80", "utf8") == false); +\end{codeblock} +\end{example} +\end{itemdescr} + +\rSec3[text.encoding.cmp]{Comparison functions} + +\indexlibrarymember{operator==}{text_encoding}% +\begin{itemdecl} +friend constexpr bool operator==(const text_encoding& a, const text_encoding& b) noexcept; +\end{itemdecl} + +\begin{itemdescr} +\pnum +\returns +If \tcode{a.\exposid{mib_} == id::other \&\& b.\exposid{mib_} == id::other} +is \tcode{true}, +then \tcode{\exposid{comp-name}(a.\exposid{name_},\linebreak{}b.\exposid{name_})}. +Otherwise, \tcode{a.\exposid{mib_} == b.\exposid{mib_}}. +\end{itemdescr} + +\indexlibrarymember{operator==}{text_encoding}% +\begin{itemdecl} +friend constexpr bool operator==(const text_encoding& encoding, id i) noexcept; +\end{itemdecl} + +\begin{itemdescr} +\pnum +\returns +\tcode{encoding.\exposid{mib_} == i}. + +\pnum +\remarks +This operator induces an equivalence relation on its arguments +if and only if \tcode{i != id::other} is \tcode{true}. +\end{itemdescr} + +\rSec3[text.encoding.aliases]{Class text_encoding::aliases_view} + +\indexlibrarymember{aliases_view}{text_encoding}% +\indexlibrarymember{begin}{text_encoding::aliases_view}% +\indexlibrarymember{end}{text_encoding::aliases_view}% +\begin{itemdecl} +struct text_encoding::aliases_view : ranges::view_interface { + constexpr @\impdefx{type of \tcode{text_encoding::aliases_view::begin()}}@ begin() const; + constexpr @\impdefx{type of \tcode{text_encoding::aliases_view::end()}}@ end() const; +}; +\end{itemdecl} + +\begin{itemdescr} +\pnum +\tcode{text_encoding::aliases_view} models +\libconcept{copyable}, +\tcode{ranges::\libconcept{view}}, +\tcode{ranges::\libconcept{random_access_range}}, and +\tcode{ranges::\libconcept{borrowed_range}}. + +\pnum +Both +\tcode{ranges::range_value_t} and +\tcode{ranges::range_reference_t} +denote \tcode{const char*}. +\end{itemdescr} + +\rSec3[text.encoding.id]{Enumeration \tcode{text_encoding::id}} + +\indexlibrarymember{id}{text_encoding}% +\begin{codeblock} +namespace std { + enum class text_encoding::id : int_least32_t { + other = 1, + unknown = 2, + ASCII = 3, + ISOLatin1 = 4, + ISOLatin2 = 5, + ISOLatin3 = 6, + ISOLatin4 = 7, + ISOLatinCyrillic = 8, + ISOLatinArabic = 9, + ISOLatinGreek = 10, + ISOLatinHebrew = 11, + ISOLatin5 = 12, + ISOLatin6 = 13, + ISOTextComm = 14, + HalfWidthKatakana = 15, + JISEncoding = 16, + ShiftJIS = 17, + EUCPkdFmtJapanese = 18, + EUCFixWidJapanese = 19, + ISO4UnitedKingdom = 20, + ISO11SwedishForNames = 21, + ISO15Italian = 22, + ISO17Spanish = 23, + ISO21German = 24, + ISO60DanishNorwegian = 25, + ISO69French = 26, + ISO10646UTF1 = 27, + ISO646basic1983 = 28, + INVARIANT = 29, + ISO2IntlRefVersion = 30, + NATSSEFI = 31, + NATSSEFIADD = 32, + ISO10Swedish = 35, + KSC56011987 = 36, + ISO2022KR = 37, + EUCKR = 38, + ISO2022JP = 39, + ISO2022JP2 = 40, + ISO13JISC6220jp = 41, + ISO14JISC6220ro = 42, + ISO16Portuguese = 43, + ISO18Greek7Old = 44, + ISO19LatinGreek = 45, + ISO25French = 46, + ISO27LatinGreek1 = 47, + ISO5427Cyrillic = 48, + ISO42JISC62261978 = 49, + ISO47BSViewdata = 50, + ISO49INIS = 51, + ISO50INIS8 = 52, + ISO51INISCyrillic = 53, + ISO54271981 = 54, + ISO5428Greek = 55, + ISO57GB1988 = 56, + ISO58GB231280 = 57, + ISO61Norwegian2 = 58, + ISO70VideotexSupp1 = 59, + ISO84Portuguese2 = 60, + ISO85Spanish2 = 61, + ISO86Hungarian = 62, + ISO87JISX0208 = 63, + ISO88Greek7 = 64, + ISO89ASMO449 = 65, + ISO90 = 66, + ISO91JISC62291984a = 67, + ISO92JISC62991984b = 68, + ISO93JIS62291984badd = 69, + ISO94JIS62291984hand = 70, + ISO95JIS62291984handadd = 71, + ISO96JISC62291984kana = 72, + ISO2033 = 73, + ISO99NAPLPS = 74, + ISO102T617bit = 75, + ISO103T618bit = 76, + ISO111ECMACyrillic = 77, + ISO121Canadian1 = 78, + ISO122Canadian2 = 79, + ISO123CSAZ24341985gr = 80, + ISO88596E = 81, + ISO88596I = 82, + ISO128T101G2 = 83, + ISO88598E = 84, + ISO88598I = 85, + ISO139CSN369103 = 86, + ISO141JUSIB1002 = 87, + ISO143IECP271 = 88, + ISO146Serbian = 89, + ISO147Macedonian = 90, + ISO150 = 91, + ISO151Cuba = 92, + ISO6937Add = 93, + ISO153GOST1976874 = 94, + ISO8859Supp = 95, + ISO10367Box = 96, + ISO158Lap = 97, + ISO159JISX02121990 = 98, + ISO646Danish = 99, + USDK = 100, + DKUS = 101, + KSC5636 = 102, + Unicode11UTF7 = 103, + ISO2022CN = 104, + ISO2022CNEXT = 105, + UTF8 = 106, + ISO885913 = 109, + ISO885914 = 110, + ISO885915 = 111, + ISO885916 = 112, + GBK = 113, + GB18030 = 114, + OSDEBCDICDF0415 = 115, + OSDEBCDICDF03IRV = 116, + OSDEBCDICDF041 = 117, + ISO115481 = 118, + KZ1048 = 119, + UCS2 = 1000, + UCS4 = 1001, + UnicodeASCII = 1002, + UnicodeLatin1 = 1003, + UnicodeJapanese = 1004, + UnicodeIBM1261 = 1005, + UnicodeIBM1268 = 1006, + UnicodeIBM1276 = 1007, + UnicodeIBM1264 = 1008, + UnicodeIBM1265 = 1009, + Unicode11 = 1010, + SCSU = 1011, + UTF7 = 1012, + UTF16BE = 1013, + UTF16LE = 1014, + UTF16 = 1015, + CESU8 = 1016, + UTF32 = 1017, + UTF32BE = 1018, + UTF32LE = 1019, + BOCU1 = 1020, + UTF7IMAP = 1021, + Windows30Latin1 = 2000, + Windows31Latin1 = 2001, + Windows31Latin2 = 2002, + Windows31Latin5 = 2003, + HPRoman8 = 2004, + AdobeStandardEncoding = 2005, + VenturaUS = 2006, + VenturaInternational = 2007, + DECMCS = 2008, + PC850Multilingual = 2009, + PC8DanishNorwegian = 2012, + PC862LatinHebrew = 2013, + PC8Turkish = 2014, + IBMSymbols = 2015, + IBMThai = 2016, + HPLegal = 2017, + HPPiFont = 2018, + HPMath8 = 2019, + HPPSMath = 2020, + HPDesktop = 2021, + VenturaMath = 2022, + MicrosoftPublishing = 2023, + Windows31J = 2024, + GB2312 = 2025, + Big5 = 2026, + Macintosh = 2027, + IBM037 = 2028, + IBM038 = 2029, + IBM273 = 2030, + IBM274 = 2031, + IBM275 = 2032, + IBM277 = 2033, + IBM278 = 2034, + IBM280 = 2035, + IBM281 = 2036, + IBM284 = 2037, + IBM285 = 2038, + IBM290 = 2039, + IBM297 = 2040, + IBM420 = 2041, + IBM423 = 2042, + IBM424 = 2043, + PC8CodePage437 = 2011, + IBM500 = 2044, + IBM851 = 2045, + PCp852 = 2010, + IBM855 = 2046, + IBM857 = 2047, + IBM860 = 2048, + IBM861 = 2049, + IBM863 = 2050, + IBM864 = 2051, + IBM865 = 2052, + IBM868 = 2053, + IBM869 = 2054, + IBM870 = 2055, + IBM871 = 2056, + IBM880 = 2057, + IBM891 = 2058, + IBM903 = 2059, + IBM904 = 2060, + IBM905 = 2061, + IBM918 = 2062, + IBM1026 = 2063, + IBMEBCDICATDE = 2064, + EBCDICATDEA = 2065, + EBCDICCAFR = 2066, + EBCDICDKNO = 2067, + EBCDICDKNOA = 2068, + EBCDICFISE = 2069, + EBCDICFISEA = 2070, + EBCDICFR = 2071, + EBCDICIT = 2072, + EBCDICPT = 2073, + EBCDICES = 2074, + EBCDICESA = 2075, + EBCDICESS = 2076, + EBCDICUK = 2077, + EBCDICUS = 2078, + Unknown8BiT = 2079, + Mnemonic = 2080, + Mnem = 2081, + VISCII = 2082, + VIQR = 2083, + KOI8R = 2084, + HZGB2312 = 2085, + IBM866 = 2086, + PC775Baltic = 2087, + KOI8U = 2088, + IBM00858 = 2089, + IBM00924 = 2090, + IBM01140 = 2091, + IBM01141 = 2092, + IBM01142 = 2093, + IBM01143 = 2094, + IBM01144 = 2095, + IBM01145 = 2096, + IBM01146 = 2097, + IBM01147 = 2098, + IBM01148 = 2099, + IBM01149 = 2100, + Big5HKSCS = 2101, + IBM1047 = 2102, + PTCP154 = 2103, + Amiga1251 = 2104, + KOI7switched = 2105, + BRF = 2106, + TSCII = 2107, + CP51932 = 2108, + windows874 = 2109, + windows1250 = 2250, + windows1251 = 2251, + windows1252 = 2252, + windows1253 = 2253, + windows1254 = 2254, + windows1255 = 2255, + windows1256 = 2256, + windows1257 = 2257, + windows1258 = 2258, + TIS620 = 2259, + CP50220 = 2260 + }; +} +\end{codeblock} + +\begin{note} +The \tcode{text_encoding::id} enumeration +contains an enumerator for each known registered character encoding. +For each encoding, the corresponding enumerator is derived from +the alias beginning with ``\tcode{cs}'', as follows +\begin{itemize} +\item +\tcode{csUnicode} is mapped to \tcode{text_encoding::id::UCS2}, +\item +\tcode{csIBBM904} is mapped to \tcode{text_encoding::id::IBM904}, and +\item +the ``\tcode{cs}'' prefix is removed from other names. +\end{itemize} +\end{note} + +\rSec3[text.encoding.hash]{Hash support} + +\indexlibrarymember{hash}{text_encoding}% +\begin{itemdecl} +template<> struct hash; +\end{itemdecl} + +\begin{itemdescr} +\pnum +The specialization is enabled\iref{unord.hash}. +\end{itemdescr} diff --git a/source/support.tex b/source/support.tex index 2d0eeb569c..5675f7dbff 100644 --- a/source/support.tex +++ b/source/support.tex @@ -758,6 +758,7 @@ #define @\defnlibxname{cpp_lib_string_udls}@ 201304L // also in \libheader{string} #define @\defnlibxname{cpp_lib_string_view}@ 201803L // also in \libheader{string}, \libheader{string_view} #define @\defnlibxname{cpp_lib_syncbuf}@ 201803L // also in \libheader{syncstream} +#define @\defnlibxname{cpp_lib_text_encoding}@ 202306L // also in \libheader{text_encoding} #define @\defnlibxname{cpp_lib_three_way_comparison}@ 201907L // freestanding, also in \libheader{compare} #define @\defnlibxname{cpp_lib_to_address}@ 201711L // freestanding, also in \libheader{memory} #define @\defnlibxname{cpp_lib_to_array}@ 201907L // also in \libheader{array} From 7798f19f1e65a84a95ae6e5fa424e1a0e0a990ed Mon Sep 17 00:00:00 2001 From: Dawn Perchik Date: Thu, 29 Jun 2023 16:23:03 -0700 Subject: [PATCH 2/3] [bibliography] Move the two IANA entries together. --- source/back.tex | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/source/back.tex b/source/back.tex index c11c841111..f0b43a6430 100644 --- a/source/back.tex +++ b/source/back.tex @@ -22,13 +22,13 @@ \chapter{Bibliography} IANA Character Sets Database. Available from:\newline \url{https://www.iana.org/assignments/character-sets/}, 2021-04-01 +\item + IANA Time Zone Database. + Available from: \url{https://www.iana.org/time-zones} \item Unicode Character Mapping Markup Language [online]. Edited by Mark Davis and Markus Scherer. Revision 5.0.1; 2017-05-31 Available from: \url{http://www.unicode.org/reports/tr22/tr22-8.html} -\item - IANA Time Zone Database. - Available from: \url{https://www.iana.org/time-zones} % Literature references. \item Bjarne Stroustrup, From 3ca7c138e4c1de2487d3be1ce0de6414911b4e50 Mon Sep 17 00:00:00 2001 From: Dawn Perchik Date: Tue, 4 Jul 2023 12:19:25 -0700 Subject: [PATCH 3/3] [text.encoding.members] Use "static_assert" instead of "assert" in example. --- source/locales.tex | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/source/locales.tex b/source/locales.tex index f9b9efa791..f66886e4b8 100644 --- a/source/locales.tex +++ b/source/locales.tex @@ -5004,10 +5004,10 @@ \begin{example} \begin{codeblock} -assert(@\exposid{comp-name}@("UTF-8", "utf8") == true); -assert(@\exposid{comp-name}@("u.t.f-008", "utf8") == true); -assert(@\exposid{comp-name}@("ut8", "utf8") == false); -assert(@\exposid{comp-name}@("utf-80", "utf8") == false); +static_assert(@\exposid{comp-name}@("UTF-8", "utf8") == true); +static_assert(@\exposid{comp-name}@("u.t.f-008", "utf8") == true); +static_assert(@\exposid{comp-name}@("ut8", "utf8") == false); +static_assert(@\exposid{comp-name}@("utf-80", "utf8") == false); \end{codeblock} \end{example} \end{itemdescr}