From 71fbc352f60a457ad6ed6758287cda9077d12f46 Mon Sep 17 00:00:00 2001
From: Jens Maurer <Jens.Maurer@gmx.net>
Date: Tue, 9 Nov 2021 17:49:27 +0100
Subject: [PATCH] [lex.charset] Define 'valid encoding'

and use the term for executation character sets introduced
in [character.seq].
---
 source/lex.tex       | 26 ++++++++++++++++++--------
 source/lib-intro.tex |  7 ++++---
 2 files changed, 22 insertions(+), 11 deletions(-)

diff --git a/source/lex.tex b/source/lex.tex
index 210a116c69..d5e997feaf 100644
--- a/source/lex.tex
+++ b/source/lex.tex
@@ -346,9 +346,11 @@
 to a wide character or string literal.
 
 \pnum
-A literal encoding or a locale-specific encoding of one of
-the execution character sets\iref{character.seq}
-encodes each element of the basic literal character set as
+An encoding is \defnx{valid}{encoding!valid} if all of the following
+conditions are satisfied:
+\begin{itemize}
+\item
+Each element of the basic literal character set is encoded as
 a single code unit with non-negative value,
 distinct from the code unit for any other such element.
 \begin{note}
@@ -357,15 +359,20 @@
 the value of such a code unit can be the same as
 that of a code unit for an element of the basic literal character set.
 \end{note}
+\item
 \indextext{character!null}%
 \indextext{wide-character!null}%
-The \unicode{0000}{null} character is encoded as the value \tcode{0}.
-No other element of the translation character set
+The \unicode{0000}{null} character is encoded as the value \tcode{0};
+no other element of the translation character set
 is encoded with a code unit of value \tcode{0}.
+\item
 The code unit value of each decimal digit character after the digit \tcode{0} (\ucode{0030})
-shall be one greater than the value of the previous.
-The ordinary and wide literal encodings are otherwise
-\impldef{ordinary and wide literal encodings}.
+is one greater than the value of the previous.
+\end{itemize}
+
+\pnum
+The ordinary and wide literal encodings are valid encodings,
+but are otherwise \impldef{ordinary and wide literal encodings}.
 \indextext{UTF-8}%
 \indextext{UTF-16}%
 \indextext{UTF-32}%
@@ -373,6 +380,9 @@
 the UCS scalar value
 corresponding to each character of the translation character set
 is encoded as specified in ISO/IEC 10646 for the respective UCS encoding form.
+\begin{note}
+Those encodings are also valid encodings.
+\end{note}
 \indextext{character set|)}
 
 \rSec1[lex.pptoken]{Preprocessing tokens}
diff --git a/source/lib-intro.tex b/source/lib-intro.tex
index 5a7d9dcae5..7d9c1089d7 100644
--- a/source/lib-intro.tex
+++ b/source/lib-intro.tex
@@ -654,9 +654,10 @@
 \item
 The \defnadj{execution}{character set} and
 the \defnadj{execution}{wide-character set}
-are supersets of the basic literal character set\iref{lex.charset}.
-The encodings of the execution character sets and
-the sets of additional elements (if any) are locale-specific.
+are supersets of the basic literal character set.
+The sets of additional elements (if any) are locale-specific.
+The encodings of the execution character sets are locale-specific,
+but valid\iref{lex.charset}.
 \begin{note}
 The encodings of the execution character sets can be unrelated
 to any literal encoding.