From c6bcb037e67cf0e0fd25f45b333e6cab82b1aa9d Mon Sep 17 00:00:00 2001 From: Alejandro Alonso Date: Tue, 14 Jan 2025 14:15:07 -0800 Subject: [PATCH 1/9] Update Unicode generator data files to 16 --- .../Data/15/ScriptExtensions.txt | 628 -- .../Apple/DerivedCoreProperties.txt | 1009 ++- .../Data/{15 => 16}/Apple/UnicodeData.txt | 5214 +++++++++++++- .../Data/{15 => 16}/CaseFolding.txt | 38 +- .../Data/{15 => 16}/DerivedAge.txt | 76 +- .../{15 => 16}/DerivedBinaryProperties.txt | 12 +- .../Data/{15 => 16}/DerivedCoreProperties.txt | 1009 ++- .../{15 => 16}/DerivedGeneralCategory.txt | 214 +- .../Data/{15 => 16}/DerivedName.txt | 1207 +++- .../{15 => 16}/DerivedNormalizationProps.txt | 6319 ++++++++++++++++- .../Data/{15 => 16}/DerivedNumericType.txt | 29 +- .../Data/{15 => 16}/DerivedNumericValues.txt | 138 +- .../Data/{15 => 16}/GraphemeBreakProperty.txt | 96 +- .../Data/{15 => 16}/IndicSyllabicCategory.txt | 99 +- .../Data/{15 => 16}/NameAliases.txt | 13 +- .../Data/{15 => 16}/PropList.txt | 215 +- .../Data/16/ScriptExtensions.txt | 233 + .../Data/{15 => 16}/Scripts.txt | 159 +- .../Data/{15 => 16}/SpecialCasing.txt | 23 +- .../Data/{15 => 16}/UnicodeData.txt | 5210 +++++++++++++- .../Data/{15 => 16}/WordBreakProperty.txt | 96 +- .../Data/{15 => 16}/emoji-data.txt | 42 +- .../Output/Common/GraphemeData.h | 118 + 23 files changed, 21040 insertions(+), 1157 deletions(-) delete mode 100644 utils/gen-unicode-data/Data/15/ScriptExtensions.txt rename utils/gen-unicode-data/Data/{15 => 16}/Apple/DerivedCoreProperties.txt (93%) rename utils/gen-unicode-data/Data/{15 => 16}/Apple/UnicodeData.txt (87%) rename utils/gen-unicode-data/Data/{15 => 16}/CaseFolding.txt (98%) rename utils/gen-unicode-data/Data/{15 => 16}/DerivedAge.txt (96%) rename utils/gen-unicode-data/Data/{15 => 16}/DerivedBinaryProperties.txt (98%) rename utils/gen-unicode-data/Data/{15 => 16}/DerivedCoreProperties.txt (93%) rename utils/gen-unicode-data/Data/{15 => 16}/DerivedGeneralCategory.txt (96%) rename utils/gen-unicode-data/Data/{15 => 16}/DerivedName.txt (97%) rename utils/gen-unicode-data/Data/{15 => 16}/DerivedNormalizationProps.txt (61%) rename utils/gen-unicode-data/Data/{15 => 16}/DerivedNumericType.txt (93%) rename utils/gen-unicode-data/Data/{15 => 16}/DerivedNumericValues.txt (95%) rename utils/gen-unicode-data/Data/{15 => 16}/GraphemeBreakProperty.txt (95%) rename utils/gen-unicode-data/Data/{15 => 16}/IndicSyllabicCategory.txt (94%) rename utils/gen-unicode-data/Data/{15 => 16}/NameAliases.txt (97%) rename utils/gen-unicode-data/Data/{15 => 16}/PropList.txt (90%) create mode 100644 utils/gen-unicode-data/Data/16/ScriptExtensions.txt rename utils/gen-unicode-data/Data/{15 => 16}/Scripts.txt (96%) rename utils/gen-unicode-data/Data/{15 => 16}/SpecialCasing.txt (95%) rename utils/gen-unicode-data/Data/{15 => 16}/UnicodeData.txt (87%) rename utils/gen-unicode-data/Data/{15 => 16}/WordBreakProperty.txt (95%) rename utils/gen-unicode-data/Data/{15 => 16}/emoji-data.txt (98%) create mode 100644 utils/gen-unicode-data/Output/Common/GraphemeData.h diff --git a/utils/gen-unicode-data/Data/15/ScriptExtensions.txt b/utils/gen-unicode-data/Data/15/ScriptExtensions.txt deleted file mode 100644 index 2f5a1727e339e..0000000000000 --- a/utils/gen-unicode-data/Data/15/ScriptExtensions.txt +++ /dev/null @@ -1,628 +0,0 @@ -# ScriptExtensions-15.0.0.txt -# Date: 2022-02-02, 00:57:11 GMT -# © 2022 Unicode®, Inc. -# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. -# For terms of use, see https://www.unicode.org/terms_of_use.html -# -# Unicode Character Database -# For documentation, see https://www.unicode.org/reports/tr44/ -# -# The Script_Extensions property indicates which characters are commonly used -# with more than one script, but with a limited number of scripts. -# For each code point, there is one or more property values. Each such value is a Script property value. -# For more information, see: -# UAX #24, Unicode Script Property: https://www.unicode.org/reports/tr24/ -# Especially the sections: -# https://www.unicode.org/reports/tr24/#Assignment_Script_Values -# https://www.unicode.org/reports/tr24/#Assignment_ScriptX_Values -# -# Each Script_Extensions value in this file consists of a set -# of one or more abbreviated Script property values. The ordering of the -# values in that set is not material, but for stability in presentation -# it is given here as alphabetical. -# -# The Script_Extensions values are presented in sorted order in the file. -# They are sorted first by the number of Script property values in their sets, -# and then alphabetically by first differing Script property value. -# -# Following each distinct Script_Extensions value is the list of code -# points associated with that value, listed in code point order. -# -# All code points not explicitly listed for Script_Extensions -# have as their value the corresponding Script property value -# -# @missing: 0000..10FFFF;