1+ use regex:: Regex ;
12use std:: collections:: BTreeSet as Set ;
23use std:: fs;
34use std:: io:: { self , Write } ;
45use std:: path:: Path ;
56use std:: process;
67
78pub struct Properties {
9+ unicode_version : ( u64 , u64 , u64 ) ,
810 xid_start : Set < u32 > ,
911 xid_continue : Set < u32 > ,
1012}
1113
1214impl Properties {
15+ pub const fn unicode_version ( & self ) -> ( u64 , u64 , u64 ) {
16+ self . unicode_version
17+ }
18+
1319 pub fn is_xid_start ( & self , ch : char ) -> bool {
1420 self . xid_start . contains ( & ( ch as u32 ) )
1521 }
@@ -20,11 +26,6 @@ impl Properties {
2026}
2127
2228pub fn parse_xid_properties ( ucd_dir : & Path ) -> Properties {
23- let mut properties = Properties {
24- xid_start : Set :: new ( ) ,
25- xid_continue : Set :: new ( ) ,
26- } ;
27-
2829 let filename = "DerivedCoreProperties.txt" ;
2930 let path = ucd_dir. join ( filename) ;
3031 let contents = fs:: read_to_string ( path) . unwrap_or_else ( |err| {
@@ -34,6 +35,12 @@ pub fn parse_xid_properties(ucd_dir: &Path) -> Properties {
3435 process:: exit ( 1 ) ;
3536 } ) ;
3637
38+ let mut properties = Properties {
39+ unicode_version : parse_unicode_version ( filename, & contents) ,
40+ xid_start : Set :: new ( ) ,
41+ xid_continue : Set :: new ( ) ,
42+ } ;
43+
3744 for ( i, line) in contents. lines ( ) . enumerate ( ) {
3845 if line. starts_with ( '#' ) || line. trim ( ) . is_empty ( ) {
3946 continue ;
@@ -73,3 +80,24 @@ fn parse_line(line: &str) -> Option<(u32, u32, &str)> {
7380fn parse_codepoint ( s : & str ) -> Option < u32 > {
7481 u32:: from_str_radix ( s, 16 ) . ok ( )
7582}
83+
84+ fn parse_unicode_version ( filename : & str , contents : & str ) -> ( u64 , u64 , u64 ) {
85+ let ( name, extension) = filename
86+ . rsplit_once ( '.' )
87+ . expect ( "Failed to split file name into name and extension" ) ;
88+ let re = Regex :: new ( & format ! ( r"# {name}-(\d+).(\d+).(\d+).{extension}" ) ) . unwrap ( ) ;
89+ let caps = re
90+ . captures ( contents)
91+ . expect ( "Failed to find unicode version in unicode data" ) ;
92+ let v = caps
93+ . iter ( )
94+ . skip ( 1 )
95+ . map ( |s| {
96+ s. unwrap ( )
97+ . as_str ( )
98+ . parse ( )
99+ . expect ( "Failed to parse unicode version" )
100+ } )
101+ . collect :: < Vec < u64 > > ( ) ;
102+ ( v[ 0 ] , v[ 1 ] , v[ 2 ] )
103+ }
0 commit comments