52
52
#endif
53
53
#endif
54
54
55
- // ! Remove Warning "-Wmaybe-uninitialized" (GCC/Clang) resp. "C4703" (MSVC), since it is wrong for all cases in this file
55
+ // ! Determine the way to inform about fallthrough behavior
56
+ #if __cplusplus >= 201700L
57
+ #define TINY_UTF8_FALLTHROUGH [[fallthrough]];
58
+ #elif defined(__clang__)
59
+ // Clang does not warn about implicit fallthrough
60
+ #define TINY_UTF8_FALLTHROUGH
61
+ #elif defined(__GNUC__) && __GNUG__ > 6
62
+ #define TINY_UTF8_FALLTHROUGH [[gnu::fallthrough]];
63
+ #else
64
+ #define TINY_UTF8_FALLTHROUGH /* fall through */
65
+ #endif
66
+
67
+ // ! Remove Warnings, since it is wrong for all cases in this file
56
68
#if defined (__clang__)
57
- #pragma clang diagnostic ignored "-Wmaybe-uninitialized"
58
- #pragma clang diagnostic push
69
+ #pragma clang diagnostic push
70
+ // #pragma clang diagnostic ignored "-Wmaybe-uninitialized" // Clang is missing it. See https://bugs.llvm.org/show_bug.cgi?id=24979
59
71
#elif defined (__GNUC__)
60
- #pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
61
- #pragma GCC diagnostic push
72
+ #pragma GCC diagnostic push
62
73
#elif defined (_MSC_VER)
63
- #pragma warning(push)
64
- #pragma warning(disable:4703)
74
+ #pragma warning(push)
75
+ #pragma warning(disable:4703) // Maybe unitialized
76
+ #pragma warning(disable:26819) // Implicit Fallthrough
65
77
#endif
66
78
67
79
namespace tiny_utf8_detail
@@ -569,7 +581,7 @@ class utf8_string
569
581
case sizeof (std::uint16_t ): return *(const std::uint16_t *)iter;
570
582
case sizeof (std::uint32_t ): return *(const std::uint32_t *)iter;
571
583
}
572
- return *(const std::uint64_t *)iter;
584
+ return (size_type) *(const std::uint64_t *)iter;
573
585
}
574
586
static inline void set_lut ( char * iter , width_type lut_width , size_type value ){
575
587
switch ( lut_width ){
@@ -669,11 +681,11 @@ class utf8_string
669
681
*/
670
682
inline static void encode_utf8 ( value_type cp , char * dest , width_type cp_bytes ){
671
683
switch ( cp_bytes ){
672
- case 7 : dest[cp_bytes-6 ] = 0x80 | ((cp >> 30 ) & 0x3F ); [[fallthrough]];
673
- case 6 : dest[cp_bytes-5 ] = 0x80 | ((cp >> 24 ) & 0x3F ); [[fallthrough]];
674
- case 5 : dest[cp_bytes-4 ] = 0x80 | ((cp >> 18 ) & 0x3F ); [[fallthrough]];
675
- case 4 : dest[cp_bytes-3 ] = 0x80 | ((cp >> 12 ) & 0x3F ); [[fallthrough]];
676
- case 3 : dest[cp_bytes-2 ] = 0x80 | ((cp >> 6 ) & 0x3F ); [[fallthrough]];
684
+ case 7 : dest[cp_bytes-6 ] = 0x80 | ((cp >> 30 ) & 0x3F ); TINY_UTF8_FALLTHROUGH
685
+ case 6 : dest[cp_bytes-5 ] = 0x80 | ((cp >> 24 ) & 0x3F ); TINY_UTF8_FALLTHROUGH
686
+ case 5 : dest[cp_bytes-4 ] = 0x80 | ((cp >> 18 ) & 0x3F ); TINY_UTF8_FALLTHROUGH
687
+ case 4 : dest[cp_bytes-3 ] = 0x80 | ((cp >> 12 ) & 0x3F ); TINY_UTF8_FALLTHROUGH
688
+ case 3 : dest[cp_bytes-2 ] = 0x80 | ((cp >> 6 ) & 0x3F ); TINY_UTF8_FALLTHROUGH
677
689
case 2 : dest[cp_bytes-1 ] = 0x80 | ((cp >> 0 ) & 0x3F );
678
690
dest[0 ] = (unsigned char )( ( std::uint_least16_t (0xFF00uL) >> cp_bytes ) | ( cp >> ( 6 * cp_bytes - 6 ) ) );
679
691
break ;
@@ -833,7 +845,7 @@ class utf8_string
833
845
* @param cp The code point that the whole buffer will be set to
834
846
*/
835
847
explicit inline utf8_string ( value_type cp ) :
836
- t_sso( (size_type )( cp = encode_utf8( cp , t_sso.data ) ) )
848
+ t_sso( (unsigned char )( cp = encode_utf8( cp , t_sso.data ) ) )
837
849
{
838
850
t_sso.data [cp] = ' \0 ' ;
839
851
}
@@ -2072,7 +2084,7 @@ static inline bool operator<=( const utf8_string::const_reverse_iterator& lhs ,
2072
2084
2073
2085
// ! std::hash specialization
2074
2086
namespace std {
2075
- template <> class hash <utf8_string>{
2087
+ template <> struct hash <utf8_string>{
2076
2088
public:
2077
2089
size_t operator ()( const utf8_string& string ) const {
2078
2090
std::hash<char > hasher;
@@ -2153,7 +2165,7 @@ utf8_string::utf8_string( utf8_string::size_type count , utf8_string::value_type
2153
2165
buffer = t_sso.data ;
2154
2166
2155
2167
// Set Attributes
2156
- set_sso_data_len ( data_len );
2168
+ set_sso_data_len ( ( unsigned char ) data_len );
2157
2169
}
2158
2170
2159
2171
// Fill the buffer
@@ -2191,7 +2203,7 @@ utf8_string::utf8_string( utf8_string::size_type count , char cp ) :
2191
2203
}
2192
2204
else {
2193
2205
buffer = t_sso.data ;
2194
- set_sso_data_len ( count );
2206
+ set_sso_data_len ( ( unsigned char ) count );
2195
2207
}
2196
2208
2197
2209
// Fill the buffer
@@ -2246,15 +2258,15 @@ utf8_string::utf8_string( const char* str , size_type len , tiny_utf8_detail::re
2246
2258
width_type bytes = get_codepoint_bytes ( *str_iter , str_end - str_iter );
2247
2259
switch ( bytes )
2248
2260
{
2249
- case 7 : buffer_iter[6 ] = str_iter[6 ]; [[fallthrough]]; // Copy data byte
2250
- case 6 : buffer_iter[5 ] = str_iter[5 ]; [[fallthrough]]; // Copy data byte
2251
- case 5 : buffer_iter[4 ] = str_iter[4 ]; [[fallthrough]]; // Copy data byte
2252
- case 4 : buffer_iter[3 ] = str_iter[3 ]; [[fallthrough]]; // Copy data byte
2253
- case 3 : buffer_iter[2 ] = str_iter[2 ]; [[fallthrough]]; // Copy data byte
2261
+ case 7 : buffer_iter[6 ] = str_iter[6 ]; TINY_UTF8_FALLTHROUGH // Copy data byte
2262
+ case 6 : buffer_iter[5 ] = str_iter[5 ]; TINY_UTF8_FALLTHROUGH // Copy data byte
2263
+ case 5 : buffer_iter[4 ] = str_iter[4 ]; TINY_UTF8_FALLTHROUGH // Copy data byte
2264
+ case 4 : buffer_iter[3 ] = str_iter[3 ]; TINY_UTF8_FALLTHROUGH // Copy data byte
2265
+ case 3 : buffer_iter[2 ] = str_iter[2 ]; TINY_UTF8_FALLTHROUGH // Copy data byte
2254
2266
case 2 : buffer_iter[1 ] = str_iter[1 ]; // Copy data byte
2255
2267
// Set next entry in the LUT!
2256
2268
utf8_string::set_lut ( lut_iter -= lut_width , lut_width , str_iter - str );
2257
- [[fallthrough]];
2269
+ TINY_UTF8_FALLTHROUGH
2258
2270
case 1 : buffer_iter[0 ] = str_iter[0 ]; break ; // Copy data byte
2259
2271
}
2260
2272
buffer_iter += bytes;
@@ -2286,7 +2298,7 @@ utf8_string::utf8_string( const char* str , size_type len , tiny_utf8_detail::re
2286
2298
buffer = t_sso.data ;
2287
2299
2288
2300
// Set Attrbutes
2289
- set_sso_data_len ( data_len );
2301
+ set_sso_data_len ( ( unsigned char ) data_len );
2290
2302
2291
2303
// Set up LUT: Not necessary, since the LUT is automatically inactive,
2292
2304
// since SSO is active and the LUT indicator shadows 't_sso.data_len', which has the LSB = 0 (=> LUT inactive).
@@ -2342,15 +2354,15 @@ utf8_string::utf8_string( const char* str , size_type data_len , tiny_utf8_detai
2342
2354
width_type bytes = get_codepoint_bytes ( *str_iter , str_end - str_iter );
2343
2355
switch ( bytes )
2344
2356
{
2345
- case 7 : buffer_iter[6 ] = str_iter[6 ]; [[fallthrough]]; // Copy data byte
2346
- case 6 : buffer_iter[5 ] = str_iter[5 ]; [[fallthrough]]; // Copy data byte
2347
- case 5 : buffer_iter[4 ] = str_iter[4 ]; [[fallthrough]]; // Copy data byte
2348
- case 4 : buffer_iter[3 ] = str_iter[3 ]; [[fallthrough]]; // Copy data byte
2349
- case 3 : buffer_iter[2 ] = str_iter[2 ]; [[fallthrough]]; // Copy data byte
2357
+ case 7 : buffer_iter[6 ] = str_iter[6 ]; TINY_UTF8_FALLTHROUGH // Copy data byte
2358
+ case 6 : buffer_iter[5 ] = str_iter[5 ]; TINY_UTF8_FALLTHROUGH // Copy data byte
2359
+ case 5 : buffer_iter[4 ] = str_iter[4 ]; TINY_UTF8_FALLTHROUGH // Copy data byte
2360
+ case 4 : buffer_iter[3 ] = str_iter[3 ]; TINY_UTF8_FALLTHROUGH // Copy data byte
2361
+ case 3 : buffer_iter[2 ] = str_iter[2 ]; TINY_UTF8_FALLTHROUGH // Copy data byte
2350
2362
case 2 : buffer_iter[1 ] = str_iter[1 ]; // Copy data byte
2351
2363
// Set next entry in the LUT!
2352
2364
utf8_string::set_lut ( lut_iter -= lut_width , lut_width , str_iter - str );
2353
- [[fallthrough]];
2365
+ TINY_UTF8_FALLTHROUGH
2354
2366
case 1 : buffer_iter[0 ] = str_iter[0 ]; break ; // Copy data byte
2355
2367
}
2356
2368
buffer_iter += bytes;
@@ -2382,7 +2394,7 @@ utf8_string::utf8_string( const char* str , size_type data_len , tiny_utf8_detai
2382
2394
buffer = t_sso.data ;
2383
2395
2384
2396
// Set Attrbutes
2385
- set_sso_data_len ( data_len );
2397
+ set_sso_data_len ( ( unsigned char ) data_len );
2386
2398
2387
2399
// Set up LUT: Not necessary, since the LUT is automatically inactive,
2388
2400
// since SSO is active and the LUT indicator shadows 't_sso.data_len', which has the LSB = 0 (=> LUT inactive).
@@ -2470,7 +2482,7 @@ utf8_string::utf8_string( const value_type* str , size_type len ) :
2470
2482
buffer = t_sso.data ;
2471
2483
2472
2484
// Set Attrbutes
2473
- set_sso_data_len ( data_len );
2485
+ set_sso_data_len ( ( unsigned char ) data_len );
2474
2486
2475
2487
// Set up LUT: Not necessary, since the LUT is automatically inactive,
2476
2488
// since SSO is active and the LUT indicator shadows 't_sso.data_len', which has the LSB = 0 (=> LUT inactive).
@@ -2495,27 +2507,27 @@ utf8_string::width_type utf8_string::get_num_bytes_of_utf8_char_before( const ch
2495
2507
default :
2496
2508
if ( ((unsigned char )data_start[-7 ] & 0xFE ) == 0xFC ) // 11111110 seven bytes
2497
2509
return 7 ;
2498
- [[fallthrough]];
2510
+ TINY_UTF8_FALLTHROUGH
2499
2511
case 6 :
2500
2512
if ( ((unsigned char )data_start[-6 ] & 0xFE ) == 0xFC ) // 1111110X six bytes
2501
2513
return 6 ;
2502
- [[fallthrough]];
2514
+ TINY_UTF8_FALLTHROUGH
2503
2515
case 5 :
2504
2516
if ( ((unsigned char )data_start[-5 ] & 0xFC ) == 0xF8 ) // 111110XX five bytes
2505
2517
return 5 ;
2506
- [[fallthrough]];
2518
+ TINY_UTF8_FALLTHROUGH
2507
2519
case 4 :
2508
2520
if ( ((unsigned char )data_start[-4 ] & 0xF8 ) == 0xF0 ) // 11110XXX four bytes
2509
2521
return 4 ;
2510
- [[fallthrough]];
2522
+ TINY_UTF8_FALLTHROUGH
2511
2523
case 3 :
2512
2524
if ( ((unsigned char )data_start[-3 ] & 0xF0 ) == 0xE0 ) // 1110XXXX three bytes
2513
2525
return 3 ;
2514
- [[fallthrough]];
2526
+ TINY_UTF8_FALLTHROUGH
2515
2527
case 2 :
2516
2528
if ( ((unsigned char )data_start[-2 ] & 0xE0 ) == 0xC0 ) // 110XXXXX two bytes
2517
2529
return 2 ;
2518
- [[fallthrough]];
2530
+ TINY_UTF8_FALLTHROUGH
2519
2531
case 1 :
2520
2532
case 0 :
2521
2533
return 1 ;
@@ -2615,7 +2627,7 @@ utf8_string& utf8_string::operator=( const utf8_string& str )
2615
2627
lbl_replicate_whole_buffer: // Replicate the whole buffer
2616
2628
delete[] t_non_sso.data ;
2617
2629
}
2618
- [[fallthrough]];
2630
+ TINY_UTF8_FALLTHROUGH
2619
2631
case 2 : // [sso-active] = [sso-inactive]
2620
2632
t_non_sso.data = new char [ utf8_string::determine_total_buffer_size ( str.t_non_sso .buffer_size ) ];
2621
2633
std::memcpy ( t_non_sso.data , str.t_non_sso .data , str.t_non_sso .buffer_size + sizeof (indicator_type) ); // Copy data
@@ -2625,7 +2637,7 @@ utf8_string& utf8_string::operator=( const utf8_string& str )
2625
2637
return *this ;
2626
2638
case 1 : // [sso-inactive] = [sso-active]
2627
2639
delete[] t_non_sso.data ;
2628
- [[fallthrough]];
2640
+ TINY_UTF8_FALLTHROUGH
2629
2641
case 0 : // [sso-active] = [sso-active]
2630
2642
if ( &str != this )
2631
2643
std::memcpy ( (void *)this , &str , sizeof (utf8_string) ); // Copy data
@@ -2661,7 +2673,7 @@ void utf8_string::shrink_to_fit()
2661
2673
2662
2674
// Allocate new buffer
2663
2675
t_non_sso.data = new char [ determine_total_buffer_size ( required_buffer_size ) ];
2664
- size_type old_lut_width = utf8_string::get_lut_width ( buffer_size );
2676
+ width_type old_lut_width = utf8_string::get_lut_width ( buffer_size );
2665
2677
char * new_lut_base_ptr = utf8_string::get_lut_base_ptr ( t_non_sso.data , required_buffer_size );
2666
2678
2667
2679
// Does the data type width change?
@@ -2953,13 +2965,14 @@ utf8_string utf8_string::raw_substr( size_type index , size_type byte_count ) co
2953
2965
if ( byte_count <= utf8_string::get_sso_capacity () )
2954
2966
{
2955
2967
utf8_string result;
2956
- if ( byte_count < utf8_string::get_sso_capacity () )
2957
- result.set_sso_data_len ( byte_count ); // Set length
2958
2968
2959
2969
// Copy data
2960
2970
std::memcpy ( result.t_sso .data , get_buffer () + index , byte_count );
2961
2971
result.t_sso .data [byte_count] = ' \0 ' ;
2962
2972
2973
+ // Set length
2974
+ result.set_sso_data_len ( (unsigned char )byte_count );
2975
+
2963
2976
return result;
2964
2977
}
2965
2978
@@ -3084,7 +3097,7 @@ utf8_string& utf8_string::append( const utf8_string& app )
3084
3097
if ( new_data_len <= utf8_string::get_sso_capacity () ){
3085
3098
std::memcpy ( t_sso.data + old_data_len , app.t_sso .data , app_data_len ); // Copy APPENDIX (must have sso active as well)
3086
3099
t_sso.data [new_data_len] = 0 ; // Trailing '\0'
3087
- set_sso_data_len ( new_data_len ); // Adjust size
3100
+ set_sso_data_len ( ( unsigned char ) new_data_len ); // Adjust size
3088
3101
return *this ;
3089
3102
}
3090
3103
@@ -3203,7 +3216,7 @@ utf8_string& utf8_string::append( const utf8_string& app )
3203
3216
char * lut_dest_iter = old_lut_base_ptr - old_lut_len * new_lut_width;
3204
3217
if ( app_lut_active )
3205
3218
{
3206
- size_type app_lut_width = utf8_string::get_lut_width ( app_buffer_size );
3219
+ width_type app_lut_width = utf8_string::get_lut_width ( app_buffer_size );
3207
3220
const char * app_lut_iter = app_lut_base_ptr;
3208
3221
while ( app_lut_len-- > 0 )
3209
3222
utf8_string::set_lut (
@@ -3252,7 +3265,7 @@ utf8_string& utf8_string::append( const utf8_string& app )
3252
3265
// Reuse indices from old lut?
3253
3266
if ( old_lut_active )
3254
3267
{
3255
- size_type old_lut_width = utf8_string::get_lut_width ( old_buffer_size );
3268
+ width_type old_lut_width = utf8_string::get_lut_width ( old_buffer_size );
3256
3269
3257
3270
// Copy all old INDICES
3258
3271
if ( new_lut_width != old_lut_width )
@@ -3290,7 +3303,7 @@ utf8_string& utf8_string::append( const utf8_string& app )
3290
3303
char * lut_dest_iter = new_lut_base_ptr - old_lut_len * new_lut_width;
3291
3304
if ( app_lut_active )
3292
3305
{
3293
- size_type app_lut_width = utf8_string::get_lut_width ( app_buffer_size );
3306
+ width_type app_lut_width = utf8_string::get_lut_width ( app_buffer_size );
3294
3307
const char * app_lut_iter = app_lut_base_ptr;
3295
3308
while ( app_lut_len-- > 0 )
3296
3309
utf8_string::set_lut (
@@ -3358,7 +3371,7 @@ utf8_string& utf8_string::raw_insert( size_type index , const utf8_string& str )
3358
3371
3359
3372
// Finish the new string object
3360
3373
t_sso.data [new_data_len] = 0 ; // Trailing '\0'
3361
- set_sso_data_len ( new_data_len );
3374
+ set_sso_data_len ( ( unsigned char ) new_data_len );
3362
3375
3363
3376
return *this ;
3364
3377
}
@@ -3534,7 +3547,7 @@ utf8_string& utf8_string::raw_insert( size_type index , const utf8_string& str )
3534
3547
char * lut_dest_iter = old_lut_base_ptr - mb_index * new_lut_width;
3535
3548
if ( str_lut_active )
3536
3549
{
3537
- size_type str_lut_width = utf8_string::get_lut_width ( str_buffer_size );
3550
+ width_type str_lut_width = utf8_string::get_lut_width ( str_buffer_size );
3538
3551
const char * str_lut_iter = str_lut_base_ptr;
3539
3552
while ( str_lut_len-- > 0 )
3540
3553
utf8_string::set_lut (
@@ -3588,7 +3601,7 @@ utf8_string& utf8_string::raw_insert( size_type index , const utf8_string& str )
3588
3601
// Reuse indices from old lut?
3589
3602
if ( old_lut_active )
3590
3603
{
3591
- size_type old_lut_width = utf8_string::get_lut_width ( old_buffer_size );
3604
+ width_type old_lut_width = utf8_string::get_lut_width ( old_buffer_size );
3592
3605
3593
3606
// Copy all INDICES BEFORE the insertion
3594
3607
if ( new_lut_width != old_lut_width )
@@ -3648,7 +3661,7 @@ utf8_string& utf8_string::raw_insert( size_type index , const utf8_string& str )
3648
3661
char * lut_dest_iter = new_lut_base_ptr - mb_index * new_lut_width;
3649
3662
if ( str_lut_active )
3650
3663
{
3651
- size_type str_lut_width = utf8_string::get_lut_width ( str_buffer_size );
3664
+ width_type str_lut_width = utf8_string::get_lut_width ( str_buffer_size );
3652
3665
const char * str_lut_iter = str_lut_base_ptr;
3653
3666
while ( str_lut_len-- > 0 )
3654
3667
utf8_string::set_lut (
@@ -3933,7 +3946,7 @@ utf8_string& utf8_string::raw_replace( size_type index , size_type replaced_len
3933
3946
char * lut_dest_iter = old_lut_base_ptr - mb_index * new_lut_width;
3934
3947
if ( repl_lut_active )
3935
3948
{
3936
- size_type repl_lut_width = utf8_string::get_lut_width ( repl_buffer_size );
3949
+ width_type repl_lut_width = utf8_string::get_lut_width ( repl_buffer_size );
3937
3950
const char * repl_lut_iter = repl_lut_base_ptr;
3938
3951
while ( repl_lut_len-- > 0 )
3939
3952
utf8_string::set_lut (
@@ -3991,7 +4004,7 @@ utf8_string& utf8_string::raw_replace( size_type index , size_type replaced_len
3991
4004
if ( old_lut_active )
3992
4005
{
3993
4006
size_type mb_end_index = mb_index + replaced_mbs;
3994
- size_type old_lut_width = utf8_string::get_lut_width ( old_buffer_size );
4007
+ width_type old_lut_width = utf8_string::get_lut_width ( old_buffer_size );
3995
4008
3996
4009
// Copy all INDICES BEFORE the replacement
3997
4010
if ( new_lut_width != old_lut_width )
@@ -4148,7 +4161,7 @@ utf8_string& utf8_string::raw_erase( size_type index , size_type len )
4148
4161
4149
4162
// Finish the new string object
4150
4163
t_sso.data [new_data_len] = 0 ; // Trailing '\0'
4151
- set_sso_data_len ( new_data_len );
4164
+ set_sso_data_len ( ( unsigned char ) new_data_len );
4152
4165
4153
4166
return *this ;
4154
4167
}
@@ -4170,7 +4183,7 @@ utf8_string& utf8_string::raw_erase( size_type index , size_type len )
4170
4183
if ( old_lut_active )
4171
4184
{
4172
4185
size_type old_lut_len = utf8_string::get_lut_len ( old_lut_base_ptr );
4173
- size_type old_lut_width = utf8_string::get_lut_width ( old_buffer_size );
4186
+ width_type old_lut_width = utf8_string::get_lut_width ( old_buffer_size );
4174
4187
size_type mb_end_index = 0 ;
4175
4188
size_type replaced_mbs = 0 ;
4176
4189
size_type iter = 0 ;
0 commit comments