Skip to content

Commit c40c1a6

Browse files
authored
Fixed several warnings (#41)
Related to - wrong use of `size_type`, where `width_type` would be appropriate. - missing cast from `size_type` to `unsigned char` for sso size type - `[[fallthrough]]` before C++17 - clang not implementing `-Wno-maybe-unitialized`
1 parent 274f5eb commit c40c1a6

File tree

1 file changed

+68
-55
lines changed

1 file changed

+68
-55
lines changed

tinyutf8.h

Lines changed: 68 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -52,16 +52,28 @@
5252
#endif
5353
#endif
5454

55-
//! Remove Warning "-Wmaybe-uninitialized" (GCC/Clang) resp. "C4703" (MSVC), since it is wrong for all cases in this file
55+
//! Determine the way to inform about fallthrough behavior
56+
#if __cplusplus >= 201700L
57+
#define TINY_UTF8_FALLTHROUGH [[fallthrough]];
58+
#elif defined(__clang__)
59+
// Clang does not warn about implicit fallthrough
60+
#define TINY_UTF8_FALLTHROUGH
61+
#elif defined(__GNUC__) && __GNUG__ > 6
62+
#define TINY_UTF8_FALLTHROUGH [[gnu::fallthrough]];
63+
#else
64+
#define TINY_UTF8_FALLTHROUGH /* fall through */
65+
#endif
66+
67+
//! Remove Warnings, since it is wrong for all cases in this file
5668
#if defined (__clang__)
57-
#pragma clang diagnostic ignored "-Wmaybe-uninitialized"
58-
#pragma clang diagnostic push
69+
#pragma clang diagnostic push
70+
// #pragma clang diagnostic ignored "-Wmaybe-uninitialized" // Clang is missing it. See https://bugs.llvm.org/show_bug.cgi?id=24979
5971
#elif defined (__GNUC__)
60-
#pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
61-
#pragma GCC diagnostic push
72+
#pragma GCC diagnostic push
6273
#elif defined (_MSC_VER)
63-
#pragma warning(push)
64-
#pragma warning(disable:4703)
74+
#pragma warning(push)
75+
#pragma warning(disable:4703) // Maybe unitialized
76+
#pragma warning(disable:26819) // Implicit Fallthrough
6577
#endif
6678

6779
namespace tiny_utf8_detail
@@ -569,7 +581,7 @@ class utf8_string
569581
case sizeof(std::uint16_t): return *(const std::uint16_t*)iter;
570582
case sizeof(std::uint32_t): return *(const std::uint32_t*)iter;
571583
}
572-
return *(const std::uint64_t*)iter;
584+
return (size_type)*(const std::uint64_t*)iter;
573585
}
574586
static inline void set_lut( char* iter , width_type lut_width , size_type value ){
575587
switch( lut_width ){
@@ -669,11 +681,11 @@ class utf8_string
669681
*/
670682
inline static void encode_utf8( value_type cp , char* dest , width_type cp_bytes ){
671683
switch( cp_bytes ){
672-
case 7: dest[cp_bytes-6] = 0x80 | ((cp >> 30) & 0x3F); [[fallthrough]];
673-
case 6: dest[cp_bytes-5] = 0x80 | ((cp >> 24) & 0x3F); [[fallthrough]];
674-
case 5: dest[cp_bytes-4] = 0x80 | ((cp >> 18) & 0x3F); [[fallthrough]];
675-
case 4: dest[cp_bytes-3] = 0x80 | ((cp >> 12) & 0x3F); [[fallthrough]];
676-
case 3: dest[cp_bytes-2] = 0x80 | ((cp >> 6) & 0x3F); [[fallthrough]];
684+
case 7: dest[cp_bytes-6] = 0x80 | ((cp >> 30) & 0x3F); TINY_UTF8_FALLTHROUGH
685+
case 6: dest[cp_bytes-5] = 0x80 | ((cp >> 24) & 0x3F); TINY_UTF8_FALLTHROUGH
686+
case 5: dest[cp_bytes-4] = 0x80 | ((cp >> 18) & 0x3F); TINY_UTF8_FALLTHROUGH
687+
case 4: dest[cp_bytes-3] = 0x80 | ((cp >> 12) & 0x3F); TINY_UTF8_FALLTHROUGH
688+
case 3: dest[cp_bytes-2] = 0x80 | ((cp >> 6) & 0x3F); TINY_UTF8_FALLTHROUGH
677689
case 2: dest[cp_bytes-1] = 0x80 | ((cp >> 0) & 0x3F);
678690
dest[0] = (unsigned char)( ( std::uint_least16_t(0xFF00uL) >> cp_bytes ) | ( cp >> ( 6 * cp_bytes - 6 ) ) );
679691
break;
@@ -833,7 +845,7 @@ class utf8_string
833845
* @param cp The code point that the whole buffer will be set to
834846
*/
835847
explicit inline utf8_string( value_type cp ) :
836-
t_sso( (size_type)( cp = encode_utf8( cp , t_sso.data ) ) )
848+
t_sso( (unsigned char)( cp = encode_utf8( cp , t_sso.data ) ) )
837849
{
838850
t_sso.data[cp] = '\0';
839851
}
@@ -2072,7 +2084,7 @@ static inline bool operator<=( const utf8_string::const_reverse_iterator& lhs ,
20722084

20732085
//! std::hash specialization
20742086
namespace std{
2075-
template<> class hash<utf8_string>{
2087+
template<> struct hash<utf8_string>{
20762088
public:
20772089
size_t operator()( const utf8_string& string ) const {
20782090
std::hash<char> hasher;
@@ -2153,7 +2165,7 @@ utf8_string::utf8_string( utf8_string::size_type count , utf8_string::value_type
21532165
buffer = t_sso.data;
21542166

21552167
// Set Attributes
2156-
set_sso_data_len( data_len );
2168+
set_sso_data_len( (unsigned char)data_len );
21572169
}
21582170

21592171
// Fill the buffer
@@ -2191,7 +2203,7 @@ utf8_string::utf8_string( utf8_string::size_type count , char cp ) :
21912203
}
21922204
else{
21932205
buffer = t_sso.data;
2194-
set_sso_data_len( count );
2206+
set_sso_data_len( (unsigned char)count );
21952207
}
21962208

21972209
// Fill the buffer
@@ -2246,15 +2258,15 @@ utf8_string::utf8_string( const char* str , size_type len , tiny_utf8_detail::re
22462258
width_type bytes = get_codepoint_bytes( *str_iter , str_end - str_iter );
22472259
switch( bytes )
22482260
{
2249-
case 7: buffer_iter[6] = str_iter[6]; [[fallthrough]]; // Copy data byte
2250-
case 6: buffer_iter[5] = str_iter[5]; [[fallthrough]]; // Copy data byte
2251-
case 5: buffer_iter[4] = str_iter[4]; [[fallthrough]]; // Copy data byte
2252-
case 4: buffer_iter[3] = str_iter[3]; [[fallthrough]]; // Copy data byte
2253-
case 3: buffer_iter[2] = str_iter[2]; [[fallthrough]]; // Copy data byte
2261+
case 7: buffer_iter[6] = str_iter[6]; TINY_UTF8_FALLTHROUGH // Copy data byte
2262+
case 6: buffer_iter[5] = str_iter[5]; TINY_UTF8_FALLTHROUGH // Copy data byte
2263+
case 5: buffer_iter[4] = str_iter[4]; TINY_UTF8_FALLTHROUGH // Copy data byte
2264+
case 4: buffer_iter[3] = str_iter[3]; TINY_UTF8_FALLTHROUGH // Copy data byte
2265+
case 3: buffer_iter[2] = str_iter[2]; TINY_UTF8_FALLTHROUGH // Copy data byte
22542266
case 2: buffer_iter[1] = str_iter[1]; // Copy data byte
22552267
// Set next entry in the LUT!
22562268
utf8_string::set_lut( lut_iter -= lut_width , lut_width , str_iter - str );
2257-
[[fallthrough]];
2269+
TINY_UTF8_FALLTHROUGH
22582270
case 1: buffer_iter[0] = str_iter[0]; break; // Copy data byte
22592271
}
22602272
buffer_iter += bytes;
@@ -2286,7 +2298,7 @@ utf8_string::utf8_string( const char* str , size_type len , tiny_utf8_detail::re
22862298
buffer = t_sso.data;
22872299

22882300
// Set Attrbutes
2289-
set_sso_data_len( data_len );
2301+
set_sso_data_len( (unsigned char)data_len );
22902302

22912303
// Set up LUT: Not necessary, since the LUT is automatically inactive,
22922304
// since SSO is active and the LUT indicator shadows 't_sso.data_len', which has the LSB = 0 (=> LUT inactive).
@@ -2342,15 +2354,15 @@ utf8_string::utf8_string( const char* str , size_type data_len , tiny_utf8_detai
23422354
width_type bytes = get_codepoint_bytes( *str_iter , str_end - str_iter );
23432355
switch( bytes )
23442356
{
2345-
case 7: buffer_iter[6] = str_iter[6]; [[fallthrough]]; // Copy data byte
2346-
case 6: buffer_iter[5] = str_iter[5]; [[fallthrough]]; // Copy data byte
2347-
case 5: buffer_iter[4] = str_iter[4]; [[fallthrough]]; // Copy data byte
2348-
case 4: buffer_iter[3] = str_iter[3]; [[fallthrough]]; // Copy data byte
2349-
case 3: buffer_iter[2] = str_iter[2]; [[fallthrough]]; // Copy data byte
2357+
case 7: buffer_iter[6] = str_iter[6]; TINY_UTF8_FALLTHROUGH // Copy data byte
2358+
case 6: buffer_iter[5] = str_iter[5]; TINY_UTF8_FALLTHROUGH // Copy data byte
2359+
case 5: buffer_iter[4] = str_iter[4]; TINY_UTF8_FALLTHROUGH // Copy data byte
2360+
case 4: buffer_iter[3] = str_iter[3]; TINY_UTF8_FALLTHROUGH // Copy data byte
2361+
case 3: buffer_iter[2] = str_iter[2]; TINY_UTF8_FALLTHROUGH // Copy data byte
23502362
case 2: buffer_iter[1] = str_iter[1]; // Copy data byte
23512363
// Set next entry in the LUT!
23522364
utf8_string::set_lut( lut_iter -= lut_width , lut_width , str_iter - str );
2353-
[[fallthrough]];
2365+
TINY_UTF8_FALLTHROUGH
23542366
case 1: buffer_iter[0] = str_iter[0]; break; // Copy data byte
23552367
}
23562368
buffer_iter += bytes;
@@ -2382,7 +2394,7 @@ utf8_string::utf8_string( const char* str , size_type data_len , tiny_utf8_detai
23822394
buffer = t_sso.data;
23832395

23842396
// Set Attrbutes
2385-
set_sso_data_len( data_len );
2397+
set_sso_data_len( (unsigned char)data_len );
23862398

23872399
// Set up LUT: Not necessary, since the LUT is automatically inactive,
23882400
// since SSO is active and the LUT indicator shadows 't_sso.data_len', which has the LSB = 0 (=> LUT inactive).
@@ -2470,7 +2482,7 @@ utf8_string::utf8_string( const value_type* str , size_type len ) :
24702482
buffer = t_sso.data;
24712483

24722484
// Set Attrbutes
2473-
set_sso_data_len( data_len );
2485+
set_sso_data_len( (unsigned char)data_len );
24742486

24752487
// Set up LUT: Not necessary, since the LUT is automatically inactive,
24762488
// since SSO is active and the LUT indicator shadows 't_sso.data_len', which has the LSB = 0 (=> LUT inactive).
@@ -2495,27 +2507,27 @@ utf8_string::width_type utf8_string::get_num_bytes_of_utf8_char_before( const ch
24952507
default:
24962508
if( ((unsigned char)data_start[-7] & 0xFE ) == 0xFC ) // 11111110 seven bytes
24972509
return 7;
2498-
[[fallthrough]];
2510+
TINY_UTF8_FALLTHROUGH
24992511
case 6:
25002512
if( ((unsigned char)data_start[-6] & 0xFE ) == 0xFC ) // 1111110X six bytes
25012513
return 6;
2502-
[[fallthrough]];
2514+
TINY_UTF8_FALLTHROUGH
25032515
case 5:
25042516
if( ((unsigned char)data_start[-5] & 0xFC ) == 0xF8 ) // 111110XX five bytes
25052517
return 5;
2506-
[[fallthrough]];
2518+
TINY_UTF8_FALLTHROUGH
25072519
case 4:
25082520
if( ((unsigned char)data_start[-4] & 0xF8 ) == 0xF0 ) // 11110XXX four bytes
25092521
return 4;
2510-
[[fallthrough]];
2522+
TINY_UTF8_FALLTHROUGH
25112523
case 3:
25122524
if( ((unsigned char)data_start[-3] & 0xF0 ) == 0xE0 ) // 1110XXXX three bytes
25132525
return 3;
2514-
[[fallthrough]];
2526+
TINY_UTF8_FALLTHROUGH
25152527
case 2:
25162528
if( ((unsigned char)data_start[-2] & 0xE0 ) == 0xC0 ) // 110XXXXX two bytes
25172529
return 2;
2518-
[[fallthrough]];
2530+
TINY_UTF8_FALLTHROUGH
25192531
case 1:
25202532
case 0:
25212533
return 1;
@@ -2615,7 +2627,7 @@ utf8_string& utf8_string::operator=( const utf8_string& str )
26152627
lbl_replicate_whole_buffer: // Replicate the whole buffer
26162628
delete[] t_non_sso.data;
26172629
}
2618-
[[fallthrough]];
2630+
TINY_UTF8_FALLTHROUGH
26192631
case 2: // [sso-active] = [sso-inactive]
26202632
t_non_sso.data = new char[ utf8_string::determine_total_buffer_size( str.t_non_sso.buffer_size ) ];
26212633
std::memcpy( t_non_sso.data , str.t_non_sso.data , str.t_non_sso.buffer_size + sizeof(indicator_type) ); // Copy data
@@ -2625,7 +2637,7 @@ utf8_string& utf8_string::operator=( const utf8_string& str )
26252637
return *this;
26262638
case 1: // [sso-inactive] = [sso-active]
26272639
delete[] t_non_sso.data;
2628-
[[fallthrough]];
2640+
TINY_UTF8_FALLTHROUGH
26292641
case 0: // [sso-active] = [sso-active]
26302642
if( &str != this )
26312643
std::memcpy( (void*)this , &str , sizeof(utf8_string) ); // Copy data
@@ -2661,7 +2673,7 @@ void utf8_string::shrink_to_fit()
26612673

26622674
// Allocate new buffer
26632675
t_non_sso.data = new char[ determine_total_buffer_size( required_buffer_size ) ];
2664-
size_type old_lut_width = utf8_string::get_lut_width( buffer_size );
2676+
width_type old_lut_width = utf8_string::get_lut_width( buffer_size );
26652677
char* new_lut_base_ptr = utf8_string::get_lut_base_ptr( t_non_sso.data , required_buffer_size );
26662678

26672679
// Does the data type width change?
@@ -2953,13 +2965,14 @@ utf8_string utf8_string::raw_substr( size_type index , size_type byte_count ) co
29532965
if( byte_count <= utf8_string::get_sso_capacity() )
29542966
{
29552967
utf8_string result;
2956-
if( byte_count < utf8_string::get_sso_capacity() )
2957-
result.set_sso_data_len( byte_count ); // Set length
29582968

29592969
// Copy data
29602970
std::memcpy( result.t_sso.data , get_buffer() + index , byte_count );
29612971
result.t_sso.data[byte_count] = '\0';
29622972

2973+
// Set length
2974+
result.set_sso_data_len( (unsigned char)byte_count );
2975+
29632976
return result;
29642977
}
29652978

@@ -3084,7 +3097,7 @@ utf8_string& utf8_string::append( const utf8_string& app )
30843097
if( new_data_len <= utf8_string::get_sso_capacity() ){
30853098
std::memcpy( t_sso.data + old_data_len , app.t_sso.data , app_data_len ); // Copy APPENDIX (must have sso active as well)
30863099
t_sso.data[new_data_len] = 0; // Trailing '\0'
3087-
set_sso_data_len( new_data_len ); // Adjust size
3100+
set_sso_data_len( (unsigned char)new_data_len ); // Adjust size
30883101
return *this;
30893102
}
30903103

@@ -3203,7 +3216,7 @@ utf8_string& utf8_string::append( const utf8_string& app )
32033216
char* lut_dest_iter = old_lut_base_ptr - old_lut_len * new_lut_width;
32043217
if( app_lut_active )
32053218
{
3206-
size_type app_lut_width = utf8_string::get_lut_width( app_buffer_size );
3219+
width_type app_lut_width = utf8_string::get_lut_width( app_buffer_size );
32073220
const char* app_lut_iter = app_lut_base_ptr;
32083221
while( app_lut_len-- > 0 )
32093222
utf8_string::set_lut(
@@ -3252,7 +3265,7 @@ utf8_string& utf8_string::append( const utf8_string& app )
32523265
// Reuse indices from old lut?
32533266
if( old_lut_active )
32543267
{
3255-
size_type old_lut_width = utf8_string::get_lut_width( old_buffer_size );
3268+
width_type old_lut_width = utf8_string::get_lut_width( old_buffer_size );
32563269

32573270
// Copy all old INDICES
32583271
if( new_lut_width != old_lut_width )
@@ -3290,7 +3303,7 @@ utf8_string& utf8_string::append( const utf8_string& app )
32903303
char* lut_dest_iter = new_lut_base_ptr - old_lut_len * new_lut_width;
32913304
if( app_lut_active )
32923305
{
3293-
size_type app_lut_width = utf8_string::get_lut_width( app_buffer_size );
3306+
width_type app_lut_width = utf8_string::get_lut_width( app_buffer_size );
32943307
const char* app_lut_iter = app_lut_base_ptr;
32953308
while( app_lut_len-- > 0 )
32963309
utf8_string::set_lut(
@@ -3358,7 +3371,7 @@ utf8_string& utf8_string::raw_insert( size_type index , const utf8_string& str )
33583371

33593372
// Finish the new string object
33603373
t_sso.data[new_data_len] = 0; // Trailing '\0'
3361-
set_sso_data_len( new_data_len );
3374+
set_sso_data_len( (unsigned char)new_data_len );
33623375

33633376
return *this;
33643377
}
@@ -3534,7 +3547,7 @@ utf8_string& utf8_string::raw_insert( size_type index , const utf8_string& str )
35343547
char* lut_dest_iter = old_lut_base_ptr - mb_index * new_lut_width;
35353548
if( str_lut_active )
35363549
{
3537-
size_type str_lut_width = utf8_string::get_lut_width( str_buffer_size );
3550+
width_type str_lut_width = utf8_string::get_lut_width( str_buffer_size );
35383551
const char* str_lut_iter = str_lut_base_ptr;
35393552
while( str_lut_len-- > 0 )
35403553
utf8_string::set_lut(
@@ -3588,7 +3601,7 @@ utf8_string& utf8_string::raw_insert( size_type index , const utf8_string& str )
35883601
// Reuse indices from old lut?
35893602
if( old_lut_active )
35903603
{
3591-
size_type old_lut_width = utf8_string::get_lut_width( old_buffer_size );
3604+
width_type old_lut_width = utf8_string::get_lut_width( old_buffer_size );
35923605

35933606
// Copy all INDICES BEFORE the insertion
35943607
if( new_lut_width != old_lut_width )
@@ -3648,7 +3661,7 @@ utf8_string& utf8_string::raw_insert( size_type index , const utf8_string& str )
36483661
char* lut_dest_iter = new_lut_base_ptr - mb_index * new_lut_width;
36493662
if( str_lut_active )
36503663
{
3651-
size_type str_lut_width = utf8_string::get_lut_width( str_buffer_size );
3664+
width_type str_lut_width = utf8_string::get_lut_width( str_buffer_size );
36523665
const char* str_lut_iter = str_lut_base_ptr;
36533666
while( str_lut_len-- > 0 )
36543667
utf8_string::set_lut(
@@ -3933,7 +3946,7 @@ utf8_string& utf8_string::raw_replace( size_type index , size_type replaced_len
39333946
char* lut_dest_iter = old_lut_base_ptr - mb_index * new_lut_width;
39343947
if( repl_lut_active )
39353948
{
3936-
size_type repl_lut_width = utf8_string::get_lut_width( repl_buffer_size );
3949+
width_type repl_lut_width = utf8_string::get_lut_width( repl_buffer_size );
39373950
const char* repl_lut_iter = repl_lut_base_ptr;
39383951
while( repl_lut_len-- > 0 )
39393952
utf8_string::set_lut(
@@ -3991,7 +4004,7 @@ utf8_string& utf8_string::raw_replace( size_type index , size_type replaced_len
39914004
if( old_lut_active )
39924005
{
39934006
size_type mb_end_index = mb_index + replaced_mbs;
3994-
size_type old_lut_width = utf8_string::get_lut_width( old_buffer_size );
4007+
width_type old_lut_width = utf8_string::get_lut_width( old_buffer_size );
39954008

39964009
// Copy all INDICES BEFORE the replacement
39974010
if( new_lut_width != old_lut_width )
@@ -4148,7 +4161,7 @@ utf8_string& utf8_string::raw_erase( size_type index , size_type len )
41484161

41494162
// Finish the new string object
41504163
t_sso.data[new_data_len] = 0; // Trailing '\0'
4151-
set_sso_data_len( new_data_len );
4164+
set_sso_data_len( (unsigned char)new_data_len );
41524165

41534166
return *this;
41544167
}
@@ -4170,7 +4183,7 @@ utf8_string& utf8_string::raw_erase( size_type index , size_type len )
41704183
if( old_lut_active )
41714184
{
41724185
size_type old_lut_len = utf8_string::get_lut_len( old_lut_base_ptr );
4173-
size_type old_lut_width = utf8_string::get_lut_width( old_buffer_size );
4186+
width_type old_lut_width = utf8_string::get_lut_width( old_buffer_size );
41744187
size_type mb_end_index = 0;
41754188
size_type replaced_mbs = 0;
41764189
size_type iter = 0;

0 commit comments

Comments
 (0)