Skip to content

Commit 8ce2426

Browse files
Handle ANSI escape sequences when performing column wrapping (#2849)
This PR adds functionality to skip around ANSI escape sequences in catch_textflow so they do not contribute to line length and line wrapping code does not split escape sequences in the middle. I've implemented this by creating a AnsiSkippingString abstraction that has a bidirectional iterator that can skip around escape sequences while iterating. Additionally I refactored Column::const_iterator to be iterator-based rather than index-based so this abstraction is a simple drop-in for std::string. Currently only color sequences are handled, other escape sequences are left unaffected. Motivation: Text with ANSI color sequences gets messed up when being output by Catch2 #2833.
1 parent fa5a53d commit 8ce2426

File tree

3 files changed

+489
-69
lines changed

3 files changed

+489
-69
lines changed

src/catch2/internal/catch_textflow.cpp

Lines changed: 169 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -26,117 +26,228 @@ namespace {
2626
return std::memchr( chars, c, sizeof( chars ) - 1 ) != nullptr;
2727
}
2828

29-
bool isBoundary( std::string const& line, size_t at ) {
30-
assert( at > 0 );
31-
assert( at <= line.size() );
32-
33-
return at == line.size() ||
34-
( isWhitespace( line[at] ) && !isWhitespace( line[at - 1] ) ) ||
35-
isBreakableBefore( line[at] ) ||
36-
isBreakableAfter( line[at - 1] );
37-
}
38-
3929
} // namespace
4030

4131
namespace Catch {
4232
namespace TextFlow {
33+
void AnsiSkippingString::preprocessString() {
34+
for ( auto it = m_string.begin(); it != m_string.end(); ) {
35+
// try to read through an ansi sequence
36+
while ( it != m_string.end() && *it == '\033' &&
37+
it + 1 != m_string.end() && *( it + 1 ) == '[' ) {
38+
auto cursor = it + 2;
39+
while ( cursor != m_string.end() &&
40+
( isdigit( *cursor ) || *cursor == ';' ) ) {
41+
++cursor;
42+
}
43+
if ( cursor == m_string.end() || *cursor != 'm' ) {
44+
break;
45+
}
46+
// 'm' -> 0xff
47+
*cursor = AnsiSkippingString::sentinel;
48+
// if we've read an ansi sequence, set the iterator and
49+
// return to the top of the loop
50+
it = cursor + 1;
51+
}
52+
if ( it != m_string.end() ) {
53+
++m_size;
54+
++it;
55+
}
56+
}
57+
}
58+
59+
AnsiSkippingString::AnsiSkippingString( std::string const& text ):
60+
m_string( text ) {
61+
preprocessString();
62+
}
63+
64+
AnsiSkippingString::AnsiSkippingString( std::string&& text ):
65+
m_string( CATCH_MOVE( text ) ) {
66+
preprocessString();
67+
}
68+
69+
AnsiSkippingString::const_iterator AnsiSkippingString::begin() const {
70+
return const_iterator( m_string );
71+
}
72+
73+
AnsiSkippingString::const_iterator AnsiSkippingString::end() const {
74+
return const_iterator( m_string, const_iterator::EndTag{} );
75+
}
76+
77+
std::string AnsiSkippingString::substring( const_iterator begin,
78+
const_iterator end ) const {
79+
// There's one caveat here to an otherwise simple substring: when
80+
// making a begin iterator we might have skipped ansi sequences at
81+
// the start. If `begin` here is a begin iterator, skipped over
82+
// initial ansi sequences, we'll use the true beginning of the
83+
// string. Lastly: We need to transform any chars we replaced with
84+
// 0xff back to 'm'
85+
auto str = std::string( begin == this->begin() ? m_string.begin()
86+
: begin.m_it,
87+
end.m_it );
88+
std::transform( str.begin(), str.end(), str.begin(), []( char c ) {
89+
return c == AnsiSkippingString::sentinel ? 'm' : c;
90+
} );
91+
return str;
92+
}
93+
94+
void AnsiSkippingString::const_iterator::tryParseAnsiEscapes() {
95+
// check if we've landed on an ansi sequence, and if so read through
96+
// it
97+
while ( m_it != m_string->end() && *m_it == '\033' &&
98+
m_it + 1 != m_string->end() && *( m_it + 1 ) == '[' ) {
99+
auto cursor = m_it + 2;
100+
while ( cursor != m_string->end() &&
101+
( isdigit( *cursor ) || *cursor == ';' ) ) {
102+
++cursor;
103+
}
104+
if ( cursor == m_string->end() ||
105+
*cursor != AnsiSkippingString::sentinel ) {
106+
break;
107+
}
108+
// if we've read an ansi sequence, set the iterator and
109+
// return to the top of the loop
110+
m_it = cursor + 1;
111+
}
112+
}
113+
114+
void AnsiSkippingString::const_iterator::advance() {
115+
assert( m_it != m_string->end() );
116+
m_it++;
117+
tryParseAnsiEscapes();
118+
}
119+
120+
void AnsiSkippingString::const_iterator::unadvance() {
121+
assert( m_it != m_string->begin() );
122+
m_it--;
123+
// if *m_it is 0xff, scan back to the \033 and then m_it-- once more
124+
// (and repeat check)
125+
while ( *m_it == AnsiSkippingString::sentinel ) {
126+
while ( *m_it != '\033' ) {
127+
assert( m_it != m_string->begin() );
128+
m_it--;
129+
}
130+
// if this happens, we must have been a begin iterator that had
131+
// skipped over ansi sequences at the start of a string
132+
assert( m_it != m_string->begin() );
133+
assert( *m_it == '\033' );
134+
m_it--;
135+
}
136+
}
137+
138+
static bool isBoundary( AnsiSkippingString const& line,
139+
AnsiSkippingString::const_iterator it ) {
140+
return it == line.end() ||
141+
( isWhitespace( *it ) &&
142+
!isWhitespace( *it.oneBefore() ) ) ||
143+
isBreakableBefore( *it ) ||
144+
isBreakableAfter( *it.oneBefore() );
145+
}
43146

44147
void Column::const_iterator::calcLength() {
45148
m_addHyphen = false;
46149
m_parsedTo = m_lineStart;
150+
AnsiSkippingString const& current_line = m_column.m_string;
47151

48-
std::string const& current_line = m_column.m_string;
49-
if ( current_line[m_lineStart] == '\n' ) {
50-
++m_parsedTo;
152+
if ( m_parsedTo == current_line.end() ) {
153+
m_lineEnd = m_parsedTo;
154+
return;
51155
}
52156

157+
assert( m_lineStart != current_line.end() );
158+
if ( *m_lineStart == '\n' ) { ++m_parsedTo; }
159+
53160
const auto maxLineLength = m_column.m_width - indentSize();
54-
const auto maxParseTo = std::min(current_line.size(), m_lineStart + maxLineLength);
55-
while ( m_parsedTo < maxParseTo &&
56-
current_line[m_parsedTo] != '\n' ) {
161+
std::size_t lineLength = 0;
162+
while ( m_parsedTo != current_line.end() &&
163+
lineLength < maxLineLength && *m_parsedTo != '\n' ) {
57164
++m_parsedTo;
165+
++lineLength;
58166
}
59167

60168
// If we encountered a newline before the column is filled,
61169
// then we linebreak at the newline and consider this line
62170
// finished.
63-
if ( m_parsedTo < m_lineStart + maxLineLength ) {
64-
m_lineLength = m_parsedTo - m_lineStart;
171+
if ( lineLength < maxLineLength ) {
172+
m_lineEnd = m_parsedTo;
65173
} else {
66174
// Look for a natural linebreak boundary in the column
67175
// (We look from the end, so that the first found boundary is
68176
// the right one)
69-
size_t newLineLength = maxLineLength;
70-
while ( newLineLength > 0 && !isBoundary( current_line, m_lineStart + newLineLength ) ) {
71-
--newLineLength;
177+
m_lineEnd = m_parsedTo;
178+
while ( lineLength > 0 &&
179+
!isBoundary( current_line, m_lineEnd ) ) {
180+
--lineLength;
181+
--m_lineEnd;
72182
}
73-
while ( newLineLength > 0 &&
74-
isWhitespace( current_line[m_lineStart + newLineLength - 1] ) ) {
75-
--newLineLength;
183+
while ( lineLength > 0 &&
184+
isWhitespace( *m_lineEnd.oneBefore() ) ) {
185+
--lineLength;
186+
--m_lineEnd;
76187
}
77188

78-
// If we found one, then that is where we linebreak
79-
if ( newLineLength > 0 ) {
80-
m_lineLength = newLineLength;
81-
} else {
82-
// Otherwise we have to split text with a hyphen
189+
// If we found one, then that is where we linebreak, otherwise
190+
// we have to split text with a hyphen
191+
if ( lineLength == 0 ) {
83192
m_addHyphen = true;
84-
m_lineLength = maxLineLength - 1;
193+
m_lineEnd = m_parsedTo.oneBefore();
85194
}
86195
}
87196
}
88197

89198
size_t Column::const_iterator::indentSize() const {
90-
auto initial =
91-
m_lineStart == 0 ? m_column.m_initialIndent : std::string::npos;
199+
auto initial = m_lineStart == m_column.m_string.begin()
200+
? m_column.m_initialIndent
201+
: std::string::npos;
92202
return initial == std::string::npos ? m_column.m_indent : initial;
93203
}
94204

95-
std::string
96-
Column::const_iterator::addIndentAndSuffix( size_t position,
97-
size_t length ) const {
205+
std::string Column::const_iterator::addIndentAndSuffix(
206+
AnsiSkippingString::const_iterator start,
207+
AnsiSkippingString::const_iterator end ) const {
98208
std::string ret;
99209
const auto desired_indent = indentSize();
100-
ret.reserve( desired_indent + length + m_addHyphen );
210+
// ret.reserve( desired_indent + (end - start) + m_addHyphen );
101211
ret.append( desired_indent, ' ' );
102-
ret.append( m_column.m_string, position, length );
103-
if ( m_addHyphen ) {
104-
ret.push_back( '-' );
105-
}
212+
// ret.append( start, end );
213+
ret += m_column.m_string.substring( start, end );
214+
if ( m_addHyphen ) { ret.push_back( '-' ); }
106215

107216
return ret;
108217
}
109218

110-
Column::const_iterator::const_iterator( Column const& column ): m_column( column ) {
219+
Column::const_iterator::const_iterator( Column const& column ):
220+
m_column( column ),
221+
m_lineStart( column.m_string.begin() ),
222+
m_lineEnd( column.m_string.begin() ),
223+
m_parsedTo( column.m_string.begin() ) {
111224
assert( m_column.m_width > m_column.m_indent );
112225
assert( m_column.m_initialIndent == std::string::npos ||
113226
m_column.m_width > m_column.m_initialIndent );
114227
calcLength();
115-
if ( m_lineLength == 0 ) {
116-
m_lineStart = m_column.m_string.size();
228+
if ( m_lineStart == m_lineEnd ) {
229+
m_lineStart = m_column.m_string.end();
117230
}
118231
}
119232

120233
std::string Column::const_iterator::operator*() const {
121234
assert( m_lineStart <= m_parsedTo );
122-
return addIndentAndSuffix( m_lineStart, m_lineLength );
235+
return addIndentAndSuffix( m_lineStart, m_lineEnd );
123236
}
124237

125238
Column::const_iterator& Column::const_iterator::operator++() {
126-
m_lineStart += m_lineLength;
127-
std::string const& current_line = m_column.m_string;
128-
if ( m_lineStart < current_line.size() && current_line[m_lineStart] == '\n' ) {
129-
m_lineStart += 1;
239+
m_lineStart = m_lineEnd;
240+
AnsiSkippingString const& current_line = m_column.m_string;
241+
if ( m_lineStart != current_line.end() && *m_lineStart == '\n' ) {
242+
m_lineStart++;
130243
} else {
131-
while ( m_lineStart < current_line.size() &&
132-
isWhitespace( current_line[m_lineStart] ) ) {
244+
while ( m_lineStart != current_line.end() &&
245+
isWhitespace( *m_lineStart ) ) {
133246
++m_lineStart;
134247
}
135248
}
136249

137-
if ( m_lineStart != current_line.size() ) {
138-
calcLength();
139-
}
250+
if ( m_lineStart != current_line.end() ) { calcLength(); }
140251
return *this;
141252
}
142253

@@ -233,25 +344,25 @@ namespace Catch {
233344
return os;
234345
}
235346

236-
Columns operator+(Column const& lhs, Column const& rhs) {
347+
Columns operator+( Column const& lhs, Column const& rhs ) {
237348
Columns cols;
238349
cols += lhs;
239350
cols += rhs;
240351
return cols;
241352
}
242-
Columns operator+(Column&& lhs, Column&& rhs) {
353+
Columns operator+( Column&& lhs, Column&& rhs ) {
243354
Columns cols;
244355
cols += CATCH_MOVE( lhs );
245356
cols += CATCH_MOVE( rhs );
246357
return cols;
247358
}
248359

249-
Columns& operator+=(Columns& lhs, Column const& rhs) {
360+
Columns& operator+=( Columns& lhs, Column const& rhs ) {
250361
lhs.m_columns.push_back( rhs );
251362
return lhs;
252363
}
253-
Columns& operator+=(Columns& lhs, Column&& rhs) {
254-
lhs.m_columns.push_back( CATCH_MOVE(rhs) );
364+
Columns& operator+=( Columns& lhs, Column&& rhs ) {
365+
lhs.m_columns.push_back( CATCH_MOVE( rhs ) );
255366
return lhs;
256367
}
257368
Columns operator+( Columns const& lhs, Column const& rhs ) {

0 commit comments

Comments
 (0)