Skip to content
This repository was archived by the owner on Feb 2, 2021. It is now read-only.

Commit 9cdb3bc

Browse files
committed
Merge pull request #664 from facebook/nekto/utf8_handling
Better handling of invalid UTF8 strings.
2 parents 27caa7d + c9d7dc2 commit 9cdb3bc

File tree

5 files changed

+39
-8
lines changed

5 files changed

+39
-8
lines changed

Common/TaskUtil.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ typedef void (^FdOutputLineFeedBlock)(int fd, NSString *);
2222
typedef void (^BlockToRunWhileReading)(void);
2323

2424
NSString *StripAnsi(NSString *inputString);
25+
NSString *StringFromDispatchDataWithBrokenUTF8Encoding(const char *dataPtr, size_t dataSz);
2526

2627
/**
2728
* Returns array of NSString's with contents read from fildes.

Common/TaskUtil.m

Lines changed: 26 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -68,23 +68,41 @@
6868
return outputString;
6969
}
7070

71-
static NSString *StringFromDispatchDataWithBrokenUTF8Encoding(const char *dataPtr, size_t dataSz)
71+
NSString *StringFromDispatchDataWithBrokenUTF8Encoding(const char *dataPtr, size_t dataSz)
7272
{
7373
int one = 1;
7474
iconv_t cd = iconv_open("UTF-8", "UTF-8");
7575
iconvctl(cd, ICONV_SET_DISCARD_ILSEQ, &one);
76-
size_t inbytesleft = dataSz;
77-
size_t outbytesleft = dataSz;
7876
char *inbuf = (char *)dataPtr;
7977
char *outbuf = malloc(sizeof(char) * dataSz);
80-
char *outptr = outbuf;
81-
NSString *string = nil;
82-
if (iconv(cd, &inbuf, &inbytesleft, &outptr, &outbytesleft) != (size_t)-1) {
83-
string = [[NSString alloc] initWithBytes:outbuf length:dataSz - outbytesleft encoding:NSUTF8StringEncoding];
78+
NSMutableString *outputString = [NSMutableString string];
79+
long bytesToProcess = dataSz;
80+
while (bytesToProcess > 0) {
81+
NSString *string = nil;
82+
size_t inbytesleft = bytesToProcess;
83+
size_t outbytesleft = bytesToProcess;
84+
char *outptr = outbuf;
85+
size_t iconvResult = iconv(cd, &inbuf, &inbytesleft, &outptr, &outbytesleft);
86+
size_t outbytesLength = bytesToProcess - outbytesleft;
87+
if (outbytesLength > 0) {
88+
string = [[NSString alloc] initWithBytesNoCopy:outbuf length:outbytesLength encoding:NSUTF8StringEncoding freeWhenDone:NO];
89+
[outputString appendString:string];
90+
}
91+
if (iconvResult != (size_t)-1) {
92+
inbuf += (bytesToProcess - inbytesleft);
93+
} else if (errno == EINVAL) {
94+
// skip first byte and then all next 10xxxxxx bytes (see UTF-8 description for more details)
95+
do {
96+
inbuf++;
97+
inbytesleft--;
98+
} while (((*inbuf) & 0xC0) == 0x80 && inbytesleft > 0);
99+
[outputString appendString:@"\uFFFD"];
100+
}
101+
bytesToProcess = inbytesleft;
84102
}
85103
free(outbuf);
86104
iconv_close(cd);
87-
return string;
105+
return outputString;
88106
}
89107

90108
static NSArray *LinesFromDispatchData(dispatch_data_t data, BOOL omitNewlineCharacters, BOOL forceUntilTheEnd, size_t *convertedSize)

xctool/xctool-tests/TaskUtilTests.m

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,4 +80,16 @@ - (void)testLaunchTaskAndFeedOutputLinesToBlockMultibyteUtf8
8080
}
8181
}
8282

83+
- (void)testConversionToUT8OfBrokenUTF8SequenceOfBytes
84+
{
85+
NSData *data = [NSData dataWithContentsOfFile:TEST_DATA @"BrokenUTF8EncodingInFile.txt"];
86+
NSString *string = StringFromDispatchDataWithBrokenUTF8Encoding(data.bytes, data.length);
87+
NSString *fixedString = [NSString stringWithContentsOfFile:TEST_DATA @"BrokenUTF8EncodingInFile-FIXED.txt" encoding:NSUTF8StringEncoding error:nil];
88+
XCTAssertEqualObjects(string, fixedString);
89+
90+
NSString *regularString = @"qwertyuiopasdfghjk';123^&*()_<>?";
91+
NSData *regularStringData = [regularString dataUsingEncoding:NSUTF8StringEncoding];
92+
XCTAssertEqualObjects(StringFromDispatchDataWithBrokenUTF8Encoding(regularStringData.bytes, regularStringData.length), regularString);
93+
}
94+
8395
@end
Binary file not shown.
Binary file not shown.

0 commit comments

Comments
 (0)