From e983501bfd1fd17ac73869a1047ce8b988e0b945 Mon Sep 17 00:00:00 2001 From: Trent Nadeau Date: Tue, 26 Jan 2016 04:45:03 +0000 Subject: [PATCH] Don't error when lexing UTF-8 BOM --- lib/Parse/Lexer.cpp | 10 +++++++++- test/Parse/{BOM.swift => utf16_bom.swift} | 0 test/Parse/utf8_bom.swift | 2 ++ 3 files changed, 11 insertions(+), 1 deletion(-) rename test/Parse/{BOM.swift => utf16_bom.swift} (100%) create mode 100644 test/Parse/utf8_bom.swift diff --git a/lib/Parse/Lexer.cpp b/lib/Parse/Lexer.cpp index 1c338af587d12..94f883290b042 100644 --- a/lib/Parse/Lexer.cpp +++ b/lib/Parse/Lexer.cpp @@ -177,7 +177,15 @@ Lexer::Lexer(const LangOptions &Options, StringRef contents = SM.extractText(SM.getRangeForBuffer(BufferID)); BufferStart = contents.data(); BufferEnd = contents.data() + contents.size(); - CurPtr = BufferStart; + + // Check for Unicode BOM at start of file (Only UTF-8 BOM supported now). + size_t BOMLength = llvm::StringSwitch(contents) + .StartsWith("\xEF\xBB\xBF", 3) + .Default(0); + + // Since the UTF-8 BOM doesn't carry information (UTF-8 has no dependency + // on byte order), throw it away. + CurPtr = BufferStart + BOMLength; // Initialize code completion. if (BufferID == SM.getCodeCompletionBufferID()) { diff --git a/test/Parse/BOM.swift b/test/Parse/utf16_bom.swift similarity index 100% rename from test/Parse/BOM.swift rename to test/Parse/utf16_bom.swift diff --git a/test/Parse/utf8_bom.swift b/test/Parse/utf8_bom.swift new file mode 100644 index 0000000000000..27f997a590f7a --- /dev/null +++ b/test/Parse/utf8_bom.swift @@ -0,0 +1,2 @@ +// RUN: %target-parse-verify-swift +struct UTF8Test {}