diff --git a/lib/Parse/Lexer.cpp b/lib/Parse/Lexer.cpp index 1c338af587d12..94f883290b042 100644 --- a/lib/Parse/Lexer.cpp +++ b/lib/Parse/Lexer.cpp @@ -177,7 +177,15 @@ Lexer::Lexer(const LangOptions &Options, StringRef contents = SM.extractText(SM.getRangeForBuffer(BufferID)); BufferStart = contents.data(); BufferEnd = contents.data() + contents.size(); - CurPtr = BufferStart; + + // Check for Unicode BOM at start of file (Only UTF-8 BOM supported now). + size_t BOMLength = llvm::StringSwitch(contents) + .StartsWith("\xEF\xBB\xBF", 3) + .Default(0); + + // Since the UTF-8 BOM doesn't carry information (UTF-8 has no dependency + // on byte order), throw it away. + CurPtr = BufferStart + BOMLength; // Initialize code completion. if (BufferID == SM.getCodeCompletionBufferID()) { diff --git a/test/Parse/BOM.swift b/test/Parse/utf16_bom.swift similarity index 100% rename from test/Parse/BOM.swift rename to test/Parse/utf16_bom.swift diff --git a/test/Parse/utf8_bom.swift b/test/Parse/utf8_bom.swift new file mode 100644 index 0000000000000..27f997a590f7a --- /dev/null +++ b/test/Parse/utf8_bom.swift @@ -0,0 +1,2 @@ +// RUN: %target-parse-verify-swift +struct UTF8Test {}