From e6561e00684eb33d66bfbe165e918f8686958ef1 Mon Sep 17 00:00:00 2001 From: Alexandre Ganea Date: Tue, 27 Aug 2019 00:13:52 +0000 Subject: [PATCH] [clang-scan-deps] Skip UTF-8 BOM in source minimizer Differential Revision: https://reviews.llvm.org/D66511 llvm-svn: 369993 --- clang/lib/Lex/DependencyDirectivesSourceMinimizer.cpp | 7 +++++++ .../minimize_source_to_dependency_directives_utf8bom.c | 10 ++++++++++ 2 files changed, 17 insertions(+) create mode 100644 clang/test/Lexer/minimize_source_to_dependency_directives_utf8bom.c diff --git a/clang/lib/Lex/DependencyDirectivesSourceMinimizer.cpp b/clang/lib/Lex/DependencyDirectivesSourceMinimizer.cpp index 265a6e44e33d..a350481df9aa 100644 --- a/clang/lib/Lex/DependencyDirectivesSourceMinimizer.cpp +++ b/clang/lib/Lex/DependencyDirectivesSourceMinimizer.cpp @@ -834,7 +834,14 @@ bool Minimizer::lexPPLine(const char *&First, const char *const End) { return lexDefault(Kind, Id.Name, First, End); } +static void skipUTF8ByteOrderMark(const char *&First, const char *const End) { + if ((End - First) >= 3 && First[0] == '\xef' && First[1] == '\xbb' && + First[2] == '\xbf') + First += 3; +} + bool Minimizer::minimizeImpl(const char *First, const char *const End) { + skipUTF8ByteOrderMark(First, End); while (First != End) if (lexPPLine(First, End)) return true; diff --git a/clang/test/Lexer/minimize_source_to_dependency_directives_utf8bom.c b/clang/test/Lexer/minimize_source_to_dependency_directives_utf8bom.c new file mode 100644 index 000000000000..305442fbd28c --- /dev/null +++ b/clang/test/Lexer/minimize_source_to_dependency_directives_utf8bom.c @@ -0,0 +1,10 @@ +// Test UTF8 BOM at start of file +// RUN: printf '\xef\xbb\xbf' > %t.c +// RUN: echo '#ifdef TEST\n' >> %t.c +// RUN: echo '#include ' >> %t.c +// RUN: echo '#endif' >> %t.c +// RUN: %clang_cc1 -DTEST -print-dependency-directives-minimized-source %t.c 2>&1 | FileCheck %s + +// CHECK: #ifdef TEST +// CHECK-NEXT: #include +// CHECK-NEXT: #endif