forked from OSchip/llvm-project
Don't warn about Unicode characters in -E mode.
People use the C preprocessor for things other than C files. Some of them have Unicode characters. We shouldn't warn about Unicode characters appearing outside of identifiers in this case. There's not currently a way for the preprocessor to tell if it's in -E mode, so I added a new flag, derived from the PreprocessorOutputOptions. This is only used by the Unicode warnings for now, but could conceivably be used by other warnings or even behavioral differences later. <rdar://problem/13107323> llvm-svn: 173881
This commit is contained in:
parent
1105821f55
commit
17441589c3
|
@ -25,7 +25,7 @@ public:
|
||||||
|
|
||||||
public:
|
public:
|
||||||
PreprocessorOutputOptions() {
|
PreprocessorOutputOptions() {
|
||||||
ShowCPP = 1;
|
ShowCPP = 0;
|
||||||
ShowComments = 0;
|
ShowComments = 0;
|
||||||
ShowLineMarkers = 1;
|
ShowLineMarkers = 1;
|
||||||
ShowMacroComments = 0;
|
ShowMacroComments = 0;
|
||||||
|
|
|
@ -160,6 +160,9 @@ class Preprocessor : public RefCountedBase<Preprocessor> {
|
||||||
/// \brief True if pragmas are enabled.
|
/// \brief True if pragmas are enabled.
|
||||||
bool PragmasEnabled : 1;
|
bool PragmasEnabled : 1;
|
||||||
|
|
||||||
|
/// \brief True if the current build action is a preprocessing action.
|
||||||
|
bool PreprocessedOutput : 1;
|
||||||
|
|
||||||
/// \brief True if we are currently preprocessing a #if or #elif directive
|
/// \brief True if we are currently preprocessing a #if or #elif directive
|
||||||
bool ParsingIfOrElifDirective;
|
bool ParsingIfOrElifDirective;
|
||||||
|
|
||||||
|
@ -474,6 +477,16 @@ public:
|
||||||
return SuppressIncludeNotFoundError;
|
return SuppressIncludeNotFoundError;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Sets whether the preprocessor is responsible for producing output or if
|
||||||
|
/// it is producing tokens to be consumed by Parse and Sema.
|
||||||
|
void setPreprocessedOutput(bool IsPreprocessedOutput) {
|
||||||
|
PreprocessedOutput = IsPreprocessedOutput;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns true if the preprocessor is responsible for generating output,
|
||||||
|
/// false if it is producing tokens to be consumed by Parse and Sema.
|
||||||
|
bool isPreprocessedOutput() const { return PreprocessedOutput; }
|
||||||
|
|
||||||
/// isCurrentLexer - Return true if we are lexing directly from the specified
|
/// isCurrentLexer - Return true if we are lexing directly from the specified
|
||||||
/// lexer.
|
/// lexer.
|
||||||
bool isCurrentLexer(const PreprocessorLexer *L) const {
|
bool isCurrentLexer(const PreprocessorLexer *L) const {
|
||||||
|
|
|
@ -243,6 +243,8 @@ void CompilerInstance::createPreprocessor() {
|
||||||
|
|
||||||
InitializePreprocessor(*PP, PPOpts, getHeaderSearchOpts(), getFrontendOpts());
|
InitializePreprocessor(*PP, PPOpts, getHeaderSearchOpts(), getFrontendOpts());
|
||||||
|
|
||||||
|
PP->setPreprocessedOutput(getPreprocessorOutputOpts().ShowCPP);
|
||||||
|
|
||||||
// Set up the module path, including the hash for the
|
// Set up the module path, including the hash for the
|
||||||
// module-creation options.
|
// module-creation options.
|
||||||
SmallString<256> SpecificModuleCache(
|
SmallString<256> SpecificModuleCache(
|
||||||
|
|
|
@ -1395,9 +1395,48 @@ static void ParsePreprocessorArgs(PreprocessorOptions &Opts, ArgList &Args,
|
||||||
}
|
}
|
||||||
|
|
||||||
static void ParsePreprocessorOutputArgs(PreprocessorOutputOptions &Opts,
|
static void ParsePreprocessorOutputArgs(PreprocessorOutputOptions &Opts,
|
||||||
ArgList &Args) {
|
ArgList &Args,
|
||||||
|
frontend::ActionKind Action) {
|
||||||
using namespace options;
|
using namespace options;
|
||||||
Opts.ShowCPP = !Args.hasArg(OPT_dM);
|
|
||||||
|
switch (Action) {
|
||||||
|
case frontend::ASTDeclList:
|
||||||
|
case frontend::ASTDump:
|
||||||
|
case frontend::ASTDumpXML:
|
||||||
|
case frontend::ASTPrint:
|
||||||
|
case frontend::ASTView:
|
||||||
|
case frontend::EmitAssembly:
|
||||||
|
case frontend::EmitBC:
|
||||||
|
case frontend::EmitHTML:
|
||||||
|
case frontend::EmitLLVM:
|
||||||
|
case frontend::EmitLLVMOnly:
|
||||||
|
case frontend::EmitCodeGenOnly:
|
||||||
|
case frontend::EmitObj:
|
||||||
|
case frontend::FixIt:
|
||||||
|
case frontend::GenerateModule:
|
||||||
|
case frontend::GeneratePCH:
|
||||||
|
case frontend::GeneratePTH:
|
||||||
|
case frontend::ParseSyntaxOnly:
|
||||||
|
case frontend::PluginAction:
|
||||||
|
case frontend::PrintDeclContext:
|
||||||
|
case frontend::RewriteObjC:
|
||||||
|
case frontend::RewriteTest:
|
||||||
|
case frontend::RunAnalysis:
|
||||||
|
case frontend::MigrateSource:
|
||||||
|
Opts.ShowCPP = 0;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case frontend::DumpRawTokens:
|
||||||
|
case frontend::DumpTokens:
|
||||||
|
case frontend::InitOnly:
|
||||||
|
case frontend::PrintPreamble:
|
||||||
|
case frontend::PrintPreprocessedInput:
|
||||||
|
case frontend::RewriteMacros:
|
||||||
|
case frontend::RunPreprocessorOnly:
|
||||||
|
Opts.ShowCPP = !Args.hasArg(OPT_dM);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
Opts.ShowComments = Args.hasArg(OPT_C);
|
Opts.ShowComments = Args.hasArg(OPT_C);
|
||||||
Opts.ShowLineMarkers = !Args.hasArg(OPT_P);
|
Opts.ShowLineMarkers = !Args.hasArg(OPT_P);
|
||||||
Opts.ShowMacroComments = Args.hasArg(OPT_CC);
|
Opts.ShowMacroComments = Args.hasArg(OPT_CC);
|
||||||
|
@ -1478,7 +1517,8 @@ bool CompilerInvocation::CreateFromArgs(CompilerInvocation &Res,
|
||||||
// parameters from the function and the "FileManager.h" #include.
|
// parameters from the function and the "FileManager.h" #include.
|
||||||
FileManager FileMgr(Res.getFileSystemOpts());
|
FileManager FileMgr(Res.getFileSystemOpts());
|
||||||
ParsePreprocessorArgs(Res.getPreprocessorOpts(), *Args, FileMgr, Diags);
|
ParsePreprocessorArgs(Res.getPreprocessorOpts(), *Args, FileMgr, Diags);
|
||||||
ParsePreprocessorOutputArgs(Res.getPreprocessorOutputOpts(), *Args);
|
ParsePreprocessorOutputArgs(Res.getPreprocessorOutputOpts(), *Args,
|
||||||
|
Res.getFrontendOpts().ProgramAction);
|
||||||
ParseTargetArgs(Res.getTargetOpts(), *Args);
|
ParseTargetArgs(Res.getTargetOpts(), *Args);
|
||||||
|
|
||||||
return Success;
|
return Success;
|
||||||
|
|
|
@ -2811,14 +2811,13 @@ static bool isUnicodeWhitespace(uint32_t C) {
|
||||||
}
|
}
|
||||||
|
|
||||||
void Lexer::LexUnicode(Token &Result, uint32_t C, const char *CurPtr) {
|
void Lexer::LexUnicode(Token &Result, uint32_t C, const char *CurPtr) {
|
||||||
if (isUnicodeWhitespace(C)) {
|
if (!isLexingRawMode() && !PP->isPreprocessedOutput() &&
|
||||||
if (!isLexingRawMode()) {
|
isUnicodeWhitespace(C)) {
|
||||||
CharSourceRange CharRange =
|
CharSourceRange CharRange =
|
||||||
CharSourceRange::getCharRange(getSourceLocation(),
|
CharSourceRange::getCharRange(getSourceLocation(),
|
||||||
getSourceLocation(CurPtr));
|
getSourceLocation(CurPtr));
|
||||||
Diag(BufferPtr, diag::ext_unicode_whitespace)
|
Diag(BufferPtr, diag::ext_unicode_whitespace)
|
||||||
<< CharRange;
|
<< CharRange;
|
||||||
}
|
|
||||||
|
|
||||||
Result.setFlag(Token::LeadingSpace);
|
Result.setFlag(Token::LeadingSpace);
|
||||||
if (SkipWhitespace(Result, CurPtr))
|
if (SkipWhitespace(Result, CurPtr))
|
||||||
|
@ -2832,7 +2831,8 @@ void Lexer::LexUnicode(Token &Result, uint32_t C, const char *CurPtr) {
|
||||||
return LexIdentifier(Result, CurPtr);
|
return LexIdentifier(Result, CurPtr);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!isASCII(*BufferPtr) && !isAllowedIDChar(C)) {
|
if (!isLexingRawMode() && !PP->isPreprocessedOutput() &&
|
||||||
|
!isASCII(*BufferPtr) && !isAllowedIDChar(C)) {
|
||||||
// Non-ASCII characters tend to creep into source code unintentionally.
|
// Non-ASCII characters tend to creep into source code unintentionally.
|
||||||
// Instead of letting the parser complain about the unknown token,
|
// Instead of letting the parser complain about the unknown token,
|
||||||
// just drop the character.
|
// just drop the character.
|
||||||
|
@ -2842,13 +2842,11 @@ void Lexer::LexUnicode(Token &Result, uint32_t C, const char *CurPtr) {
|
||||||
// loophole in the mapping of Unicode characters to basic character set
|
// loophole in the mapping of Unicode characters to basic character set
|
||||||
// characters that allows us to map these particular characters to, say,
|
// characters that allows us to map these particular characters to, say,
|
||||||
// whitespace.
|
// whitespace.
|
||||||
if (!isLexingRawMode()) {
|
CharSourceRange CharRange =
|
||||||
CharSourceRange CharRange =
|
CharSourceRange::getCharRange(getSourceLocation(),
|
||||||
CharSourceRange::getCharRange(getSourceLocation(),
|
getSourceLocation(CurPtr));
|
||||||
getSourceLocation(CurPtr));
|
Diag(BufferPtr, diag::err_non_ascii)
|
||||||
Diag(BufferPtr, diag::err_non_ascii)
|
<< FixItHint::CreateRemoval(CharRange);
|
||||||
<< FixItHint::CreateRemoval(CharRange);
|
|
||||||
}
|
|
||||||
|
|
||||||
BufferPtr = CurPtr;
|
BufferPtr = CurPtr;
|
||||||
return LexTokenInternal(Result);
|
return LexTokenInternal(Result);
|
||||||
|
@ -3537,11 +3535,15 @@ LexNextToken:
|
||||||
if (Status == conversionOK)
|
if (Status == conversionOK)
|
||||||
return LexUnicode(Result, CodePoint, CurPtr);
|
return LexUnicode(Result, CodePoint, CurPtr);
|
||||||
|
|
||||||
|
if (isLexingRawMode() || PP->isPreprocessedOutput()) {
|
||||||
|
Kind = tok::unknown;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
// Non-ASCII characters tend to creep into source code unintentionally.
|
// Non-ASCII characters tend to creep into source code unintentionally.
|
||||||
// Instead of letting the parser complain about the unknown token,
|
// Instead of letting the parser complain about the unknown token,
|
||||||
// just diagnose the invalid UTF-8, then drop the character.
|
// just diagnose the invalid UTF-8, then drop the character.
|
||||||
if (!isLexingRawMode())
|
Diag(CurPtr, diag::err_invalid_utf8);
|
||||||
Diag(CurPtr, diag::err_invalid_utf8);
|
|
||||||
|
|
||||||
BufferPtr = CurPtr+1;
|
BufferPtr = CurPtr+1;
|
||||||
goto LexNextToken;
|
goto LexNextToken;
|
||||||
|
|
|
@ -1,6 +1,15 @@
|
||||||
// RUN: %clang_cc1 -fsyntax-only -verify %s
|
// RUN: %clang_cc1 -fsyntax-only -verify %s
|
||||||
|
// RUN: %clang_cc1 -E -DPP_ONLY=1 %s -o %t
|
||||||
|
// RUN: FileCheck --strict-whitespace --input-file=%t %s
|
||||||
|
|
||||||
// This file contains Unicode characters; please do not "fix" them!
|
// This file contains Unicode characters; please do not "fix" them!
|
||||||
|
|
||||||
extern int x; // expected-warning {{treating Unicode character as whitespace}}
|
extern int x; // expected-warning {{treating Unicode character as whitespace}}
|
||||||
extern int x; // expected-warning {{treating Unicode character as whitespace}}
|
extern int x; // expected-warning {{treating Unicode character as whitespace}}
|
||||||
|
|
||||||
|
// CHECK: extern int {{x}}
|
||||||
|
// CHECK: extern int {{x}}
|
||||||
|
|
||||||
|
#if PP_ONLY
|
||||||
|
CHECK: The preprocessor should not complain about Unicode characters like ©.
|
||||||
|
#endif
|
||||||
|
|
Loading…
Reference in New Issue