[Preprocessor] Implement -fminimize-whitespace.

This patch adds the -fminimize-whitespace with the following effects:

 * If combined with -E, remove as much non-line-breaking whitespace as
   possible.

 * If combined with -E -P, removes as much whitespace as possible,
   including line-breaks.

The motivation is to reduce the amount of insignificant changes in the
preprocessed output with source files where only whitespace has been
changed (add/remove comments, clang-format, etc.) which is in particular
useful with ccache.

A patch for ccache for using this flag has been proposed to ccache as well:
https://github.com/ccache/ccache/pull/815, which will use
-fnormalize-whitespace when clang-13 has been detected, and additionally
uses -P in "unify_mode". ccache already had a unify_mode in an older
version which was removed because of problems that using the
preprocessor itself does not have (such that the custom tokenizer did
not recognize C++11 raw strings).

This patch slightly reorganizes which part is responsible for adding
newlines that are required for semantics. It is now either
startNewLineIfNeeded() or MoveToLine() but never both; this avoids the
ShouldUpdateCurrentLine workaround and avoids redundant lines being
inserted in some cases. It also fixes a mandatory newline not inserted
after a _Pragma("...") that is expanded into a #pragma.

Reviewed By: aaron.ballman

Differential Revision: https://reviews.llvm.org/D104601
This commit is contained in:
Michael Kruse 2021-07-25 21:39:08 -05:00
parent 1558bb80c0
commit ae6b400002
19 changed files with 391 additions and 140 deletions

View File

@ -2475,6 +2475,16 @@ Turn on loop unroller
Use #line in preprocessed output
.. option:: -fminimize-whitespace, -fno-minimize-whitespace
Ignore the whitespace from the input file when emitting preprocessor
output. It will only contain whitespace when necessary, e.g. to keep two
minus signs from merging into to an increment operator. Useful with the
-P option to normalize whitespace such that two files with only formatting
changes are equal.
Only valid with -E on C-like inputs and incompatible with -traditional-cpp.
.. option:: -fvalidate-ast-input-files-content
Compute and store the hash of input files used to build an AST. Files with mismatching mtime's are considered valid if both contents is identical

View File

@ -129,6 +129,8 @@ def err_drv_invalid_Xopenmp_target_with_args : Error<
"invalid -Xopenmp-target argument: '%0', options requiring arguments are unsupported">;
def err_drv_argument_only_allowed_with : Error<
"invalid argument '%0' only allowed with '%1'">;
def err_drv_minws_unsupported_input_type : Error<
"'-fminimize-whitespace' invalid for input of type %0">;
def err_drv_amdgpu_ieee_without_no_honor_nans : Error<
"invalid argument '-mno-amdgpu-ieee' only allowed with relaxed NaN handling">;
def err_drv_argument_not_allowed_with : Error<

View File

@ -1799,6 +1799,9 @@ def frewrite_map_file_EQ : Joined<["-"], "frewrite-map-file=">,
defm use_line_directives : BoolFOption<"use-line-directives",
PreprocessorOutputOpts<"UseLineDirectives">, DefaultFalse,
PosFlag<SetTrue, [CC1Option], "Use #line in preprocessed output">, NegFlag<SetFalse>>;
defm minimize_whitespace : BoolFOption<"minimize-whitespace",
PreprocessorOutputOpts<"MinimizeWhitespace">, DefaultFalse,
PosFlag<SetTrue, [CC1Option], "Minimize whitespace when emitting preprocessor output">, NegFlag<SetFalse>>;
def ffreestanding : Flag<["-"], "ffreestanding">, Group<f_Group>, Flags<[CC1Option]>,
HelpText<"Assert that the compilation takes place in a freestanding environment">,

View File

@ -66,6 +66,14 @@ namespace types {
/// isAcceptedByClang - Can clang handle this input type.
bool isAcceptedByClang(ID Id);
/// isDerivedFromC - Is the input derived from C.
///
/// That is, does the lexer follow the rules of
/// TokenConcatenation::AvoidConcat. If this is the case, the preprocessor may
/// add and remove whitespace between tokens. Used to determine whether the
/// input can be processed by -fminimize-whitespace.
bool isDerivedFromC(ID Id);
/// isCXX - Is this a "C++" input (C++ and Obj-C++ sources and headers).
bool isCXX(ID Id);

View File

@ -24,6 +24,7 @@ public:
unsigned ShowIncludeDirectives : 1; ///< Print includes, imports etc. within preprocessed output.
unsigned RewriteIncludes : 1; ///< Preprocess include directives only.
unsigned RewriteImports : 1; ///< Include contents of transitively-imported modules.
unsigned MinimizeWhitespace : 1; ///< Ignore whitespace from input.
public:
PreprocessorOutputOptions() {
@ -36,6 +37,7 @@ public:
ShowIncludeDirectives = 0;
RewriteIncludes = 0;
RewriteImports = 0;
MinimizeWhitespace = 0;
}
};

View File

@ -52,8 +52,9 @@ using namespace clang;
using namespace llvm::opt;
static void CheckPreprocessingOptions(const Driver &D, const ArgList &Args) {
if (Arg *A =
Args.getLastArg(clang::driver::options::OPT_C, options::OPT_CC)) {
if (Arg *A = Args.getLastArg(clang::driver::options::OPT_C, options::OPT_CC,
options::OPT_fminimize_whitespace,
options::OPT_fno_minimize_whitespace)) {
if (!Args.hasArg(options::OPT_E) && !Args.hasArg(options::OPT__SLASH_P) &&
!Args.hasArg(options::OPT__SLASH_EP) && !D.CCCIsCPP()) {
D.Diag(clang::diag::err_drv_argument_only_allowed_with)
@ -6067,6 +6068,16 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
options::OPT_fno_use_line_directives, false))
CmdArgs.push_back("-fuse-line-directives");
// -fno-minimize-whitespace is default.
if (Args.hasFlag(options::OPT_fminimize_whitespace,
options::OPT_fno_minimize_whitespace, false)) {
types::ID InputType = Inputs[0].getType();
if (!isDerivedFromC(InputType))
D.Diag(diag::err_drv_minws_unsupported_input_type)
<< types::getTypeName(InputType);
CmdArgs.push_back("-fminimize-whitespace");
}
// -fms-extensions=0 is default.
if (Args.hasFlag(options::OPT_fms_extensions, options::OPT_fno_ms_extensions,
IsWindowsMSVC))

View File

@ -147,6 +147,45 @@ bool types::isAcceptedByClang(ID Id) {
}
}
bool types::isDerivedFromC(ID Id) {
switch (Id) {
default:
return false;
case TY_PP_C:
case TY_C:
case TY_CL:
case TY_CLCXX:
case TY_PP_CUDA:
case TY_CUDA:
case TY_CUDA_DEVICE:
case TY_PP_HIP:
case TY_HIP:
case TY_HIP_DEVICE:
case TY_PP_ObjC:
case TY_PP_ObjC_Alias:
case TY_ObjC:
case TY_PP_CXX:
case TY_CXX:
case TY_PP_ObjCXX:
case TY_PP_ObjCXX_Alias:
case TY_ObjCXX:
case TY_RenderScript:
case TY_PP_CHeader:
case TY_CHeader:
case TY_CLHeader:
case TY_PP_ObjCHeader:
case TY_ObjCHeader:
case TY_PP_CXXHeader:
case TY_CXXHeader:
case TY_PP_ObjCXXHeader:
case TY_ObjCXXHeader:
case TY_CXXModule:
case TY_PP_CXXModule:
return true;
}
}
bool types::isObjC(ID Id) {
switch (Id) {
default:

View File

@ -95,14 +95,20 @@ private:
bool DumpIncludeDirectives;
bool UseLineDirectives;
bool IsFirstFileEntered;
bool MinimizeWhitespace;
Token PrevTok;
Token PrevPrevTok;
public:
PrintPPOutputPPCallbacks(Preprocessor &pp, raw_ostream &os, bool lineMarkers,
bool defines, bool DumpIncludeDirectives,
bool UseLineDirectives)
bool UseLineDirectives, bool MinimizeWhitespace)
: PP(pp), SM(PP.getSourceManager()), ConcatInfo(PP), OS(os),
DisableLineMarkers(lineMarkers), DumpDefines(defines),
DumpIncludeDirectives(DumpIncludeDirectives),
UseLineDirectives(UseLineDirectives) {
UseLineDirectives(UseLineDirectives),
MinimizeWhitespace(MinimizeWhitespace) {
CurLine = 0;
CurFilename += "<uninit>";
EmittedTokensOnThisLine = false;
@ -110,8 +116,13 @@ public:
FileType = SrcMgr::C_User;
Initialized = false;
IsFirstFileEntered = false;
PrevTok.startToken();
PrevPrevTok.startToken();
}
bool isMinimizeWhitespace() const { return MinimizeWhitespace; }
void setEmittedTokensOnThisLine() { EmittedTokensOnThisLine = true; }
bool hasEmittedTokensOnThisLine() const { return EmittedTokensOnThisLine; }
@ -120,7 +131,12 @@ public:
return EmittedDirectiveOnThisLine;
}
bool startNewLineIfNeeded(bool ShouldUpdateCurrentLine = true);
/// Ensure that the output stream position is at the beginning of a new line
/// and inserts one if it does not. It is intended to ensure that directives
/// inserted by the directives not from the input source (such as #line) are
/// in the first column. To insert newlines that represent the input, use
/// MoveToLine(/*...*/, /*RequireStartOfLine=*/true).
void startNewLineIfNeeded();
void FileChanged(SourceLocation Loc, FileChangeReason Reason,
SrcMgr::CharacteristicKind FileType,
@ -148,18 +164,45 @@ public:
void PragmaAssumeNonNullBegin(SourceLocation Loc) override;
void PragmaAssumeNonNullEnd(SourceLocation Loc) override;
bool HandleFirstTokOnLine(Token &Tok);
/// Insert whitespace before emitting the next token.
///
/// @param Tok Next token to be emitted.
/// @param RequireSpace Ensure at least one whitespace is emitted. Useful
/// if non-tokens have been emitted to the stream.
/// @param RequireSameLine Never emit newlines. Useful when semantics depend
/// on being on the same line, such as directives.
void HandleWhitespaceBeforeTok(const Token &Tok, bool RequireSpace,
bool RequireSameLine);
/// Move to the line of the provided source location. This will
/// return true if the output stream required adjustment or if
/// the requested location is on the first line.
bool MoveToLine(SourceLocation Loc) {
/// return true if a newline was inserted or if
/// the requested location is the first token on the first line.
/// In these cases the next output will be the first column on the line and
/// make it possible to insert indention. The newline was inserted
/// implicitly when at the beginning of the file.
///
/// @param Tok Token where to move to.
/// @param RequiresStartOfLine Whether the next line depends on being in the
/// first column, such as a directive.
///
/// @return Whether column adjustments are necessary.
bool MoveToLine(const Token &Tok, bool RequireStartOfLine) {
PresumedLoc PLoc = SM.getPresumedLoc(Tok.getLocation());
if (PLoc.isInvalid())
return false;
bool IsFirstInFile = Tok.isAtStartOfLine() && PLoc.getLine() == 1;
return MoveToLine(PLoc.getLine(), RequireStartOfLine) || IsFirstInFile;
}
/// Move to the line of the provided source location. Returns true if a new
/// line was inserted.
bool MoveToLine(SourceLocation Loc, bool RequireStartOfLine) {
PresumedLoc PLoc = SM.getPresumedLoc(Loc);
if (PLoc.isInvalid())
return false;
return MoveToLine(PLoc.getLine()) || (PLoc.getLine() == 1);
return MoveToLine(PLoc.getLine(), RequireStartOfLine);
}
bool MoveToLine(unsigned LineNo);
bool MoveToLine(unsigned LineNo, bool RequireStartOfLine);
bool AvoidConcat(const Token &PrevPrevTok, const Token &PrevTok,
const Token &Tok) {
@ -187,7 +230,7 @@ public:
void PrintPPOutputPPCallbacks::WriteLineInfo(unsigned LineNo,
const char *Extra,
unsigned ExtraLen) {
startNewLineIfNeeded(/*ShouldUpdateCurrentLine=*/false);
startNewLineIfNeeded();
// Emit #line directives or GNU line markers depending on what mode we're in.
if (UseLineDirectives) {
@ -214,43 +257,57 @@ void PrintPPOutputPPCallbacks::WriteLineInfo(unsigned LineNo,
/// object. We can do this by emitting some number of \n's, or be emitting a
/// #line directive. This returns false if already at the specified line, true
/// if some newlines were emitted.
bool PrintPPOutputPPCallbacks::MoveToLine(unsigned LineNo) {
bool PrintPPOutputPPCallbacks::MoveToLine(unsigned LineNo,
bool RequireStartOfLine) {
// If it is required to start a new line or finish the current, insert
// vertical whitespace now and take it into account when moving to the
// expected line.
bool StartedNewLine = false;
if ((RequireStartOfLine && EmittedTokensOnThisLine) ||
EmittedDirectiveOnThisLine) {
OS << '\n';
StartedNewLine = true;
CurLine += 1;
EmittedTokensOnThisLine = false;
EmittedDirectiveOnThisLine = false;
}
// If this line is "close enough" to the original line, just print newlines,
// otherwise print a #line directive.
if (LineNo-CurLine <= 8) {
if (LineNo-CurLine == 1)
OS << '\n';
else if (LineNo == CurLine)
return false; // Spelling line moved, but expansion line didn't.
else {
const char *NewLines = "\n\n\n\n\n\n\n\n";
OS.write(NewLines, LineNo-CurLine);
}
if (CurLine == LineNo) {
// Nothing to do if we are already on the correct line.
} else if (!StartedNewLine && (!MinimizeWhitespace || !DisableLineMarkers) &&
LineNo - CurLine == 1) {
// Printing a single line has priority over printing a #line directive, even
// when minimizing whitespace which otherwise would print #line directives
// for every single line.
OS << '\n';
StartedNewLine = true;
} else if (!MinimizeWhitespace && LineNo - CurLine <= 8) {
const char *NewLines = "\n\n\n\n\n\n\n\n";
OS.write(NewLines, LineNo - CurLine);
StartedNewLine = true;
} else if (!DisableLineMarkers) {
// Emit a #line or line marker.
WriteLineInfo(LineNo, nullptr, 0);
} else {
// Okay, we're in -P mode, which turns off line markers. However, we still
// need to emit a newline between tokens on different lines.
startNewLineIfNeeded(/*ShouldUpdateCurrentLine=*/false);
StartedNewLine = true;
}
if (StartedNewLine) {
EmittedTokensOnThisLine = false;
EmittedDirectiveOnThisLine = false;
}
CurLine = LineNo;
return true;
return StartedNewLine;
}
bool
PrintPPOutputPPCallbacks::startNewLineIfNeeded(bool ShouldUpdateCurrentLine) {
void PrintPPOutputPPCallbacks::startNewLineIfNeeded() {
if (EmittedTokensOnThisLine || EmittedDirectiveOnThisLine) {
OS << '\n';
EmittedTokensOnThisLine = false;
EmittedDirectiveOnThisLine = false;
if (ShouldUpdateCurrentLine)
++CurLine;
return true;
}
return false;
}
/// FileChanged - Whenever the preprocessor enters or exits a #include file
@ -273,7 +330,7 @@ void PrintPPOutputPPCallbacks::FileChanged(SourceLocation Loc,
if (Reason == PPCallbacks::EnterFile) {
SourceLocation IncludeLoc = UserLoc.getIncludeLoc();
if (IncludeLoc.isValid())
MoveToLine(IncludeLoc);
MoveToLine(IncludeLoc, /*RequireStartOfLine=*/false);
} else if (Reason == PPCallbacks::SystemHeaderPragma) {
// GCC emits the # directive for this directive on the line AFTER the
// directive and emits a bunch of spaces that aren't needed. This is because
@ -290,7 +347,8 @@ void PrintPPOutputPPCallbacks::FileChanged(SourceLocation Loc,
FileType = NewFileType;
if (DisableLineMarkers) {
startNewLineIfNeeded(/*ShouldUpdateCurrentLine=*/false);
if (!MinimizeWhitespace)
startNewLineIfNeeded();
return;
}
@ -336,15 +394,13 @@ void PrintPPOutputPPCallbacks::InclusionDirective(
// In -dI mode, dump #include directives prior to dumping their content or
// interpretation.
if (DumpIncludeDirectives) {
startNewLineIfNeeded();
MoveToLine(HashLoc);
MoveToLine(HashLoc, /*RequireStartOfLine=*/true);
const std::string TokenText = PP.getSpelling(IncludeTok);
assert(!TokenText.empty());
OS << "#" << TokenText << " "
<< (IsAngled ? '<' : '"') << FileName << (IsAngled ? '>' : '"')
<< " /* clang -E -dI */";
setEmittedDirectiveOnThisLine();
startNewLineIfNeeded();
}
// When preprocessing, turn implicit imports into module import pragmas.
@ -353,17 +409,13 @@ void PrintPPOutputPPCallbacks::InclusionDirective(
case tok::pp_include:
case tok::pp_import:
case tok::pp_include_next:
startNewLineIfNeeded();
MoveToLine(HashLoc);
MoveToLine(HashLoc, /*RequireStartOfLine=*/true);
OS << "#pragma clang module import " << Imported->getFullModuleName(true)
<< " /* clang -E: implicit import for "
<< "#" << PP.getSpelling(IncludeTok) << " "
<< (IsAngled ? '<' : '"') << FileName << (IsAngled ? '>' : '"')
<< " */";
// Since we want a newline after the pragma, but not a #<line>, start a
// new line immediately.
EmittedTokensOnThisLine = true;
startNewLineIfNeeded();
setEmittedDirectiveOnThisLine();
break;
case tok::pp___include_macros:
@ -398,11 +450,11 @@ void PrintPPOutputPPCallbacks::EndModule(const Module *M) {
/// Ident - Handle #ident directives when read by the preprocessor.
///
void PrintPPOutputPPCallbacks::Ident(SourceLocation Loc, StringRef S) {
MoveToLine(Loc);
MoveToLine(Loc, /*RequireStartOfLine=*/true);
OS.write("#ident ", strlen("#ident "));
OS.write(S.begin(), S.size());
EmittedTokensOnThisLine = true;
setEmittedTokensOnThisLine();
}
/// MacroDefined - This hook is called whenever a macro definition is seen.
@ -414,7 +466,7 @@ void PrintPPOutputPPCallbacks::MacroDefined(const Token &MacroNameTok,
// Ignore __FILE__ etc.
MI->isBuiltinMacro()) return;
MoveToLine(MI->getDefinitionLoc());
MoveToLine(MI->getDefinitionLoc(), /*RequireStartOfLine=*/true);
PrintMacroDefinition(*MacroNameTok.getIdentifierInfo(), *MI, PP, OS);
setEmittedDirectiveOnThisLine();
}
@ -425,7 +477,7 @@ void PrintPPOutputPPCallbacks::MacroUndefined(const Token &MacroNameTok,
// Only print out macro definitions in -dD mode.
if (!DumpDefines) return;
MoveToLine(MacroNameTok.getLocation());
MoveToLine(MacroNameTok.getLocation(), /*RequireStartOfLine=*/true);
OS << "#undef " << MacroNameTok.getIdentifierInfo()->getName();
setEmittedDirectiveOnThisLine();
}
@ -446,8 +498,7 @@ void PrintPPOutputPPCallbacks::PragmaMessage(SourceLocation Loc,
StringRef Namespace,
PragmaMessageKind Kind,
StringRef Str) {
startNewLineIfNeeded();
MoveToLine(Loc);
MoveToLine(Loc, /*RequireStartOfLine=*/true);
OS << "#pragma ";
if (!Namespace.empty())
OS << Namespace << ' ';
@ -472,8 +523,7 @@ void PrintPPOutputPPCallbacks::PragmaMessage(SourceLocation Loc,
void PrintPPOutputPPCallbacks::PragmaDebug(SourceLocation Loc,
StringRef DebugType) {
startNewLineIfNeeded();
MoveToLine(Loc);
MoveToLine(Loc, /*RequireStartOfLine=*/true);
OS << "#pragma clang __debug ";
OS << DebugType;
@ -483,16 +533,14 @@ void PrintPPOutputPPCallbacks::PragmaDebug(SourceLocation Loc,
void PrintPPOutputPPCallbacks::
PragmaDiagnosticPush(SourceLocation Loc, StringRef Namespace) {
startNewLineIfNeeded();
MoveToLine(Loc);
MoveToLine(Loc, /*RequireStartOfLine=*/true);
OS << "#pragma " << Namespace << " diagnostic push";
setEmittedDirectiveOnThisLine();
}
void PrintPPOutputPPCallbacks::
PragmaDiagnosticPop(SourceLocation Loc, StringRef Namespace) {
startNewLineIfNeeded();
MoveToLine(Loc);
MoveToLine(Loc, /*RequireStartOfLine=*/true);
OS << "#pragma " << Namespace << " diagnostic pop";
setEmittedDirectiveOnThisLine();
}
@ -501,8 +549,7 @@ void PrintPPOutputPPCallbacks::PragmaDiagnostic(SourceLocation Loc,
StringRef Namespace,
diag::Severity Map,
StringRef Str) {
startNewLineIfNeeded();
MoveToLine(Loc);
MoveToLine(Loc, /*RequireStartOfLine=*/true);
OS << "#pragma " << Namespace << " diagnostic ";
switch (Map) {
case diag::Severity::Remark:
@ -528,8 +575,7 @@ void PrintPPOutputPPCallbacks::PragmaDiagnostic(SourceLocation Loc,
void PrintPPOutputPPCallbacks::PragmaWarning(SourceLocation Loc,
StringRef WarningSpec,
ArrayRef<int> Ids) {
startNewLineIfNeeded();
MoveToLine(Loc);
MoveToLine(Loc, /*RequireStartOfLine=*/true);
OS << "#pragma warning(" << WarningSpec << ':';
for (ArrayRef<int>::iterator I = Ids.begin(), E = Ids.end(); I != E; ++I)
OS << ' ' << *I;
@ -539,8 +585,7 @@ void PrintPPOutputPPCallbacks::PragmaWarning(SourceLocation Loc,
void PrintPPOutputPPCallbacks::PragmaWarningPush(SourceLocation Loc,
int Level) {
startNewLineIfNeeded();
MoveToLine(Loc);
MoveToLine(Loc, /*RequireStartOfLine=*/true);
OS << "#pragma warning(push";
if (Level >= 0)
OS << ", " << Level;
@ -549,16 +594,14 @@ void PrintPPOutputPPCallbacks::PragmaWarningPush(SourceLocation Loc,
}
void PrintPPOutputPPCallbacks::PragmaWarningPop(SourceLocation Loc) {
startNewLineIfNeeded();
MoveToLine(Loc);
MoveToLine(Loc, /*RequireStartOfLine=*/true);
OS << "#pragma warning(pop)";
setEmittedDirectiveOnThisLine();
}
void PrintPPOutputPPCallbacks::PragmaExecCharsetPush(SourceLocation Loc,
StringRef Str) {
startNewLineIfNeeded();
MoveToLine(Loc);
MoveToLine(Loc, /*RequireStartOfLine=*/true);
OS << "#pragma character_execution_set(push";
if (!Str.empty())
OS << ", " << Str;
@ -567,64 +610,80 @@ void PrintPPOutputPPCallbacks::PragmaExecCharsetPush(SourceLocation Loc,
}
void PrintPPOutputPPCallbacks::PragmaExecCharsetPop(SourceLocation Loc) {
startNewLineIfNeeded();
MoveToLine(Loc);
MoveToLine(Loc, /*RequireStartOfLine=*/true);
OS << "#pragma character_execution_set(pop)";
setEmittedDirectiveOnThisLine();
}
void PrintPPOutputPPCallbacks::
PragmaAssumeNonNullBegin(SourceLocation Loc) {
startNewLineIfNeeded();
MoveToLine(Loc);
MoveToLine(Loc, /*RequireStartOfLine=*/true);
OS << "#pragma clang assume_nonnull begin";
setEmittedDirectiveOnThisLine();
}
void PrintPPOutputPPCallbacks::
PragmaAssumeNonNullEnd(SourceLocation Loc) {
startNewLineIfNeeded();
MoveToLine(Loc);
MoveToLine(Loc, /*RequireStartOfLine=*/true);
OS << "#pragma clang assume_nonnull end";
setEmittedDirectiveOnThisLine();
}
/// HandleFirstTokOnLine - When emitting a preprocessed file in -E mode, this
/// is called for the first token on each new line. If this really is the start
/// of a new logical line, handle it and return true, otherwise return false.
/// This may not be the start of a logical line because the "start of line"
/// marker is set for spelling lines, not expansion ones.
bool PrintPPOutputPPCallbacks::HandleFirstTokOnLine(Token &Tok) {
// Figure out what line we went to and insert the appropriate number of
// newline characters.
if (!MoveToLine(Tok.getLocation()))
return false;
void PrintPPOutputPPCallbacks::HandleWhitespaceBeforeTok(const Token &Tok,
bool RequireSpace,
bool RequireSameLine) {
// These tokens are not expanded to anything and don't need whitespace before
// them.
if (Tok.is(tok::eof) ||
(Tok.isAnnotation() && !Tok.is(tok::annot_header_unit) &&
!Tok.is(tok::annot_module_begin) && !Tok.is(tok::annot_module_end)))
return;
// Print out space characters so that the first token on a line is
// indented for easy reading.
unsigned ColNo = SM.getExpansionColumnNumber(Tok.getLocation());
if (!RequireSameLine && MoveToLine(Tok, /*RequireStartOfLine=*/false)) {
if (MinimizeWhitespace) {
// Avoid interpreting hash as a directive under -fpreprocessed.
if (Tok.is(tok::hash))
OS << ' ';
} else {
// Print out space characters so that the first token on a line is
// indented for easy reading.
unsigned ColNo = SM.getExpansionColumnNumber(Tok.getLocation());
// The first token on a line can have a column number of 1, yet still expect
// leading white space, if a macro expansion in column 1 starts with an empty
// macro argument, or an empty nested macro expansion. In this case, move the
// token to column 2.
if (ColNo == 1 && Tok.hasLeadingSpace())
ColNo = 2;
// The first token on a line can have a column number of 1, yet still
// expect leading white space, if a macro expansion in column 1 starts
// with an empty macro argument, or an empty nested macro expansion. In
// this case, move the token to column 2.
if (ColNo == 1 && Tok.hasLeadingSpace())
ColNo = 2;
// This hack prevents stuff like:
// #define HASH #
// HASH define foo bar
// From having the # character end up at column 1, which makes it so it
// is not handled as a #define next time through the preprocessor if in
// -fpreprocessed mode.
if (ColNo <= 1 && Tok.is(tok::hash))
OS << ' ';
// This hack prevents stuff like:
// #define HASH #
// HASH define foo bar
// From having the # character end up at column 1, which makes it so it
// is not handled as a #define next time through the preprocessor if in
// -fpreprocessed mode.
if (ColNo <= 1 && Tok.is(tok::hash))
OS << ' ';
// Otherwise, indent the appropriate number of spaces.
for (; ColNo > 1; --ColNo)
OS << ' ';
// Otherwise, indent the appropriate number of spaces.
for (; ColNo > 1; --ColNo)
OS << ' ';
}
} else {
// Insert whitespace between the previous and next token if either
// - The caller requires it
// - The input had whitespace between them and we are not in
// whitespace-minimization mode
// - The whitespace is necessary to keep the tokens apart and there is not
// already a newline between them
if (RequireSpace || (!MinimizeWhitespace && Tok.hasLeadingSpace()) ||
((EmittedTokensOnThisLine || EmittedTokensOnThisLine) &&
AvoidConcat(PrevPrevTok, PrevTok, Tok)))
OS << ' ';
}
return true;
PrevPrevTok = PrevTok;
PrevTok = Tok;
}
void PrintPPOutputPPCallbacks::HandleNewlinesInToken(const char *TokStr,
@ -668,9 +727,9 @@ struct UnknownPragmaHandler : public PragmaHandler {
Token &PragmaTok) override {
// Figure out what line we went to and insert the appropriate number of
// newline characters.
Callbacks->startNewLineIfNeeded();
Callbacks->MoveToLine(PragmaTok.getLocation());
Callbacks->MoveToLine(PragmaTok.getLocation(), /*RequireStartOfLine=*/true);
Callbacks->OS.write(Prefix, strlen(Prefix));
Callbacks->setEmittedTokensOnThisLine();
if (ShouldExpandTokens) {
// The first token does not have expanded macros. Expand them, if
@ -682,21 +741,16 @@ struct UnknownPragmaHandler : public PragmaHandler {
/*IsReinject=*/false);
PP.Lex(PragmaTok);
}
Token PrevToken;
Token PrevPrevToken;
PrevToken.startToken();
PrevPrevToken.startToken();
// Read and print all of the pragma tokens.
bool IsFirst = true;
while (PragmaTok.isNot(tok::eod)) {
if (PragmaTok.hasLeadingSpace() ||
Callbacks->AvoidConcat(PrevPrevToken, PrevToken, PragmaTok))
Callbacks->OS << ' ';
Callbacks->HandleWhitespaceBeforeTok(PragmaTok, /*RequireSpace=*/IsFirst,
/*RequireSameLine=*/true);
IsFirst = false;
std::string TokSpell = PP.getSpelling(PragmaTok);
Callbacks->OS.write(&TokSpell[0], TokSpell.size());
PrevPrevToken = PrevToken;
PrevToken = PragmaTok;
Callbacks->setEmittedTokensOnThisLine();
if (ShouldExpandTokens)
PP.Lex(PragmaTok);
@ -715,44 +769,41 @@ static void PrintPreprocessedTokens(Preprocessor &PP, Token &Tok,
bool DropComments = PP.getLangOpts().TraditionalCPP &&
!PP.getCommentRetentionState();
bool IsStartOfLine = false;
char Buffer[256];
Token PrevPrevTok, PrevTok;
PrevPrevTok.startToken();
PrevTok.startToken();
while (1) {
if (Callbacks->hasEmittedDirectiveOnThisLine()) {
Callbacks->startNewLineIfNeeded();
Callbacks->MoveToLine(Tok.getLocation());
}
// Two lines joined with line continuation ('\' as last character on the
// line) must be emitted as one line even though Tok.getLine() returns two
// different values. In this situation Tok.isAtStartOfLine() is false even
// though it may be the first token on the lexical line. When
// dropping/skipping a token that is at the start of a line, propagate the
// start-of-line-ness to the next token to not append it to the previous
// line.
IsStartOfLine = IsStartOfLine || Tok.isAtStartOfLine();
// If this token is at the start of a line, emit newlines if needed.
if (Tok.isAtStartOfLine() && Callbacks->HandleFirstTokOnLine(Tok)) {
// done.
} else if (Tok.hasLeadingSpace() ||
// If we haven't emitted a token on this line yet, PrevTok isn't
// useful to look at and no concatenation could happen anyway.
(Callbacks->hasEmittedTokensOnThisLine() &&
// Don't print "-" next to "-", it would form "--".
Callbacks->AvoidConcat(PrevPrevTok, PrevTok, Tok))) {
OS << ' ';
}
Callbacks->HandleWhitespaceBeforeTok(Tok, /*RequireSpace=*/false,
/*RequireSameLine=*/!IsStartOfLine);
if (DropComments && Tok.is(tok::comment)) {
// Skip comments. Normally the preprocessor does not generate
// tok::comment nodes at all when not keeping comments, but under
// -traditional-cpp the lexer keeps /all/ whitespace, including comments.
SourceLocation StartLoc = Tok.getLocation();
Callbacks->MoveToLine(StartLoc.getLocWithOffset(Tok.getLength()));
PP.Lex(Tok);
continue;
} else if (Tok.is(tok::eod)) {
// Don't print end of directive tokens, since they are typically newlines
// that mess up our line tracking. These come from unknown pre-processor
// directives or hash-prefixed comments in standalone assembly files.
PP.Lex(Tok);
// FIXME: The token on the next line after #include should have
// Tok.isAtStartOfLine() set.
IsStartOfLine = true;
continue;
} else if (Tok.is(tok::annot_module_include)) {
// PrintPPOutputPPCallbacks::InclusionDirective handles producing
// appropriate output here. Ignore this token entirely.
PP.Lex(Tok);
IsStartOfLine = true;
continue;
} else if (Tok.is(tok::annot_module_begin)) {
// FIXME: We retrieve this token after the FileChanged callback, and
@ -764,11 +815,13 @@ static void PrintPreprocessedTokens(Preprocessor &PP, Token &Tok,
Callbacks->BeginModule(
reinterpret_cast<Module *>(Tok.getAnnotationValue()));
PP.Lex(Tok);
IsStartOfLine = true;
continue;
} else if (Tok.is(tok::annot_module_end)) {
Callbacks->EndModule(
reinterpret_cast<Module *>(Tok.getAnnotationValue()));
PP.Lex(Tok);
IsStartOfLine = true;
continue;
} else if (Tok.is(tok::annot_header_unit)) {
// This is a header-name that has been (effectively) converted into a
@ -796,8 +849,17 @@ static void PrintPreprocessedTokens(Preprocessor &PP, Token &Tok,
// Tokens that can contain embedded newlines need to adjust our current
// line number.
// FIXME: The token may end with a newline in which case
// setEmittedDirectiveOnThisLine/setEmittedTokensOnThisLine afterwards is
// wrong.
if (Tok.getKind() == tok::comment || Tok.getKind() == tok::unknown)
Callbacks->HandleNewlinesInToken(TokPtr, Len);
if (Tok.is(tok::comment) && Len >= 2 && TokPtr[0] == '/' &&
TokPtr[1] == '/') {
// It's a line comment;
// Ensure that we don't concatenate anything behind it.
Callbacks->setEmittedDirectiveOnThisLine();
}
} else {
std::string S = PP.getSpelling(Tok);
OS.write(S.data(), S.size());
@ -806,13 +868,17 @@ static void PrintPreprocessedTokens(Preprocessor &PP, Token &Tok,
// line number.
if (Tok.getKind() == tok::comment || Tok.getKind() == tok::unknown)
Callbacks->HandleNewlinesInToken(S.data(), S.size());
if (Tok.is(tok::comment) && S.size() >= 2 && S[0] == '/' && S[1] == '/') {
// It's a line comment;
// Ensure that we don't concatenate anything behind it.
Callbacks->setEmittedDirectiveOnThisLine();
}
}
Callbacks->setEmittedTokensOnThisLine();
IsStartOfLine = false;
if (Tok.is(tok::eof)) break;
PrevPrevTok = PrevTok;
PrevTok = Tok;
PP.Lex(Tok);
}
}
@ -870,7 +936,8 @@ void clang::DoPrintPreprocessedInput(Preprocessor &PP, raw_ostream *OS,
PrintPPOutputPPCallbacks *Callbacks = new PrintPPOutputPPCallbacks(
PP, *OS, !Opts.ShowLineMarkers, Opts.ShowMacros,
Opts.ShowIncludeDirectives, Opts.UseLineDirectives);
Opts.ShowIncludeDirectives, Opts.UseLineDirectives,
Opts.MinimizeWhitespace);
// Expand macros in pragmas with -fms-extensions. The assumption is that
// the majority of pragmas in such a file will be Microsoft pragmas.

View File

@ -716,6 +716,12 @@ IdentifierInfo *Preprocessor::LookUpIdentifierInfo(Token &Identifier) const {
}
// Update the token info (identifier info and appropriate token kind).
// FIXME: the raw_identifier may contain leading whitespace which is removed
// from the cleaned identifier token. The SourceLocation should be updated to
// refer to the non-whitespace character. For instance, the text "\\\nB" (a
// line continuation before 'B') is parsed as a single tok::raw_identifier and
// is cleaned to tok::identifier "B". After cleaning the token's length is
// still 3 and the SourceLocation refers to the location of the backslash.
Identifier.setIdentifierInfo(II);
if (getLangOpts().MSVCCompat && II->isCPlusPlusOperatorKeyword() &&
getSourceManager().isInSystemHeader(Identifier.getLocation()))

View File

@ -1,4 +1,5 @@
// RUN: %clang_cc1 -E -C %s | FileCheck -strict-whitespace %s
// RUN: %clang_cc1 -E -C -fminimize-whitespace %s | FileCheck -strict-whitespace %s
// foo
// CHECK: // foo

View File

@ -1,7 +1,14 @@
foo
// RUN: %clang_cc1 -E %s | FileCheck -strict-whitespace %s
// RUN: %clang_cc1 -E -fminimize-whitespace %s | FileCheck -strict-whitespace %s --check-prefix=MINCOL
// RUN: %clang_cc1 -E -fminimize-whitespace -P %s | FileCheck -strict-whitespace %s --check-prefix=MINWS
bar
// CHECK: {{^ }}foo
// CHECK: {{^ }}bar
// MINCOL: {{^}}foo
// MINCOL: {{^}}bar
// MINWS: {{^}}foo bar

View File

@ -4,6 +4,10 @@
// CHECK-NEXT: {{^ #$}}
// CHECK-NEXT: {{^2$}}
// CHECK-NEXT: {{^ #$}}
// RUN: %clang_cc1 -E -P -fminimize-whitespace %s | FileCheck --strict-whitespace %s --check-prefix=MINWS
// MINWS: {{^}}1#2#{{$}}
#define EMPTY
#define IDENTITY(X) X
1

View File

@ -0,0 +1,11 @@
// RUN: %clang_cc1 -E -fminimize-whitespace %s 2>&1 | FileCheck %s -strict-whitespace
// CHECK: # 6 "{{.*}}line-directive-output-mincol.c"
// CHECK-NEXT: int x;
// CHECK-NEXT: int y;
int x;
int y;
// CHECK-NEXT: # 10 "{{.*}}line-directive-output-mincol.c"
// CHECK-NEXT: int z;
int z;

View File

@ -1,4 +1,5 @@
// RUN: %clang_cc1 -E %s 2>&1 | FileCheck %s -strict-whitespace
// RUN: %clang_cc1 -E -fminimize-whitespace %s 2>&1 | FileCheck %s -strict-whitespace
// PR6101
int a;
// CHECK: # 1 "{{.*}}line-directive-output.c"

View File

@ -1,4 +1,5 @@
// RUN: %clang_cc1 -E %s | FileCheck --strict-whitespace %s
// RUN: %clang_cc1 -E -P -fminimize-whitespace %s | FileCheck --strict-whitespace %s --check-prefix=MINWS
#define FOO1()
#define FOO2(x)x
@ -13,24 +14,32 @@
TEST(FOO1,)
// CHECK: FOO1 <> < > <> <> < > <> < > < >
// MINWS: FOO1<><><><><><><><>
TEST(FOO2,)
// CHECK: FOO2 <> < > <> <> < > <> < > < >
// MINWS-SAME: FOO2<><><><><><><><>
TEST(FOO3,)
// CHECK: FOO3 <> < > <> <> < > <> < > < >
// MINWS-SAME: FOO3<><><><><><><><>
TEST(FOO4,)
// CHECK: FOO4 < > < > < > < > < > < > < > < >
// MINWS-SAME: FOO4<><><><><><><><>
TEST(FOO5,)
// CHECK: FOO5 < > < > < > < > < > < > < > < >
// MINWS-SAME: FOO5<><><><><><><><>
TEST(FOO6,)
// CHECK: FOO6 <[]> < []> <[]> <[]> <[] > <[]> <[] > < []>
// MINWS-SAME: FOO6<[]><[]><[]><[]><[]><[]><[]><[]>
TEST(FOO7,)
// CHECK: FOO7 <[ ]> < [ ]> <[ ]> <[ ]> <[ ] > <[ ]> <[ ] > < [ ]>
// MINWS-SAME: FOO7<[]><[]><[]><[]><[]><[]><[]><[]>
TEST(FOO8,)
// CHECK: FOO8 <[ ]> < [ ]> <[ ]> <[ ]> <[ ] > <[ ]> <[ ] > < [ ]>
// MINWS-SAME: FOO8<[]><[]><[]><[]><[]><[]><[]><[]>

View File

@ -0,0 +1,8 @@
// RUN: not %clang -c -fminimize-whitespace %s 2>&1 | FileCheck %s --check-prefix=ON
// ON: error: invalid argument '-fminimize-whitespace' only allowed with '-E'
// RUN: not %clang -c -fno-minimize-whitespace %s 2>&1 | FileCheck %s --check-prefix=OFF
// OFF: error: invalid argument '-fno-minimize-whitespace' only allowed with '-E'
// RUN: not %clang -E -fminimize-whitespace -x assembler-with-cpp %s 2>&1 | FileCheck %s --check-prefix=ASM
// ASM: error: '-fminimize-whitespace' invalid for input of type assembler-with-cpp

View File

@ -0,0 +1,55 @@
// RUN: %clang_cc1 -fminimize-whitespace -E %s 2>&1 | FileCheck %s --strict-whitespace --check-prefix=MINCOL
// RUN: %clang_cc1 -fminimize-whitespace -E -C %s 2>&1 | FileCheck %s --strict-whitespace --check-prefix=MINCCOL
// RUN: %clang_cc1 -fminimize-whitespace -E -P %s 2>&1 | FileCheck %s --strict-whitespace --check-prefix=MINWS
// RUN: %clang_cc1 -fminimize-whitespace -E -C -P %s 2>&1 | FileCheck %s --strict-whitespace --check-prefix=MINCWS
#define NOT_OMP omp something
#define HASH #
int a; /* span-comment */
int b ; // line-comment
_Pragma ( "omp barrier" ) x // more line-comments
#pragma omp nothing // another comment
HASH pragma NOT_OMP
int e; // again a line
int \
f ;
// MINCOL: {{^}}# 9 "{{.*}}minimize-whitespace.c"{{$}}
// MINCOL: {{^}}int a;{{$}}
// MINCOL-NEXT: {{^}}int b;{{$}}
// MINCOL-NEXT: {{^}}#pragma omp barrier{{$}}
// MINCOL-NEXT: # 11 "{{.*}}minimize-whitespace.c"
// MINCOL-NEXT: {{^}}x{{$}}
// MINCOL-NEXT: {{^}}#pragma omp nothing{{$}}
// MINCOL-NEXT: {{^ }}#pragma omp something{{$}}
// MINCOL-NEXT: {{^}}int e;{{$}}
// MINCOL-NEXT: {{^}}int f;{{$}}
// FIXME: Comments after pragmas disappear, even without -fminimize-whitespace
// MINCCOL: {{^}}# 9 "{{.*}}minimize-whitespace.c"{{$}}
// MINCCOL: {{^}}int a;/* span-comment */{{$}}
// MINCCOL-NEXT: {{^}}int b;// line-comment{{$}}
// MINCCOL-NEXT: {{^}}#pragma omp barrier{{$}}
// MINCCOL-NEXT: # 11 "{{.*}}minimize-whitespace.c"
// MINCCOL-NEXT: {{^}}x// more line-comments{{$}}
// MINCCOL-NEXT: {{^}}#pragma omp nothing{{$}}
// MINCCOL-NEXT: {{^ }}#pragma omp something{{$}}
// MINCCOL-NEXT: {{^}}int e;// again a line{{$}}
// MINCCOL-NEXT: {{^}}int f;{{$}}
// MINWS: {{^}}int a;int b;{{$}}
// MINWS-NEXT: {{^}}#pragma omp barrier{{$}}
// MINWS-NEXT: {{^}}x{{$}}
// MINWS-NEXT: {{^}}#pragma omp nothing{{$}}
// MINWS-NEXT: {{^ }}#pragma omp something int e;int f;{{$}}
// FIXME: Comments after pragmas disappear, even without -fminimize-whitespace
// MINCWS: {{^}}int a;/* span-comment */int b;// line-comment{{$}}
// MINCWS-NEXT: {{^}}#pragma omp barrier{{$}}
// MINCWS-NEXT: {{^}}x// more line-comments{{$}}
// MINCWS-NEXT: {{^}}#pragma omp nothing{{$}}
// MINCWS-NEXT: {{^ }}#pragma omp something int e;// again a line{{$}}
// MINCWS-NEXT: {{^}}int f;

View File

@ -2,5 +2,8 @@
// CHECK: int x;
// CHECK-NEXT: int x;
// RUN: %clang_cc1 -E -P -fminimize-whitespace %s | FileCheck %s --check-prefix=MINWS --strict-whitespace
// MINWS: {{^}}int x;int x;{{$}}
#include "print_line_include.h"
#include "print_line_include.h"

View File

@ -1,16 +1,18 @@
// RUN: %clang_cc1 -E %s | FileCheck --strict-whitespace %s
// RUN: %clang_cc1 -E -P -fminimize-whitespace %s | FileCheck --strict-whitespace %s --check-prefix=MINWS
#define A(b) -#b , - #b , -# b , - # b
A()
// CHECK: {{^}}-"" , - "" , -"" , - ""{{$}}
// MINWS: {{^}}-"",-"",-"",-""
#define t(x) #x
t(a
c)
// CHECK: {{^}}"a c"{{$}}
// MINWS-SAME: "a c"
#define str(x) #x
#define f(x) str(-x)
@ -18,6 +20,7 @@ f(
1)
// CHECK: {{^}}"-1"
// MINWS-SAME: "-1"
#define paste(a,b) str(a<b##ld)
paste(hello1, wor)
@ -29,3 +32,4 @@ wor)
// CHECK: {{^}}"hello1<world"
// CHECK: {{^}}"hello2<world"
// CHECK: {{^}}"hello3<world"
// MINWS-SAME: {{^}}"hello1<world""hello2<world""hello3<world"{{$}}