2016-05-20 19:24:24 +08:00
|
|
|
//===--- FormatTokenLexer.h - Format C++ code ----------------*- C++ ----*-===//
|
|
|
|
//
|
2019-01-19 16:50:56 +08:00
|
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
2016-05-20 19:24:24 +08:00
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
///
|
|
|
|
/// \file
|
2018-05-09 09:00:01 +08:00
|
|
|
/// This file contains FormatTokenLexer, which tokenizes a source file
|
2016-05-20 19:24:24 +08:00
|
|
|
/// into a token stream suitable for ClangFormat.
|
|
|
|
///
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
|
|
#ifndef LLVM_CLANG_LIB_FORMAT_FORMATTOKENLEXER_H
|
|
|
|
#define LLVM_CLANG_LIB_FORMAT_FORMATTOKENLEXER_H
|
|
|
|
|
|
|
|
#include "Encoding.h"
|
|
|
|
#include "FormatToken.h"
|
|
|
|
#include "clang/Basic/SourceLocation.h"
|
|
|
|
#include "clang/Basic/SourceManager.h"
|
|
|
|
#include "clang/Format/Format.h"
|
clang-format: better handle statement macros
Summary:
Some macros are used in the body of function, and actually contain the trailing semicolon: they should thus be automatically followed by a new line, and not get merged with the next line. This is for example the case with Qt's Q_UNUSED macro:
void foo(int a, int b) {
Q_UNUSED(a)
return b;
}
This patch deals with these cases by introducing a new option to specify list of statement macros. This re-uses the system already in place for foreach macros, to ensure there is no impact on performance.
Reviewers: krasimir, djasper, klimek
Reviewed By: krasimir
Subscribers: acoomans, mgrang, alexfh, klimek, cfe-commits
Differential Revision: https://reviews.llvm.org/D33440
llvm-svn: 343602
2018-10-03 00:37:51 +08:00
|
|
|
#include "llvm/ADT/MapVector.h"
|
2020-02-06 00:51:31 +08:00
|
|
|
#include "llvm/ADT/StringSet.h"
|
2019-03-01 17:09:54 +08:00
|
|
|
#include "llvm/Support/Regex.h"
|
2016-05-20 19:24:24 +08:00
|
|
|
|
2016-08-25 18:13:21 +08:00
|
|
|
#include <stack>
|
|
|
|
|
2016-05-20 19:24:24 +08:00
|
|
|
namespace clang {
|
|
|
|
namespace format {
|
|
|
|
|
2016-08-25 18:13:21 +08:00
|
|
|
enum LexerState {
|
|
|
|
NORMAL,
|
|
|
|
TEMPLATE_STRING,
|
|
|
|
TOKEN_STASHED,
|
|
|
|
};
|
|
|
|
|
2016-05-20 19:24:24 +08:00
|
|
|
class FormatTokenLexer {
|
|
|
|
public:
|
2017-10-30 22:01:50 +08:00
|
|
|
FormatTokenLexer(const SourceManager &SourceMgr, FileID ID, unsigned Column,
|
2020-07-06 20:02:54 +08:00
|
|
|
const FormatStyle &Style, encoding::Encoding Encoding,
|
|
|
|
llvm::SpecificBumpPtrAllocator<FormatToken> &Allocator,
|
|
|
|
IdentifierTable &IdentTable);
|
2016-05-20 19:24:24 +08:00
|
|
|
|
|
|
|
ArrayRef<FormatToken *> lex();
|
|
|
|
|
|
|
|
const AdditionalKeywords &getKeywords() { return Keywords; }
|
|
|
|
|
|
|
|
private:
|
|
|
|
void tryMergePreviousTokens();
|
|
|
|
|
|
|
|
bool tryMergeLessLess();
|
2017-04-11 17:55:00 +08:00
|
|
|
bool tryMergeNSStringLiteral();
|
2019-03-19 20:28:41 +08:00
|
|
|
bool tryMergeJSPrivateIdentifier();
|
2020-01-28 22:46:27 +08:00
|
|
|
bool tryMergeCSharpStringLiteral();
|
[clang-format] Add basic support for formatting C# files
Summary:
This revision adds basic support for formatting C# files with clang-format, I know the barrier to entry is high here so I'm sending this revision in to test the water as to whether this might be something we'd consider landing.
Tracking in Bugzilla as:
https://bugs.llvm.org/show_bug.cgi?id=40850
Justification:
C# code just looks ugly in comparison to the C++ code in our source tree which is clang-formatted.
I've struggled with Visual Studio reformatting to get a clean and consistent style, I want to format our C# code on saving like I do now for C++ and i want it to have the same style as defined in our .clang-format file, so it consistent as it can be with C++. (Braces/Breaking/Spaces/Indent etc..)
Using clang format without this patch leaves the code in a bad state, sometimes when the BreakStringLiterals is set, it fails to compile.
Mostly the C# is similar to Java, except instead of JavaAnnotations I try to reuse the TT_AttributeSquare.
Almost the most valuable portion is to have a new Language in order to partition the configuration for C# within a common .clang-format file, with the auto detection on the .cs extension. But there are other C# specific styles that could be added later if this is accepted. in particular how `{ set;get }` is formatted.
Reviewers: djasper, klimek, krasimir, benhamilton, JonasToth
Reviewed By: klimek
Subscribers: llvm-commits, mgorny, jdoerfert, cfe-commits
Tags: #clang, #clang-tools-extra
Differential Revision: https://reviews.llvm.org/D58404
llvm-svn: 356662
2019-03-21 21:09:22 +08:00
|
|
|
bool tryMergeCSharpKeywordVariables();
|
2021-05-06 17:22:31 +08:00
|
|
|
bool tryMergeNullishCoalescingEqual();
|
2019-10-04 16:10:22 +08:00
|
|
|
bool tryTransformCSharpForEach();
|
2020-05-20 14:44:36 +08:00
|
|
|
bool tryMergeForEach();
|
2020-06-04 03:42:09 +08:00
|
|
|
bool tryTransformTryUsageForC();
|
2016-05-20 19:24:24 +08:00
|
|
|
|
|
|
|
bool tryMergeTokens(ArrayRef<tok::TokenKind> Kinds, TokenType NewType);
|
|
|
|
|
|
|
|
// Returns \c true if \p Tok can only be followed by an operand in JavaScript.
|
|
|
|
bool precedesOperand(FormatToken *Tok);
|
|
|
|
|
|
|
|
bool canPrecedeRegexLiteral(FormatToken *Prev);
|
|
|
|
|
|
|
|
// Tries to parse a JavaScript Regex literal starting at the current token,
|
|
|
|
// if that begins with a slash and is in a location where JavaScript allows
|
|
|
|
// regex literals. Changes the current token to a regex literal and updates
|
|
|
|
// its text if successful.
|
|
|
|
void tryParseJSRegexLiteral();
|
|
|
|
|
2016-08-25 18:13:21 +08:00
|
|
|
// Handles JavaScript template strings.
|
|
|
|
//
|
|
|
|
// JavaScript template strings use backticks ('`') as delimiters, and allow
|
|
|
|
// embedding expressions nested in ${expr-here}. Template strings can be
|
|
|
|
// nested recursively, i.e. expressions can contain template strings in turn.
|
|
|
|
//
|
|
|
|
// The code below parses starting from a backtick, up to a closing backtick or
|
|
|
|
// an opening ${. It also maintains a stack of lexing contexts to handle
|
|
|
|
// nested template parts by balancing curly braces.
|
|
|
|
void handleTemplateStrings();
|
2016-05-20 19:24:24 +08:00
|
|
|
|
2020-01-30 21:22:59 +08:00
|
|
|
void handleCSharpVerbatimAndInterpolatedStrings();
|
|
|
|
|
2017-11-10 20:50:09 +08:00
|
|
|
void tryParsePythonComment();
|
|
|
|
|
2016-05-20 19:24:24 +08:00
|
|
|
bool tryMerge_TMacro();
|
|
|
|
|
|
|
|
bool tryMergeConflictMarkers();
|
|
|
|
|
|
|
|
FormatToken *getStashedToken();
|
|
|
|
|
|
|
|
FormatToken *getNextToken();
|
|
|
|
|
|
|
|
FormatToken *FormatTok;
|
|
|
|
bool IsFirstToken;
|
2016-08-25 18:13:21 +08:00
|
|
|
std::stack<LexerState> StateStack;
|
2016-05-20 19:24:24 +08:00
|
|
|
unsigned Column;
|
|
|
|
unsigned TrailingWhitespace;
|
|
|
|
std::unique_ptr<Lexer> Lex;
|
|
|
|
const SourceManager &SourceMgr;
|
|
|
|
FileID ID;
|
|
|
|
const FormatStyle &Style;
|
2020-07-06 20:02:54 +08:00
|
|
|
IdentifierTable &IdentTable;
|
2016-05-20 19:24:24 +08:00
|
|
|
AdditionalKeywords Keywords;
|
|
|
|
encoding::Encoding Encoding;
|
2020-07-06 20:02:54 +08:00
|
|
|
llvm::SpecificBumpPtrAllocator<FormatToken> &Allocator;
|
2016-05-20 19:24:24 +08:00
|
|
|
// Index (in 'Tokens') of the last token that starts a new line.
|
|
|
|
unsigned FirstInLineIndex;
|
|
|
|
SmallVector<FormatToken *, 16> Tokens;
|
clang-format: better handle statement macros
Summary:
Some macros are used in the body of function, and actually contain the trailing semicolon: they should thus be automatically followed by a new line, and not get merged with the next line. This is for example the case with Qt's Q_UNUSED macro:
void foo(int a, int b) {
Q_UNUSED(a)
return b;
}
This patch deals with these cases by introducing a new option to specify list of statement macros. This re-uses the system already in place for foreach macros, to ensure there is no impact on performance.
Reviewers: krasimir, djasper, klimek
Reviewed By: krasimir
Subscribers: acoomans, mgrang, alexfh, klimek, cfe-commits
Differential Revision: https://reviews.llvm.org/D33440
llvm-svn: 343602
2018-10-03 00:37:51 +08:00
|
|
|
|
|
|
|
llvm::SmallMapVector<IdentifierInfo *, TokenType, 8> Macros;
|
2016-05-20 19:24:24 +08:00
|
|
|
|
|
|
|
bool FormattingDisabled;
|
|
|
|
|
|
|
|
llvm::Regex MacroBlockBeginRegex;
|
|
|
|
llvm::Regex MacroBlockEndRegex;
|
|
|
|
|
2020-02-06 00:51:31 +08:00
|
|
|
// Targets that may appear inside a C# attribute.
|
|
|
|
static const llvm::StringSet<> CSharpAttributeTargets;
|
|
|
|
|
2016-05-20 19:24:24 +08:00
|
|
|
void readRawToken(FormatToken &Tok);
|
|
|
|
|
|
|
|
void resetLexer(unsigned Offset);
|
|
|
|
};
|
|
|
|
|
|
|
|
} // namespace format
|
|
|
|
} // namespace clang
|
|
|
|
|
|
|
|
#endif
|