llvm-project/clang/lib/Format/FormatTokenLexer.h

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

134 lines
4.0 KiB
C
Raw Normal View History

//===--- FormatTokenLexer.h - Format C++ code ----------------*- C++ ----*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
/// \file
/// This file contains FormatTokenLexer, which tokenizes a source file
/// into a token stream suitable for ClangFormat.
///
//===----------------------------------------------------------------------===//
#ifndef LLVM_CLANG_LIB_FORMAT_FORMATTOKENLEXER_H
#define LLVM_CLANG_LIB_FORMAT_FORMATTOKENLEXER_H
#include "Encoding.h"
#include "FormatToken.h"
#include "clang/Basic/SourceLocation.h"
#include "clang/Basic/SourceManager.h"
#include "clang/Format/Format.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/StringSet.h"
#include "llvm/Support/Regex.h"
#include <stack>
namespace clang {
namespace format {
enum LexerState {
NORMAL,
TEMPLATE_STRING,
TOKEN_STASHED,
};
class FormatTokenLexer {
public:
FormatTokenLexer(const SourceManager &SourceMgr, FileID ID, unsigned Column,
const FormatStyle &Style, encoding::Encoding Encoding,
llvm::SpecificBumpPtrAllocator<FormatToken> &Allocator,
IdentifierTable &IdentTable);
ArrayRef<FormatToken *> lex();
const AdditionalKeywords &getKeywords() { return Keywords; }
private:
void tryMergePreviousTokens();
bool tryMergeLessLess();
bool tryMergeNSStringLiteral();
bool tryMergeJSPrivateIdentifier();
bool tryMergeCSharpStringLiteral();
[clang-format] Add basic support for formatting C# files Summary: This revision adds basic support for formatting C# files with clang-format, I know the barrier to entry is high here so I'm sending this revision in to test the water as to whether this might be something we'd consider landing. Tracking in Bugzilla as: https://bugs.llvm.org/show_bug.cgi?id=40850 Justification: C# code just looks ugly in comparison to the C++ code in our source tree which is clang-formatted. I've struggled with Visual Studio reformatting to get a clean and consistent style, I want to format our C# code on saving like I do now for C++ and i want it to have the same style as defined in our .clang-format file, so it consistent as it can be with C++. (Braces/Breaking/Spaces/Indent etc..) Using clang format without this patch leaves the code in a bad state, sometimes when the BreakStringLiterals is set, it fails to compile. Mostly the C# is similar to Java, except instead of JavaAnnotations I try to reuse the TT_AttributeSquare. Almost the most valuable portion is to have a new Language in order to partition the configuration for C# within a common .clang-format file, with the auto detection on the .cs extension. But there are other C# specific styles that could be added later if this is accepted. in particular how `{ set;get }` is formatted. Reviewers: djasper, klimek, krasimir, benhamilton, JonasToth Reviewed By: klimek Subscribers: llvm-commits, mgorny, jdoerfert, cfe-commits Tags: #clang, #clang-tools-extra Differential Revision: https://reviews.llvm.org/D58404 llvm-svn: 356662
2019-03-21 21:09:22 +08:00
bool tryMergeCSharpKeywordVariables();
bool tryMergeNullishCoalescingEqual();
bool tryTransformCSharpForEach();
bool tryMergeForEach();
bool tryTransformTryUsageForC();
bool tryMergeTokens(ArrayRef<tok::TokenKind> Kinds, TokenType NewType);
// Returns \c true if \p Tok can only be followed by an operand in JavaScript.
bool precedesOperand(FormatToken *Tok);
bool canPrecedeRegexLiteral(FormatToken *Prev);
// Tries to parse a JavaScript Regex literal starting at the current token,
// if that begins with a slash and is in a location where JavaScript allows
// regex literals. Changes the current token to a regex literal and updates
// its text if successful.
void tryParseJSRegexLiteral();
// Handles JavaScript template strings.
//
// JavaScript template strings use backticks ('`') as delimiters, and allow
// embedding expressions nested in ${expr-here}. Template strings can be
// nested recursively, i.e. expressions can contain template strings in turn.
//
// The code below parses starting from a backtick, up to a closing backtick or
// an opening ${. It also maintains a stack of lexing contexts to handle
// nested template parts by balancing curly braces.
void handleTemplateStrings();
void handleCSharpVerbatimAndInterpolatedStrings();
void tryParsePythonComment();
bool tryMerge_TMacro();
bool tryMergeConflictMarkers();
FormatToken *getStashedToken();
FormatToken *getNextToken();
FormatToken *FormatTok;
bool IsFirstToken;
std::stack<LexerState> StateStack;
unsigned Column;
unsigned TrailingWhitespace;
std::unique_ptr<Lexer> Lex;
const SourceManager &SourceMgr;
FileID ID;
const FormatStyle &Style;
IdentifierTable &IdentTable;
AdditionalKeywords Keywords;
encoding::Encoding Encoding;
llvm::SpecificBumpPtrAllocator<FormatToken> &Allocator;
// Index (in 'Tokens') of the last token that starts a new line.
unsigned FirstInLineIndex;
SmallVector<FormatToken *, 16> Tokens;
llvm::SmallMapVector<IdentifierInfo *, TokenType, 8> Macros;
bool FormattingDisabled;
llvm::Regex MacroBlockBeginRegex;
llvm::Regex MacroBlockEndRegex;
// Targets that may appear inside a C# attribute.
static const llvm::StringSet<> CSharpAttributeTargets;
void readRawToken(FormatToken &Tok);
void resetLexer(unsigned Offset);
};
} // namespace format
} // namespace clang
#endif