clang-format: [JS] detect C++ keywords.

Summary:
C++ defines a number of keywords that are regular identifiers in
JavaScript, e.g. `concept`:

    const concept = 1; // legit JS

This change expands the existing `IsJavaScriptIdentifier(Tok)` function
to return false for C++ keywords that aren't keywords in JS.

Reviewers: krasimir

Subscribers: jfb, cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D77311
This commit is contained in:
Martin Probst 2020-04-02 16:53:17 +02:00
parent 0718e3ae31
commit 146d685cd6
3 changed files with 118 additions and 17 deletions

View File

@ -910,9 +910,64 @@ struct AdditionalKeywords {
/// Returns \c true if \p Tok is a true JavaScript identifier, returns
/// \c false if it is a keyword or a pseudo keyword.
bool IsJavaScriptIdentifier(const FormatToken &Tok) const {
return Tok.is(tok::identifier) &&
JsExtraKeywords.find(Tok.Tok.getIdentifierInfo()) ==
JsExtraKeywords.end();
// Based on the list of JavaScript & TypeScript keywords here:
// https://github.com/microsoft/TypeScript/blob/master/src/compiler/scanner.ts#L74
switch (Tok.Tok.getKind()) {
case tok::kw_break:
case tok::kw_case:
case tok::kw_catch:
case tok::kw_class:
case tok::kw_continue:
case tok::kw_const:
case tok::kw_default:
case tok::kw_delete:
case tok::kw_do:
case tok::kw_else:
case tok::kw_enum:
case tok::kw_export:
case tok::kw_false:
case tok::kw_for:
case tok::kw_if:
case tok::kw_import:
case tok::kw_module:
case tok::kw_new:
case tok::kw_private:
case tok::kw_protected:
case tok::kw_public:
case tok::kw_return:
case tok::kw_static:
case tok::kw_switch:
case tok::kw_this:
case tok::kw_throw:
case tok::kw_true:
case tok::kw_try:
case tok::kw_typeof:
case tok::kw_void:
case tok::kw_while:
// These are JS keywords that are lexed by LLVM/clang as keywords.
return false;
case tok::identifier:
// For identifiers, make sure they are true identifiers, excluding the
// JavaScript pseudo-keywords (not lexed by LLVM/clang as keywords).
return JsExtraKeywords.find(Tok.Tok.getIdentifierInfo()) ==
JsExtraKeywords.end();
default:
// Other keywords are handled in the switch below, to avoid problems due
// to duplicate case labels when using the #include trick.
break;
}
switch (Tok.Tok.getKind()) {
// Handle C++ keywords not included above: these are all JS identifiers.
#define KEYWORD(X, Y) case tok::kw_##X:
#include "clang/Basic/TokenKinds.def"
// #undef KEYWORD is not needed -- it's #undef-ed at the end of
// TokenKinds.def
return true;
default:
// All other tokens (punctuation etc) are not JS identifiers.
return false;
}
}
/// Returns \c true if \p Tok is a C# keyword, returns

View File

@ -1522,9 +1522,9 @@ private:
if (Style.Language == FormatStyle::LK_JavaScript) {
if (Current.is(tok::exclaim)) {
if (Current.Previous &&
(Current.Previous->isOneOf(tok::identifier, tok::kw_namespace,
tok::r_paren, tok::r_square,
tok::r_brace) ||
(Keywords.IsJavaScriptIdentifier(*Current.Previous) ||
Current.Previous->isOneOf(tok::kw_namespace, tok::r_paren,
tok::r_square, tok::r_brace) ||
Current.Previous->Tok.isLiteral())) {
Current.Type = TT_JsNonNullAssertion;
return;
@ -3070,10 +3070,8 @@ bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line,
(Right.is(TT_TemplateString) && Right.TokenText.startswith("}")))
return false;
// In tagged template literals ("html`bar baz`"), there is no space between
// the tag identifier and the template string. getIdentifierInfo makes sure
// that the identifier is not a pseudo keyword like `yield`, either.
if (Left.is(tok::identifier) && Keywords.IsJavaScriptIdentifier(Left) &&
Right.is(TT_TemplateString))
// the tag identifier and the template string.
if (Keywords.IsJavaScriptIdentifier(Left) && Right.is(TT_TemplateString))
return false;
if (Right.is(tok::star) &&
Left.isOneOf(Keywords.kw_function, Keywords.kw_yield))

View File

@ -386,13 +386,6 @@ TEST_F(FormatTestJS, ReservedWordsParenthesized) {
"return (x);\n");
}
TEST_F(FormatTestJS, CppKeywords) {
// Make sure we don't mess stuff up because of C++ keywords.
verifyFormat("return operator && (aa);");
// .. or QT ones.
verifyFormat("slots: Slot[];");
}
TEST_F(FormatTestJS, ES6DestructuringAssignment) {
verifyFormat("var [a, b, c] = [1, 2, 3];");
verifyFormat("const [a, b, c] = [1, 2, 3];");
@ -2366,6 +2359,61 @@ TEST_F(FormatTestJS, NonNullAssertionOperator) {
verifyFormat("return !!x;\n");
}
TEST_F(FormatTestJS, CppKeywords) {
// Make sure we don't mess stuff up because of C++ keywords.
verifyFormat("return operator && (aa);");
// .. or QT ones.
verifyFormat("const slots: Slot[];");
// use the "!" assertion operator to validate that clang-format understands
// these C++ keywords aren't keywords in JS/TS.
verifyFormat("auto!;");
verifyFormat("char!;");
verifyFormat("concept!;");
verifyFormat("double!;");
verifyFormat("extern!;");
verifyFormat("float!;");
verifyFormat("inline!;");
verifyFormat("int!;");
verifyFormat("long!;");
verifyFormat("register!;");
verifyFormat("restrict!;");
verifyFormat("sizeof!;");
verifyFormat("struct!;");
verifyFormat("typedef!;");
verifyFormat("union!;");
verifyFormat("unsigned!;");
verifyFormat("volatile!;");
verifyFormat("_Alignas!;");
verifyFormat("_Alignof!;");
verifyFormat("_Atomic!;");
verifyFormat("_Bool!;");
verifyFormat("_Complex!;");
verifyFormat("_Generic!;");
verifyFormat("_Imaginary!;");
verifyFormat("_Noreturn!;");
verifyFormat("_Static_assert!;");
verifyFormat("_Thread_local!;");
verifyFormat("__func__!;");
verifyFormat("__objc_yes!;");
verifyFormat("__objc_no!;");
verifyFormat("asm!;");
verifyFormat("bool!;");
verifyFormat("const_cast!;");
verifyFormat("dynamic_cast!;");
verifyFormat("explicit!;");
verifyFormat("friend!;");
verifyFormat("mutable!;");
verifyFormat("operator!;");
verifyFormat("reinterpret_cast!;");
verifyFormat("static_cast!;");
verifyFormat("template!;");
verifyFormat("typename!;");
verifyFormat("typeid!;");
verifyFormat("using!;");
verifyFormat("virtual!;");
verifyFormat("wchar_t!;");
}
TEST_F(FormatTestJS, NullPropagatingOperator) {
verifyFormat("let x = foo?.bar?.baz();\n");
verifyFormat("let x = foo?.(foo);\n");