forked from OSchip/llvm-project
452 lines
16 KiB
C++
452 lines
16 KiB
C++
//===--- SortJavaScriptImports.cpp - Sort ES6 Imports -----------*- C++ -*-===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
///
|
|
/// \file
|
|
/// This file implements a sort operation for JavaScript ES6 imports.
|
|
///
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "SortJavaScriptImports.h"
|
|
#include "TokenAnalyzer.h"
|
|
#include "TokenAnnotator.h"
|
|
#include "clang/Basic/Diagnostic.h"
|
|
#include "clang/Basic/DiagnosticOptions.h"
|
|
#include "clang/Basic/LLVM.h"
|
|
#include "clang/Basic/SourceLocation.h"
|
|
#include "clang/Basic/SourceManager.h"
|
|
#include "clang/Format/Format.h"
|
|
#include "llvm/ADT/STLExtras.h"
|
|
#include "llvm/ADT/SmallVector.h"
|
|
#include "llvm/Support/Debug.h"
|
|
#include <algorithm>
|
|
#include <string>
|
|
|
|
#define DEBUG_TYPE "format-formatter"
|
|
|
|
namespace clang {
|
|
namespace format {
|
|
|
|
class FormatTokenLexer;
|
|
|
|
using clang::format::FormatStyle;
|
|
|
|
// An imported symbol in a JavaScript ES6 import/export, possibly aliased.
|
|
struct JsImportedSymbol {
|
|
StringRef Symbol;
|
|
StringRef Alias;
|
|
SourceRange Range;
|
|
|
|
bool operator==(const JsImportedSymbol &RHS) const {
|
|
// Ignore Range for comparison, it is only used to stitch code together,
|
|
// but imports at different code locations are still conceptually the same.
|
|
return Symbol == RHS.Symbol && Alias == RHS.Alias;
|
|
}
|
|
};
|
|
|
|
// An ES6 module reference.
|
|
//
|
|
// ES6 implements a module system, where individual modules (~= source files)
|
|
// can reference other modules, either importing symbols from them, or exporting
|
|
// symbols from them:
|
|
// import {foo} from 'foo';
|
|
// export {foo};
|
|
// export {bar} from 'bar';
|
|
//
|
|
// `export`s with URLs are syntactic sugar for an import of the symbol from the
|
|
// URL, followed by an export of the symbol, allowing this code to treat both
|
|
// statements more or less identically, with the exception being that `export`s
|
|
// are sorted last.
|
|
//
|
|
// imports and exports support individual symbols, but also a wildcard syntax:
|
|
// import * as prefix from 'foo';
|
|
// export * from 'bar';
|
|
//
|
|
// This struct represents both exports and imports to build up the information
|
|
// required for sorting module references.
|
|
struct JsModuleReference {
|
|
bool IsExport = false;
|
|
// Module references are sorted into these categories, in order.
|
|
enum ReferenceCategory {
|
|
SIDE_EFFECT, // "import 'something';"
|
|
ABSOLUTE, // from 'something'
|
|
RELATIVE_PARENT, // from '../*'
|
|
RELATIVE, // from './*'
|
|
};
|
|
ReferenceCategory Category = ReferenceCategory::SIDE_EFFECT;
|
|
// The URL imported, e.g. `import .. from 'url';`. Empty for `export {a, b};`.
|
|
StringRef URL;
|
|
// Prefix from "import * as prefix". Empty for symbol imports and `export *`.
|
|
// Implies an empty names list.
|
|
StringRef Prefix;
|
|
// Symbols from `import {SymbolA, SymbolB, ...} from ...;`.
|
|
SmallVector<JsImportedSymbol, 1> Symbols;
|
|
// Textual position of the import/export, including preceding and trailing
|
|
// comments.
|
|
SourceRange Range;
|
|
};
|
|
|
|
bool operator<(const JsModuleReference &LHS, const JsModuleReference &RHS) {
|
|
if (LHS.IsExport != RHS.IsExport)
|
|
return LHS.IsExport < RHS.IsExport;
|
|
if (LHS.Category != RHS.Category)
|
|
return LHS.Category < RHS.Category;
|
|
if (LHS.Category == JsModuleReference::ReferenceCategory::SIDE_EFFECT)
|
|
// Side effect imports might be ordering sensitive. Consider them equal so
|
|
// that they maintain their relative order in the stable sort below.
|
|
// This retains transitivity because LHS.Category == RHS.Category here.
|
|
return false;
|
|
// Empty URLs sort *last* (for export {...};).
|
|
if (LHS.URL.empty() != RHS.URL.empty())
|
|
return LHS.URL.empty() < RHS.URL.empty();
|
|
if (int Res = LHS.URL.compare_lower(RHS.URL))
|
|
return Res < 0;
|
|
// '*' imports (with prefix) sort before {a, b, ...} imports.
|
|
if (LHS.Prefix.empty() != RHS.Prefix.empty())
|
|
return LHS.Prefix.empty() < RHS.Prefix.empty();
|
|
if (LHS.Prefix != RHS.Prefix)
|
|
return LHS.Prefix > RHS.Prefix;
|
|
return false;
|
|
}
|
|
|
|
// JavaScriptImportSorter sorts JavaScript ES6 imports and exports. It is
|
|
// implemented as a TokenAnalyzer because ES6 imports have substantial syntactic
|
|
// structure, making it messy to sort them using regular expressions.
|
|
class JavaScriptImportSorter : public TokenAnalyzer {
|
|
public:
|
|
JavaScriptImportSorter(const Environment &Env, const FormatStyle &Style)
|
|
: TokenAnalyzer(Env, Style),
|
|
FileContents(Env.getSourceManager().getBufferData(Env.getFileID())) {}
|
|
|
|
std::pair<tooling::Replacements, unsigned>
|
|
analyze(TokenAnnotator &Annotator,
|
|
SmallVectorImpl<AnnotatedLine *> &AnnotatedLines,
|
|
FormatTokenLexer &Tokens) override {
|
|
tooling::Replacements Result;
|
|
AffectedRangeMgr.computeAffectedLines(AnnotatedLines);
|
|
|
|
const AdditionalKeywords &Keywords = Tokens.getKeywords();
|
|
SmallVector<JsModuleReference, 16> References;
|
|
AnnotatedLine *FirstNonImportLine;
|
|
std::tie(References, FirstNonImportLine) =
|
|
parseModuleReferences(Keywords, AnnotatedLines);
|
|
|
|
if (References.empty())
|
|
return {Result, 0};
|
|
|
|
SmallVector<unsigned, 16> Indices;
|
|
for (unsigned i = 0, e = References.size(); i != e; ++i)
|
|
Indices.push_back(i);
|
|
llvm::stable_sort(Indices, [&](unsigned LHSI, unsigned RHSI) {
|
|
return References[LHSI] < References[RHSI];
|
|
});
|
|
bool ReferencesInOrder = llvm::is_sorted(Indices);
|
|
|
|
std::string ReferencesText;
|
|
bool SymbolsInOrder = true;
|
|
for (unsigned i = 0, e = Indices.size(); i != e; ++i) {
|
|
JsModuleReference Reference = References[Indices[i]];
|
|
if (appendReference(ReferencesText, Reference))
|
|
SymbolsInOrder = false;
|
|
if (i + 1 < e) {
|
|
// Insert breaks between imports and exports.
|
|
ReferencesText += "\n";
|
|
// Separate imports groups with two line breaks, but keep all exports
|
|
// in a single group.
|
|
if (!Reference.IsExport &&
|
|
(Reference.IsExport != References[Indices[i + 1]].IsExport ||
|
|
Reference.Category != References[Indices[i + 1]].Category))
|
|
ReferencesText += "\n";
|
|
}
|
|
}
|
|
|
|
if (ReferencesInOrder && SymbolsInOrder)
|
|
return {Result, 0};
|
|
|
|
SourceRange InsertionPoint = References[0].Range;
|
|
InsertionPoint.setEnd(References[References.size() - 1].Range.getEnd());
|
|
|
|
// The loop above might collapse previously existing line breaks between
|
|
// import blocks, and thus shrink the file. SortIncludes must not shrink
|
|
// overall source length as there is currently no re-calculation of ranges
|
|
// after applying source sorting.
|
|
// This loop just backfills trailing spaces after the imports, which are
|
|
// harmless and will be stripped by the subsequent formatting pass.
|
|
// FIXME: A better long term fix is to re-calculate Ranges after sorting.
|
|
unsigned PreviousSize = getSourceText(InsertionPoint).size();
|
|
while (ReferencesText.size() < PreviousSize) {
|
|
ReferencesText += " ";
|
|
}
|
|
|
|
// Separate references from the main code body of the file.
|
|
if (FirstNonImportLine && FirstNonImportLine->First->NewlinesBefore < 2)
|
|
ReferencesText += "\n";
|
|
|
|
LLVM_DEBUG(llvm::dbgs() << "Replacing imports:\n"
|
|
<< getSourceText(InsertionPoint) << "\nwith:\n"
|
|
<< ReferencesText << "\n");
|
|
auto Err = Result.add(tooling::Replacement(
|
|
Env.getSourceManager(), CharSourceRange::getCharRange(InsertionPoint),
|
|
ReferencesText));
|
|
// FIXME: better error handling. For now, just print error message and skip
|
|
// the replacement for the release version.
|
|
if (Err) {
|
|
llvm::errs() << llvm::toString(std::move(Err)) << "\n";
|
|
assert(false);
|
|
}
|
|
|
|
return {Result, 0};
|
|
}
|
|
|
|
private:
|
|
FormatToken *Current;
|
|
FormatToken *LineEnd;
|
|
|
|
FormatToken invalidToken;
|
|
|
|
StringRef FileContents;
|
|
|
|
void skipComments() { Current = skipComments(Current); }
|
|
|
|
FormatToken *skipComments(FormatToken *Tok) {
|
|
while (Tok && Tok->is(tok::comment))
|
|
Tok = Tok->Next;
|
|
return Tok;
|
|
}
|
|
|
|
void nextToken() {
|
|
Current = Current->Next;
|
|
skipComments();
|
|
if (!Current || Current == LineEnd->Next) {
|
|
// Set the current token to an invalid token, so that further parsing on
|
|
// this line fails.
|
|
invalidToken.Tok.setKind(tok::unknown);
|
|
Current = &invalidToken;
|
|
}
|
|
}
|
|
|
|
StringRef getSourceText(SourceRange Range) {
|
|
return getSourceText(Range.getBegin(), Range.getEnd());
|
|
}
|
|
|
|
StringRef getSourceText(SourceLocation Begin, SourceLocation End) {
|
|
const SourceManager &SM = Env.getSourceManager();
|
|
return FileContents.substr(SM.getFileOffset(Begin),
|
|
SM.getFileOffset(End) - SM.getFileOffset(Begin));
|
|
}
|
|
|
|
// Appends ``Reference`` to ``Buffer``, returning true if text within the
|
|
// ``Reference`` changed (e.g. symbol order).
|
|
bool appendReference(std::string &Buffer, JsModuleReference &Reference) {
|
|
// Sort the individual symbols within the import.
|
|
// E.g. `import {b, a} from 'x';` -> `import {a, b} from 'x';`
|
|
SmallVector<JsImportedSymbol, 1> Symbols = Reference.Symbols;
|
|
llvm::stable_sort(
|
|
Symbols, [&](const JsImportedSymbol &LHS, const JsImportedSymbol &RHS) {
|
|
return LHS.Symbol.compare_lower(RHS.Symbol) < 0;
|
|
});
|
|
if (Symbols == Reference.Symbols) {
|
|
// No change in symbol order.
|
|
StringRef ReferenceStmt = getSourceText(Reference.Range);
|
|
Buffer += ReferenceStmt;
|
|
return false;
|
|
}
|
|
// Stitch together the module reference start...
|
|
SourceLocation SymbolsStart = Reference.Symbols.front().Range.getBegin();
|
|
SourceLocation SymbolsEnd = Reference.Symbols.back().Range.getEnd();
|
|
Buffer += getSourceText(Reference.Range.getBegin(), SymbolsStart);
|
|
// ... then the references in order ...
|
|
for (auto I = Symbols.begin(), E = Symbols.end(); I != E; ++I) {
|
|
if (I != Symbols.begin())
|
|
Buffer += ",";
|
|
Buffer += getSourceText(I->Range);
|
|
}
|
|
// ... followed by the module reference end.
|
|
Buffer += getSourceText(SymbolsEnd, Reference.Range.getEnd());
|
|
return true;
|
|
}
|
|
|
|
// Parses module references in the given lines. Returns the module references,
|
|
// and a pointer to the first "main code" line if that is adjacent to the
|
|
// affected lines of module references, nullptr otherwise.
|
|
std::pair<SmallVector<JsModuleReference, 16>, AnnotatedLine *>
|
|
parseModuleReferences(const AdditionalKeywords &Keywords,
|
|
SmallVectorImpl<AnnotatedLine *> &AnnotatedLines) {
|
|
SmallVector<JsModuleReference, 16> References;
|
|
SourceLocation Start;
|
|
AnnotatedLine *FirstNonImportLine = nullptr;
|
|
bool AnyImportAffected = false;
|
|
for (auto Line : AnnotatedLines) {
|
|
Current = Line->First;
|
|
LineEnd = Line->Last;
|
|
skipComments();
|
|
if (Start.isInvalid() || References.empty())
|
|
// After the first file level comment, consider line comments to be part
|
|
// of the import that immediately follows them by using the previously
|
|
// set Start.
|
|
Start = Line->First->Tok.getLocation();
|
|
if (!Current) {
|
|
// Only comments on this line. Could be the first non-import line.
|
|
FirstNonImportLine = Line;
|
|
continue;
|
|
}
|
|
JsModuleReference Reference;
|
|
Reference.Range.setBegin(Start);
|
|
if (!parseModuleReference(Keywords, Reference)) {
|
|
if (!FirstNonImportLine)
|
|
FirstNonImportLine = Line; // if no comment before.
|
|
break;
|
|
}
|
|
FirstNonImportLine = nullptr;
|
|
AnyImportAffected = AnyImportAffected || Line->Affected;
|
|
Reference.Range.setEnd(LineEnd->Tok.getEndLoc());
|
|
LLVM_DEBUG({
|
|
llvm::dbgs() << "JsModuleReference: {"
|
|
<< "is_export: " << Reference.IsExport
|
|
<< ", cat: " << Reference.Category
|
|
<< ", url: " << Reference.URL
|
|
<< ", prefix: " << Reference.Prefix;
|
|
for (size_t i = 0; i < Reference.Symbols.size(); ++i)
|
|
llvm::dbgs() << ", " << Reference.Symbols[i].Symbol << " as "
|
|
<< Reference.Symbols[i].Alias;
|
|
llvm::dbgs() << ", text: " << getSourceText(Reference.Range);
|
|
llvm::dbgs() << "}\n";
|
|
});
|
|
References.push_back(Reference);
|
|
Start = SourceLocation();
|
|
}
|
|
// Sort imports if any import line was affected.
|
|
if (!AnyImportAffected)
|
|
References.clear();
|
|
return std::make_pair(References, FirstNonImportLine);
|
|
}
|
|
|
|
// Parses a JavaScript/ECMAScript 6 module reference.
|
|
// See http://www.ecma-international.org/ecma-262/6.0/#sec-scripts-and-modules
|
|
// for grammar EBNF (production ModuleItem).
|
|
bool parseModuleReference(const AdditionalKeywords &Keywords,
|
|
JsModuleReference &Reference) {
|
|
if (!Current || !Current->isOneOf(Keywords.kw_import, tok::kw_export))
|
|
return false;
|
|
Reference.IsExport = Current->is(tok::kw_export);
|
|
|
|
nextToken();
|
|
if (Current->isStringLiteral() && !Reference.IsExport) {
|
|
// "import 'side-effect';"
|
|
Reference.Category = JsModuleReference::ReferenceCategory::SIDE_EFFECT;
|
|
Reference.URL =
|
|
Current->TokenText.substr(1, Current->TokenText.size() - 2);
|
|
return true;
|
|
}
|
|
|
|
if (!parseModuleBindings(Keywords, Reference))
|
|
return false;
|
|
|
|
if (Current->is(Keywords.kw_from)) {
|
|
// imports have a 'from' clause, exports might not.
|
|
nextToken();
|
|
if (!Current->isStringLiteral())
|
|
return false;
|
|
// URL = TokenText without the quotes.
|
|
Reference.URL =
|
|
Current->TokenText.substr(1, Current->TokenText.size() - 2);
|
|
if (Reference.URL.startswith(".."))
|
|
Reference.Category =
|
|
JsModuleReference::ReferenceCategory::RELATIVE_PARENT;
|
|
else if (Reference.URL.startswith("."))
|
|
Reference.Category = JsModuleReference::ReferenceCategory::RELATIVE;
|
|
else
|
|
Reference.Category = JsModuleReference::ReferenceCategory::ABSOLUTE;
|
|
} else {
|
|
// w/o URL groups with "empty".
|
|
Reference.Category = JsModuleReference::ReferenceCategory::RELATIVE;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
bool parseModuleBindings(const AdditionalKeywords &Keywords,
|
|
JsModuleReference &Reference) {
|
|
if (parseStarBinding(Keywords, Reference))
|
|
return true;
|
|
return parseNamedBindings(Keywords, Reference);
|
|
}
|
|
|
|
bool parseStarBinding(const AdditionalKeywords &Keywords,
|
|
JsModuleReference &Reference) {
|
|
// * as prefix from '...';
|
|
if (Current->isNot(tok::star))
|
|
return false;
|
|
nextToken();
|
|
if (Current->isNot(Keywords.kw_as))
|
|
return false;
|
|
nextToken();
|
|
if (Current->isNot(tok::identifier))
|
|
return false;
|
|
Reference.Prefix = Current->TokenText;
|
|
nextToken();
|
|
return true;
|
|
}
|
|
|
|
bool parseNamedBindings(const AdditionalKeywords &Keywords,
|
|
JsModuleReference &Reference) {
|
|
if (Current->is(tok::identifier)) {
|
|
nextToken();
|
|
if (Current->is(Keywords.kw_from))
|
|
return true;
|
|
if (Current->isNot(tok::comma))
|
|
return false;
|
|
nextToken(); // eat comma.
|
|
}
|
|
if (Current->isNot(tok::l_brace))
|
|
return false;
|
|
|
|
// {sym as alias, sym2 as ...} from '...';
|
|
while (Current->isNot(tok::r_brace)) {
|
|
nextToken();
|
|
if (Current->is(tok::r_brace))
|
|
break;
|
|
if (!Current->isOneOf(tok::identifier, tok::kw_default))
|
|
return false;
|
|
|
|
JsImportedSymbol Symbol;
|
|
Symbol.Symbol = Current->TokenText;
|
|
// Make sure to include any preceding comments.
|
|
Symbol.Range.setBegin(
|
|
Current->getPreviousNonComment()->Next->WhitespaceRange.getBegin());
|
|
nextToken();
|
|
|
|
if (Current->is(Keywords.kw_as)) {
|
|
nextToken();
|
|
if (!Current->isOneOf(tok::identifier, tok::kw_default))
|
|
return false;
|
|
Symbol.Alias = Current->TokenText;
|
|
nextToken();
|
|
}
|
|
Symbol.Range.setEnd(Current->Tok.getLocation());
|
|
Reference.Symbols.push_back(Symbol);
|
|
|
|
if (!Current->isOneOf(tok::r_brace, tok::comma))
|
|
return false;
|
|
}
|
|
nextToken(); // consume r_brace
|
|
return true;
|
|
}
|
|
};
|
|
|
|
tooling::Replacements sortJavaScriptImports(const FormatStyle &Style,
|
|
StringRef Code,
|
|
ArrayRef<tooling::Range> Ranges,
|
|
StringRef FileName) {
|
|
// FIXME: Cursor support.
|
|
return JavaScriptImportSorter(Environment(Code, FileName, Ranges), Style)
|
|
.process()
|
|
.first;
|
|
}
|
|
|
|
} // end namespace format
|
|
} // end namespace clang
|