forked from OSchip/llvm-project
[include-cleaner] Include-cleaner library structure, and simplistic AST walking.
Include-cleaner is a library that uses the clang AST and preprocessor to determine which headers are used. It will be used in clang-tidy, in clangd, in a standalone tool at least for testing, and in out-of-tree tools. Roughly, it walks the AST, finds referenced decls, maps these to used sourcelocations, then to FileEntrys, then matching these against #includes. However there are many wrinkles: dealing with macros, standard library symbols, umbrella headers, IWYU directives etc. It is not built on the C++20 modules concept of usage, to allow: - use with existing non-modules codebases - a flexible API embeddable in clang-tidy, clangd, and other tools - avoiding a chicken-and-egg problem where include cleanups are needed before modules can be adopted This library is based on existing functionality in clangd that provides an unused-include warning. However it has design changes: - it accommodates diagnosing missing includes too (this means tracking where references come from, not just the set of targets) - it more clearly separates the different mappings (symbol => location => header => include) for better testing - it handles special cases like standard library symbols and IWYU directives more elegantly by adding unified Location and Header types instead of side-tables - it will support some customization of policy where necessary (e.g. for style questions of what constitutes a use, or to allow both missing-include and unused-include modes to be conservative) This patch adds the basic directory structure under clang-tools-extra and a skeleton version of the AST traversal, which will be the central piece. A more end-to-end prototype is in https://reviews.llvm.org/D122677 RFC: https://discourse.llvm.org/t/rfc-lifting-include-cleaner-missing-unused-include-detection-out-of-clangd/61228 Differential Revision: https://reviews.llvm.org/D124164
This commit is contained in:
parent
14869bd2df
commit
41ac245c10
|
@ -14,6 +14,7 @@ add_subdirectory(clang-doc)
|
|||
add_subdirectory(clang-include-fixer)
|
||||
add_subdirectory(clang-move)
|
||||
add_subdirectory(clang-query)
|
||||
add_subdirectory(include-cleaner)
|
||||
add_subdirectory(pp-trace)
|
||||
add_subdirectory(pseudo)
|
||||
add_subdirectory(tool-template)
|
||||
|
|
|
@ -0,0 +1,5 @@
|
|||
add_subdirectory(lib)
|
||||
if(CLANG_INCLUDE_TESTS)
|
||||
add_subdirectory(test)
|
||||
add_subdirectory(unittests)
|
||||
endif()
|
|
@ -0,0 +1,47 @@
|
|||
//===--- AnalysisInternal.h - Analysis building blocks ------------- C++-*-===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file provides smaller, testable pieces of the used-header analysis.
|
||||
// We find the headers by chaining together several mappings.
|
||||
//
|
||||
// AST => AST node => Symbol => Location => Header
|
||||
// /
|
||||
// Macro expansion =>
|
||||
//
|
||||
// The individual steps are declared here.
|
||||
// (AST => AST Node => Symbol is one API to avoid materializing DynTypedNodes).
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef CLANG_INCLUDE_CLEANER_ANALYSISINTERNAL_H
|
||||
#define CLANG_INCLUDE_CLEANER_ANALYSISINTERNAL_H
|
||||
|
||||
#include "clang/Basic/SourceLocation.h"
|
||||
#include "llvm/ADT/STLFunctionalExtras.h"
|
||||
|
||||
namespace clang {
|
||||
class Decl;
|
||||
class NamedDecl;
|
||||
namespace include_cleaner {
|
||||
|
||||
/// Traverses part of the AST from \p Root, finding uses of symbols.
|
||||
///
|
||||
/// Each use is reported to the callback:
|
||||
/// - the SourceLocation describes where the symbol was used. This is usually
|
||||
/// the primary location of the AST node found under Root.
|
||||
/// - the NamedDecl is the symbol referenced. It is canonical, rather than e.g.
|
||||
/// the redecl actually found by lookup.
|
||||
///
|
||||
/// walkAST is typically called once per top-level declaration in the file
|
||||
/// being analyzed, in order to find all references within it.
|
||||
void walkAST(Decl &Root, llvm::function_ref<void(SourceLocation, NamedDecl &)>);
|
||||
|
||||
} // namespace include_cleaner
|
||||
} // namespace clang
|
||||
|
||||
#endif
|
|
@ -0,0 +1,10 @@
|
|||
set(LLVM_LINK_COMPONENTS Support)
|
||||
|
||||
add_clang_library(clangIncludeCleaner
|
||||
WalkAST.cpp
|
||||
|
||||
LINK_LIBS
|
||||
clangBasic
|
||||
clangAST
|
||||
)
|
||||
|
|
@ -0,0 +1,47 @@
|
|||
//===--- WalkAST.cpp - Find declaration references in the AST -------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "AnalysisInternal.h"
|
||||
#include "clang/AST/RecursiveASTVisitor.h"
|
||||
|
||||
namespace clang {
|
||||
namespace include_cleaner {
|
||||
namespace {
|
||||
using DeclCallback = llvm::function_ref<void(SourceLocation, NamedDecl &)>;
|
||||
|
||||
class ASTWalker : public RecursiveASTVisitor<ASTWalker> {
|
||||
DeclCallback Callback;
|
||||
|
||||
void report(SourceLocation Loc, NamedDecl *ND) {
|
||||
if (!ND || Loc.isInvalid())
|
||||
return;
|
||||
Callback(Loc, *cast<NamedDecl>(ND->getCanonicalDecl()));
|
||||
}
|
||||
|
||||
public:
|
||||
ASTWalker(DeclCallback Callback) : Callback(Callback) {}
|
||||
|
||||
bool VisitTagTypeLoc(TagTypeLoc TTL) {
|
||||
report(TTL.getNameLoc(), TTL.getDecl());
|
||||
return true;
|
||||
}
|
||||
|
||||
bool VisitDeclRefExpr(DeclRefExpr *DRE) {
|
||||
report(DRE->getLocation(), DRE->getFoundDecl());
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
||||
void walkAST(Decl &Root, DeclCallback Callback) {
|
||||
ASTWalker(Callback).TraverseDecl(&Root);
|
||||
}
|
||||
|
||||
} // namespace include_cleaner
|
||||
} // namespace clang
|
|
@ -0,0 +1,25 @@
|
|||
set(CLANG_INCLUDE_CLEANER_TEST_DEPS
|
||||
ClangIncludeCleanerTests
|
||||
)
|
||||
|
||||
foreach (dep FileCheck not count)
|
||||
if(TARGET ${dep})
|
||||
list(APPEND CLANG_INCLUDE_CLEANER_TEST_DEPS ${dep})
|
||||
endif()
|
||||
endforeach()
|
||||
|
||||
configure_lit_site_cfg(
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/lit.site.cfg.py.in
|
||||
${CMAKE_CURRENT_BINARY_DIR}/lit.site.cfg.py
|
||||
MAIN_CONFIG
|
||||
${CMAKE_CURRENT_BINARY_DIR}/lit.cfg.py)
|
||||
|
||||
configure_lit_site_cfg(
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/Unit/lit.site.cfg.py.in
|
||||
${CMAKE_CURRENT_BINARY_DIR}/Unit/lit.site.cfg.py
|
||||
MAIN_CONFIG
|
||||
${CMAKE_CURRENT_BINARY_DIR}/Unit/lit.cfg.py)
|
||||
|
||||
add_lit_testsuite(check-clang-include-cleaner "Running the clang-include-cleaner regression tests"
|
||||
${CMAKE_CURRENT_BINARY_DIR}
|
||||
DEPENDS ${CLANG_INCLUDE_CLEANER_TEST_DEPS})
|
|
@ -0,0 +1,18 @@
|
|||
import lit.formats
|
||||
config.name = "clangIncludeCleaner Unit Tests"
|
||||
config.test_format = lit.formats.GoogleTest('.', 'Tests')
|
||||
config.test_source_root = config.clang_include_cleaner_binary_dir + "/unittests"
|
||||
config.test_exec_root = config.clang_include_cleaner_binary_dir + "/unittests"
|
||||
|
||||
# Point the dynamic loader at dynamic libraries in 'lib'.
|
||||
# FIXME: it seems every project has a copy of this logic. Move it somewhere.
|
||||
import platform
|
||||
if platform.system() == 'Darwin':
|
||||
shlibpath_var = 'DYLD_LIBRARY_PATH'
|
||||
elif platform.system() == 'Windows':
|
||||
shlibpath_var = 'PATH'
|
||||
else:
|
||||
shlibpath_var = 'LD_LIBRARY_PATH'
|
||||
config.environment[shlibpath_var] = os.path.pathsep.join((
|
||||
"@SHLIBDIR@", "@LLVM_LIBS_DIR@",
|
||||
config.environment.get(shlibpath_var,'')))
|
|
@ -0,0 +1,10 @@
|
|||
@LIT_SITE_CFG_IN_HEADER@
|
||||
# This is a shim to run the gtest unittests in ../unittests using lit.
|
||||
|
||||
config.llvm_libs_dir = path("@LLVM_LIBS_DIR@")
|
||||
config.shlibdir = path("@SHLIBDIR@")
|
||||
|
||||
config.clang_include_cleaner_binary_dir = path("@CMAKE_CURRENT_BINARY_DIR@/..")
|
||||
|
||||
# Delegate logic to lit.cfg.py.
|
||||
lit_config.load_config(config, "@CMAKE_CURRENT_SOURCE_DIR@/Unit/lit.cfg.py")
|
|
@ -0,0 +1,16 @@
|
|||
import lit.llvm
|
||||
|
||||
lit.llvm.initialize(lit_config, config)
|
||||
lit.llvm.llvm_config.use_default_substitutions()
|
||||
|
||||
config.name = 'ClangIncludeCleaner'
|
||||
config.suffixes = ['.test', '.c', '.cpp']
|
||||
config.excludes = ['Inputs']
|
||||
config.test_format = lit.formats.ShTest(not lit.llvm.llvm_config.use_lit_shell)
|
||||
config.test_source_root = config.clang_include_cleaner_source_dir + "/test"
|
||||
config.test_exec_root = config.clang_include_cleaner_binary_dir + "/test"
|
||||
|
||||
config.environment['PATH'] = os.path.pathsep.join((
|
||||
config.clang_tools_dir,
|
||||
config.llvm_tools_dir,
|
||||
config.environment['PATH']))
|
|
@ -0,0 +1,14 @@
|
|||
@LIT_SITE_CFG_IN_HEADER@
|
||||
|
||||
# Variables needed for common llvm config.
|
||||
config.clang_tools_dir = path("@CURRENT_TOOLS_DIR@")
|
||||
config.lit_tools_dir = path("@LLVM_LIT_TOOLS_DIR@")
|
||||
config.llvm_tools_dir = path(lit_config.substitute("@LLVM_TOOLS_DIR@"))
|
||||
config.llvm_libs_dir = path(lit_config.substitute("@LLVM_LIBS_DIR@"))
|
||||
config.target_triple = "@TARGET_TRIPLE@"
|
||||
config.python_executable = "@Python3_EXECUTABLE@"
|
||||
|
||||
config.clang_include_cleaner_source_dir = path("@CMAKE_CURRENT_SOURCE_DIR@/..")
|
||||
config.clang_include_cleaner_binary_dir = path("@CMAKE_CURRENT_BINARY_DIR@/..")
|
||||
# Delegate logic to lit.cfg.py.
|
||||
lit_config.load_config(config, "@CMAKE_CURRENT_SOURCE_DIR@/lit.cfg.py")
|
|
@ -0,0 +1,25 @@
|
|||
set(LLVM_LINK_COMPONENTS
|
||||
Support
|
||||
TestingSupport
|
||||
)
|
||||
|
||||
add_custom_target(ClangIncludeCleanerUnitTests)
|
||||
add_unittest(ClangIncludeCleanerUnitTests ClangIncludeCleanerTests
|
||||
WalkASTTest.cpp
|
||||
)
|
||||
|
||||
target_include_directories(ClangIncludeCleanerTests
|
||||
PRIVATE
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/../lib)
|
||||
|
||||
clang_target_link_libraries(ClangIncludeCleanerTests
|
||||
PRIVATE
|
||||
clangBasic
|
||||
)
|
||||
|
||||
target_link_libraries(ClangIncludeCleanerTests
|
||||
PRIVATE
|
||||
clangIncludeCleaner
|
||||
clangTesting
|
||||
)
|
||||
|
|
@ -0,0 +1,110 @@
|
|||
#include "AnalysisInternal.h"
|
||||
#include "clang/AST/ASTContext.h"
|
||||
#include "clang/Basic/FileManager.h"
|
||||
#include "clang/Frontend/TextDiagnostic.h"
|
||||
#include "clang/Testing/TestAST.h"
|
||||
#include "llvm/Support/Error.h"
|
||||
#include "llvm/Testing/Support/Annotations.h"
|
||||
#include "gtest/gtest.h"
|
||||
|
||||
namespace clang {
|
||||
namespace include_cleaner {
|
||||
namespace {
|
||||
|
||||
// Specifies a test of which symbols are referenced by a piece of code.
|
||||
//
|
||||
// Example:
|
||||
// Target: int ^foo();
|
||||
// Referencing: int x = ^foo();
|
||||
// There must be exactly one referencing location marked.
|
||||
void testWalk(llvm::StringRef TargetCode, llvm::StringRef ReferencingCode) {
|
||||
llvm::Annotations Target(TargetCode);
|
||||
llvm::Annotations Referencing(ReferencingCode);
|
||||
|
||||
TestInputs Inputs(Referencing.code());
|
||||
Inputs.ExtraFiles["target.h"] = Target.code().str();
|
||||
Inputs.ExtraArgs.push_back("-include");
|
||||
Inputs.ExtraArgs.push_back("target.h");
|
||||
TestAST AST(Inputs);
|
||||
const auto &SM = AST.sourceManager();
|
||||
|
||||
// We're only going to record references from the nominated point,
|
||||
// to the target file.
|
||||
FileID ReferencingFile = SM.getMainFileID();
|
||||
SourceLocation ReferencingLoc =
|
||||
SM.getComposedLoc(ReferencingFile, Referencing.point());
|
||||
FileID TargetFile = SM.translateFile(
|
||||
llvm::cantFail(AST.fileManager().getFileRef("target.h")));
|
||||
|
||||
// Perform the walk, and capture the offsets of the referenced targets.
|
||||
std::vector<size_t> ReferencedOffsets;
|
||||
for (Decl *D : AST.context().getTranslationUnitDecl()->decls()) {
|
||||
if (ReferencingFile != SM.getDecomposedExpansionLoc(D->getLocation()).first)
|
||||
continue;
|
||||
walkAST(*D, [&](SourceLocation Loc, NamedDecl &ND) {
|
||||
if (SM.getFileLoc(Loc) != ReferencingLoc)
|
||||
return;
|
||||
auto NDLoc = SM.getDecomposedLoc(SM.getFileLoc(ND.getLocation()));
|
||||
if (NDLoc.first != TargetFile)
|
||||
return;
|
||||
ReferencedOffsets.push_back(NDLoc.second);
|
||||
});
|
||||
}
|
||||
llvm::sort(ReferencedOffsets);
|
||||
|
||||
// Compare results to the expected points.
|
||||
// For each difference, show the target point in context, like a diagnostic.
|
||||
std::string DiagBuf;
|
||||
llvm::raw_string_ostream DiagOS(DiagBuf);
|
||||
auto *DiagOpts = new DiagnosticOptions();
|
||||
DiagOpts->ShowLevel = 0;
|
||||
DiagOpts->ShowNoteIncludeStack = 0;
|
||||
TextDiagnostic Diag(DiagOS, AST.context().getLangOpts(), DiagOpts);
|
||||
auto DiagnosePoint = [&](const char *Message, unsigned Offset) {
|
||||
Diag.emitDiagnostic(
|
||||
FullSourceLoc(SM.getComposedLoc(TargetFile, Offset), SM),
|
||||
DiagnosticsEngine::Note, Message, {}, {});
|
||||
};
|
||||
for (auto Expected : Target.points())
|
||||
if (!llvm::is_contained(ReferencedOffsets, Expected))
|
||||
DiagnosePoint("location not marked used", Expected);
|
||||
for (auto Actual : ReferencedOffsets)
|
||||
if (!llvm::is_contained(Target.points(), Actual))
|
||||
DiagnosePoint("location unexpectedly used", Actual);
|
||||
|
||||
// If there were any differences, we print the entire referencing code once.
|
||||
if (!DiagBuf.empty())
|
||||
ADD_FAILURE() << DiagBuf << "\nfrom code:\n" << ReferencingCode;
|
||||
}
|
||||
|
||||
TEST(WalkAST, DeclRef) {
|
||||
testWalk("int ^x;", "int y = ^x;");
|
||||
testWalk("int ^foo();", "int y = ^foo();");
|
||||
testWalk("namespace ns { int ^x; }", "int y = ns::^x;");
|
||||
testWalk("struct S { static int ^x; };", "int y = S::^x;");
|
||||
// Canonical declaration only.
|
||||
testWalk("extern int ^x; int x;", "int y = ^x;");
|
||||
}
|
||||
|
||||
TEST(WalkAST, TagType) {
|
||||
testWalk("struct ^S {};", "^S *y;");
|
||||
testWalk("enum ^E {};", "^E *y;");
|
||||
testWalk("struct ^S { static int x; };", "int y = ^S::x;");
|
||||
}
|
||||
|
||||
TEST(WalkAST, Alias) {
|
||||
testWalk(R"cpp(
|
||||
namespace ns { int x; }
|
||||
using ns::^x;
|
||||
)cpp",
|
||||
"int y = ^x;");
|
||||
testWalk(R"cpp(
|
||||
namespace ns { struct S; } // Not used
|
||||
using ns::S; // FIXME: S should be used
|
||||
)cpp",
|
||||
"^S *x;");
|
||||
}
|
||||
|
||||
} // namespace
|
||||
} // namespace include_cleaner
|
||||
} // namespace clang
|
|
@ -45,6 +45,10 @@ struct TestInputs {
|
|||
/// Extra argv to pass to clang -cc1.
|
||||
std::vector<std::string> ExtraArgs = {};
|
||||
|
||||
/// Extra virtual files that are available to be #included.
|
||||
/// Keys are plain filenames ("foo.h"), values are file content.
|
||||
llvm::StringMap<std::string> ExtraFiles = {};
|
||||
|
||||
/// By default, error diagnostics during parsing are reported as gtest errors.
|
||||
/// To suppress this, set ErrorOK or include "error-ok" in a comment in Code.
|
||||
/// In either case, all diagnostics appear in TestAST::diagnostics().
|
||||
|
|
|
@ -105,6 +105,10 @@ TestAST::TestAST(const TestInputs &In) {
|
|||
auto VFS = llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
|
||||
VFS->addFile(Filename, /*ModificationTime=*/0,
|
||||
llvm::MemoryBuffer::getMemBufferCopy(In.Code, Filename));
|
||||
for (const auto &Extra : In.ExtraFiles)
|
||||
VFS->addFile(
|
||||
Extra.getKey(), /*ModificationTime=*/0,
|
||||
llvm::MemoryBuffer::getMemBufferCopy(Extra.getValue(), Extra.getKey()));
|
||||
Clang->createFileManager(VFS);
|
||||
|
||||
// Running the FrontendAction creates the other components: SourceManager,
|
||||
|
|
Loading…
Reference in New Issue