forked from OSchip/llvm-project
Revert "[clang-tidy] Confusable identifiers detection"
This reverts commitb94db7ed7e
. See comments on https://reviews.llvm.org/D112916: - breaks `check-clangd`, and makes clang-tidy crash on simple inputs - likely does the wrong thing in cross builds Also revert follow-up "[gn build] (manually) portb94db7ed7e
(Confusables.inc)" This reverts commit180bae08a0
.
This commit is contained in:
parent
88052fd241
commit
371e6f8b7f
|
@ -3,18 +3,8 @@ set(LLVM_LINK_COMPONENTS
|
|||
Support
|
||||
)
|
||||
|
||||
add_subdirectory(ConfusableTable)
|
||||
|
||||
add_custom_command(
|
||||
OUTPUT Confusables.inc
|
||||
COMMAND make_confusable_table ${CMAKE_CURRENT_SOURCE_DIR}/ConfusableTable/confusables.txt ${CMAKE_CURRENT_BINARY_DIR}/Confusables.inc
|
||||
DEPENDS make_confusable_table ConfusableTable/confusables.txt)
|
||||
|
||||
add_custom_target(genconfusable DEPENDS Confusables.inc)
|
||||
|
||||
add_clang_library(clangTidyMiscModule
|
||||
DefinitionsInHeadersCheck.cpp
|
||||
Homoglyph.cpp
|
||||
MiscTidyModule.cpp
|
||||
MisleadingBidirectional.cpp
|
||||
MisleadingIdentifier.cpp
|
||||
|
@ -38,7 +28,6 @@ add_clang_library(clangTidyMiscModule
|
|||
|
||||
DEPENDS
|
||||
omp_gen
|
||||
genconfusable
|
||||
)
|
||||
|
||||
clang_target_link_libraries(clangTidyMiscModule
|
||||
|
|
|
@ -1,3 +0,0 @@
|
|||
add_llvm_executable(make_confusable_table
|
||||
build_confusable_table.cpp
|
||||
)
|
|
@ -1,72 +0,0 @@
|
|||
//===--- build_confusable_table.cpp - clang-tidy---------------------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
#include "llvm/ADT/StringExtras.h"
|
||||
#include "llvm/Support/ConvertUTF.h"
|
||||
#include "llvm/Support/MemoryBuffer.h"
|
||||
#include "llvm/Support/raw_ostream.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
auto ErrorOrBuffer = MemoryBuffer::getFile(argv[1], true);
|
||||
if (!ErrorOrBuffer)
|
||||
return 1;
|
||||
std::unique_ptr<MemoryBuffer> Buffer = std::move(ErrorOrBuffer.get());
|
||||
StringRef Content = Buffer->getBuffer();
|
||||
Content = Content.drop_until([](char c) { return c == '#'; });
|
||||
SmallVector<StringRef> Lines;
|
||||
SplitString(Content, Lines, "\r\n");
|
||||
|
||||
std::vector<std::pair<llvm::UTF32, SmallVector<llvm::UTF32>>> Entries;
|
||||
for (StringRef Line : Lines) {
|
||||
if (Line.startswith("#"))
|
||||
continue;
|
||||
|
||||
SmallVector<StringRef> Values;
|
||||
Line.split(Values, ';');
|
||||
if (Values.size() < 2) {
|
||||
errs() << "Failed to parse: " << Line << "\n";
|
||||
return 2;
|
||||
}
|
||||
|
||||
llvm::StringRef From = Values[0].trim();
|
||||
llvm::UTF32 CodePoint;
|
||||
From.getAsInteger(16, CodePoint);
|
||||
|
||||
SmallVector<llvm::UTF32> To;
|
||||
SmallVector<StringRef> ToN;
|
||||
Values[1].split(ToN, ' ', -1, false);
|
||||
for (StringRef To_ : ToN) {
|
||||
llvm::UTF32 ToCodePoint;
|
||||
To_.trim().getAsInteger(16, ToCodePoint);
|
||||
To.push_back(ToCodePoint);
|
||||
}
|
||||
while (To.size() < 32)
|
||||
To.push_back(0);
|
||||
|
||||
Entries.emplace_back(CodePoint, To);
|
||||
}
|
||||
std::sort(Entries.begin(), Entries.end());
|
||||
errs() << "Parsed " << Entries.size() << " Entries\n";
|
||||
|
||||
std::error_code ec;
|
||||
llvm::raw_fd_ostream os(argv[2], ec);
|
||||
os << "struct {llvm::UTF32 codepoint; llvm::UTF32 values[32];} "
|
||||
"ConfusableEntries[] = {\n";
|
||||
for (auto const &Values : Entries) {
|
||||
os << " { ";
|
||||
os << Values.first;
|
||||
os << ", {";
|
||||
for (auto CP : Values.second) {
|
||||
os << CP << ", ";
|
||||
}
|
||||
os << "}},\n";
|
||||
}
|
||||
os << "};\n";
|
||||
return 0;
|
||||
}
|
File diff suppressed because it is too large
Load Diff
|
@ -1,108 +0,0 @@
|
|||
//===--- MisleadingBidirectional.cpp - clang-tidy--------------------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "Homoglyph.h"
|
||||
|
||||
#include "clang/Frontend/CompilerInstance.h"
|
||||
#include "clang/Lex/Preprocessor.h"
|
||||
#include "llvm/Support/ConvertUTF.h"
|
||||
|
||||
namespace {
|
||||
// Preprocessed version of
|
||||
// https://www.unicode.org/Public/security/latest/confusables.txt
|
||||
//
|
||||
// This contains a sorted array of { UTF32 codepoint; UTF32 values[N];}
|
||||
#include "Confusables.inc"
|
||||
} // namespace
|
||||
|
||||
namespace clang {
|
||||
namespace tidy {
|
||||
namespace misc {
|
||||
|
||||
Homoglyph::Homoglyph(StringRef Name, ClangTidyContext *Context)
|
||||
: ClangTidyCheck(Name, Context) {}
|
||||
|
||||
Homoglyph::~Homoglyph() = default;
|
||||
|
||||
/**
|
||||
* Build a skeleton out of the Original identifier, following the algorithm
|
||||
* described in http://www.unicode.org/reports/tr39/#def-skeleton
|
||||
*/
|
||||
std::string Homoglyph::skeleton(StringRef Name) {
|
||||
std::string SName = Name.str();
|
||||
std::string Skeleton;
|
||||
Skeleton.reserve(1 + Name.size());
|
||||
|
||||
char const *Curr = SName.c_str();
|
||||
char const *End = Curr + SName.size();
|
||||
while (Curr < End) {
|
||||
|
||||
char const *Prev = Curr;
|
||||
llvm::UTF32 CodePoint;
|
||||
llvm::ConversionResult Result = llvm::convertUTF8Sequence(
|
||||
(const llvm::UTF8 **)&Curr, (const llvm::UTF8 *)End, &CodePoint,
|
||||
llvm::strictConversion);
|
||||
if (Result != llvm::conversionOK) {
|
||||
llvm::errs() << "Unicode conversion issue\n";
|
||||
break;
|
||||
}
|
||||
|
||||
StringRef Key(Prev, Curr - Prev);
|
||||
auto Where = std::lower_bound(
|
||||
std::begin(ConfusableEntries), std::end(ConfusableEntries), CodePoint,
|
||||
[](decltype(ConfusableEntries[0]) x, llvm::UTF32 y) {
|
||||
return x.codepoint < y;
|
||||
});
|
||||
if (Where == std::end(ConfusableEntries) || CodePoint != Where->codepoint) {
|
||||
Skeleton.append(Prev, Curr);
|
||||
} else {
|
||||
llvm::UTF8 Buffer[32];
|
||||
llvm::UTF8 *BufferStart = std::begin(Buffer);
|
||||
llvm::UTF8 *IBuffer = BufferStart;
|
||||
const llvm::UTF32 *ValuesStart = std::begin(Where->values);
|
||||
const llvm::UTF32 *ValuesEnd =
|
||||
std::find(std::begin(Where->values), std::end(Where->values), '\0');
|
||||
if (llvm::ConvertUTF32toUTF8(&ValuesStart, ValuesEnd, &IBuffer,
|
||||
std::end(Buffer), llvm::strictConversion) !=
|
||||
llvm::conversionOK) {
|
||||
llvm::errs() << "Unicode conversion issue\n";
|
||||
break;
|
||||
}
|
||||
Skeleton.append((char *)BufferStart, (char *)IBuffer);
|
||||
}
|
||||
}
|
||||
return Skeleton;
|
||||
}
|
||||
|
||||
void Homoglyph::check(const ast_matchers::MatchFinder::MatchResult &Result) {
|
||||
if (const auto *ND = Result.Nodes.getNodeAs<NamedDecl>("nameddecl")) {
|
||||
StringRef NDName = ND->getName();
|
||||
auto &Mapped = Mapper[skeleton(NDName)];
|
||||
auto *NDDecl = ND->getDeclContext();
|
||||
for (auto *OND : Mapped) {
|
||||
if (!NDDecl->isDeclInLexicalTraversal(OND) &&
|
||||
!OND->getDeclContext()->isDeclInLexicalTraversal(ND))
|
||||
continue;
|
||||
if (OND->getName() != NDName) {
|
||||
diag(OND->getLocation(), "%0 is confusable with %1")
|
||||
<< OND->getName() << NDName;
|
||||
diag(ND->getLocation(), "other definition found here",
|
||||
DiagnosticIDs::Note);
|
||||
}
|
||||
}
|
||||
Mapped.push_back(ND);
|
||||
}
|
||||
}
|
||||
|
||||
void Homoglyph::registerMatchers(ast_matchers::MatchFinder *Finder) {
|
||||
Finder->addMatcher(ast_matchers::namedDecl().bind("nameddecl"), this);
|
||||
}
|
||||
|
||||
} // namespace misc
|
||||
} // namespace tidy
|
||||
} // namespace clang
|
|
@ -1,35 +0,0 @@
|
|||
//===--- Homoglyph.h - clang-tidy -------------------------------*- C++ -*-===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MISC_HOMOGLYPH_H
|
||||
#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MISC_HOMOGLYPH_H
|
||||
|
||||
#include "../ClangTidyCheck.h"
|
||||
|
||||
namespace clang {
|
||||
namespace tidy {
|
||||
namespace misc {
|
||||
|
||||
class Homoglyph : public ClangTidyCheck {
|
||||
public:
|
||||
Homoglyph(StringRef Name, ClangTidyContext *Context);
|
||||
~Homoglyph();
|
||||
|
||||
void registerMatchers(ast_matchers::MatchFinder *Finder) override;
|
||||
void check(const ast_matchers::MatchFinder::MatchResult &Result) override;
|
||||
|
||||
private:
|
||||
std::string skeleton(StringRef);
|
||||
llvm::StringMap<llvm::SmallVector<NamedDecl const *>> Mapper;
|
||||
};
|
||||
|
||||
} // namespace misc
|
||||
} // namespace tidy
|
||||
} // namespace clang
|
||||
|
||||
#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MISC_HOMOGLYPH_H
|
|
@ -10,7 +10,6 @@
|
|||
#include "../ClangTidyModule.h"
|
||||
#include "../ClangTidyModuleRegistry.h"
|
||||
#include "DefinitionsInHeadersCheck.h"
|
||||
#include "Homoglyph.h"
|
||||
#include "MisleadingBidirectional.h"
|
||||
#include "MisleadingIdentifier.h"
|
||||
#include "MisplacedConstCheck.h"
|
||||
|
@ -38,7 +37,6 @@ public:
|
|||
"misc-definitions-in-headers");
|
||||
CheckFactories.registerCheck<MisleadingBidirectionalCheck>(
|
||||
"misc-misleading-bidirectional");
|
||||
CheckFactories.registerCheck<Homoglyph>("misc-homoglyph");
|
||||
CheckFactories.registerCheck<MisleadingIdentifierCheck>(
|
||||
"misc-misleading-identifier");
|
||||
CheckFactories.registerCheck<MisplacedConstCheck>("misc-misplaced-const");
|
||||
|
|
|
@ -136,10 +136,6 @@ New checks
|
|||
Future libc++ will remove the extension (`D120996
|
||||
<https://reviews.llvm.org/D120996>`).
|
||||
|
||||
- New :doc:`misc-homoglyph <clang-tidy/checks/misc-homoglyph>` check.
|
||||
|
||||
Detects confusable unicode identifiers.
|
||||
|
||||
New check aliases
|
||||
^^^^^^^^^^^^^^^^^
|
||||
|
||||
|
|
|
@ -214,7 +214,6 @@ Clang-Tidy Checks
|
|||
`llvmlibc-implementation-in-namespace <llvmlibc-implementation-in-namespace.html>`_,
|
||||
`llvmlibc-restrict-system-libc-headers <llvmlibc-restrict-system-libc-headers.html>`_, "Yes"
|
||||
`misc-definitions-in-headers <misc-definitions-in-headers.html>`_, "Yes"
|
||||
`misc-homoglyph <misc-homoglyph.html>`_,
|
||||
`misc-misleading-bidirectional <misc-misleading-bidirectional.html>`_,
|
||||
`misc-misleading-identifier <misc-misleading-identifier.html>`_,
|
||||
`misc-misplaced-const <misc-misplaced-const.html>`_,
|
||||
|
|
|
@ -1,15 +0,0 @@
|
|||
.. title:: clang-tidy - misc-homoglyph
|
||||
|
||||
misc-homoglyph
|
||||
==============
|
||||
|
||||
Warn about confusable identifiers, i.e. identifiers that are visually close to
|
||||
each other, but use different unicode characters. This detetcs potential attack
|
||||
as described in `Trojan Source <https://www.trojansource.codes>`_.
|
||||
|
||||
Example:
|
||||
|
||||
.. code-block:: c++
|
||||
|
||||
int fo;
|
||||
int 𝐟o;
|
|
@ -1,19 +0,0 @@
|
|||
// RUN: %check_clang_tidy %s misc-homoglyph %t
|
||||
|
||||
int fo;
|
||||
// CHECK-MESSAGES: :[[#@LINE-1]]:5: warning: fo is confusable with 𝐟o [misc-homoglyph]
|
||||
int 𝐟o;
|
||||
// CHECK-MESSAGES: :[[#@LINE-1]]:5: note: other definition found here
|
||||
|
||||
void no() {
|
||||
int 𝐟oo;
|
||||
}
|
||||
|
||||
void worry() {
|
||||
int foo;
|
||||
}
|
||||
|
||||
int 𝐟i;
|
||||
// CHECK-MESSAGES: :[[#@LINE-1]]:5: warning: 𝐟i is confusable with fi [misc-homoglyph]
|
||||
int fi;
|
||||
// CHECK-MESSAGES: :[[#@LINE-1]]:5: note: other definition found here
|
|
@ -1,26 +1,7 @@
|
|||
action("Confusables.inc") {
|
||||
gen_target = "ConfusableTable:make_confusable_table($host_toolchain)"
|
||||
gen_executable = get_label_info(gen_target, "root_out_dir") +
|
||||
"/bin/" + get_label_info(gen_target, "name")
|
||||
deps = [ gen_target ]
|
||||
|
||||
# FIXME: Rename this script, now that it's used for other things.
|
||||
script = "//llvm/utils/gn/build/run_tablegen.py"
|
||||
sources = [ "ConfusableTable/confusables.txt" ]
|
||||
outputs = [ "$target_gen_dir/$target_name" ]
|
||||
args = [
|
||||
rebase_path(gen_executable, root_build_dir),
|
||||
rebase_path(sources[0], root_build_dir),
|
||||
rebase_path(outputs[0], root_build_dir),
|
||||
]
|
||||
}
|
||||
|
||||
static_library("misc") {
|
||||
output_name = "clangTidyMiscModule"
|
||||
configs += [ "//llvm/utils/gn/build:clang_code" ]
|
||||
include_dirs = [ target_gen_dir ]
|
||||
deps = [
|
||||
":Confusables.inc",
|
||||
"//clang-tools-extra/clang-tidy",
|
||||
"//clang-tools-extra/clang-tidy/utils",
|
||||
"//clang/lib/AST",
|
||||
|
@ -34,7 +15,6 @@ static_library("misc") {
|
|||
]
|
||||
sources = [
|
||||
"DefinitionsInHeadersCheck.cpp",
|
||||
"Homoglyph.cpp",
|
||||
"MiscTidyModule.cpp",
|
||||
"MisleadingBidirectional.cpp",
|
||||
"MisleadingIdentifier.cpp",
|
||||
|
|
|
@ -1,4 +0,0 @@
|
|||
executable("make_confusable_table") {
|
||||
deps = [ "//llvm/lib/Support" ]
|
||||
sources = [ "build_confusable_table.cpp" ]
|
||||
}
|
Loading…
Reference in New Issue