Revert "[clang-tidy] Confusable identifiers detection"

This reverts commit b94db7ed7e.
See comments on https://reviews.llvm.org/D112916:
- breaks `check-clangd`, and makes clang-tidy crash on simple inputs
- likely does the wrong thing in cross builds

Also revert follow-up "[gn build] (manually) port b94db7ed7e (Confusables.inc)"
This reverts commit 180bae08a0.
This commit is contained in:
Nico Weber 2022-06-03 09:29:10 -04:00
parent 88052fd241
commit 371e6f8b7f
13 changed files with 0 additions and 9932 deletions

View File

@ -3,18 +3,8 @@ set(LLVM_LINK_COMPONENTS
Support
)
add_subdirectory(ConfusableTable)
add_custom_command(
OUTPUT Confusables.inc
COMMAND make_confusable_table ${CMAKE_CURRENT_SOURCE_DIR}/ConfusableTable/confusables.txt ${CMAKE_CURRENT_BINARY_DIR}/Confusables.inc
DEPENDS make_confusable_table ConfusableTable/confusables.txt)
add_custom_target(genconfusable DEPENDS Confusables.inc)
add_clang_library(clangTidyMiscModule
DefinitionsInHeadersCheck.cpp
Homoglyph.cpp
MiscTidyModule.cpp
MisleadingBidirectional.cpp
MisleadingIdentifier.cpp
@ -38,7 +28,6 @@ add_clang_library(clangTidyMiscModule
DEPENDS
omp_gen
genconfusable
)
clang_target_link_libraries(clangTidyMiscModule

View File

@ -1,3 +0,0 @@
add_llvm_executable(make_confusable_table
build_confusable_table.cpp
)

View File

@ -1,72 +0,0 @@
//===--- build_confusable_table.cpp - clang-tidy---------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "llvm/ADT/StringExtras.h"
#include "llvm/Support/ConvertUTF.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
int main(int argc, char *argv[]) {
auto ErrorOrBuffer = MemoryBuffer::getFile(argv[1], true);
if (!ErrorOrBuffer)
return 1;
std::unique_ptr<MemoryBuffer> Buffer = std::move(ErrorOrBuffer.get());
StringRef Content = Buffer->getBuffer();
Content = Content.drop_until([](char c) { return c == '#'; });
SmallVector<StringRef> Lines;
SplitString(Content, Lines, "\r\n");
std::vector<std::pair<llvm::UTF32, SmallVector<llvm::UTF32>>> Entries;
for (StringRef Line : Lines) {
if (Line.startswith("#"))
continue;
SmallVector<StringRef> Values;
Line.split(Values, ';');
if (Values.size() < 2) {
errs() << "Failed to parse: " << Line << "\n";
return 2;
}
llvm::StringRef From = Values[0].trim();
llvm::UTF32 CodePoint;
From.getAsInteger(16, CodePoint);
SmallVector<llvm::UTF32> To;
SmallVector<StringRef> ToN;
Values[1].split(ToN, ' ', -1, false);
for (StringRef To_ : ToN) {
llvm::UTF32 ToCodePoint;
To_.trim().getAsInteger(16, ToCodePoint);
To.push_back(ToCodePoint);
}
while (To.size() < 32)
To.push_back(0);
Entries.emplace_back(CodePoint, To);
}
std::sort(Entries.begin(), Entries.end());
errs() << "Parsed " << Entries.size() << " Entries\n";
std::error_code ec;
llvm::raw_fd_ostream os(argv[2], ec);
os << "struct {llvm::UTF32 codepoint; llvm::UTF32 values[32];} "
"ConfusableEntries[] = {\n";
for (auto const &Values : Entries) {
os << " { ";
os << Values.first;
os << ", {";
for (auto CP : Values.second) {
os << CP << ", ";
}
os << "}},\n";
}
os << "};\n";
return 0;
}

View File

@ -1,108 +0,0 @@
//===--- MisleadingBidirectional.cpp - clang-tidy--------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "Homoglyph.h"
#include "clang/Frontend/CompilerInstance.h"
#include "clang/Lex/Preprocessor.h"
#include "llvm/Support/ConvertUTF.h"
namespace {
// Preprocessed version of
// https://www.unicode.org/Public/security/latest/confusables.txt
//
// This contains a sorted array of { UTF32 codepoint; UTF32 values[N];}
#include "Confusables.inc"
} // namespace
namespace clang {
namespace tidy {
namespace misc {
Homoglyph::Homoglyph(StringRef Name, ClangTidyContext *Context)
: ClangTidyCheck(Name, Context) {}
Homoglyph::~Homoglyph() = default;
/**
* Build a skeleton out of the Original identifier, following the algorithm
* described in http://www.unicode.org/reports/tr39/#def-skeleton
*/
std::string Homoglyph::skeleton(StringRef Name) {
std::string SName = Name.str();
std::string Skeleton;
Skeleton.reserve(1 + Name.size());
char const *Curr = SName.c_str();
char const *End = Curr + SName.size();
while (Curr < End) {
char const *Prev = Curr;
llvm::UTF32 CodePoint;
llvm::ConversionResult Result = llvm::convertUTF8Sequence(
(const llvm::UTF8 **)&Curr, (const llvm::UTF8 *)End, &CodePoint,
llvm::strictConversion);
if (Result != llvm::conversionOK) {
llvm::errs() << "Unicode conversion issue\n";
break;
}
StringRef Key(Prev, Curr - Prev);
auto Where = std::lower_bound(
std::begin(ConfusableEntries), std::end(ConfusableEntries), CodePoint,
[](decltype(ConfusableEntries[0]) x, llvm::UTF32 y) {
return x.codepoint < y;
});
if (Where == std::end(ConfusableEntries) || CodePoint != Where->codepoint) {
Skeleton.append(Prev, Curr);
} else {
llvm::UTF8 Buffer[32];
llvm::UTF8 *BufferStart = std::begin(Buffer);
llvm::UTF8 *IBuffer = BufferStart;
const llvm::UTF32 *ValuesStart = std::begin(Where->values);
const llvm::UTF32 *ValuesEnd =
std::find(std::begin(Where->values), std::end(Where->values), '\0');
if (llvm::ConvertUTF32toUTF8(&ValuesStart, ValuesEnd, &IBuffer,
std::end(Buffer), llvm::strictConversion) !=
llvm::conversionOK) {
llvm::errs() << "Unicode conversion issue\n";
break;
}
Skeleton.append((char *)BufferStart, (char *)IBuffer);
}
}
return Skeleton;
}
void Homoglyph::check(const ast_matchers::MatchFinder::MatchResult &Result) {
if (const auto *ND = Result.Nodes.getNodeAs<NamedDecl>("nameddecl")) {
StringRef NDName = ND->getName();
auto &Mapped = Mapper[skeleton(NDName)];
auto *NDDecl = ND->getDeclContext();
for (auto *OND : Mapped) {
if (!NDDecl->isDeclInLexicalTraversal(OND) &&
!OND->getDeclContext()->isDeclInLexicalTraversal(ND))
continue;
if (OND->getName() != NDName) {
diag(OND->getLocation(), "%0 is confusable with %1")
<< OND->getName() << NDName;
diag(ND->getLocation(), "other definition found here",
DiagnosticIDs::Note);
}
}
Mapped.push_back(ND);
}
}
void Homoglyph::registerMatchers(ast_matchers::MatchFinder *Finder) {
Finder->addMatcher(ast_matchers::namedDecl().bind("nameddecl"), this);
}
} // namespace misc
} // namespace tidy
} // namespace clang

View File

@ -1,35 +0,0 @@
//===--- Homoglyph.h - clang-tidy -------------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MISC_HOMOGLYPH_H
#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MISC_HOMOGLYPH_H
#include "../ClangTidyCheck.h"
namespace clang {
namespace tidy {
namespace misc {
class Homoglyph : public ClangTidyCheck {
public:
Homoglyph(StringRef Name, ClangTidyContext *Context);
~Homoglyph();
void registerMatchers(ast_matchers::MatchFinder *Finder) override;
void check(const ast_matchers::MatchFinder::MatchResult &Result) override;
private:
std::string skeleton(StringRef);
llvm::StringMap<llvm::SmallVector<NamedDecl const *>> Mapper;
};
} // namespace misc
} // namespace tidy
} // namespace clang
#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MISC_HOMOGLYPH_H

View File

@ -10,7 +10,6 @@
#include "../ClangTidyModule.h"
#include "../ClangTidyModuleRegistry.h"
#include "DefinitionsInHeadersCheck.h"
#include "Homoglyph.h"
#include "MisleadingBidirectional.h"
#include "MisleadingIdentifier.h"
#include "MisplacedConstCheck.h"
@ -38,7 +37,6 @@ public:
"misc-definitions-in-headers");
CheckFactories.registerCheck<MisleadingBidirectionalCheck>(
"misc-misleading-bidirectional");
CheckFactories.registerCheck<Homoglyph>("misc-homoglyph");
CheckFactories.registerCheck<MisleadingIdentifierCheck>(
"misc-misleading-identifier");
CheckFactories.registerCheck<MisplacedConstCheck>("misc-misplaced-const");

View File

@ -136,10 +136,6 @@ New checks
Future libc++ will remove the extension (`D120996
<https://reviews.llvm.org/D120996>`).
- New :doc:`misc-homoglyph <clang-tidy/checks/misc-homoglyph>` check.
Detects confusable unicode identifiers.
New check aliases
^^^^^^^^^^^^^^^^^

View File

@ -214,7 +214,6 @@ Clang-Tidy Checks
`llvmlibc-implementation-in-namespace <llvmlibc-implementation-in-namespace.html>`_,
`llvmlibc-restrict-system-libc-headers <llvmlibc-restrict-system-libc-headers.html>`_, "Yes"
`misc-definitions-in-headers <misc-definitions-in-headers.html>`_, "Yes"
`misc-homoglyph <misc-homoglyph.html>`_,
`misc-misleading-bidirectional <misc-misleading-bidirectional.html>`_,
`misc-misleading-identifier <misc-misleading-identifier.html>`_,
`misc-misplaced-const <misc-misplaced-const.html>`_,

View File

@ -1,15 +0,0 @@
.. title:: clang-tidy - misc-homoglyph
misc-homoglyph
==============
Warn about confusable identifiers, i.e. identifiers that are visually close to
each other, but use different unicode characters. This detetcs potential attack
as described in `Trojan Source <https://www.trojansource.codes>`_.
Example:
.. code-block:: c++
int fo;
int 𝐟o;

View File

@ -1,19 +0,0 @@
// RUN: %check_clang_tidy %s misc-homoglyph %t
int fo;
// CHECK-MESSAGES: :[[#@LINE-1]]:5: warning: fo is confusable with 𝐟o [misc-homoglyph]
int 𝐟o;
// CHECK-MESSAGES: :[[#@LINE-1]]:5: note: other definition found here
void no() {
int 𝐟oo;
}
void worry() {
int foo;
}
int 𝐟i;
// CHECK-MESSAGES: :[[#@LINE-1]]:5: warning: 𝐟i is confusable with fi [misc-homoglyph]
int fi;
// CHECK-MESSAGES: :[[#@LINE-1]]:5: note: other definition found here

View File

@ -1,26 +1,7 @@
action("Confusables.inc") {
gen_target = "ConfusableTable:make_confusable_table($host_toolchain)"
gen_executable = get_label_info(gen_target, "root_out_dir") +
"/bin/" + get_label_info(gen_target, "name")
deps = [ gen_target ]
# FIXME: Rename this script, now that it's used for other things.
script = "//llvm/utils/gn/build/run_tablegen.py"
sources = [ "ConfusableTable/confusables.txt" ]
outputs = [ "$target_gen_dir/$target_name" ]
args = [
rebase_path(gen_executable, root_build_dir),
rebase_path(sources[0], root_build_dir),
rebase_path(outputs[0], root_build_dir),
]
}
static_library("misc") {
output_name = "clangTidyMiscModule"
configs += [ "//llvm/utils/gn/build:clang_code" ]
include_dirs = [ target_gen_dir ]
deps = [
":Confusables.inc",
"//clang-tools-extra/clang-tidy",
"//clang-tools-extra/clang-tidy/utils",
"//clang/lib/AST",
@ -34,7 +15,6 @@ static_library("misc") {
]
sources = [
"DefinitionsInHeadersCheck.cpp",
"Homoglyph.cpp",
"MiscTidyModule.cpp",
"MisleadingBidirectional.cpp",
"MisleadingIdentifier.cpp",

View File

@ -1,4 +0,0 @@
executable("make_confusable_table") {
deps = [ "//llvm/lib/Support" ]
sources = [ "build_confusable_table.cpp" ]
}