Misleading bidirectional detection

This patch implements detection of incomplete bidirectional sequence withing
comments and string literals within clang-tidy.

It detects the bidi part of https://www.trojansource.codes/trojan-source.pdf

Differential Revision: https://reviews.llvm.org/D112913
This commit is contained in:
serge-sans-paille 2021-09-28 11:23:22 +02:00
parent e3275cfa94
commit 35cca45b09
8 changed files with 208 additions and 1 deletions

View File

@ -6,6 +6,7 @@ set(LLVM_LINK_COMPONENTS
add_clang_library(clangTidyMiscModule add_clang_library(clangTidyMiscModule
DefinitionsInHeadersCheck.cpp DefinitionsInHeadersCheck.cpp
MiscTidyModule.cpp MiscTidyModule.cpp
MisleadingBidirectional.cpp
MisleadingIdentifier.cpp MisleadingIdentifier.cpp
MisplacedConstCheck.cpp MisplacedConstCheck.cpp
NewDeleteOverloadsCheck.cpp NewDeleteOverloadsCheck.cpp

View File

@ -10,6 +10,7 @@
#include "../ClangTidyModule.h" #include "../ClangTidyModule.h"
#include "../ClangTidyModuleRegistry.h" #include "../ClangTidyModuleRegistry.h"
#include "DefinitionsInHeadersCheck.h" #include "DefinitionsInHeadersCheck.h"
#include "MisleadingBidirectional.h"
#include "MisleadingIdentifier.h" #include "MisleadingIdentifier.h"
#include "MisplacedConstCheck.h" #include "MisplacedConstCheck.h"
#include "NewDeleteOverloadsCheck.h" #include "NewDeleteOverloadsCheck.h"
@ -34,6 +35,8 @@ public:
void addCheckFactories(ClangTidyCheckFactories &CheckFactories) override { void addCheckFactories(ClangTidyCheckFactories &CheckFactories) override {
CheckFactories.registerCheck<DefinitionsInHeadersCheck>( CheckFactories.registerCheck<DefinitionsInHeadersCheck>(
"misc-definitions-in-headers"); "misc-definitions-in-headers");
CheckFactories.registerCheck<MisleadingBidirectionalCheck>(
"misc-misleading-bidirectional");
CheckFactories.registerCheck<MisleadingIdentifierCheck>( CheckFactories.registerCheck<MisleadingIdentifierCheck>(
"misc-misleading-identifier"); "misc-misleading-identifier");
CheckFactories.registerCheck<MisplacedConstCheck>("misc-misplaced-const"); CheckFactories.registerCheck<MisplacedConstCheck>("misc-misplaced-const");

View File

@ -0,0 +1,139 @@
//===--- MisleadingBidirectional.cpp - clang-tidy -------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "MisleadingBidirectional.h"
#include "clang/Frontend/CompilerInstance.h"
#include "clang/Lex/Preprocessor.h"
#include "llvm/Support/ConvertUTF.h"
using namespace clang;
using namespace clang::tidy::misc;
static bool containsMisleadingBidi(StringRef Buffer,
bool HonorLineBreaks = true) {
const char *CurPtr = Buffer.begin();
enum BidiChar {
PS = 0x2029,
RLO = 0x202E,
RLE = 0x202B,
LRO = 0x202D,
LRE = 0x202A,
PDF = 0x202C,
RLI = 0x2067,
LRI = 0x2066,
FSI = 0x2068,
PDI = 0x2069
};
SmallVector<BidiChar> BidiContexts;
// Scan each character while maintaining a stack of opened bidi context.
// RLO/RLE/LRO/LRE all are closed by PDF while RLI LRI and FSI are closed by
// PDI. New lines reset the context count. Extra PDF / PDI are ignored.
//
// Warn if we end up with an unclosed context.
while (CurPtr < Buffer.end()) {
unsigned char C = *CurPtr;
if (isASCII(C)) {
++CurPtr;
bool IsParagrapSep =
(C == 0xA || C == 0xD || (0x1C <= C && C <= 0x1E) || C == 0x85);
bool IsSegmentSep = (C == 0x9 || C == 0xB || C == 0x1F);
if (IsParagrapSep || IsSegmentSep)
BidiContexts.clear();
continue;
}
llvm::UTF32 CodePoint;
llvm::ConversionResult Result = llvm::convertUTF8Sequence(
(const llvm::UTF8 **)&CurPtr, (const llvm::UTF8 *)Buffer.end(),
&CodePoint, llvm::strictConversion);
// If conversion fails, utf-8 is designed so that we can just try next char.
if (Result != llvm::conversionOK) {
++CurPtr;
continue;
}
// Open a PDF context.
if (CodePoint == RLO || CodePoint == RLE || CodePoint == LRO ||
CodePoint == LRE)
BidiContexts.push_back(PDF);
// Close PDF Context.
else if (CodePoint == PDF) {
if (!BidiContexts.empty() && BidiContexts.back() == PDF)
BidiContexts.pop_back();
}
// Open a PDI Context.
else if (CodePoint == RLI || CodePoint == LRI || CodePoint == FSI)
BidiContexts.push_back(PDI);
// Close a PDI Context.
else if (CodePoint == PDI) {
auto R = std::find(BidiContexts.rbegin(), BidiContexts.rend(), PDI);
if (R != BidiContexts.rend())
BidiContexts.resize(BidiContexts.rend() - R - 1);
}
// Line break or equivalent
else if (CodePoint == PS)
BidiContexts.clear();
}
return !BidiContexts.empty();
}
class MisleadingBidirectionalCheck::MisleadingBidirectionalHandler
: public CommentHandler {
public:
MisleadingBidirectionalHandler(MisleadingBidirectionalCheck &Check,
llvm::Optional<std::string> User)
: Check(Check) {}
bool HandleComment(Preprocessor &PP, SourceRange Range) override {
// FIXME: check that we are in a /* */ comment
StringRef Text =
Lexer::getSourceText(CharSourceRange::getCharRange(Range),
PP.getSourceManager(), PP.getLangOpts());
if (containsMisleadingBidi(Text, true))
Check.diag(
Range.getBegin(),
"comment contains misleading bidirectional Unicode characters");
return false;
}
private:
MisleadingBidirectionalCheck &Check;
};
MisleadingBidirectionalCheck::MisleadingBidirectionalCheck(
StringRef Name, ClangTidyContext *Context)
: ClangTidyCheck(Name, Context),
Handler(std::make_unique<MisleadingBidirectionalHandler>(
*this, Context->getOptions().User)) {}
MisleadingBidirectionalCheck::~MisleadingBidirectionalCheck() = default;
void MisleadingBidirectionalCheck::registerPPCallbacks(
const SourceManager &SM, Preprocessor *PP, Preprocessor *ModuleExpanderPP) {
PP->addCommentHandler(Handler.get());
}
void MisleadingBidirectionalCheck::check(
const ast_matchers::MatchFinder::MatchResult &Result) {
if (const auto *SL = Result.Nodes.getNodeAs<StringLiteral>("strlit")) {
StringRef Literal = SL->getBytes();
if (containsMisleadingBidi(Literal, false))
diag(SL->getBeginLoc(), "string literal contains misleading "
"bidirectional Unicode characters");
}
}
void MisleadingBidirectionalCheck::registerMatchers(
ast_matchers::MatchFinder *Finder) {
Finder->addMatcher(ast_matchers::stringLiteral().bind("strlit"), this);
}

View File

@ -0,0 +1,38 @@
//===--- MisleadingBidirectionalCheck.h - clang-tidy ------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MISC_MISLEADINGBIDIRECTIONALCHECK_H
#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MISC_MISLEADINGBIDIRECTIONALCHECK_H
#include "../ClangTidyCheck.h"
namespace clang {
namespace tidy {
namespace misc {
class MisleadingBidirectionalCheck : public ClangTidyCheck {
public:
MisleadingBidirectionalCheck(StringRef Name, ClangTidyContext *Context);
~MisleadingBidirectionalCheck();
void registerPPCallbacks(const SourceManager &SM, Preprocessor *PP,
Preprocessor *ModuleExpanderPP) override;
void registerMatchers(ast_matchers::MatchFinder *Finder) override;
void check(const ast_matchers::MatchFinder::MatchResult &Result) override;
private:
class MisleadingBidirectionalHandler;
std::unique_ptr<MisleadingBidirectionalHandler> Handler;
};
} // namespace misc
} // namespace tidy
} // namespace clang
#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MISC_MISLEADINGBIDIRECTIONALCHECK_H

View File

@ -127,6 +127,10 @@ New checks
Reports identifiers whose names are too short. Currently checks local Reports identifiers whose names are too short. Currently checks local
variables and function parameters only. variables and function parameters only.
- New :doc:`misc-misleading-bidirectional <clang-tidy/checks/misc-misleading-bidirectional>` check.
Inspects string literal and comments for unterminated bidirectional Unicode
characters.
New check aliases New check aliases
^^^^^^^^^^^^^^^^^ ^^^^^^^^^^^^^^^^^

View File

@ -212,7 +212,8 @@ Clang-Tidy Checks
`llvmlibc-implementation-in-namespace <llvmlibc-implementation-in-namespace.html>`_, `llvmlibc-implementation-in-namespace <llvmlibc-implementation-in-namespace.html>`_,
`llvmlibc-restrict-system-libc-headers <llvmlibc-restrict-system-libc-headers.html>`_, "Yes" `llvmlibc-restrict-system-libc-headers <llvmlibc-restrict-system-libc-headers.html>`_, "Yes"
`misc-definitions-in-headers <misc-definitions-in-headers.html>`_, "Yes" `misc-definitions-in-headers <misc-definitions-in-headers.html>`_, "Yes"
`misc-misleading-identifier <misc-misleading-identifier.html>`_, `misc-misleading-bidirectional <misc-misleading-bidirectional.html>`_,
`misc-misleading-identifier <misc-mileading-identifier.html>`_,
`misc-misplaced-const <misc-misplaced-const.html>`_, `misc-misplaced-const <misc-misplaced-const.html>`_,
`misc-new-delete-overloads <misc-new-delete-overloads.html>`_, `misc-new-delete-overloads <misc-new-delete-overloads.html>`_,
`misc-no-recursion <misc-no-recursion.html>`_, `misc-no-recursion <misc-no-recursion.html>`_,

View File

@ -0,0 +1,21 @@
.. title:: clang-tidy - misc-misleading-bidirectional
misc-misleading-bidirectional
=============================
Warn about unterminated bidirectional unicode sequence, detecting potential attack
as described in the `Trojan Source <https://www.trojansource.codes>`_ attack.
Example:
.. code-block:: c++
#include <iostream>
int main() {
bool isAdmin = false;
/* } if (isAdmin) begin admins only */
std::cout << "You are an admin.\n";
/* end admins only { */
return 0;
}