Revert "[clang] Emit SARIF Diagnostics: Create `clang::SarifDocumentWriter` interface"

This reverts commit 329fae7103.

This should fix failing test bots like:
https://lab.llvm.org/buildbot/#/builders/91/builds/11328
This commit is contained in:
Aaron Ballman 2022-06-30 13:38:42 -04:00
parent 45f3a5aae7
commit b46ad1b5be
5 changed files with 0 additions and 1153 deletions

View File

@ -1,440 +0,0 @@
//== clang/Basic/Sarif.h - SARIF Diagnostics Object Model -------*- C++ -*--==//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
/// Defines clang::SarifDocumentWriter, clang::SarifRule, clang::SarifResult.
///
/// The document built can be accessed as a JSON Object.
/// Several value semantic types are also introduced which represent properties
/// of the SARIF standard, such as 'artifact', 'result', 'rule'.
///
/// A SARIF (Static Analysis Results Interchange Format) document is JSON
/// document that describes in detail the results of running static analysis
/// tools on a project. Each (non-trivial) document consists of at least one
/// "run", which are themselves composed of details such as:
/// * Tool: The tool that was run
/// * Rules: The rules applied during the tool run, represented by
/// \c reportingDescriptor objects in SARIF
/// * Results: The matches for the rules applied against the project(s) being
/// evaluated, represented by \c result objects in SARIF
///
/// Reference:
/// 1. <a href="https://docs.oasis-open.org/sarif/sarif/v2.1.0/os/sarif-v2.1.0-os.html">The SARIF standard</a>
/// 2. <a href="https://docs.oasis-open.org/sarif/sarif/v2.1.0/os/sarif-v2.1.0-os.html#_Toc34317836">SARIF<pre>reportingDescriptor</pre></a>
/// 3. <a href="https://docs.oasis-open.org/sarif/sarif/v2.1.0/os/sarif-v2.1.0-os.html#_Toc34317638">SARIF<pre>result</pre></a>
//===----------------------------------------------------------------------===//
#ifndef LLVM_CLANG_BASIC_SARIF_H
#define LLVM_CLANG_BASIC_SARIF_H
#include "clang/Basic/SourceLocation.h"
#include "clang/Basic/Version.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/JSON.h"
#include <cassert>
#include <cstddef>
#include <cstdint>
#include <initializer_list>
#include <string>
namespace clang {
class SarifDocumentWriter;
class SourceManager;
namespace detail {
/// \internal
/// An artifact location is SARIF's way of describing the complete location
/// of an artifact encountered during analysis. The \c artifactLocation object
/// typically consists of a URI, and/or an index to reference the artifact it
/// locates.
///
/// This builder makes an additional assumption: that every artifact encountered
/// by \c clang will be a physical, top-level artifact. Which is why the static
/// creation method \ref SarifArtifactLocation::create takes a mandatory URI
/// parameter. The official standard states that either a \c URI or \c Index
/// must be available in the object, \c clang picks the \c URI as a reasonable
/// default, because it intends to deal in physical artifacts for now.
///
/// Reference:
/// 1. <a href="https://docs.oasis-open.org/sarif/sarif/v2.1.0/os/sarif-v2.1.0-os.html#_Toc34317427">artifactLocation object</a>
/// 2. \ref SarifArtifact
class SarifArtifactLocation {
private:
friend class clang::SarifDocumentWriter;
llvm::Optional<uint32_t> Index;
std::string URI;
SarifArtifactLocation() = delete;
explicit SarifArtifactLocation(const std::string &URI) : URI(URI) {}
public:
static SarifArtifactLocation create(llvm::StringRef URI) {
return SarifArtifactLocation{URI.str()};
}
SarifArtifactLocation setIndex(uint32_t Idx) {
Index = Idx;
return *this;
}
};
/// \internal
/// An artifact in SARIF is any object (a sequence of bytes) addressable by
/// a URI (RFC 3986). The most common type of artifact for clang's use-case
/// would be source files. SARIF's artifact object is described in detail in
/// section 3.24.
//
/// Since every clang artifact MUST have a location (there being no nested
/// artifacts), the creation method \ref SarifArtifact::create requires a
/// \ref SarifArtifactLocation object.
///
/// Reference:
/// 1. <a href="https://docs.oasis-open.org/sarif/sarif/v2.1.0/os/sarif-v2.1.0-os.html#_Toc34317611">artifact object</a>
class SarifArtifact {
private:
friend class clang::SarifDocumentWriter;
llvm::Optional<uint32_t> Offset;
llvm::Optional<size_t> Length;
std::string MimeType;
SarifArtifactLocation Location;
llvm::SmallVector<std::string, 4> Roles;
SarifArtifact() = delete;
explicit SarifArtifact(const SarifArtifactLocation &Loc) : Location(Loc) {}
public:
static SarifArtifact create(const SarifArtifactLocation &Loc) {
return SarifArtifact{Loc};
}
SarifArtifact setOffset(uint32_t ArtifactOffset) {
Offset = ArtifactOffset;
return *this;
}
SarifArtifact setLength(size_t NumBytes) {
Length = NumBytes;
return *this;
}
SarifArtifact setRoles(std::initializer_list<llvm::StringRef> ArtifactRoles) {
Roles.assign(ArtifactRoles.begin(), ArtifactRoles.end());
return *this;
}
SarifArtifact setMimeType(llvm::StringRef ArtifactMimeType) {
MimeType = ArtifactMimeType.str();
return *this;
}
};
} // namespace detail
enum class ThreadFlowImportance { Important, Essential, Unimportant };
/// A thread flow is a sequence of code locations that specify a possible path
/// through a single thread of execution.
/// A thread flow in SARIF is related to a code flow which describes
/// the progress of one or more programs through one or more thread flows.
///
/// Reference:
/// 1. <a href="https://docs.oasis-open.org/sarif/sarif/v2.1.0/os/sarif-v2.1.0-os.html#_Toc34317744">threadFlow object</a>
/// 2. <a href="https://docs.oasis-open.org/sarif/sarif/v2.1.0/os/sarif-v2.1.0-os.html#_Toc34317740">codeFlow object</a>
class ThreadFlow {
friend class SarifDocumentWriter;
CharSourceRange Range;
ThreadFlowImportance Importance;
std::string Message;
ThreadFlow() = default;
public:
static ThreadFlow create() { return {}; }
ThreadFlow setRange(const CharSourceRange &ItemRange) {
assert(ItemRange.isCharRange() &&
"ThreadFlows require a character granular source range!");
Range = ItemRange;
return *this;
}
ThreadFlow setImportance(const ThreadFlowImportance &ItemImportance) {
Importance = ItemImportance;
return *this;
}
ThreadFlow setMessage(llvm::StringRef ItemMessage) {
Message = ItemMessage.str();
return *this;
}
};
/// A SARIF rule (\c reportingDescriptor object) contains information that
/// describes a reporting item generated by a tool. A reporting item is
/// either a result of analysis or notification of a condition encountered by
/// the tool. Rules are arbitrary but are identifiable by a hierarchical
/// rule-id.
///
/// This builder provides an interface to create SARIF \c reportingDescriptor
/// objects via the \ref SarifRule::create static method.
///
/// Reference:
/// 1. <a href="https://docs.oasis-open.org/sarif/sarif/v2.1.0/os/sarif-v2.1.0-os.html#_Toc34317836">reportingDescriptor object</a>
class SarifRule {
friend class clang::SarifDocumentWriter;
std::string Name;
std::string Id;
std::string Description;
std::string HelpURI;
SarifRule() = default;
public:
static SarifRule create() { return {}; }
SarifRule setName(llvm::StringRef RuleName) {
Name = RuleName.str();
return *this;
}
SarifRule setRuleId(llvm::StringRef RuleId) {
Id = RuleId.str();
return *this;
}
SarifRule setDescription(llvm::StringRef RuleDesc) {
Description = RuleDesc.str();
return *this;
}
SarifRule setHelpURI(llvm::StringRef RuleHelpURI) {
HelpURI = RuleHelpURI.str();
return *this;
}
};
/// A SARIF result (also called a "reporting item") is a unit of output
/// produced when one of the tool's \c reportingDescriptor encounters a match
/// on the file being analysed by the tool.
///
/// This builder provides a \ref SarifResult::create static method that can be
/// used to create an empty shell onto which attributes can be added using the
/// \c setX(...) methods.
///
/// For example:
/// \code{.cpp}
/// SarifResult result = SarifResult::create(...)
/// .setRuleId(...)
/// .setDiagnosticMessage(...);
/// \endcode
///
/// Reference:
/// 1. <a href="https://docs.oasis-open.org/sarif/sarif/v2.1.0/os/sarif-v2.1.0-os.html#_Toc34317638">SARIF<pre>result</pre></a>
class SarifResult {
friend class clang::SarifDocumentWriter;
// NOTE:
// This type cannot fit all possible indexes representable by JSON, but is
// chosen because it is the largest unsigned type that can be safely
// converted to an \c int64_t.
uint32_t RuleIdx;
std::string RuleId;
std::string DiagnosticMessage;
llvm::SmallVector<CharSourceRange, 8> Locations;
llvm::SmallVector<ThreadFlow, 8> ThreadFlows;
SarifResult() = delete;
explicit SarifResult(uint32_t RuleIdx) : RuleIdx(RuleIdx) {}
public:
static SarifResult create(uint32_t RuleIdx) { return SarifResult{RuleIdx}; }
SarifResult setIndex(uint32_t Idx) {
RuleIdx = Idx;
return *this;
}
SarifResult setRuleId(llvm::StringRef Id) {
RuleId = Id.str();
return *this;
}
SarifResult setDiagnosticMessage(llvm::StringRef Message) {
DiagnosticMessage = Message.str();
return *this;
}
SarifResult setLocations(llvm::ArrayRef<CharSourceRange> DiagLocs) {
#ifndef NDEBUG
for (const auto &Loc : DiagLocs) {
assert(Loc.isCharRange() &&
"SARIF Results require character granular source ranges!");
}
#endif
Locations.assign(DiagLocs.begin(), DiagLocs.end());
return *this;
}
SarifResult setThreadFlows(llvm::ArrayRef<ThreadFlow> ThreadFlowResults) {
ThreadFlows.assign(ThreadFlowResults.begin(), ThreadFlowResults.end());
return *this;
}
};
/// This class handles creating a valid SARIF document given various input
/// attributes. However, it requires an ordering among certain method calls:
///
/// 1. Because every SARIF document must contain at least 1 \c run, callers
/// must ensure that \ref SarifDocumentWriter::createRun is is called before
/// any other methods.
/// 2. If SarifDocumentWriter::endRun is called, callers MUST call
/// SarifDocumentWriter::createRun, before invoking any of the result
/// aggregation methods such as SarifDocumentWriter::appendResult etc.
class SarifDocumentWriter {
private:
const llvm::StringRef SchemaURI{
"https://docs.oasis-open.org/sarif/sarif/v2.1.0/cos02/schemas/"
"sarif-schema-2.1.0.json"};
const llvm::StringRef SchemaVersion{"2.1.0"};
/// \internal
/// Return a pointer to the current tool. Asserts that a run exists.
llvm::json::Object &getCurrentTool();
/// \internal
/// Checks if there is a run associated with this document.
///
/// \return true on success
bool hasRun() const;
/// \internal
/// Reset portions of the internal state so that the document is ready to
/// receive data for a new run.
void reset();
/// \internal
/// Return a mutable reference to the current run, after asserting it exists.
///
/// \note It is undefined behavior to call this if a run does not exist in
/// the SARIF document.
llvm::json::Object &getCurrentRun();
/// Create a code flow object for the given threadflows.
/// See \ref ThreadFlow.
///
/// \note It is undefined behavior to call this if a run does not exist in
/// the SARIF document.
llvm::json::Object
createCodeFlow(const llvm::ArrayRef<ThreadFlow> ThreadFlows);
/// Add the given threadflows to the ones this SARIF document knows about.
llvm::json::Array
createThreadFlows(const llvm::ArrayRef<ThreadFlow> ThreadFlows);
/// Add the given \ref CharSourceRange to the SARIF document as a physical
/// location, with its corresponding artifact.
llvm::json::Object createPhysicalLocation(const CharSourceRange &R);
public:
SarifDocumentWriter() = delete;
/// Create a new empty SARIF document with the given source manager.
SarifDocumentWriter(const SourceManager &SourceMgr) : SourceMgr(SourceMgr) {}
/// Release resources held by this SARIF document.
~SarifDocumentWriter() = default;
/// Create a new run with which any upcoming analysis will be associated.
/// Each run requires specifying the tool that is generating reporting items.
void createRun(const llvm::StringRef ShortToolName,
const llvm::StringRef LongToolName,
const llvm::StringRef ToolVersion = CLANG_VERSION_STRING);
/// If there is a current run, end it.
///
/// This method collects various book-keeping required to clear and close
/// resources associated with the current run, but may also allocate some
/// for the next run.
///
/// Calling \ref endRun before associating a run through \ref createRun leads
/// to undefined behaviour.
void endRun();
/// Associate the given rule with the current run.
///
/// Returns an integer rule index for the created rule that is unique within
/// the current run, which can then be used to create a \ref SarifResult
/// to add to the current run. Note that a rule must exist before being
/// referenced by a result.
///
/// \pre
/// There must be a run associated with the document, failing to do so will
/// cause undefined behaviour.
size_t createRule(const SarifRule &Rule);
/// Append a new result to the currently in-flight run.
///
/// \pre
/// There must be a run associated with the document, failing to do so will
/// cause undefined behaviour.
/// \pre
/// \c RuleIdx used to create the result must correspond to a rule known by
/// the SARIF document. It must be the value returned by a previous call
/// to \ref createRule.
void appendResult(const SarifResult &SarifResult);
/// Return the SARIF document in its current state.
/// Calling this will trigger a copy of the internal state including all
/// reported diagnostics, resulting in an expensive call.
llvm::json::Object createDocument();
private:
/// Source Manager to use for the current SARIF document.
const SourceManager &SourceMgr;
/// Flag to track the state of this document:
/// A closed document is one on which a new runs must be created.
/// This could be a document that is freshly created, or has recently
/// finished writing to a previous run.
bool Closed = true;
/// A sequence of SARIF runs.
/// Each run object describes a single run of an analysis tool and contains
/// the output of that run.
///
/// Reference: <a href="https://docs.oasis-open.org/sarif/sarif/v2.1.0/os/sarif-v2.1.0-os.html#_Toc34317484">run object</a>
llvm::json::Array Runs;
/// The list of rules associated with the most recent active run. These are
/// defined using the diagnostics passed to the SarifDocument. Each rule
/// need not be unique through the result set. E.g. there may be several
/// 'syntax' errors throughout code under analysis, each of which has its
/// own specific diagnostic message (and consequently, RuleId). Rules are
/// also known as "reportingDescriptor" objects in SARIF.
///
/// Reference: <a href="https://docs.oasis-open.org/sarif/sarif/v2.1.0/os/sarif-v2.1.0-os.html#_Toc34317556">rules property</a>
llvm::SmallVector<SarifRule, 32> CurrentRules;
/// The list of artifacts that have been encountered on the most recent active
/// run. An artifact is defined in SARIF as a sequence of bytes addressable
/// by a URI. A common example for clang's case would be files named by
/// filesystem paths.
llvm::StringMap<detail::SarifArtifact> CurrentArtifacts;
};
} // namespace clang
#endif // LLVM_CLANG_BASIC_SARIF_H

View File

@ -63,7 +63,6 @@ add_clang_library(clangBasic
NoSanitizeList.cpp NoSanitizeList.cpp
SanitizerSpecialCaseList.cpp SanitizerSpecialCaseList.cpp
Sanitizers.cpp Sanitizers.cpp
Sarif.cpp
SourceLocation.cpp SourceLocation.cpp
SourceManager.cpp SourceManager.cpp
Stack.cpp Stack.cpp

View File

@ -1,391 +0,0 @@
//===-- clang/Basic/Sarif.cpp - SarifDocumentWriter class definition ------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
/// \file
/// This file contains the declaration of the SARIFDocumentWriter class, and
/// associated builders such as:
/// - \ref SarifArtifact
/// - \ref SarifArtifactLocation
/// - \ref SarifRule
/// - \ref SarifResult
//===----------------------------------------------------------------------===//
#include "clang/Basic/Sarif.h"
#include "clang/Basic/LangOptions.h"
#include "clang/Basic/SourceLocation.h"
#include "clang/Basic/SourceManager.h"
#include "clang/Basic/Version.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/ConvertUTF.h"
#include "llvm/Support/JSON.h"
#include "llvm/Support/Path.h"
#include <string>
#include <utility>
using namespace clang;
using namespace llvm;
using clang::detail::SarifArtifact;
using clang::detail::SarifArtifactLocation;
static StringRef getFileName(const FileEntry &FE) {
StringRef Filename = FE.tryGetRealPathName();
if (Filename.empty())
Filename = FE.getName();
return Filename;
}
/// \name URI
/// @{
/// \internal
/// \brief
/// Return the RFC3986 encoding of the input character.
///
/// \param C Character to encode to RFC3986.
///
/// \return The RFC3986 representation of \c C.
static std::string percentEncodeURICharacter(char C) {
// RFC 3986 claims alpha, numeric, and this handful of
// characters are not reserved for the path component and
// should be written out directly. Otherwise, percent
// encode the character and write that out instead of the
// reserved character.
if (llvm::isAlnum(C) ||
StringRef::npos != StringRef("-._~:@!$&'()*+,;=").find(C))
return std::string(&C, 1);
return "%" + llvm::toHex(StringRef(&C, 1));
}
/// \internal
/// \brief Return a URI representing the given file name.
///
/// \param Filename The filename to be represented as URI.
///
/// \return RFC3986 URI representing the input file name.
static std::string fileNameToURI(StringRef Filename) {
SmallString<32> Ret = StringRef("file://");
// Get the root name to see if it has a URI authority.
StringRef Root = sys::path::root_name(Filename);
if (Root.startswith("//")) {
// There is an authority, so add it to the URI.
Ret += Root.drop_front(2).str();
} else if (!Root.empty()) {
// There is no authority, so end the component and add the root to the URI.
Ret += Twine("/" + Root).str();
}
auto Iter = sys::path::begin(Filename), End = sys::path::end(Filename);
assert(Iter != End && "Expected there to be a non-root path component.");
// Add the rest of the path components, encoding any reserved characters;
// we skip past the first path component, as it was handled it above.
std::for_each(++Iter, End, [&Ret](StringRef Component) {
// For reasons unknown to me, we may get a backslash with Windows native
// paths for the initial backslash following the drive component, which
// we need to ignore as a URI path part.
if (Component == "\\")
return;
// Add the separator between the previous path part and the one being
// currently processed.
Ret += "/";
// URI encode the part.
for (char C : Component) {
Ret += percentEncodeURICharacter(C);
}
});
return std::string(Ret);
}
/// @}
/// \brief Calculate the column position expressed in the number of UTF-8 code
/// points from column start to the source location
///
/// \param Loc The source location whose column needs to be calculated.
/// \param TokenLen Optional hint for when the token is multiple bytes long.
///
/// \return The column number as a UTF-8 aware byte offset from column start to
/// the effective source location.
static unsigned int adjustColumnPos(FullSourceLoc Loc,
unsigned int TokenLen = 0) {
assert(!Loc.isInvalid() && "invalid Loc when adjusting column position");
std::pair<FileID, unsigned> LocInfo = Loc.getDecomposedLoc();
Optional<MemoryBufferRef> Buf =
Loc.getManager().getBufferOrNone(LocInfo.first);
assert(Buf && "got an invalid buffer for the location's file");
assert(Buf->getBufferSize() >= (LocInfo.second + TokenLen) &&
"token extends past end of buffer?");
// Adjust the offset to be the start of the line, since we'll be counting
// Unicode characters from there until our column offset.
unsigned int Off = LocInfo.second - (Loc.getExpansionColumnNumber() - 1);
unsigned int Ret = 1;
while (Off < (LocInfo.second + TokenLen)) {
Off += getNumBytesForUTF8(Buf->getBuffer()[Off]);
Ret++;
}
return Ret;
}
/// \name SARIF Utilities
/// @{
/// \internal
json::Object createMessage(StringRef Text) {
return json::Object{{"text", Text.str()}};
}
/// \internal
/// \pre CharSourceRange must be a token range
static json::Object createTextRegion(const SourceManager &SM,
const CharSourceRange &R) {
FullSourceLoc FirstTokenLoc{R.getBegin(), SM};
FullSourceLoc LastTokenLoc{R.getEnd(), SM};
json::Object Region{{"startLine", FirstTokenLoc.getExpansionLineNumber()},
{"startColumn", adjustColumnPos(FirstTokenLoc)},
{"endColumn", adjustColumnPos(LastTokenLoc)}};
if (FirstTokenLoc != LastTokenLoc) {
Region["endLine"] = LastTokenLoc.getExpansionLineNumber();
}
return Region;
}
static json::Object createLocation(json::Object &&PhysicalLocation,
StringRef Message = "") {
json::Object Ret{{"physicalLocation", std::move(PhysicalLocation)}};
if (!Message.empty())
Ret.insert({"message", createMessage(Message)});
return Ret;
}
static StringRef importanceToStr(ThreadFlowImportance I) {
switch (I) {
case ThreadFlowImportance::Important:
return "important";
case ThreadFlowImportance::Essential:
return "essential";
case ThreadFlowImportance::Unimportant:
return "unimportant";
}
llvm_unreachable("Fully covered switch is not so fully covered");
}
static json::Object
createThreadFlowLocation(json::Object &&Location,
const ThreadFlowImportance &Importance) {
return json::Object{{"location", std::move(Location)},
{"importance", importanceToStr(Importance)}};
}
/// @}
json::Object
SarifDocumentWriter::createPhysicalLocation(const CharSourceRange &R) {
assert(R.isValid() &&
"Cannot create a physicalLocation from invalid SourceRange!");
assert(R.isCharRange() &&
"Cannot create a physicalLocation from a token range!");
FullSourceLoc Start{R.getBegin(), SourceMgr};
const FileEntry *FE = Start.getExpansionLoc().getFileEntry();
assert(FE != nullptr && "Diagnostic does not exist within a valid file!");
const std::string &FileURI = fileNameToURI(getFileName(*FE));
auto I = CurrentArtifacts.find(FileURI);
if (I == CurrentArtifacts.end()) {
uint32_t Idx = static_cast<uint32_t>(CurrentArtifacts.size());
const SarifArtifactLocation &Location =
SarifArtifactLocation::create(FileURI).setIndex(Idx);
const SarifArtifact &Artifact = SarifArtifact::create(Location)
.setRoles({"resultFile"})
.setLength(FE->getSize())
.setMimeType("text/plain");
auto StatusIter = CurrentArtifacts.insert({FileURI, Artifact});
// If inserted, ensure the original iterator points to the newly inserted
// element, so it can be used downstream.
if (StatusIter.second)
I = StatusIter.first;
}
assert(I != CurrentArtifacts.end() && "Failed to insert new artifact");
const SarifArtifactLocation &Location = I->second.Location;
uint32_t Idx = Location.Index.getValue();
return json::Object{{{"artifactLocation", json::Object{{{"index", Idx}}}},
{"region", createTextRegion(SourceMgr, R)}}};
}
json::Object &SarifDocumentWriter::getCurrentTool() {
assert(!Closed && "SARIF Document is closed. "
"Need to call createRun() before using getcurrentTool!");
// Since Closed = false here, expect there to be at least 1 Run, anything
// else is an invalid state.
assert(!Runs.empty() && "There are no runs associated with the document!");
return *Runs.back().getAsObject()->get("tool")->getAsObject();
}
void SarifDocumentWriter::reset() {
CurrentRules.clear();
CurrentArtifacts.clear();
}
void SarifDocumentWriter::endRun() {
// Exit early if trying to close a closed Document.
if (Closed) {
reset();
return;
}
// Since Closed = false here, expect there to be at least 1 Run, anything
// else is an invalid state.
assert(!Runs.empty() && "There are no runs associated with the document!");
// Flush all the rules.
json::Object &Tool = getCurrentTool();
json::Array Rules;
for (const SarifRule &R : CurrentRules) {
json::Object Rule{
{"name", R.Name},
{"id", R.Id},
{"fullDescription", json::Object{{"text", R.Description}}}};
if (!R.HelpURI.empty())
Rule["helpUri"] = R.HelpURI;
Rules.emplace_back(std::move(Rule));
}
json::Object &Driver = *Tool.getObject("driver");
Driver["rules"] = std::move(Rules);
// Flush all the artifacts.
json::Object &Run = getCurrentRun();
json::Array *Artifacts = Run.getArray("artifacts");
for (const auto &Pair : CurrentArtifacts) {
const SarifArtifact &A = Pair.getValue();
json::Object Loc{{"uri", A.Location.URI}};
if (A.Location.Index.hasValue()) {
Loc["index"] = static_cast<int64_t>(A.Location.Index.getValue());
}
json::Object Artifact;
Artifact["location"] = std::move(Loc);
if (A.Length.hasValue())
Artifact["length"] = static_cast<int64_t>(A.Length.getValue());
if (!A.Roles.empty())
Artifact["roles"] = json::Array(A.Roles);
if (!A.MimeType.empty())
Artifact["mimeType"] = A.MimeType;
if (A.Offset.hasValue())
Artifact["offset"] = A.Offset;
Artifacts->push_back(json::Value(std::move(Artifact)));
}
// Clear, reset temporaries before next run.
reset();
// Mark the document as closed.
Closed = true;
}
json::Array
SarifDocumentWriter::createThreadFlows(ArrayRef<ThreadFlow> ThreadFlows) {
json::Object Ret{{"locations", json::Array{}}};
json::Array Locs;
for (const auto &ThreadFlow : ThreadFlows) {
json::Object PLoc = createPhysicalLocation(ThreadFlow.Range);
json::Object Loc = createLocation(std::move(PLoc), ThreadFlow.Message);
Locs.emplace_back(
createThreadFlowLocation(std::move(Loc), ThreadFlow.Importance));
}
Ret["locations"] = std::move(Locs);
return json::Array{std::move(Ret)};
}
json::Object
SarifDocumentWriter::createCodeFlow(ArrayRef<ThreadFlow> ThreadFlows) {
return json::Object{{"threadFlows", createThreadFlows(ThreadFlows)}};
}
void SarifDocumentWriter::createRun(StringRef ShortToolName,
StringRef LongToolName,
StringRef ToolVersion) {
// Clear resources associated with a previous run.
endRun();
// Signify a new run has begun.
Closed = false;
json::Object Tool{
{"driver",
json::Object{{"name", ShortToolName},
{"fullName", LongToolName},
{"language", "en-US"},
{"version", ToolVersion},
{"informationUri",
"https://clang.llvm.org/docs/UsersManual.html"}}}};
json::Object theRun{{"tool", std::move(Tool)},
{"results", {}},
{"artifacts", {}},
{"columnKind", "unicodeCodePoints"}};
Runs.emplace_back(std::move(theRun));
}
json::Object &SarifDocumentWriter::getCurrentRun() {
assert(!Closed &&
"SARIF Document is closed. "
"Can only getCurrentRun() if document is opened via createRun(), "
"create a run first");
// Since Closed = false here, expect there to be at least 1 Run, anything
// else is an invalid state.
assert(!Runs.empty() && "There are no runs associated with the document!");
return *Runs.back().getAsObject();
}
size_t SarifDocumentWriter::createRule(const SarifRule &Rule) {
size_t Ret = CurrentRules.size();
CurrentRules.emplace_back(Rule);
return Ret;
}
void SarifDocumentWriter::appendResult(const SarifResult &Result) {
size_t RuleIdx = Result.RuleIdx;
assert(RuleIdx < CurrentRules.size() &&
"Trying to reference a rule that doesn't exist");
json::Object Ret{{"message", createMessage(Result.DiagnosticMessage)},
{"ruleIndex", static_cast<int64_t>(RuleIdx)},
{"ruleId", CurrentRules[RuleIdx].Id}};
if (!Result.Locations.empty()) {
json::Array Locs;
for (auto &Range : Result.Locations) {
Locs.emplace_back(createLocation(createPhysicalLocation(Range)));
}
Ret["locations"] = std::move(Locs);
}
if (!Result.ThreadFlows.empty())
Ret["codeFlows"] = json::Array{createCodeFlow(Result.ThreadFlows)};
json::Object &Run = getCurrentRun();
json::Array *Results = Run.getArray("results");
Results->emplace_back(std::move(Ret));
}
json::Object SarifDocumentWriter::createDocument() {
// Flush all temporaries to their destinations if needed.
endRun();
json::Object doc{
{"$schema", SchemaURI},
{"version", SchemaVersion},
};
if (!Runs.empty())
doc["runs"] = json::Array(Runs);
return doc;
}

View File

@ -10,7 +10,6 @@ add_clang_unittest(BasicTests
FileManagerTest.cpp FileManagerTest.cpp
LineOffsetMappingTest.cpp LineOffsetMappingTest.cpp
SanitizersTest.cpp SanitizersTest.cpp
SarifTest.cpp
SourceManagerTest.cpp SourceManagerTest.cpp
) )

View File

@ -1,320 +0,0 @@
//===- unittests/Basic/SarifTest.cpp - Test writing SARIF documents -------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "clang/Basic/Sarif.h"
#include "clang/Basic/DiagnosticIDs.h"
#include "clang/Basic/DiagnosticOptions.h"
#include "clang/Basic/FileManager.h"
#include "clang/Basic/FileSystemOptions.h"
#include "clang/Basic/LangOptions.h"
#include "clang/Basic/SourceLocation.h"
#include "clang/Basic/SourceManager.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/FormatVariadic.h"
#include "llvm/Support/JSON.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/VirtualFileSystem.h"
#include "llvm/Support/raw_ostream.h"
#include "gmock/gmock.h"
#include "gtest/gtest-death-test.h"
#include "gtest/gtest-matchers.h"
#include "gtest/gtest.h"
#include <algorithm>
using namespace clang;
namespace {
using LineCol = std::pair<unsigned int, unsigned int>;
static std::string serializeSarifDocument(llvm::json::Object &&Doc) {
std::string Output;
llvm::json::Value value(std::move(Doc));
llvm::raw_string_ostream OS{Output};
OS << llvm::formatv("{0}", value);
OS.flush();
return Output;
}
class SarifDocumentWriterTest : public ::testing::Test {
protected:
SarifDocumentWriterTest()
: InMemoryFileSystem(new llvm::vfs::InMemoryFileSystem),
FileMgr(FileSystemOptions(), InMemoryFileSystem),
DiagID(new DiagnosticIDs()), DiagOpts(new DiagnosticOptions()),
Diags(DiagID, DiagOpts.get(), new IgnoringDiagConsumer()),
SourceMgr(Diags, FileMgr) {}
IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> InMemoryFileSystem;
FileManager FileMgr;
IntrusiveRefCntPtr<DiagnosticIDs> DiagID;
IntrusiveRefCntPtr<DiagnosticOptions> DiagOpts;
DiagnosticsEngine Diags;
SourceManager SourceMgr;
LangOptions LangOpts;
FileID registerSource(llvm::StringRef Name, const char *SourceText,
bool IsMainFile = false) {
std::unique_ptr<llvm::MemoryBuffer> SourceBuf =
llvm::MemoryBuffer::getMemBuffer(SourceText);
const FileEntry *SourceFile =
FileMgr.getVirtualFile(Name, SourceBuf->getBufferSize(), 0);
SourceMgr.overrideFileContents(SourceFile, std::move(SourceBuf));
FileID FID = SourceMgr.getOrCreateFileID(SourceFile, SrcMgr::C_User);
if (IsMainFile)
SourceMgr.setMainFileID(FID);
return FID;
}
CharSourceRange getFakeCharSourceRange(FileID FID, LineCol Begin,
LineCol End) {
auto BeginLoc = SourceMgr.translateLineCol(FID, Begin.first, Begin.second);
auto EndLoc = SourceMgr.translateLineCol(FID, End.first, End.second);
return CharSourceRange{SourceRange{BeginLoc, EndLoc}, /* ITR = */ false};
}
};
TEST_F(SarifDocumentWriterTest, createEmptyDocument) {
// GIVEN:
SarifDocumentWriter Writer{SourceMgr};
// WHEN:
const llvm::json::Object &EmptyDoc = Writer.createDocument();
std::vector<StringRef> Keys(EmptyDoc.size());
std::transform(EmptyDoc.begin(), EmptyDoc.end(), Keys.begin(),
[](auto item) { return item.getFirst(); });
// THEN:
ASSERT_THAT(Keys, testing::UnorderedElementsAre("$schema", "version"));
}
// Test that a newly inserted run will associate correct tool names
TEST_F(SarifDocumentWriterTest, documentWithARun) {
// GIVEN:
SarifDocumentWriter Writer{SourceMgr};
const char *ShortName = "sariftest";
const char *LongName = "sarif writer test";
// WHEN:
Writer.createRun(ShortName, LongName);
Writer.endRun();
const llvm::json::Object &Doc = Writer.createDocument();
const llvm::json::Array *Runs = Doc.getArray("runs");
// THEN:
// A run was created
ASSERT_THAT(Runs, testing::NotNull());
// It is the only run
ASSERT_EQ(Runs->size(), 1UL);
// The tool associated with the run was the tool
const llvm::json::Object *driver =
Runs->begin()->getAsObject()->getObject("tool")->getObject("driver");
ASSERT_THAT(driver, testing::NotNull());
ASSERT_TRUE(driver->getString("name").hasValue());
ASSERT_TRUE(driver->getString("fullName").hasValue());
ASSERT_TRUE(driver->getString("language").hasValue());
EXPECT_EQ(driver->getString("name").getValue(), ShortName);
EXPECT_EQ(driver->getString("fullName").getValue(), LongName);
EXPECT_EQ(driver->getString("language").getValue(), "en-US");
}
// Test adding result without a run causes a crash
TEST_F(SarifDocumentWriterTest, addingResultsWillCrashIfThereIsNoRun) {
// GIVEN:
SarifDocumentWriter Writer{SourceMgr};
// WHEN:
// A SarifDocumentWriter::createRun(...) was not called prior to
// SarifDocumentWriter::appendResult(...)
// But a rule exists
auto RuleIdx = Writer.createRule(SarifRule::create());
SarifResult &&EmptyResult = SarifResult::create(RuleIdx);
// THEN:
ASSERT_DEATH({ Writer.appendResult(EmptyResult); }, ".*create a run first.*");
}
// Test adding rule and result shows up in the final document
TEST_F(SarifDocumentWriterTest, addResultWithValidRuleIsOk) {
// GIVEN:
SarifDocumentWriter Writer{SourceMgr};
const SarifRule &Rule =
SarifRule::create()
.setRuleId("clang.unittest")
.setDescription("Example rule created during unit tests")
.setName("clang unit test");
// WHEN:
Writer.createRun("sarif test", "sarif test runner");
unsigned RuleIdx = Writer.createRule(Rule);
const SarifResult &result = SarifResult::create(RuleIdx);
Writer.appendResult(result);
const llvm::json::Object &Doc = Writer.createDocument();
// THEN:
// A document with a valid schema and version exists
ASSERT_THAT(Doc.get("$schema"), ::testing::NotNull());
ASSERT_THAT(Doc.get("version"), ::testing::NotNull());
const llvm::json::Array *Runs = Doc.getArray("runs");
// A run exists on this document
ASSERT_THAT(Runs, ::testing::NotNull());
ASSERT_EQ(Runs->size(), 1UL);
const llvm::json::Object *TheRun = Runs->back().getAsObject();
// The run has slots for tools, results, rules and artifacts
ASSERT_THAT(TheRun->get("tool"), ::testing::NotNull());
ASSERT_THAT(TheRun->get("results"), ::testing::NotNull());
ASSERT_THAT(TheRun->get("artifacts"), ::testing::NotNull());
const llvm::json::Object *Driver =
TheRun->getObject("tool")->getObject("driver");
const llvm::json::Array *Results = TheRun->getArray("results");
const llvm::json::Array *Artifacts = TheRun->getArray("artifacts");
// The tool is as expected
ASSERT_TRUE(Driver->getString("name").hasValue());
ASSERT_TRUE(Driver->getString("fullName").hasValue());
EXPECT_EQ(Driver->getString("name").getValue(), "sarif test");
EXPECT_EQ(Driver->getString("fullName").getValue(), "sarif test runner");
// The results are as expected
EXPECT_EQ(Results->size(), 1UL);
// The artifacts are as expected
EXPECT_TRUE(Artifacts->empty());
}
TEST_F(SarifDocumentWriterTest, checkSerializingResults) {
// GIVEN:
const std::string ExpectedOutput =
R"({"$schema":"https://docs.oasis-open.org/sarif/sarif/v2.1.0/cos02/schemas/sarif-schema-2.1.0.json","runs":[{"artifacts":[],"columnKind":"unicodeCodePoints","results":[{"message":{"text":""},"ruleId":"clang.unittest","ruleIndex":0}],"tool":{"driver":{"fullName":"sarif test runner","informationUri":"https://clang.llvm.org/docs/UsersManual.html","language":"en-US","name":"sarif test","rules":[{"fullDescription":{"text":"Example rule created during unit tests"},"id":"clang.unittest","name":"clang unit test"}],"version":"1.0.0"}}}],"version":"2.1.0"})";
SarifDocumentWriter Writer{SourceMgr};
const SarifRule &Rule =
SarifRule::create()
.setRuleId("clang.unittest")
.setDescription("Example rule created during unit tests")
.setName("clang unit test");
// WHEN: A run contains a result
Writer.createRun("sarif test", "sarif test runner", "1.0.0");
unsigned ruleIdx = Writer.createRule(Rule);
const SarifResult &Result = SarifResult::create(ruleIdx);
Writer.appendResult(Result);
std::string Output = serializeSarifDocument(Writer.createDocument());
// THEN:
ASSERT_THAT(Output, ::testing::StrEq(ExpectedOutput));
}
// Check that serializing artifacts from results produces valid SARIF
TEST_F(SarifDocumentWriterTest, checkSerializingArtifacts) {
// GIVEN:
const std::string ExpectedOutput =
R"({"$schema":"https://docs.oasis-open.org/sarif/sarif/v2.1.0/cos02/schemas/sarif-schema-2.1.0.json","runs":[{"artifacts":[{"length":40,"location":{"index":0,"uri":"file:///main.cpp"},"mimeType":"text/plain","roles":["resultFile"]}],"columnKind":"unicodeCodePoints","results":[{"locations":[{"physicalLocation":{"artifactLocation":{"index":0},"region":{"endColumn":14,"startColumn":14,"startLine":3}}}],"message":{"text":"expected ';' after top level declarator"},"ruleId":"clang.unittest","ruleIndex":0}],"tool":{"driver":{"fullName":"sarif test runner","informationUri":"https://clang.llvm.org/docs/UsersManual.html","language":"en-US","name":"sarif test","rules":[{"fullDescription":{"text":"Example rule created during unit tests"},"id":"clang.unittest","name":"clang unit test"}],"version":"1.0.0"}}}],"version":"2.1.0"})";
SarifDocumentWriter Writer{SourceMgr};
const SarifRule &Rule =
SarifRule::create()
.setRuleId("clang.unittest")
.setDescription("Example rule created during unit tests")
.setName("clang unit test");
// WHEN: A result is added with valid source locations for its diagnostics
Writer.createRun("sarif test", "sarif test runner", "1.0.0");
unsigned RuleIdx = Writer.createRule(Rule);
llvm::SmallVector<CharSourceRange, 1> DiagLocs;
const char *SourceText = "int foo = 0;\n"
"int bar = 1;\n"
"float x = 0.0\n";
FileID MainFileID =
registerSource("/main.cpp", SourceText, /* IsMainFile = */ true);
CharSourceRange SourceCSR =
getFakeCharSourceRange(MainFileID, {3, 14}, {3, 14});
DiagLocs.push_back(SourceCSR);
const SarifResult &Result =
SarifResult::create(RuleIdx).setLocations(DiagLocs).setDiagnosticMessage(
"expected ';' after top level declarator");
Writer.appendResult(Result);
std::string Output = serializeSarifDocument(Writer.createDocument());
// THEN: Assert that the serialized SARIF is as expected
ASSERT_THAT(Output, ::testing::StrEq(ExpectedOutput));
}
TEST_F(SarifDocumentWriterTest, checkSerializingCodeflows) {
// GIVEN:
const std::string ExpectedOutput =
R"({"$schema":"https://docs.oasis-open.org/sarif/sarif/v2.1.0/cos02/schemas/sarif-schema-2.1.0.json","runs":[{"artifacts":[{"length":27,"location":{"index":1,"uri":"file:///test-header-1.h"},"mimeType":"text/plain","roles":["resultFile"]},{"length":30,"location":{"index":2,"uri":"file:///test-header-2.h"},"mimeType":"text/plain","roles":["resultFile"]},{"length":28,"location":{"index":3,"uri":"file:///test-header-3.h"},"mimeType":"text/plain","roles":["resultFile"]},{"length":41,"location":{"index":0,"uri":"file:///main.cpp"},"mimeType":"text/plain","roles":["resultFile"]}],"columnKind":"unicodeCodePoints","results":[{"codeFlows":[{"threadFlows":[{"locations":[{"importance":"essential","location":{"message":{"text":"Message #1"},"physicalLocation":{"artifactLocation":{"index":1},"region":{"endColumn":8,"endLine":2,"startColumn":1,"startLine":1}}}},{"importance":"important","location":{"message":{"text":"Message #2"},"physicalLocation":{"artifactLocation":{"index":2},"region":{"endColumn":8,"endLine":2,"startColumn":1,"startLine":1}}}},{"importance":"unimportant","location":{"message":{"text":"Message #3"},"physicalLocation":{"artifactLocation":{"index":3},"region":{"endColumn":8,"endLine":2,"startColumn":1,"startLine":1}}}}]}]}],"locations":[{"physicalLocation":{"artifactLocation":{"index":0},"region":{"endColumn":8,"endLine":2,"startColumn":5,"startLine":2}}}],"message":{"text":"Redefinition of 'foo'"},"ruleId":"clang.unittest","ruleIndex":0}],"tool":{"driver":{"fullName":"sarif test runner","informationUri":"https://clang.llvm.org/docs/UsersManual.html","language":"en-US","name":"sarif test","rules":[{"fullDescription":{"text":"Example rule created during unit tests"},"id":"clang.unittest","name":"clang unit test"}],"version":"1.0.0"}}}],"version":"2.1.0"})";
const char *SourceText = "int foo = 0;\n"
"int foo = 1;\n"
"float x = 0.0;\n";
FileID MainFileID =
registerSource("/main.cpp", SourceText, /* IsMainFile = */ true);
CharSourceRange DiagLoc{getFakeCharSourceRange(MainFileID, {2, 5}, {2, 8})};
SarifDocumentWriter Writer{SourceMgr};
const SarifRule &Rule =
SarifRule::create()
.setRuleId("clang.unittest")
.setDescription("Example rule created during unit tests")
.setName("clang unit test");
constexpr unsigned int NUM_CASES = 3;
llvm::SmallVector<ThreadFlow, NUM_CASES> Threadflows;
const char *HeaderTexts[NUM_CASES]{("#pragma once\n"
"#include <foo>"),
("#ifndef FOO\n"
"#define FOO\n"
"#endif"),
("#ifdef FOO\n"
"#undef FOO\n"
"#endif")};
const char *HeaderNames[NUM_CASES]{"/test-header-1.h", "/test-header-2.h",
"/test-header-3.h"};
ThreadFlowImportance Importances[NUM_CASES]{
ThreadFlowImportance::Essential, ThreadFlowImportance::Important,
ThreadFlowImportance::Unimportant};
for (size_t Idx = 0; Idx != NUM_CASES; ++Idx) {
FileID FID = registerSource(HeaderNames[Idx], HeaderTexts[Idx]);
CharSourceRange &&CSR = getFakeCharSourceRange(FID, {1, 1}, {2, 8});
std::string Message = llvm::formatv("Message #{0}", Idx + 1);
ThreadFlow Item = ThreadFlow::create()
.setRange(CSR)
.setImportance(Importances[Idx])
.setMessage(Message);
Threadflows.push_back(Item);
}
// WHEN: A result containing code flows and diagnostic locations is added
Writer.createRun("sarif test", "sarif test runner", "1.0.0");
unsigned RuleIdx = Writer.createRule(Rule);
const SarifResult &Result = SarifResult::create(RuleIdx)
.setLocations({DiagLoc})
.setDiagnosticMessage("Redefinition of 'foo'")
.setThreadFlows(Threadflows);
Writer.appendResult(Result);
std::string Output = serializeSarifDocument(Writer.createDocument());
// THEN: Assert that the serialized SARIF is as expected
ASSERT_THAT(Output, ::testing::StrEq(ExpectedOutput));
}
} // namespace