2019-03-06 04:45:17 +08:00
|
|
|
//===- RemarkParser.cpp --------------------------------------------------===//
|
2018-10-11 02:43:42 +08:00
|
|
|
//
|
2019-01-19 18:56:40 +08:00
|
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
2018-10-11 02:43:42 +08:00
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
//
|
|
|
|
// This file provides utility methods used by clients that want to use the
|
2019-03-06 04:45:17 +08:00
|
|
|
// parser for remark diagnostics in LLVM.
|
2018-10-11 02:43:42 +08:00
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
2019-03-20 05:11:07 +08:00
|
|
|
#include "llvm/Remarks/RemarkParser.h"
|
2019-09-10 01:43:50 +08:00
|
|
|
#include "BitstreamRemarkParser.h"
|
2019-03-20 05:11:07 +08:00
|
|
|
#include "YAMLRemarkParser.h"
|
2019-03-06 04:45:17 +08:00
|
|
|
#include "llvm-c/Remarks.h"
|
2018-10-11 02:43:42 +08:00
|
|
|
#include "llvm/ADT/STLExtras.h"
|
2019-03-20 05:11:07 +08:00
|
|
|
#include "llvm/Support/CBindingWrapping.h"
|
2018-10-11 02:43:42 +08:00
|
|
|
|
|
|
|
using namespace llvm;
|
2019-03-20 05:11:07 +08:00
|
|
|
using namespace llvm::remarks;
|
2018-10-11 02:43:42 +08:00
|
|
|
|
2019-07-16 23:25:05 +08:00
|
|
|
char EndOfFileError::ID = 0;
|
2019-03-20 02:21:43 +08:00
|
|
|
|
[Remarks] Add string deduplication using a string table
* Add support for uniquing strings in the remark streamer and emitting the string table in the remarks section.
* Add parsing support for the string table in the RemarkParser.
From this remark:
```
--- !Missed
Pass: inline
Name: NoDefinition
DebugLoc: { File: 'test-suite/SingleSource/UnitTests/2002-04-17-PrintfChar.c',
Line: 7, Column: 3 }
Function: printArgsNoRet
Args:
- Callee: printf
- String: ' will not be inlined into '
- Caller: printArgsNoRet
DebugLoc: { File: 'test-suite/SingleSource/UnitTests/2002-04-17-PrintfChar.c',
Line: 6, Column: 0 }
- String: ' because its definition is unavailable'
...
```
to:
```
--- !Missed
Pass: 0
Name: 1
DebugLoc: { File: 3, Line: 7, Column: 3 }
Function: 2
Args:
- Callee: 4
- String: 5
- Caller: 2
DebugLoc: { File: 3, Line: 6, Column: 0 }
- String: 6
...
```
And the string table in the .remarks/__remarks section containing:
```
inline\0NoDefinition\0printArgsNoRet\0
test-suite/SingleSource/UnitTests/2002-04-17-PrintfChar.c\0printf\0
will not be inlined into \0 because its definition is unavailable\0
```
This is mostly supposed to be used for testing purposes, but it gives us
a 2x reduction in the remark size, and is an incremental change for the
updates to the remarks file format.
Differential Revision: https://reviews.llvm.org/D60227
llvm-svn: 359050
2019-04-24 08:06:24 +08:00
|
|
|
ParsedStringTable::ParsedStringTable(StringRef InBuffer) : Buffer(InBuffer) {
|
|
|
|
while (!InBuffer.empty()) {
|
|
|
|
// Strings are separated by '\0' bytes.
|
|
|
|
std::pair<StringRef, StringRef> Split = InBuffer.split('\0');
|
|
|
|
// We only store the offset from the beginning of the buffer.
|
|
|
|
Offsets.push_back(Split.first.data() - Buffer.data());
|
|
|
|
InBuffer = Split.second;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-07-04 08:30:58 +08:00
|
|
|
Expected<StringRef> ParsedStringTable::operator[](size_t Index) const {
|
[Remarks] Add string deduplication using a string table
* Add support for uniquing strings in the remark streamer and emitting the string table in the remarks section.
* Add parsing support for the string table in the RemarkParser.
From this remark:
```
--- !Missed
Pass: inline
Name: NoDefinition
DebugLoc: { File: 'test-suite/SingleSource/UnitTests/2002-04-17-PrintfChar.c',
Line: 7, Column: 3 }
Function: printArgsNoRet
Args:
- Callee: printf
- String: ' will not be inlined into '
- Caller: printArgsNoRet
DebugLoc: { File: 'test-suite/SingleSource/UnitTests/2002-04-17-PrintfChar.c',
Line: 6, Column: 0 }
- String: ' because its definition is unavailable'
...
```
to:
```
--- !Missed
Pass: 0
Name: 1
DebugLoc: { File: 3, Line: 7, Column: 3 }
Function: 2
Args:
- Callee: 4
- String: 5
- Caller: 2
DebugLoc: { File: 3, Line: 6, Column: 0 }
- String: 6
...
```
And the string table in the .remarks/__remarks section containing:
```
inline\0NoDefinition\0printArgsNoRet\0
test-suite/SingleSource/UnitTests/2002-04-17-PrintfChar.c\0printf\0
will not be inlined into \0 because its definition is unavailable\0
```
This is mostly supposed to be used for testing purposes, but it gives us
a 2x reduction in the remark size, and is an incremental change for the
updates to the remarks file format.
Differential Revision: https://reviews.llvm.org/D60227
llvm-svn: 359050
2019-04-24 08:06:24 +08:00
|
|
|
if (Index >= Offsets.size())
|
|
|
|
return createStringError(
|
|
|
|
std::make_error_code(std::errc::invalid_argument),
|
|
|
|
"String with index %u is out of bounds (size = %u).", Index,
|
|
|
|
Offsets.size());
|
|
|
|
|
|
|
|
size_t Offset = Offsets[Index];
|
|
|
|
// If it's the last offset, we can't use the next offset to know the size of
|
|
|
|
// the string.
|
|
|
|
size_t NextOffset =
|
|
|
|
(Index == Offsets.size() - 1) ? Buffer.size() : Offsets[Index + 1];
|
|
|
|
return StringRef(Buffer.data() + Offset, NextOffset - Offset - 1);
|
|
|
|
}
|
|
|
|
|
2019-07-25 08:16:56 +08:00
|
|
|
Expected<std::unique_ptr<RemarkParser>>
|
2019-07-24 04:42:46 +08:00
|
|
|
llvm::remarks::createRemarkParser(Format ParserFormat, StringRef Buf) {
|
|
|
|
switch (ParserFormat) {
|
|
|
|
case Format::YAML:
|
2019-08-15 23:54:37 +08:00
|
|
|
return std::make_unique<YAMLRemarkParser>(Buf);
|
2019-07-24 04:42:46 +08:00
|
|
|
case Format::YAMLStrTab:
|
|
|
|
return createStringError(
|
|
|
|
std::make_error_code(std::errc::invalid_argument),
|
|
|
|
"The YAML with string table format requires a parsed string table.");
|
Reland: [Remarks] Add an LLVM-bitstream-based remark serializer
Add a new serializer, using a binary format based on the LLVM bitstream
format.
This format provides a way to serialize the remarks in two modes:
1) Separate mode: the metadata is separate from the remark entries.
2) Standalone mode: the metadata and the remark entries are in the same
file.
The format contains:
* a meta block: container version, container type, string table,
external file path, remark version
* a remark block: type, remark name, pass name, function name, debug
file, debug line, debug column, hotness, arguments (key, value, debug
file, debug line, debug column)
A string table is required for this format, which will be dumped in the
meta block to be consumed before parsing the remark blocks.
On clang itself, we noticed a size reduction of 13.4x compared to YAML,
and a compile-time reduction of between 1.7% and 3.5% on CTMark.
Differential Revision: https://reviews.llvm.org/D63466
Original llvm-svn: 367364
Revert llvm-svn: 367370
llvm-svn: 367372
2019-07-31 08:13:51 +08:00
|
|
|
case Format::Bitstream:
|
2019-09-10 01:43:50 +08:00
|
|
|
return std::make_unique<BitstreamRemarkParser>(Buf);
|
2019-07-24 04:42:46 +08:00
|
|
|
case Format::Unknown:
|
|
|
|
return createStringError(std::make_error_code(std::errc::invalid_argument),
|
|
|
|
"Unknown remark parser format.");
|
|
|
|
}
|
2019-07-24 15:55:01 +08:00
|
|
|
llvm_unreachable("unhandled ParseFormat");
|
2019-07-24 04:42:46 +08:00
|
|
|
}
|
|
|
|
|
2019-07-25 08:16:56 +08:00
|
|
|
Expected<std::unique_ptr<RemarkParser>>
|
2019-07-16 23:25:05 +08:00
|
|
|
llvm::remarks::createRemarkParser(Format ParserFormat, StringRef Buf,
|
2019-07-24 06:50:08 +08:00
|
|
|
ParsedStringTable StrTab) {
|
2019-07-16 23:25:05 +08:00
|
|
|
switch (ParserFormat) {
|
|
|
|
case Format::YAML:
|
2019-07-24 04:42:46 +08:00
|
|
|
return createStringError(std::make_error_code(std::errc::invalid_argument),
|
|
|
|
"The YAML format can't be used with a string "
|
|
|
|
"table. Use yaml-strtab instead.");
|
|
|
|
case Format::YAMLStrTab:
|
2019-08-15 23:54:37 +08:00
|
|
|
return std::make_unique<YAMLStrTabRemarkParser>(Buf, std::move(StrTab));
|
Reland: [Remarks] Add an LLVM-bitstream-based remark serializer
Add a new serializer, using a binary format based on the LLVM bitstream
format.
This format provides a way to serialize the remarks in two modes:
1) Separate mode: the metadata is separate from the remark entries.
2) Standalone mode: the metadata and the remark entries are in the same
file.
The format contains:
* a meta block: container version, container type, string table,
external file path, remark version
* a remark block: type, remark name, pass name, function name, debug
file, debug line, debug column, hotness, arguments (key, value, debug
file, debug line, debug column)
A string table is required for this format, which will be dumped in the
meta block to be consumed before parsing the remark blocks.
On clang itself, we noticed a size reduction of 13.4x compared to YAML,
and a compile-time reduction of between 1.7% and 3.5% on CTMark.
Differential Revision: https://reviews.llvm.org/D63466
Original llvm-svn: 367364
Revert llvm-svn: 367370
llvm-svn: 367372
2019-07-31 08:13:51 +08:00
|
|
|
case Format::Bitstream:
|
2019-09-10 01:43:50 +08:00
|
|
|
return std::make_unique<BitstreamRemarkParser>(Buf, std::move(StrTab));
|
2019-07-16 23:25:05 +08:00
|
|
|
case Format::Unknown:
|
|
|
|
return createStringError(std::make_error_code(std::errc::invalid_argument),
|
|
|
|
"Unknown remark parser format.");
|
|
|
|
}
|
2019-07-24 15:55:01 +08:00
|
|
|
llvm_unreachable("unhandled ParseFormat");
|
2019-07-16 23:25:05 +08:00
|
|
|
}
|
|
|
|
|
2019-07-27 05:02:02 +08:00
|
|
|
Expected<std::unique_ptr<RemarkParser>>
|
2019-10-16 23:40:59 +08:00
|
|
|
llvm::remarks::createRemarkParserFromMeta(
|
|
|
|
Format ParserFormat, StringRef Buf, Optional<ParsedStringTable> StrTab,
|
|
|
|
Optional<StringRef> ExternalFilePrependPath) {
|
2019-07-27 05:02:02 +08:00
|
|
|
switch (ParserFormat) {
|
|
|
|
// Depending on the metadata, the format can be either yaml or yaml-strtab,
|
|
|
|
// regardless of the input argument.
|
|
|
|
case Format::YAML:
|
|
|
|
case Format::YAMLStrTab:
|
2019-10-16 23:40:59 +08:00
|
|
|
return createYAMLParserFromMeta(Buf, std::move(StrTab),
|
|
|
|
std::move(ExternalFilePrependPath));
|
Reland: [Remarks] Add an LLVM-bitstream-based remark serializer
Add a new serializer, using a binary format based on the LLVM bitstream
format.
This format provides a way to serialize the remarks in two modes:
1) Separate mode: the metadata is separate from the remark entries.
2) Standalone mode: the metadata and the remark entries are in the same
file.
The format contains:
* a meta block: container version, container type, string table,
external file path, remark version
* a remark block: type, remark name, pass name, function name, debug
file, debug line, debug column, hotness, arguments (key, value, debug
file, debug line, debug column)
A string table is required for this format, which will be dumped in the
meta block to be consumed before parsing the remark blocks.
On clang itself, we noticed a size reduction of 13.4x compared to YAML,
and a compile-time reduction of between 1.7% and 3.5% on CTMark.
Differential Revision: https://reviews.llvm.org/D63466
Original llvm-svn: 367364
Revert llvm-svn: 367370
llvm-svn: 367372
2019-07-31 08:13:51 +08:00
|
|
|
case Format::Bitstream:
|
2019-10-16 23:40:59 +08:00
|
|
|
return createBitstreamParserFromMeta(Buf, std::move(StrTab),
|
|
|
|
std::move(ExternalFilePrependPath));
|
2019-07-27 05:02:02 +08:00
|
|
|
case Format::Unknown:
|
|
|
|
return createStringError(std::make_error_code(std::errc::invalid_argument),
|
|
|
|
"Unknown remark parser format.");
|
|
|
|
}
|
2019-07-27 06:42:54 +08:00
|
|
|
llvm_unreachable("unhandled ParseFormat");
|
2019-07-27 05:02:02 +08:00
|
|
|
}
|
|
|
|
|
2019-08-24 03:59:23 +08:00
|
|
|
namespace {
|
2019-07-16 23:25:05 +08:00
|
|
|
// Wrapper that holds the state needed to interact with the C API.
|
|
|
|
struct CParser {
|
2019-07-25 08:16:56 +08:00
|
|
|
std::unique_ptr<RemarkParser> TheParser;
|
2019-07-16 23:25:05 +08:00
|
|
|
Optional<std::string> Err;
|
|
|
|
|
|
|
|
CParser(Format ParserFormat, StringRef Buf,
|
2019-07-24 06:50:08 +08:00
|
|
|
Optional<ParsedStringTable> StrTab = None)
|
|
|
|
: TheParser(cantFail(
|
|
|
|
StrTab ? createRemarkParser(ParserFormat, Buf, std::move(*StrTab))
|
|
|
|
: createRemarkParser(ParserFormat, Buf))) {}
|
2019-07-16 23:25:05 +08:00
|
|
|
|
|
|
|
void handleError(Error E) { Err.emplace(toString(std::move(E))); }
|
|
|
|
bool hasError() const { return Err.hasValue(); }
|
|
|
|
const char *getMessage() const { return Err ? Err->c_str() : nullptr; };
|
|
|
|
};
|
2019-08-24 03:59:23 +08:00
|
|
|
} // namespace
|
2019-07-16 23:25:05 +08:00
|
|
|
|
2019-03-20 02:21:43 +08:00
|
|
|
// Create wrappers for C Binding types (see CBindingWrapping.h).
|
2019-07-16 23:25:05 +08:00
|
|
|
DEFINE_SIMPLE_CONVERSION_FUNCTIONS(CParser, LLVMRemarkParserRef)
|
2018-10-11 02:43:42 +08:00
|
|
|
|
2019-03-20 05:11:07 +08:00
|
|
|
extern "C" LLVMRemarkParserRef LLVMRemarkParserCreateYAML(const void *Buf,
|
|
|
|
uint64_t Size) {
|
2019-07-16 23:25:05 +08:00
|
|
|
return wrap(new CParser(Format::YAML,
|
|
|
|
StringRef(static_cast<const char *>(Buf), Size)));
|
2019-09-10 01:43:50 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
extern "C" LLVMRemarkParserRef LLVMRemarkParserCreateBitstream(const void *Buf,
|
|
|
|
uint64_t Size) {
|
|
|
|
return wrap(new CParser(Format::Bitstream,
|
|
|
|
StringRef(static_cast<const char *>(Buf), Size)));
|
2019-03-20 05:11:07 +08:00
|
|
|
}
|
2018-10-11 02:43:42 +08:00
|
|
|
|
2019-03-20 05:11:07 +08:00
|
|
|
extern "C" LLVMRemarkEntryRef
|
|
|
|
LLVMRemarkParserGetNext(LLVMRemarkParserRef Parser) {
|
2019-07-16 23:25:05 +08:00
|
|
|
CParser &TheCParser = *unwrap(Parser);
|
2019-07-25 08:16:56 +08:00
|
|
|
remarks::RemarkParser &TheParser = *TheCParser.TheParser;
|
2019-07-16 23:25:05 +08:00
|
|
|
|
|
|
|
Expected<std::unique_ptr<Remark>> MaybeRemark = TheParser.next();
|
|
|
|
if (Error E = MaybeRemark.takeError()) {
|
|
|
|
if (E.isA<EndOfFileError>()) {
|
|
|
|
consumeError(std::move(E));
|
|
|
|
return nullptr;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Handle the error. Allow it to be checked through HasError and
|
|
|
|
// GetErrorMessage.
|
|
|
|
TheCParser.handleError(std::move(E));
|
2019-03-20 02:09:51 +08:00
|
|
|
return nullptr;
|
2019-03-20 02:21:43 +08:00
|
|
|
}
|
|
|
|
|
2019-03-20 05:11:07 +08:00
|
|
|
// Valid remark.
|
2019-07-16 23:25:05 +08:00
|
|
|
return wrap(MaybeRemark->release());
|
2018-10-11 02:43:42 +08:00
|
|
|
}
|
|
|
|
|
2019-03-06 04:45:17 +08:00
|
|
|
extern "C" LLVMBool LLVMRemarkParserHasError(LLVMRemarkParserRef Parser) {
|
2019-07-16 23:25:05 +08:00
|
|
|
return unwrap(Parser)->hasError();
|
2018-10-11 02:43:42 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
extern "C" const char *
|
2019-03-06 04:45:17 +08:00
|
|
|
LLVMRemarkParserGetErrorMessage(LLVMRemarkParserRef Parser) {
|
2019-07-16 23:25:05 +08:00
|
|
|
return unwrap(Parser)->getMessage();
|
2018-10-11 02:43:42 +08:00
|
|
|
}
|
|
|
|
|
2019-03-06 04:45:17 +08:00
|
|
|
extern "C" void LLVMRemarkParserDispose(LLVMRemarkParserRef Parser) {
|
2018-10-11 02:43:42 +08:00
|
|
|
delete unwrap(Parser);
|
|
|
|
}
|