OpenTelemetry API Tracing. (#6478)

* OTEL Span Implementation.

* Addi trace logging, refactor constructors, unit tests.

* Unit tests for creating OTELSpans

* refactor flag names

* Additional comments.

* Formatting.

* Add back Arena.h include

* cleanup header includes

* Remove include cstddef.

* Remove memory include.

* Remove trailing commas on enums.

* Enum formatting.

* Changing SpanStatus enum from ERROR to ERR to see if it is clashing with Windows.h.

* Move OTELEvents to SmallVectorRef<KeyValueRef>.

* Clean up unused includes.

* Unit tests

* Const reference arguments for OTEL constructors and additional addAttribute
unit tests. Adding return of OTELSpan reference on addAttribute.

* Formatting.

* Begin messagepack encoding tests.

* Formatting.

* MessagePack encoding unit tests.

* Formatting.

* Remove swapBinary.

* remove ambiguous helper methods

* Formatting fixes

* Fix ambiguous calls in AddEvents unit tests.

* Include AddAttributes unit test.

* descope windows for UDP encoding test

* Move ifndef WIN32 around MPEncoding unit test.

* Fix AddEvents Attributes size assertion.

* Formatting.

* Enable AddLinks unit test.

* Full MP encoding testing.

* Fix for encoding longer strings with MessagePack and unit test.

* Remove unnecessary header includes and serialize_string_ref function.

* Fix typos

* Update flow/Tracing.actor.cpp

Co-authored-by: Lukas Joswiak <lukas.joswiak@snowflake.com>

* Update flow/Tracing.actor.cpp

Co-authored-by: Lukas Joswiak <lukas.joswiak@snowflake.com>

* Use ASSERT_WE_THINK and add logging.

We don't want people creating incredibly large traces, so we are only
supporting a subset of MessagePack collection and string sizes. Assert
and log when we hit these unsupported sizes.

* Remove TODOs no longer applicable.

* Refactor OTELEvent to OTELEventRef.

* Remove unnecessary public declaration in struct.

* fix OTELEventRef attribute size assertion

* Formatting

Co-authored-by: Lukas Joswiak <lukas.joswiak@snowflake.com>
This commit is contained in:
Ray Jenkins 2022-04-04 19:55:38 -05:00 committed by GitHub
parent 5a336655f1
commit bb9b9d2471
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 718 additions and 16 deletions

View File

@ -30,6 +30,25 @@
#include "flow/Arena.h"
#include "flow/flow.h"
enum class TraceFlags : uint8_t { unsampled = 0b00000000, sampled = 0b00000001 };
inline TraceFlags operator&(TraceFlags lhs, TraceFlags rhs) {
return static_cast<TraceFlags>(static_cast<std::underlying_type_t<TraceFlags>>(lhs) &
static_cast<std::underlying_type_t<TraceFlags>>(rhs));
}
struct SpanContext {
UID traceID;
uint64_t spanID;
TraceFlags m_Flags;
SpanContext() : traceID(UID()), spanID(0), m_Flags(TraceFlags::unsampled) {}
SpanContext(UID traceID, uint64_t spanID, TraceFlags flags) : traceID(traceID), spanID(spanID), m_Flags(flags) {}
SpanContext(UID traceID, uint64_t spanID) : traceID(traceID), spanID(spanID), m_Flags(TraceFlags::unsampled) {}
SpanContext(Arena arena, const SpanContext& span)
: traceID(span.traceID), spanID(span.spanID), m_Flags(span.m_Flags) {}
bool isSampled() const { return (m_Flags & TraceFlags::sampled) == TraceFlags::sampled; }
};
typedef int64_t Version;
typedef uint64_t LogEpoch;
typedef uint64_t Sequence;

View File

@ -19,10 +19,9 @@
*/
#include "flow/Tracing.h"
#include "flow/UnitTest.h"
#include "flow/Knobs.h"
#include "flow/network.h"
#include <functional>
#include <iomanip>
#include <memory>
@ -43,6 +42,7 @@ constexpr float kQueueSizeLogInterval = 5.0;
struct NoopTracer : ITracer {
TracerType type() const override { return TracerType::DISABLED; }
void trace(Span const& span) override {}
void trace(OTELSpan const& span) override {}
};
struct LogfileTracer : ITracer {
@ -63,6 +63,35 @@ struct LogfileTracer : ITracer {
TraceEvent(SevInfo, "TracingSpanTag", span.context).detail("Key", key).detail("Value", value);
}
}
void trace(OTELSpan const& span) override {
TraceEvent te(SevInfo, "TracingSpan", span.context.traceID);
te.detail("SpanID", span.context.spanID)
.detail("Location", span.location.name)
.detail("Begin", format("%.6f", span.begin))
.detail("End", format("%.6f", span.end))
.detail("Kind", span.kind)
.detail("Status", span.status)
.detail("ParentSpanID", span.parentContext.spanID);
for (const auto& link : span.links) {
TraceEvent(SevInfo, "TracingSpanLink", span.context.traceID)
.detail("TraceID", link.traceID)
.detail("SpanID", link.spanID);
}
for (const auto& [key, value] : span.attributes) {
TraceEvent(SevInfo, "TracingSpanTag", span.context.traceID).detail("Key", key).detail("Value", value);
}
for (const auto& event : span.events) {
TraceEvent(SevInfo, "TracingSpanEvent", span.context.traceID)
.detail("Name", event.name)
.detail("Time", event.time);
for (const auto& [key, value] : event.attributes) {
TraceEvent(SevInfo, "TracingSpanEventAttribute", span.context.traceID)
.detail("Key", key)
.detail("Value", value);
}
}
}
};
struct TraceRequest {
@ -151,7 +180,6 @@ ACTOR Future<Void> traceLog(int* pendingMessages, bool* sendError) {
*/
struct UDPTracer : public ITracer {
protected:
// Serializes span fields as an array into the supplied TraceRequest
// buffer.
void serialize_span(const Span& span, TraceRequest& request) {
@ -179,6 +207,32 @@ protected:
serialize_vector(span.parents, request);
}
void serialize_span(const OTELSpan& span, TraceRequest& request) {
uint16_t size = 14;
request.write_byte(size | 0b10010000); // write as array
serialize_value(span.context.traceID.first(), request, 0xcf); // trace id
serialize_value(span.context.traceID.second(), request, 0xcf); // trace id
serialize_value(span.context.spanID, request, 0xcf); // spanid
// parent value
serialize_value(span.parentContext.traceID.first(), request, 0xcf); // trace id
serialize_value(span.parentContext.traceID.second(), request, 0xcf); // trace id
serialize_value(span.parentContext.spanID, request, 0xcf); // spanId
// Payload
serialize_string(span.location.name.toString(), request);
serialize_value(span.begin, request, 0xcb); // start time
serialize_value(span.end, request, 0xcb); // end
// Kind
serialize_value(span.kind, request, 0xcc);
// Status
serialize_value(span.status, request, 0xcc);
// Links
serialize_vector(span.links, request);
// Events
serialize_vector(span.events, request);
// Attributes
serialize_map(span.attributes, request);
}
private:
// Writes the given value in big-endian format to the request. Sets the
// first byte to msgpack_type.
@ -205,10 +259,12 @@ private:
request.write_byte(static_cast<uint8_t>(length));
} else if (length <= 65535) {
request.write_byte(0xda);
request.write_byte(static_cast<uint16_t>(length));
request.write_byte(reinterpret_cast<const uint8_t*>(&length)[1]);
request.write_byte(reinterpret_cast<const uint8_t*>(&length)[0]);
} else {
// TODO: Add support for longer strings if necessary.
ASSERT(false);
TraceEvent(SevWarn, "TracingSpanSerializeString")
.detail("Failed to MessagePack encode very large string", length);
ASSERT_WE_THINK(false);
}
request.write_bytes(c, length);
@ -225,7 +281,6 @@ private:
if (size == 0) {
return;
}
if (size <= 15) {
request.write_byte(static_cast<uint8_t>(size) | 0b10010000);
} else if (size <= 65535) {
@ -233,8 +288,9 @@ private:
request.write_byte(reinterpret_cast<const uint8_t*>(&size)[1]);
request.write_byte(reinterpret_cast<const uint8_t*>(&size)[0]);
} else {
// TODO: Add support for longer vectors if necessary.
ASSERT(false);
TraceEvent(SevWarn, "TracingSpanSerializeVector")
.detail("Failed to MessagePack encode very large vector", size);
ASSERT_WE_THINK(false);
}
for (const auto& parentContext : vec) {
@ -242,14 +298,76 @@ private:
}
}
inline void serialize_map(const std::unordered_map<StringRef, StringRef>& map, TraceRequest& request) {
// Writes the given vector of linked SpanContext's to the request. If the vector is
// empty, the request is not modified.
inline void serialize_vector(const SmallVectorRef<SpanContext>& vec, TraceRequest& request) {
int size = vec.size();
if (size <= 15) {
request.write_byte(static_cast<uint8_t>(size) | 0b10010000);
} else if (size <= 65535) {
request.write_byte(0xdc);
request.write_byte(reinterpret_cast<const uint8_t*>(&size)[1]);
request.write_byte(reinterpret_cast<const uint8_t*>(&size)[0]);
} else {
TraceEvent(SevWarn, "TracingSpanSerializeVector").detail("Failed to MessagePack encode large vector", size);
ASSERT_WE_THINK(false);
}
for (const auto& link : vec) {
serialize_value(link.traceID.first(), request, 0xcf); // trace id
serialize_value(link.traceID.second(), request, 0xcf); // trace id
serialize_value(link.spanID, request, 0xcf); // spanid
}
}
// Writes the given vector of linked SpanContext's to the request. If the vector is
// empty, the request is not modified.
inline void serialize_vector(const SmallVectorRef<OTELEventRef>& vec, TraceRequest& request) {
int size = vec.size();
if (size <= 15) {
request.write_byte(static_cast<uint8_t>(size) | 0b10010000);
} else if (size <= 65535) {
request.write_byte(0xdc);
request.write_byte(reinterpret_cast<const uint8_t*>(&size)[1]);
request.write_byte(reinterpret_cast<const uint8_t*>(&size)[0]);
} else {
TraceEvent(SevWarn, "TracingSpanSerializeVector").detail("Failed to MessagePack encode large vector", size);
ASSERT_WE_THINK(false);
}
for (const auto& event : vec) {
serialize_string(event.name.toString(), request); // event name
serialize_value(event.time, request, 0xcb); // event time
serialize_vector(event.attributes, request);
}
}
inline void serialize_vector(const SmallVectorRef<KeyValueRef>& vals, TraceRequest& request) {
int size = vals.size();
if (size <= 15) {
// N.B. We're actually writing this out as a fixmap here in messagepack format!
// fixmap 1000xxxx 0x80 - 0x8f
request.write_byte(static_cast<uint8_t>(size) | 0b10000000);
} else {
TraceEvent(SevWarn, "TracingSpanSerializeVector").detail("Failed to MessagePack encode large vector", size);
ASSERT_WE_THINK(false);
}
for (const auto& kv : vals) {
serialize_string(kv.key.toString(), request);
serialize_string(kv.value.toString(), request);
}
}
template <class Map>
inline void serialize_map(const Map& map, TraceRequest& request) {
int size = map.size();
if (size <= 15) {
request.write_byte(static_cast<uint8_t>(size) | 0b10000000);
} else {
// TODO: Add support for longer maps if necessary.
ASSERT(false);
TraceEvent(SevWarn, "TracingSpanSerializeMap").detail("Failed to MessagePack encode large map", size);
ASSERT_WE_THINK(false);
}
for (const auto& [key, value] : map) {
@ -291,7 +409,7 @@ struct FastUDPTracer : public UDPTracer {
TracerType type() const override { return TracerType::NETWORK_LOSSY; }
void trace(Span const& span) override {
void prepare(int size) {
static std::once_flag once;
std::call_once(once, [&]() {
log_actor_ = fastTraceLogger(&unready_socket_messages_, &failed_messages_, &total_messages_, &send_error_);
@ -307,7 +425,7 @@ struct FastUDPTracer : public UDPTracer {
socket_ = INetworkConnections::net()->createUDPSocket(destAddress);
});
if (span.location.name.size() == 0) {
if (size == 0) {
return;
}
@ -322,9 +440,9 @@ struct FastUDPTracer : public UDPTracer {
if (send_error_) {
return;
}
}
serialize_span(span, request_);
void write() {
int bytesSent = send(socket_fd_, request_.buffer.get(), request_.data_size, MSG_DONTWAIT);
if (bytesSent == -1) {
// Will forgo checking errno here, and assume all error messages
@ -335,6 +453,18 @@ struct FastUDPTracer : public UDPTracer {
request_.reset();
}
void trace(OTELSpan const& span) override {
prepare(span.location.name.size());
serialize_span(span, request_);
write();
}
void trace(Span const& span) override {
prepare(span.location.name.size());
serialize_span(span, request_);
write();
}
private:
TraceRequest request_;
@ -403,3 +533,352 @@ Span::~Span() {
g_tracer->trace(*this);
}
}
OTELSpan& OTELSpan::operator=(OTELSpan&& o) {
if (begin > 0.0 && o.context.isSampled() > 0) {
end = g_network->now();
g_tracer->trace(*this);
}
arena = std::move(o.arena);
context = o.context;
parentContext = o.parentContext;
begin = o.begin;
end = o.end;
location = o.location;
links = std::move(o.links);
events = std::move(o.events);
status = o.status;
kind = o.kind;
o.context = SpanContext();
o.parentContext = SpanContext();
o.kind = SpanKind::INTERNAL;
o.begin = 0.0;
o.end = 0.0;
o.status = SpanStatus::UNSET;
return *this;
}
OTELSpan::~OTELSpan() {
if (begin > 0.0 && context.isSampled()) {
end = g_network->now();
g_tracer->trace(*this);
}
}
TEST_CASE("/flow/Tracing/CreateOTELSpan") {
// Sampling disabled, no parent.
OTELSpan notSampled("foo"_loc);
ASSERT(!notSampled.context.isSampled());
// Force Sampling
OTELSpan sampled("foo"_loc, []() { return 1.0; });
ASSERT(sampled.context.isSampled());
// Ensure child traceID matches parent, when parent is sampled.
OTELSpan childTraceIDMatchesParent(
"foo"_loc, []() { return 1.0; }, SpanContext(UID(100, 101), 200, TraceFlags::sampled));
ASSERT(childTraceIDMatchesParent.context.traceID.first() ==
childTraceIDMatchesParent.parentContext.traceID.first());
ASSERT(childTraceIDMatchesParent.context.traceID.second() ==
childTraceIDMatchesParent.parentContext.traceID.second());
// When the parent isn't sampled AND it has legitimate values we should not sample a child,
// even if the child was randomly selected for sampling.
OTELSpan parentNotSampled(
"foo"_loc, []() { return 1.0; }, SpanContext(UID(1, 1), 1, TraceFlags::unsampled));
ASSERT(!parentNotSampled.context.isSampled());
// When the parent isn't sampled AND it has zero values for traceID and spanID this means
// we should defer to the child as the new root of the trace as there was no actual parent.
// If the child was sampled we should send the child trace with a null parent.
OTELSpan noParent(
"foo"_loc, []() { return 1.0; }, SpanContext(UID(0, 0), 0, TraceFlags::unsampled));
ASSERT(noParent.context.isSampled());
return Void();
};
TEST_CASE("/flow/Tracing/AddEvents") {
// Use helper method to add an OTELEventRef to an OTELSpan.
OTELSpan span1("span_with_event"_loc);
auto arena = span1.arena;
SmallVectorRef<KeyValueRef> attrs;
attrs.push_back(arena, KeyValueRef("foo"_sr, "bar"_sr));
span1.addEvent(LiteralStringRef("read_version"), 1.0, attrs);
ASSERT(span1.events[0].name.toString() == "read_version");
ASSERT(span1.events[0].time == 1.0);
ASSERT(span1.events[0].attributes.begin()->key.toString() == "foo");
ASSERT(span1.events[0].attributes.begin()->value.toString() == "bar");
// Use helper method to add an OTELEventRef with no attributes to an OTELSpan
OTELSpan span2("span_with_event"_loc);
span2.addEvent(StringRef(span2.arena, LiteralStringRef("commit_succeed")), 1234567.100);
ASSERT(span2.events[0].name.toString() == "commit_succeed");
ASSERT(span2.events[0].time == 1234567.100);
ASSERT(span2.events[0].attributes.size() == 0);
// Add fully constructed OTELEventRef to OTELSpan passed by value.
OTELSpan span3("span_with_event"_loc);
auto s3Arena = span3.arena;
SmallVectorRef<KeyValueRef> s3Attrs;
s3Attrs.push_back(s3Arena, KeyValueRef("xyz"_sr, "123"_sr));
span3.addEvent("commit_fail"_sr, 1234567.100, s3Attrs).addEvent("commit_succeed"_sr, 1111.001, s3Attrs);
ASSERT(span3.events[0].name.toString() == "commit_fail");
ASSERT(span3.events[0].time == 1234567.100);
ASSERT(span3.events[0].attributes.size() == 1);
ASSERT(span3.events[0].attributes.begin()->key.toString() == "xyz");
ASSERT(span3.events[0].attributes.begin()->value.toString() == "123");
ASSERT(span3.events[1].name.toString() == "commit_succeed");
ASSERT(span3.events[1].time == 1111.001);
ASSERT(span3.events[1].attributes.size() == 1);
ASSERT(span3.events[1].attributes.begin()->key.toString() == "xyz");
ASSERT(span3.events[1].attributes.begin()->value.toString() == "123");
return Void();
};
TEST_CASE("/flow/Tracing/AddAttributes") {
OTELSpan span1("span_with_attrs"_loc);
auto arena = span1.arena;
span1.addAttribute(StringRef(arena, LiteralStringRef("foo")), StringRef(arena, LiteralStringRef("bar")));
span1.addAttribute(StringRef(arena, LiteralStringRef("operation")), StringRef(arena, LiteralStringRef("grv")));
ASSERT_EQ(span1.attributes.size(), 3); // Includes default attribute of "address"
ASSERT(span1.attributes[1] == KeyValueRef("foo"_sr, "bar"_sr));
ASSERT(span1.attributes[2] == KeyValueRef("operation"_sr, "grv"_sr));
OTELSpan span3("span_with_attrs"_loc);
auto s3Arena = span3.arena;
span3.addAttribute(StringRef(s3Arena, LiteralStringRef("a")), StringRef(s3Arena, LiteralStringRef("1")))
.addAttribute(StringRef(s3Arena, LiteralStringRef("b")), LiteralStringRef("2"))
.addAttribute(StringRef(s3Arena, LiteralStringRef("c")), LiteralStringRef("3"));
ASSERT_EQ(span3.attributes.size(), 4); // Includes default attribute of "address"
ASSERT(span3.attributes[1] == KeyValueRef("a"_sr, "1"_sr));
ASSERT(span3.attributes[2] == KeyValueRef("b"_sr, "2"_sr));
ASSERT(span3.attributes[3] == KeyValueRef("c"_sr, "3"_sr));
return Void();
};
TEST_CASE("/flow/Tracing/AddLinks") {
OTELSpan span1("span_with_links"_loc);
span1.addLink(SpanContext(UID(100, 101), 200, TraceFlags::sampled));
span1.addLink(SpanContext(UID(200, 201), 300, TraceFlags::unsampled))
.addLink(SpanContext(UID(300, 301), 400, TraceFlags::sampled));
ASSERT(span1.links[0].traceID == UID(100, 101));
ASSERT(span1.links[0].spanID == 200);
ASSERT(span1.links[0].m_Flags == TraceFlags::sampled);
ASSERT(span1.links[1].traceID == UID(200, 201));
ASSERT(span1.links[1].spanID == 300);
ASSERT(span1.links[1].m_Flags == TraceFlags::unsampled);
ASSERT(span1.links[2].traceID == UID(300, 301));
ASSERT(span1.links[2].spanID == 400);
ASSERT(span1.links[2].m_Flags == TraceFlags::sampled);
OTELSpan span2("span_with_links"_loc);
auto link1 = SpanContext(UID(1, 1), 1, TraceFlags::sampled);
auto link2 = SpanContext(UID(2, 2), 2, TraceFlags::sampled);
auto link3 = SpanContext(UID(3, 3), 3, TraceFlags::sampled);
span2.addLinks({ link1, link2 }).addLinks({ link3 });
ASSERT(span2.links[0].traceID == UID(1, 1));
ASSERT(span2.links[0].spanID == 1);
ASSERT(span2.links[0].m_Flags == TraceFlags::sampled);
ASSERT(span2.links[1].traceID == UID(2, 2));
ASSERT(span2.links[1].spanID == 2);
ASSERT(span2.links[1].m_Flags == TraceFlags::sampled);
ASSERT(span2.links[2].traceID == UID(3, 3));
ASSERT(span2.links[2].spanID == 3);
ASSERT(span2.links[2].m_Flags == TraceFlags::sampled);
return Void();
};
uint64_t swapUint16BE(uint8_t* index) {
uint16_t value;
memcpy(&value, index, sizeof(value));
return fromBigEndian16(value);
}
uint64_t swapUint64BE(uint8_t* index) {
uint64_t value;
memcpy(&value, index, sizeof(value));
return fromBigEndian64(value);
}
double swapDoubleBE(uint8_t* index) {
double value;
memcpy(&value, index, sizeof(value));
char* const p = reinterpret_cast<char*>(&value);
for (size_t i = 0; i < sizeof(double) / 2; ++i)
std::swap(p[i], p[sizeof(double) - i - 1]);
return value;
}
std::string readMPString(uint8_t* index, int len) {
uint8_t data[len + 1];
std::copy(index, index + len, data);
data[len] = '\0';
return reinterpret_cast<char*>(data);
}
// Windows doesn't like lack of header and declaration of constructor for FastUDPTracer
#ifndef WIN32
TEST_CASE("/flow/Tracing/FastUDPMessagePackEncoding") {
OTELSpan span1("encoded_span"_loc);
auto request = TraceRequest{ .buffer = std::make_unique<uint8_t[]>(kTraceBufferSize),
.data_size = 0,
.buffer_size = kTraceBufferSize };
auto tracer = FastUDPTracer();
tracer.serialize_span(span1, request);
auto data = request.buffer.get();
ASSERT(data[0] == 0b10011110); // Default array size.
request.reset();
// Test - constructor OTELSpan(const Location& location, const SpanContext parent, const SpanContext& link)
// Will delegate to other constructors.
OTELSpan span2("encoded_span"_loc,
SpanContext(UID(100, 101), 1, TraceFlags::sampled),
SpanContext(UID(200, 201), 2, TraceFlags::sampled));
tracer.serialize_span(span2, request);
data = request.buffer.get();
ASSERT(data[0] == 0b10011110); // 14 element array.
// Verify the Parent Trace ID overwrites this spans Trace ID
ASSERT(data[1] == 0xcf);
ASSERT(swapUint64BE(&data[2]) == 100);
ASSERT(data[10] == 0xcf);
ASSERT(swapUint64BE(&data[11]) == 101);
ASSERT(data[19] == 0xcf);
// We don't care about the next 8 bytes, they are the ID for the span itself and will be random.
// Parent TraceID and Parent SpanID.
ASSERT(data[28] == 0xcf);
ASSERT(swapUint64BE(&data[29]) == 100);
ASSERT(data[37] == 0xcf);
ASSERT(swapUint64BE(&data[38]) == 101);
ASSERT(data[46] == 0xcf);
ASSERT(swapUint64BE(&data[47]) == 1);
// Read and verify span name
ASSERT(data[55] == (0b10100000 | strlen("encoded_span")));
ASSERT(strncmp(readMPString(&data[56], strlen("encoded_span")).c_str(), "encoded_span", strlen("encoded_span")) ==
0);
// Verify begin/end is encoded, we don't care about the values
ASSERT(data[68] == 0xcb);
ASSERT(data[77] == 0xcb);
// SpanKind
ASSERT(data[86] == 0xcc);
ASSERT(data[87] == static_cast<uint8_t>(SpanKind::SERVER));
// Status
ASSERT(data[88] == 0xcc);
ASSERT(data[89] == static_cast<uint8_t>(SpanStatus::OK));
// Linked SpanContext
ASSERT(data[90] == 0b10010001);
ASSERT(data[91] == 0xcf);
ASSERT(swapUint64BE(&data[92]) == 200);
ASSERT(data[100] == 0xcf);
ASSERT(swapUint64BE(&data[101]) == 201);
ASSERT(data[109] == 0xcf);
ASSERT(swapUint64BE(&data[110]) == 2);
// Events
ASSERT(data[118] == 0b10010000); // empty
// Attributes
ASSERT(data[119] == 0b10000001); // single k/v pair
ASSERT(data[120] == 0b10100111); // length of key string "address" == 7
request.reset();
// Exercise all fluent interfaces, include links, events, and attributes.
OTELSpan span3("encoded_span_3"_loc);
auto s3Arena = span3.arena;
SmallVectorRef<KeyValueRef> attrs;
attrs.push_back(s3Arena, KeyValueRef("foo"_sr, "bar"_sr));
span3.addAttribute("operation"_sr, "grv"_sr)
.addLink(SpanContext(UID(300, 301), 400, TraceFlags::sampled))
.addEvent(StringRef(s3Arena, LiteralStringRef("event1")), 100.101, attrs);
tracer.serialize_span(span3, request);
data = request.buffer.get();
ASSERT(data[0] == 0b10011110); // 14 element array.
// We don't care about the next 54 bytes as there is no parent and a randomly assigned Trace and SpanID
// Read and verify span name
ASSERT(data[55] == (0b10100000 | strlen("encoded_span_3")));
ASSERT(strncmp(readMPString(&data[56], strlen("encoded_span_3")).c_str(),
"encoded_span_3",
strlen("encoded_span_3")) == 0);
// Verify begin/end is encoded, we don't care about the values
ASSERT(data[70] == 0xcb);
ASSERT(data[79] == 0xcb);
// SpanKind
ASSERT(data[88] == 0xcc);
ASSERT(data[89] == static_cast<uint8_t>(SpanKind::SERVER));
// Status
ASSERT(data[90] == 0xcc);
ASSERT(data[91] == static_cast<uint8_t>(SpanStatus::OK));
// Linked SpanContext
ASSERT(data[92] == 0b10010001);
ASSERT(data[93] == 0xcf);
ASSERT(swapUint64BE(&data[94]) == 300);
ASSERT(data[102] == 0xcf);
ASSERT(swapUint64BE(&data[103]) == 301);
ASSERT(data[111] == 0xcf);
ASSERT(swapUint64BE(&data[112]) == 400);
// Events
ASSERT(data[120] == 0b10010001); // empty
ASSERT(data[121] == (0b10100000 | strlen("event1")));
ASSERT(strncmp(readMPString(&data[122], strlen("event1")).c_str(), "event1", strlen("event1")) == 0);
ASSERT(data[128] == 0xcb);
ASSERT(swapDoubleBE(&data[129]) == 100.101);
// Events Attributes
ASSERT(data[137] == 0b10000001); // single k/v pair
ASSERT(data[138] == 0b10100011); // length of key string "foo" == 3
ASSERT(strncmp(readMPString(&data[139], strlen("foo")).c_str(), "foo", strlen("foo")) == 0);
ASSERT(data[142] == 0b10100011); // length of key string "bar" == 3
ASSERT(strncmp(readMPString(&data[143], strlen("bar")).c_str(), "bar", strlen("bar")) == 0);
// Attributes
ASSERT(data[146] == 0b10000010); // two k/v pair
// Reconstruct map from MessagePack wire format data and verify.
std::unordered_map<std::string, std::string> attributes;
auto index = 147;
// We & out the bits here that contain the length the initial 4 higher order bits are
// to signify this is a string of len <= 31 chars.
auto firstKeyLength = static_cast<uint8_t>(data[index] & 0b00001111);
index++;
auto firstKey = readMPString(&data[index], firstKeyLength);
index += firstKeyLength;
auto firstValueLength = static_cast<uint8_t>(data[index] & 0b00001111);
index++;
auto firstValue = readMPString(&data[index], firstValueLength);
index += firstValueLength;
attributes[firstKey] = firstValue;
auto secondKeyLength = static_cast<uint8_t>(data[index] & 0b00001111);
index++;
auto secondKey = readMPString(&data[index], secondKeyLength);
index += secondKeyLength;
auto secondValueLength = static_cast<uint8_t>(data[index] & 0b00001111);
index++;
auto secondValue = readMPString(&data[index], secondValueLength);
attributes[secondKey] = secondValue;
// We don't know what the value for address will be, so just verify it is in the map.
ASSERT(attributes.find("address") != attributes.end());
ASSERT(strncmp(attributes["operation"].c_str(), "grv", strlen("grv")) == 0);
request.reset();
// Test message pack encoding for string >= 256 && <= 65535 chars
const char* longString = "yGUtj42gSKfdqib3f0Ri4OVhD7eWyTbKsH/g9+x4UWyXry7NIBFIapPV9f1qdTRl"
"2jXcZI8Ua/Gp8k9EBn7peaEN1uj4w9kf4FQ2Lalu0VrA4oquQoaKYr+wPsLBak9i"
"uyZDF9sX/HW4pVvQhPQdXQWME5E7m58XFMpZ3H8HNXuytWInEuh97SRLlI0RhrvG"
"ixNpYtYlvghsLCrEdZMMGnS2gXgGufIdg1xKJd30fUbZLHcYIC4DTnL5RBpkbQCR"
"SGKKUrpIb/7zePhBDi+gzUzyAcbQ2zUbFWI1KNi3zQk58uUG6wWJZkw+GCs7Cc3V"
"OUxOljwCJkC4QTgdsbbFhxUC+rtoHV5xAqoTQwR0FXnWigUjP7NtdL6huJUr3qRv"
"40c4yUI1a4+P5vJa";
auto span4 = OTELSpan();
auto location = Location();
location.name = StringRef(span4.arena, longString);
span4.location = location;
tracer.serialize_span(span4, request);
data = request.buffer.get();
ASSERT(data[0] == 0b10011110); // 14 element array.
// We don't care about the next 54 bytes as there is no parent and a randomly assigned Trace and SpanID
// Read and verify span name
ASSERT(data[55] == 0xda);
auto locationLength = swapUint16BE(&data[56]);
ASSERT(locationLength == strlen(longString));
ASSERT(strncmp(readMPString(&data[58], locationLength).c_str(), longString, strlen(longString)) == 0);
return Void();
};
#endif

View File

@ -106,6 +106,196 @@ struct Span {
std::unordered_map<StringRef, StringRef> tags;
};
// OTELSpan
//
// OTELSpan is a tracing implementation which, for the most part, complies with the W3C Trace Context specification
// https://www.w3.org/TR/trace-context/ and the OpenTelemetry API
// https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/trace/api.md.
//
// The major differences between OTELSpan and the current Span implementation, which is based off the OpenTracing.io
// specification https://opentracing.io/ are as follows.
// https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/trace/api.md#span
//
// OTELSpans have...
// 1. A SpanContext which consists of 3 attributes.
//
// TraceId - A valid trace identifier is a 16-byte array with at least one non-zero byte.
// SpanId - A valid span identifier is an 8-byte array with at least one non-zero byte.
// TraceFlags - 1 byte, bit field for flags.
//
// TraceState is not implemented, specifically we do not provide some of the following APIs
// https://www.w3.org/TR/trace-context/#mutating-the-tracestate-field In particular APIs to delete/update a specific,
// arbitrary key/value pair, as this complies with the OTEL specification where SpanContexts are immutable.
// 2. A begin/end and those values are serialized, unlike the Span implementation which has an end but serializes with a
// begin and calculated duration field.
// 3. A SpanKind
// https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/trace/api.md#spankind
// 4. A SpanStatus
// https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/trace/api.md#set-status
// 5. A singular parent SpanContext, which may optionally be null, as opposed to our Span implementation which allows
// for a list of parents.
// 6. An "attributes" rather than "tags", however the implementation is essentially the same, a set of key/value of
// strings, stored here as a SmallVectorRef<KeyValueRef> rather than map as a convenience.
// https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/common/common.md#attributes
// 7. An optional list of linked SpanContexts.
// https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/trace/api.md#specifying-links
// 8. An optional list of timestamped Events.
// https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/trace/api.md#add-events
enum class SpanKind : uint8_t { INTERNAL = 0, CLIENT = 1, SERVER = 2, PRODUCER = 3, CONSUMER = 4 };
enum class SpanStatus : uint8_t { UNSET = 0, OK = 1, ERR = 2 };
struct OTELEventRef {
OTELEventRef() {}
OTELEventRef(const StringRef& name,
const double& time,
const SmallVectorRef<KeyValueRef>& attributes = SmallVectorRef<KeyValueRef>())
: name(name), time(time), attributes(attributes) {}
OTELEventRef(Arena& arena, const OTELEventRef& other)
: name(arena, other.name), time(other.time), attributes(arena, other.attributes) {}
StringRef name;
double time = 0.0;
SmallVectorRef<KeyValueRef> attributes;
};
class OTELSpan {
public:
OTELSpan(const SpanContext& context,
const Location& location,
const SpanContext& parentContext,
const std::initializer_list<SpanContext>& links = {})
: context(context), location(location), parentContext(parentContext), links(arena, links.begin(), links.end()),
begin(g_network->now()) {
// We've simplified the logic here, essentially we're now always setting trace and span ids and relying on the
// TraceFlags to determine if we're sampling. Therefore if the parent is sampled, we simply overwrite this
// span's traceID with the parent trace id.
if (parentContext.isSampled()) {
this->context.traceID = UID(parentContext.traceID.first(), parentContext.traceID.second());
this->context.m_Flags = TraceFlags::sampled;
} else {
// However there are two other cases.
// 1. A legitamite parent span exists but it was not selected for tracing.
// 2. There is no actual parent, just a default arg parent provided by the constructor AND the "child" span
// was selected for sampling. For case 1. we handle below by marking the child as unsampled. For case 2 we
// needn't do anything, and can rely on the values in this OTELSpan
if (parentContext.traceID.first() != 0 && parentContext.traceID.second() != 0 &&
parentContext.spanID != 0) {
this->context.m_Flags = TraceFlags::unsampled;
}
}
this->kind = SpanKind::SERVER;
this->status = SpanStatus::OK;
this->attributes.push_back(
this->arena, KeyValueRef("address"_sr, StringRef(this->arena, g_network->getLocalAddress().toString())));
}
OTELSpan(const Location& location,
const SpanContext& parent = SpanContext(),
const std::initializer_list<SpanContext>& links = {})
: OTELSpan(
SpanContext(UID(deterministicRandom()->randomUInt64(), deterministicRandom()->randomUInt64()), // traceID
deterministicRandom()->randomUInt64(), // spanID
deterministicRandom()->random01() < FLOW_KNOBS->TRACING_SAMPLE_RATE // sampled or unsampled
? TraceFlags::sampled
: TraceFlags::unsampled),
location,
parent,
links) {}
OTELSpan(const Location& location, const SpanContext parent, const SpanContext& link)
: OTELSpan(location, parent, { link }) {}
// NOTE: This constructor is primarly for unit testing until we sort out how to enable/disable a Knob dynamically in
// a test.
OTELSpan(const Location& location,
const std::function<double()>& rateProvider,
const SpanContext& parent = SpanContext(),
const std::initializer_list<SpanContext>& links = {})
: OTELSpan(SpanContext(UID(deterministicRandom()->randomUInt64(), deterministicRandom()->randomUInt64()),
deterministicRandom()->randomUInt64(),
deterministicRandom()->random01() < rateProvider() ? TraceFlags::sampled
: TraceFlags::unsampled),
location,
parent,
links) {}
OTELSpan(const OTELSpan&) = delete;
OTELSpan(OTELSpan&& o) {
arena = std::move(o.arena);
context = o.context;
location = o.location;
parentContext = std::move(o.parentContext);
kind = o.kind;
begin = o.begin;
end = o.end;
links = std::move(o.links);
events = std::move(o.events);
status = o.status;
o.context = SpanContext();
o.parentContext = SpanContext();
o.kind = SpanKind::INTERNAL;
o.begin = 0.0;
o.end = 0.0;
o.status = SpanStatus::UNSET;
}
OTELSpan() {}
~OTELSpan();
OTELSpan& operator=(OTELSpan&& o);
OTELSpan& operator=(const OTELSpan&) = delete;
void swap(OTELSpan& other) {
std::swap(arena, other.arena);
std::swap(context, other.context);
std::swap(location, other.location);
std::swap(parentContext, other.parentContext);
std::swap(kind, other.kind);
std::swap(status, other.status);
std::swap(begin, other.begin);
std::swap(end, other.end);
std::swap(links, other.links);
std::swap(events, other.events);
}
OTELSpan& addLink(const SpanContext& linkContext) {
links.push_back(arena, linkContext);
return *this;
}
OTELSpan& addLinks(const std::initializer_list<SpanContext>& linkContexts = {}) {
for (auto const& sc : linkContexts) {
links.push_back(arena, sc);
}
return *this;
}
OTELSpan& addEvent(const OTELEventRef& event) {
events.push_back_deep(arena, event);
return *this;
}
OTELSpan& addEvent(const StringRef& name,
const double& time,
const SmallVectorRef<KeyValueRef>& attrs = SmallVectorRef<KeyValueRef>()) {
return addEvent(OTELEventRef(name, time, attrs));
}
OTELSpan& addAttribute(const StringRef& key, const StringRef& value) {
attributes.push_back_deep(arena, KeyValueRef(key, value));
return *this;
}
Arena arena;
SpanContext context;
Location location;
SpanContext parentContext;
SpanKind kind;
SmallVectorRef<SpanContext> links;
double begin = 0.0, end = 0.0;
SmallVectorRef<KeyValueRef> attributes; // not necessarily sorted
SmallVectorRef<OTELEventRef> events;
SpanStatus status;
};
// The user selects a tracer using a string passed to fdbserver on boot.
// Clients should not refer to TracerType directly, and mappings of names to
// values in this enum can change without notice.
@ -121,6 +311,7 @@ struct ITracer {
virtual TracerType type() const = 0;
// passed ownership to the tracer
virtual void trace(Span const& span) = 0;
virtual void trace(OTELSpan const& span) = 0;
};
void openTracer(TracerType type);
@ -137,3 +328,16 @@ struct SpannedDeque : Deque<T> {
span = std::move(other.span);
}
};
template <class T>
struct OTELSpannedDeque : Deque<T> {
OTELSpan span;
explicit OTELSpannedDeque(Location loc) : span(loc) {}
OTELSpannedDeque(OTELSpannedDeque&& other) : Deque<T>(std::move(other)), span(std::move(other.span)) {}
OTELSpannedDeque(OTELSpannedDeque const&) = delete;
OTELSpannedDeque& operator=(OTELSpannedDeque const&) = delete;
OTELSpannedDeque& operator=(OTELSpannedDeque&& other) {
*static_cast<Deque<T>*>(this) = std::move(other);
span = std::move(other.span);
}
};