foundationdb/flow/FastAlloc.h

305 lines
8.7 KiB
C
Raw Normal View History

2017-05-26 04:48:44 +08:00
/*
* FastAlloc.h
*
* This source file is part of the FoundationDB open source project
*
2022-03-22 04:36:23 +08:00
* Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
*
2017-05-26 04:48:44 +08:00
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
2017-05-26 04:48:44 +08:00
* http://www.apache.org/licenses/LICENSE-2.0
*
2017-05-26 04:48:44 +08:00
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef FLOW_FASTALLOC_H
#define FLOW_FASTALLOC_H
#pragma once
#include "flow/Error.h"
#include "flow/Platform.h"
#include "flow/config.h"
2017-05-26 04:48:44 +08:00
// ALLOC_INSTRUMENTATION_STDOUT enables non-sampled logging of all allocations and deallocations to stdout to be
// processed by tools/alloc_instrumentation.py
2017-05-26 04:48:44 +08:00
//#define ALLOC_INSTRUMENTATION_STDOUT ENABLED(NOT_IN_CLEAN)
//#define ALLOC_INSTRUMENTATION ENABLED(NOT_IN_CLEAN)
// The form "(1==1)" in this context is used to satisfy both clang and vc++ with a single syntax. Clang rejects "1" and
// vc++ rejects "true".
#define FASTALLOC_THREAD_SAFE (FLOW_THREAD_SAFE || (1 == 1))
2017-05-26 04:48:44 +08:00
#if VALGRIND
#include <drd.h>
#include <memcheck.h>
bool valgrindPrecise();
2017-05-26 04:48:44 +08:00
#endif
#include "flow/Hash3.h"
2017-05-26 04:48:44 +08:00
#include <assert.h>
#include <atomic>
2017-05-26 04:48:44 +08:00
#include <vector>
#include <cstdlib>
#include <cstdio>
#include <unordered_map>
#if defined(ALLOC_INSTRUMENTATION) && defined(__linux__)
#include <execinfo.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#endif
#ifdef ALLOC_INSTRUMENTATION
#include <map>
#include <algorithm>
#include "flow/ThreadPrimitives.h"
2017-05-26 04:48:44 +08:00
struct AllocInstrInfo {
int64_t allocCount;
int64_t deallocCount;
int64_t maxAllocated;
inline void alloc(int64_t count = 1) {
2017-05-26 04:48:44 +08:00
allocCount += count;
maxAllocated = std::max(allocCount - deallocCount, maxAllocated);
2017-05-26 04:48:44 +08:00
}
inline void dealloc(int64_t count = 1) { deallocCount += count; }
2017-05-26 04:48:44 +08:00
};
extern std::map<const char*, AllocInstrInfo> allocInstr;
#define INSTRUMENT_ALLOCATE(name) (allocInstr[(name)].alloc())
#define INSTRUMENT_RELEASE(name) (allocInstr[(name)].dealloc())
// extern std::map<uint32_t, uint64_t> stackAllocations;
2017-05-26 04:48:44 +08:00
// maps from an address to the hash of the backtrace and the size of the alloction
extern std::unordered_map<int64_t, std::pair<uint32_t, size_t>> memSample;
struct BackTraceAccount {
double count;
size_t sampleCount;
size_t totalSize;
std::vector<void*>* backTrace;
2017-05-26 04:48:44 +08:00
};
// maps from a hash of a backtrace to a backtrace and the total size of data currently allocated from this stack
extern std::unordered_map<uint32_t, BackTraceAccount> backTraceLookup;
extern ThreadSpinLock memLock;
extern thread_local bool memSample_entered;
extern const size_t SAMPLE_BYTES;
#else
#define INSTRUMENT_ALLOCATE(name)
#define INSTRUMENT_RELEASE(name)
#endif
#if defined(ALLOC_INSTRUMENTATION) || defined(ALLOC_INSTRUMENTATION_STDOUT)
void recordAllocation(void* ptr, size_t size);
void recordDeallocation(void* ptr);
2017-05-26 04:48:44 +08:00
#endif
inline constexpr auto kFastAllocMagazineBytes = 128 << 10;
2017-05-26 04:48:44 +08:00
template <int Size>
class FastAllocator {
public:
2019-08-10 03:55:21 +08:00
[[nodiscard]] static void* allocate();
2017-05-26 04:48:44 +08:00
static void release(void* ptr);
static void check(void* ptr, bool alloc);
2017-05-26 04:48:44 +08:00
static long long getTotalMemory();
static long long getApproximateMemoryUnused();
static long long getActiveThreads();
2017-05-26 04:48:44 +08:00
#ifdef ALLOC_INSTRUMENTATION
static volatile int32_t pageCount;
#endif
FastAllocator() = delete;
2017-05-26 04:48:44 +08:00
private:
#ifdef VALGRIND
static unsigned long vLock;
#endif
static const int magazine_size = kFastAllocMagazineBytes / Size;
2017-05-26 04:48:44 +08:00
static const int PSize = Size / sizeof(void*);
struct GlobalData;
struct ThreadData {
void* freelist;
int count; // there are count items on freelist
void* alternate; // alternate is either a full magazine, or an empty one
ThreadData();
~ThreadData();
};
struct ThreadDataInit {
ThreadDataInit() { threadData(); }
2017-05-26 04:48:44 +08:00
};
// Used to try to initialize threadData as early as possible. It's still
// possible that a static thread local variable (that owns fast-allocated
// memory) could be constructed before threadData, in which case threadData
// would be destroyed by the time that variable's destructor attempts to free.
// This is undefined behavior if this happens, which is why we want to
// initialize threadData as early as possible.
static thread_local ThreadDataInit threadDataInit;
// Used to access threadData. Returning a reference to a function-level
// static guarantees that threadData will be constructed before it's
// accessed here. Furthermore, if accessing threadData from a static thread
// local variable's constructor, this guarantees that threadData will
// outlive this object, since destruction order is the reverse of
// construction order.
static ThreadData& threadData() noexcept;
static GlobalData* globalData() noexcept {
2017-05-26 04:48:44 +08:00
#ifdef VALGRIND
ANNOTATE_RWLOCK_ACQUIRED(vLock, 1);
#endif
static GlobalData* data = new GlobalData(); // This is thread-safe as of c++11 (VS 2015, gcc 4.8, clang 3.3)
2017-05-26 04:48:44 +08:00
#ifdef VALGRIND
ANNOTATE_RWLOCK_RELEASED(vLock, 1);
#endif
return data;
}
static void* freelist;
static void getMagazine();
2017-05-26 04:48:44 +08:00
static void releaseMagazine(void*);
};
extern std::atomic<int64_t> g_hugeArenaMemory;
void hugeArenaSample(int size);
2017-05-26 04:48:44 +08:00
void releaseAllThreadMagazines();
int64_t getTotalUnusedAllocatedMemory();
2017-05-26 04:48:44 +08:00
inline constexpr int nextFastAllocatedSize(int x) {
2022-04-07 09:02:24 +08:00
assert(x > 0 && x <= 16384);
if (x <= 16)
return 16;
else if (x <= 32)
return 32;
else if (x <= 64)
return 64;
else if (x <= 96)
return 96;
else if (x <= 128)
return 128;
else if (x <= 256)
return 256;
else if (x <= 512)
return 512;
else if (x <= 1024)
return 1024;
else if (x <= 2048)
return 2048;
else if (x <= 4096)
return 4096;
2022-04-07 09:02:24 +08:00
else if (x <= 8192)
return 8192;
2022-04-07 09:02:24 +08:00
else
return 16384;
}
2017-05-26 04:48:44 +08:00
template <class Object>
class FastAllocated {
public:
2019-08-10 03:55:21 +08:00
[[nodiscard]] static void* operator new(size_t s) {
if (s != sizeof(Object))
abort();
2017-05-26 04:48:44 +08:00
INSTRUMENT_ALLOCATE(typeid(Object).name());
if constexpr (sizeof(Object) <= 256) {
void* p = FastAllocator < sizeof(Object) <= 64 ? 64 : nextFastAllocatedSize(sizeof(Object)) > ::allocate();
return p;
} else {
void* p = new uint8_t[nextFastAllocatedSize(sizeof(Object))];
return p;
}
2017-05-26 04:48:44 +08:00
}
static void operator delete(void* s) {
INSTRUMENT_RELEASE(typeid(Object).name());
if constexpr (sizeof(Object) <= 256) {
FastAllocator<sizeof(Object) <= 64 ? 64 : nextFastAllocatedSize(sizeof(Object))>::release(s);
} else {
delete[] reinterpret_cast<uint8_t*>(s);
}
2017-05-26 04:48:44 +08:00
}
// Redefine placement new so you can still use it
static void* operator new(size_t, void* p) { return p; }
static void operator delete(void*, void*) {}
2017-05-26 04:48:44 +08:00
};
[[nodiscard]] inline void* allocateFast(int size) {
if (size <= 16)
return FastAllocator<16>::allocate();
if (size <= 32)
return FastAllocator<32>::allocate();
if (size <= 64)
return FastAllocator<64>::allocate();
if (size <= 96)
return FastAllocator<96>::allocate();
if (size <= 128)
return FastAllocator<128>::allocate();
if (size <= 256)
return FastAllocator<256>::allocate();
2017-05-26 04:48:44 +08:00
return new uint8_t[size];
}
2019-06-21 00:29:01 +08:00
inline void freeFast(int size, void* ptr) {
if (size <= 16)
return FastAllocator<16>::release(ptr);
if (size <= 32)
return FastAllocator<32>::release(ptr);
if (size <= 64)
return FastAllocator<64>::release(ptr);
if (size <= 96)
return FastAllocator<96>::release(ptr);
if (size <= 128)
return FastAllocator<128>::release(ptr);
if (size <= 256)
return FastAllocator<256>::release(ptr);
delete[](uint8_t*) ptr;
2017-05-26 04:48:44 +08:00
}
// Allocate a block of memory aligned to 4096 bytes. Size must be a multiple of
// 4096. Guaranteed not to return null. Use freeFast4kAligned to free.
[[nodiscard]] inline void* allocateFast4kAligned(int size) {
#if !defined(USE_JEMALLOC)
2021-05-18 09:02:09 +08:00
// Use FastAllocator for sizes it supports to avoid internal fragmentation in some implementations of aligned_alloc
if (size <= 4096)
return FastAllocator<4096>::allocate();
if (size <= 8192)
return FastAllocator<8192>::allocate();
if (size <= 16384)
return FastAllocator<16384>::allocate();
#endif
auto* result = aligned_alloc(4096, size);
if (result == nullptr) {
platform::outOfMemory();
}
return result;
}
// Free a pointer returned from allocateFast4kAligned(size)
inline void freeFast4kAligned(int size, void* ptr) {
#if !defined(USE_JEMALLOC)
2021-05-18 09:02:09 +08:00
// Sizes supported by FastAllocator must be release via FastAllocator
if (size <= 4096)
return FastAllocator<4096>::release(ptr);
if (size <= 8192)
return FastAllocator<8192>::release(ptr);
if (size <= 16384)
return FastAllocator<16384>::release(ptr);
#endif
aligned_free(ptr);
}
2017-05-26 04:48:44 +08:00
#endif