[scudo] PRNG makeover

Summary:
This follows the addition of `GetRandom` with D34412. We remove our
`/dev/urandom` code and use the new function. Additionally, change the PRNG for
a slightly faster version. One of the issues with the old code is that we have
64 full bits of randomness per "next", using only 8 of those for the Salt and
discarding the rest. So we add a cached u64 in the PRNG that can serve up to
8 u8 before having to call the "next" function again.

During some integration work, I also realized that some very early processes
(like `init`) do not benefit from `/dev/urandom` yet. So if there is no
`getrandom` syscall as well, we have to fallback to some sort of initialization
of the PRNG.

Now a few words on why XoRoShiRo and not something else. I have played a while
with various PRNGs on 32 & 64 bit platforms. Some results are below. LCG 32 & 64
are usually faster but produce respectively 15 & 31 bits of entropy, meaning
that to get a full 64-bit, you would need to call them several times. The simple
XorShift is fast, produces 32 bits but is mediocre with regard to PRNG test
suites, PCG is slower overall, and XoRoShiRo is faster than XorShift128+ and
produces full 64 bits.

%%%
root@tulip-chiphd:/data # ./randtest.arm
[+] starting xs32...
[?] xs32 duration: 22431833053ns
[+] starting lcg32...
[?] lcg32 duration: 14941402090ns
[+] starting pcg32...
[?] pcg32 duration: 44941973771ns
[+] starting xs128p...
[?] xs128p duration: 48889786981ns
[+] starting lcg64...
[?] lcg64 duration: 33831042391ns
[+] starting xos128p...
[?] xos128p duration: 44850878605ns

root@tulip-chiphd:/data # ./randtest.aarch64
[+] starting xs32...
[?] xs32 duration: 22425151678ns
[+] starting lcg32...
[?] lcg32 duration: 14954255257ns
[+] starting pcg32...
[?] pcg32 duration: 37346265726ns
[+] starting xs128p...
[?] xs128p duration: 22523807219ns
[+] starting lcg64...
[?] lcg64 duration: 26141304679ns
[+] starting xos128p...
[?] xos128p duration: 14937033215ns
%%%

Reviewers: alekseyshl

Reviewed By: alekseyshl

Subscribers: aemerson, kristof.beyls, llvm-commits

Differential Revision: https://reviews.llvm.org/D35221

llvm-svn: 307798
This commit is contained in:
Kostya Kortchinsky 2017-07-12 15:29:08 +00:00
parent a63981aaa9
commit 00582563be
4 changed files with 55 additions and 56 deletions

View File

@ -264,7 +264,7 @@ ScudoQuarantineCache *getQuarantineCache(ScudoThreadContext *ThreadContext) {
ScudoQuarantineCache *>(ThreadContext->QuarantineCachePlaceHolder);
}
Xorshift128Plus *getPrng(ScudoThreadContext *ThreadContext) {
ScudoPrng *getPrng(ScudoThreadContext *ThreadContext) {
return &ThreadContext->Prng;
}
@ -283,7 +283,7 @@ struct ScudoAllocator {
StaticSpinMutex FallbackMutex;
AllocatorCache FallbackAllocatorCache;
ScudoQuarantineCache FallbackQuarantineCache;
Xorshift128Plus FallbackPrng;
ScudoPrng FallbackPrng;
bool DeallocationTypeMismatch;
bool ZeroContents;
@ -333,8 +333,8 @@ struct ScudoAllocator {
static_cast<uptr>(Options.QuarantineSizeMb) << 20,
static_cast<uptr>(Options.ThreadLocalQuarantineSizeKb) << 10);
BackendAllocator.InitCache(&FallbackAllocatorCache);
FallbackPrng.initFromURandom();
Cookie = FallbackPrng.getNext();
FallbackPrng.init();
Cookie = FallbackPrng.getU64();
}
// Helper function that checks for a valid Scudo chunk. nullptr isn't.
@ -373,19 +373,19 @@ struct ScudoAllocator {
bool FromPrimary = PrimaryAllocator::CanAllocate(AlignedSize, MinAlignment);
void *Ptr;
uptr Salt;
u8 Salt;
uptr AllocationSize = FromPrimary ? AlignedSize : NeededSize;
uptr AllocationAlignment = FromPrimary ? MinAlignment : Alignment;
ScudoThreadContext *ThreadContext = getThreadContextAndLock();
if (LIKELY(ThreadContext)) {
Salt = getPrng(ThreadContext)->getNext();
Salt = getPrng(ThreadContext)->getU8();
Ptr = BackendAllocator.Allocate(getAllocatorCache(ThreadContext),
AllocationSize, AllocationAlignment,
FromPrimary);
ThreadContext->unlock();
} else {
SpinMutexLock l(&FallbackMutex);
Salt = FallbackPrng.getNext();
Salt = FallbackPrng.getU8();
Ptr = BackendAllocator.Allocate(&FallbackAllocatorCache, AllocationSize,
AllocationAlignment, FromPrimary);
}
@ -612,7 +612,7 @@ static void initScudoInternal(const AllocatorOptions &Options) {
void ScudoThreadContext::init() {
getBackendAllocator().InitCache(&Cache);
Prng.initFromURandom();
Prng.init();
memset(QuarantineCachePlaceHolder, 0, sizeof(QuarantineCachePlaceHolder));
}

View File

@ -30,7 +30,7 @@ namespace __scudo {
struct ALIGNED(64) ScudoThreadContext : public ScudoThreadContextPlatform {
AllocatorCache Cache;
Xorshift128Plus Prng;
ScudoPrng Prng;
uptr QuarantineCachePlaceHolder[4];
void init();
void commitBack();

View File

@ -123,40 +123,4 @@ bool testCPUFeature(CPUFeature Feature) {
}
#endif // defined(__x86_64__) || defined(__i386__)
// readRetry will attempt to read Count bytes from the Fd specified, and if
// interrupted will retry to read additional bytes to reach Count.
static ssize_t readRetry(int Fd, u8 *Buffer, size_t Count) {
ssize_t AmountRead = 0;
while (static_cast<size_t>(AmountRead) < Count) {
ssize_t Result = read(Fd, Buffer + AmountRead, Count - AmountRead);
if (Result > 0)
AmountRead += Result;
else if (!Result)
break;
else if (errno != EINTR) {
AmountRead = -1;
break;
}
}
return AmountRead;
}
static void fillRandom(u8 *Data, ssize_t Size) {
int Fd = open("/dev/urandom", O_RDONLY);
if (Fd < 0) {
dieWithMessage("ERROR: failed to open /dev/urandom.\n");
}
bool Success = readRetry(Fd, Data, Size) == Size;
close(Fd);
if (!Success) {
dieWithMessage("ERROR: failed to read enough data from /dev/urandom.\n");
}
}
// Seeds the xorshift state with /dev/urandom.
// TODO(kostyak): investigate using getrandom() if available.
void Xorshift128Plus::initFromURandom() {
fillRandom(reinterpret_cast<u8 *>(State), sizeof(State));
}
} // namespace __scudo

View File

@ -36,23 +36,58 @@ enum CPUFeature {
};
bool testCPUFeature(CPUFeature feature);
// Tiny PRNG based on https://en.wikipedia.org/wiki/Xorshift#xorshift.2B
// The state (128 bits) will be stored in thread local storage.
struct Xorshift128Plus {
INLINE u64 rotl(const u64 X, int K) {
return (X << K) | (X >> (64 - K));
}
// XoRoShiRo128+ PRNG (http://xoroshiro.di.unimi.it/).
struct XoRoShiRo128Plus {
public:
void initFromURandom();
u64 getNext() {
u64 x = State[0];
const u64 y = State[1];
State[0] = y;
x ^= x << 23;
State[1] = x ^ y ^ (x >> 17) ^ (y >> 26);
return State[1] + y;
void init() {
if (UNLIKELY(!GetRandom(reinterpret_cast<void *>(State), sizeof(State)))) {
// Early processes (eg: init) do not have /dev/urandom yet, but we still
// have to provide them with some degree of entropy. Not having a secure
// seed is not as problematic for them, as they are less likely to be
// the target of heap based vulnerabilities exploitation attempts.
State[0] = NanoTime();
State[1] = 0;
}
fillCache();
}
u8 getU8() {
if (UNLIKELY(isCacheEmpty()))
fillCache();
const u8 Result = static_cast<u8>(CachedBytes & 0xff);
CachedBytes >>= 8;
CachedBytesAvailable--;
return Result;
}
u64 getU64() { return next(); }
private:
u8 CachedBytesAvailable;
u64 CachedBytes;
u64 State[2];
u64 next() {
const u64 S0 = State[0];
u64 S1 = State[1];
const u64 Result = S0 + S1;
S1 ^= S0;
State[0] = rotl(S0, 55) ^ S1 ^ (S1 << 14);
State[1] = rotl(S1, 36);
return Result;
}
bool isCacheEmpty() {
return CachedBytesAvailable == 0;
}
void fillCache() {
CachedBytes = next();
CachedBytesAvailable = sizeof(CachedBytes);
}
};
typedef XoRoShiRo128Plus ScudoPrng;
} // namespace __scudo
#endif // SCUDO_UTILS_H_