forked from OSchip/llvm-project
Improve C11 atomics support:
- Generate atomicrmw operations in most of the cases when it's sensible to do so. - Don't crash in several common cases (and hopefully don't crash in more of them). - Add some better tests. We now generate significantly better code for things like: _Atomic(int) x; ... x++; On MIPS, this now generates a 4-instruction ll/sc loop, where previously it generated about 30 instructions in two nested loops. On x86-64, we generate a single lock incl, instead of a lock cmpxchgl loop (one instruction instead of ten). llvm-svn: 176420
This commit is contained in:
parent
e0a5c0d914
commit
ef78c305fa
|
@ -1444,21 +1444,60 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV,
|
|||
bool isInc, bool isPre) {
|
||||
|
||||
QualType type = E->getSubExpr()->getType();
|
||||
llvm::Value *value = EmitLoadOfLValue(LV);
|
||||
llvm::Value *input = value;
|
||||
llvm::PHINode *atomicPHI = 0;
|
||||
llvm::Value *value;
|
||||
llvm::Value *input;
|
||||
|
||||
int amount = (isInc ? 1 : -1);
|
||||
|
||||
if (const AtomicType *atomicTy = type->getAs<AtomicType>()) {
|
||||
type = atomicTy->getValueType();
|
||||
if (isInc && type->isBooleanType()) {
|
||||
llvm::Value *True = CGF.EmitToMemory(Builder.getTrue(), type);
|
||||
if (isPre) {
|
||||
Builder.Insert(new llvm::StoreInst(True,
|
||||
LV.getAddress(), LV.isVolatileQualified(),
|
||||
LV.getAlignment().getQuantity(),
|
||||
llvm::SequentiallyConsistent));
|
||||
return Builder.getTrue();
|
||||
}
|
||||
// For atomic bool increment, we just store true and return it for
|
||||
// preincrement, do an atomic swap with true for postincrement
|
||||
return Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg,
|
||||
LV.getAddress(), True, llvm::SequentiallyConsistent);
|
||||
}
|
||||
// Special case for atomic increment / decrement on integers, emit
|
||||
// atomicrmw instructions. We skip this if we want to be doing overflow
|
||||
// checking, and fall into the slow path with the atomic cmpxchg loop.
|
||||
if (!type->isBooleanType() && type->isIntegerType() &&
|
||||
!(type->isUnsignedIntegerType() &&
|
||||
CGF.SanOpts->UnsignedIntegerOverflow) &&
|
||||
CGF.getLangOpts().getSignedOverflowBehavior() !=
|
||||
LangOptions::SOB_Trapping) {
|
||||
llvm::AtomicRMWInst::BinOp aop = isInc ? llvm::AtomicRMWInst::Add :
|
||||
llvm::AtomicRMWInst::Sub;
|
||||
llvm::Instruction::BinaryOps op = isInc ? llvm::Instruction::Add :
|
||||
llvm::Instruction::Sub;
|
||||
llvm::Value *amt = CGF.EmitToMemory(
|
||||
llvm::ConstantInt::get(ConvertType(type), 1, true), type);
|
||||
llvm::Value *old = Builder.CreateAtomicRMW(aop,
|
||||
LV.getAddress(), amt, llvm::SequentiallyConsistent);
|
||||
return isPre ? Builder.CreateBinOp(op, old, amt) : old;
|
||||
}
|
||||
value = EmitLoadOfLValue(LV);
|
||||
input = value;
|
||||
// For every other atomic operation, we need to emit a load-op-cmpxchg loop
|
||||
llvm::BasicBlock *startBB = Builder.GetInsertBlock();
|
||||
llvm::BasicBlock *opBB = CGF.createBasicBlock("atomic_op", CGF.CurFn);
|
||||
value = CGF.EmitToMemory(value, type);
|
||||
Builder.CreateBr(opBB);
|
||||
Builder.SetInsertPoint(opBB);
|
||||
atomicPHI = Builder.CreatePHI(value->getType(), 2);
|
||||
atomicPHI->addIncoming(value, startBB);
|
||||
type = atomicTy->getValueType();
|
||||
value = atomicPHI;
|
||||
} else {
|
||||
value = EmitLoadOfLValue(LV);
|
||||
input = value;
|
||||
}
|
||||
|
||||
// Special case of integer increment that we have to check first: bool++.
|
||||
|
@ -1596,7 +1635,7 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV,
|
|||
llvm::BasicBlock *opBB = Builder.GetInsertBlock();
|
||||
llvm::BasicBlock *contBB = CGF.createBasicBlock("atomic_cont", CGF.CurFn);
|
||||
llvm::Value *old = Builder.CreateAtomicCmpXchg(LV.getAddress(), atomicPHI,
|
||||
value, llvm::SequentiallyConsistent);
|
||||
CGF.EmitToMemory(value, type), llvm::SequentiallyConsistent);
|
||||
atomicPHI->addIncoming(old, opBB);
|
||||
llvm::Value *success = Builder.CreateICmpEQ(old, atomicPHI);
|
||||
Builder.CreateCondBr(success, contBB, opBB);
|
||||
|
@ -1872,20 +1911,63 @@ LValue ScalarExprEmitter::EmitCompoundAssignLValue(
|
|||
OpInfo.E = E;
|
||||
// Load/convert the LHS.
|
||||
LValue LHSLV = EmitCheckedLValue(E->getLHS(), CodeGenFunction::TCK_Store);
|
||||
OpInfo.LHS = EmitLoadOfLValue(LHSLV);
|
||||
|
||||
llvm::PHINode *atomicPHI = 0;
|
||||
if (LHSTy->isAtomicType()) {
|
||||
if (const AtomicType *atomicTy = LHSTy->getAs<AtomicType>()) {
|
||||
QualType type = atomicTy->getValueType();
|
||||
if (!type->isBooleanType() && type->isIntegerType() &&
|
||||
!(type->isUnsignedIntegerType() &&
|
||||
CGF.SanOpts->UnsignedIntegerOverflow) &&
|
||||
CGF.getLangOpts().getSignedOverflowBehavior() !=
|
||||
LangOptions::SOB_Trapping) {
|
||||
llvm::AtomicRMWInst::BinOp aop = llvm::AtomicRMWInst::BAD_BINOP;
|
||||
switch (OpInfo.Opcode) {
|
||||
// We don't have atomicrmw operands for *, %, /, <<, >>
|
||||
case BO_MulAssign: case BO_DivAssign:
|
||||
case BO_RemAssign:
|
||||
case BO_ShlAssign:
|
||||
case BO_ShrAssign:
|
||||
break;
|
||||
case BO_AddAssign:
|
||||
aop = llvm::AtomicRMWInst::Add;
|
||||
break;
|
||||
case BO_SubAssign:
|
||||
aop = llvm::AtomicRMWInst::Sub;
|
||||
break;
|
||||
case BO_AndAssign:
|
||||
aop = llvm::AtomicRMWInst::And;
|
||||
break;
|
||||
case BO_XorAssign:
|
||||
aop = llvm::AtomicRMWInst::Xor;
|
||||
break;
|
||||
case BO_OrAssign:
|
||||
aop = llvm::AtomicRMWInst::Or;
|
||||
break;
|
||||
default:
|
||||
llvm_unreachable("Invalid compound assignment type");
|
||||
}
|
||||
if (aop != llvm::AtomicRMWInst::BAD_BINOP) {
|
||||
llvm::Value *amt = CGF.EmitToMemory(EmitScalarConversion(OpInfo.RHS,
|
||||
E->getRHS()->getType(), LHSTy), LHSTy);
|
||||
Builder.CreateAtomicRMW(aop, LHSLV.getAddress(), amt,
|
||||
llvm::SequentiallyConsistent);
|
||||
return LHSLV;
|
||||
}
|
||||
}
|
||||
// FIXME: For floating point types, we should be saving and restoring the
|
||||
// floating point environment in the loop.
|
||||
llvm::BasicBlock *startBB = Builder.GetInsertBlock();
|
||||
llvm::BasicBlock *opBB = CGF.createBasicBlock("atomic_op", CGF.CurFn);
|
||||
OpInfo.LHS = EmitLoadOfLValue(LHSLV);
|
||||
OpInfo.LHS = CGF.EmitToMemory(OpInfo.LHS, type);
|
||||
Builder.CreateBr(opBB);
|
||||
Builder.SetInsertPoint(opBB);
|
||||
atomicPHI = Builder.CreatePHI(OpInfo.LHS->getType(), 2);
|
||||
atomicPHI->addIncoming(OpInfo.LHS, startBB);
|
||||
OpInfo.LHS = atomicPHI;
|
||||
}
|
||||
else
|
||||
OpInfo.LHS = EmitLoadOfLValue(LHSLV);
|
||||
|
||||
OpInfo.LHS = EmitScalarConversion(OpInfo.LHS, LHSTy,
|
||||
E->getComputationLHSType());
|
||||
|
@ -1900,7 +1982,7 @@ LValue ScalarExprEmitter::EmitCompoundAssignLValue(
|
|||
llvm::BasicBlock *opBB = Builder.GetInsertBlock();
|
||||
llvm::BasicBlock *contBB = CGF.createBasicBlock("atomic_cont", CGF.CurFn);
|
||||
llvm::Value *old = Builder.CreateAtomicCmpXchg(LHSLV.getAddress(), atomicPHI,
|
||||
Result, llvm::SequentiallyConsistent);
|
||||
CGF.EmitToMemory(Result, LHSTy), llvm::SequentiallyConsistent);
|
||||
atomicPHI->addIncoming(old, opBB);
|
||||
llvm::Value *success = Builder.CreateICmpEQ(old, atomicPHI);
|
||||
Builder.CreateCondBr(success, contBB, opBB);
|
||||
|
|
|
@ -15,9 +15,4 @@ void foo(int x)
|
|||
// CHECK: sdiv i32
|
||||
// CHECK: cmpxchg i16*
|
||||
|
||||
// These should be emitting atomicrmw instructions, but they aren't yet
|
||||
i += 2; // CHECK: cmpxchg
|
||||
i -= 2; // CHECK: cmpxchg
|
||||
i++; // CHECK: cmpxchg
|
||||
i--; // CHECK: cmpxchg
|
||||
}
|
||||
|
|
|
@ -0,0 +1,139 @@
|
|||
// RUN: %clang_cc1 %s -emit-llvm -o - -triple=armv7-unknown-freebsd -std=c11 | FileCheck %s
|
||||
|
||||
// Test that we are generating atomicrmw instructions, rather than
|
||||
// compare-exchange loops for common atomic ops. This makes a big difference
|
||||
// on RISC platforms, where the compare-exchange loop becomes a ll/sc pair for
|
||||
// the load and then another ll/sc in the loop, expanding to about 30
|
||||
// instructions when it should be only 4. It has a smaller, but still
|
||||
// noticeable, impact on platforms like x86 and RISC-V, where there are atomic
|
||||
// RMW instructions.
|
||||
//
|
||||
// We currently emit cmpxchg loops for most operations on _Bools, because
|
||||
// they're sufficiently rare that it's not worth making sure that the semantics
|
||||
// are correct.
|
||||
|
||||
typedef int __attribute__((vector_size(16))) vector;
|
||||
|
||||
_Atomic(_Bool) b;
|
||||
_Atomic(int) i;
|
||||
_Atomic(long long) l;
|
||||
_Atomic(short) s;
|
||||
_Atomic(char*) p;
|
||||
_Atomic(float) f;
|
||||
_Atomic(vector) v;
|
||||
|
||||
// CHECK-NOT: cmpxchg
|
||||
|
||||
// CHECK: testinc
|
||||
void testinc(void)
|
||||
{
|
||||
// Special case for suffix bool++, sets to true and returns the old value.
|
||||
// CHECK: atomicrmw xchg i8* @b, i8 1 seq_cst
|
||||
b++;
|
||||
// CHECK: atomicrmw add i32* @i, i32 1 seq_cst
|
||||
i++;
|
||||
// CHECK: atomicrmw add i64* @l, i64 1 seq_cst
|
||||
l++;
|
||||
// CHECK: atomicrmw add i16* @s, i16 1 seq_cst
|
||||
s++;
|
||||
// Prefix increment
|
||||
// Special case for bool: set to true and return true
|
||||
// CHECK: store atomic i8 1, i8* @b seq_cst, align 1
|
||||
++b;
|
||||
// Currently, we have no variant of atomicrmw that returns the new value, so
|
||||
// we have to generate an atomic add, which returns the old value, and then a
|
||||
// non-atomic add.
|
||||
// CHECK: atomicrmw add i32* @i, i32 1 seq_cst
|
||||
// CHECK: add i32
|
||||
++i;
|
||||
// CHECK: atomicrmw add i64* @l, i64 1 seq_cst
|
||||
// CHECK: add i64
|
||||
++l;
|
||||
// CHECK: atomicrmw add i16* @s, i16 1 seq_cst
|
||||
// CHECK: add i16
|
||||
++s;
|
||||
}
|
||||
// CHECK: testdec
|
||||
void testdec(void)
|
||||
{
|
||||
// CHECK: cmpxchg i8* @b
|
||||
b--;
|
||||
// CHECK: atomicrmw sub i32* @i, i32 1 seq_cst
|
||||
i--;
|
||||
// CHECK: atomicrmw sub i64* @l, i64 1 seq_cst
|
||||
l--;
|
||||
// CHECK: atomicrmw sub i16* @s, i16 1 seq_cst
|
||||
s--;
|
||||
// CHECK: cmpxchg i8* @b
|
||||
--b;
|
||||
// CHECK: atomicrmw sub i32* @i, i32 1 seq_cst
|
||||
// CHECK: sub i32
|
||||
--i;
|
||||
// CHECK: atomicrmw sub i64* @l, i64 1 seq_cst
|
||||
// CHECK: sub i64
|
||||
--l;
|
||||
// CHECK: atomicrmw sub i16* @s, i16 1 seq_cst
|
||||
// CHECK: sub i16
|
||||
--s;
|
||||
}
|
||||
// CHECK: testaddeq
|
||||
void testaddeq(void)
|
||||
{
|
||||
// CHECK: cmpxchg i8* @b
|
||||
// CHECK: atomicrmw add i32* @i, i32 42 seq_cst
|
||||
// CHECK: atomicrmw add i64* @l, i64 42 seq_cst
|
||||
// CHECK: atomicrmw add i16* @s, i16 42 seq_cst
|
||||
b += 42;
|
||||
i += 42;
|
||||
l += 42;
|
||||
s += 42;
|
||||
}
|
||||
// CHECK: testsubeq
|
||||
void testsubeq(void)
|
||||
{
|
||||
// CHECK: cmpxchg i8* @b
|
||||
// CHECK: atomicrmw sub i32* @i, i32 42 seq_cst
|
||||
// CHECK: atomicrmw sub i64* @l, i64 42 seq_cst
|
||||
// CHECK: atomicrmw sub i16* @s, i16 42 seq_cst
|
||||
b -= 42;
|
||||
i -= 42;
|
||||
l -= 42;
|
||||
s -= 42;
|
||||
}
|
||||
// CHECK: testxoreq
|
||||
void testxoreq(void)
|
||||
{
|
||||
// CHECK: cmpxchg i8* @b
|
||||
// CHECK: atomicrmw xor i32* @i, i32 42 seq_cst
|
||||
// CHECK: atomicrmw xor i64* @l, i64 42 seq_cst
|
||||
// CHECK: atomicrmw xor i16* @s, i16 42 seq_cst
|
||||
b ^= 42;
|
||||
i ^= 42;
|
||||
l ^= 42;
|
||||
s ^= 42;
|
||||
}
|
||||
// CHECK: testoreq
|
||||
void testoreq(void)
|
||||
{
|
||||
// CHECK: cmpxchg i8* @b
|
||||
// CHECK: atomicrmw or i32* @i, i32 42 seq_cst
|
||||
// CHECK: atomicrmw or i64* @l, i64 42 seq_cst
|
||||
// CHECK: atomicrmw or i16* @s, i16 42 seq_cst
|
||||
b |= 42;
|
||||
i |= 42;
|
||||
l |= 42;
|
||||
s |= 42;
|
||||
}
|
||||
// CHECK: testandeq
|
||||
void testandeq(void)
|
||||
{
|
||||
// CHECK: cmpxchg i8* @b
|
||||
// CHECK: atomicrmw and i32* @i, i32 42 seq_cst
|
||||
// CHECK: atomicrmw and i64* @l, i64 42 seq_cst
|
||||
// CHECK: atomicrmw and i16* @s, i16 42 seq_cst
|
||||
b &= 42;
|
||||
i &= 42;
|
||||
l &= 42;
|
||||
s &= 42;
|
||||
}
|
||||
|
Loading…
Reference in New Issue