[CUDA] fix codegen for __nvvm_atom_cas_*

Summary: __nvvm_atom_cas_* returns the old value instead of whether the swap succeeds.

Reviewers: eliben, tra

Subscribers: jholewinski, llvm-commits

Differential Revision: http://reviews.llvm.org/D13306

llvm-svn: 248951
This commit is contained in:
Jingyue Wu 2015-09-30 21:49:32 +00:00
parent e5e8347496
commit f1eca25b16
2 changed files with 6 additions and 1 deletions

View File

@ -7021,7 +7021,9 @@ Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID,
case NVPTX::BI__nvvm_atom_cas_gen_i:
case NVPTX::BI__nvvm_atom_cas_gen_l:
case NVPTX::BI__nvvm_atom_cas_gen_ll:
return MakeAtomicCmpXchgValue(*this, E, true);
// __nvvm_atom_cas_gen_* should return the old value rather than the
// success flag.
return MakeAtomicCmpXchgValue(*this, E, /*ReturnBool=*/false);
case NVPTX::BI__nvvm_atom_add_gen_f: {
Value *Ptr = EmitScalarExpr(E->getArg(0));

View File

@ -260,10 +260,13 @@ __device__ void nvvm_atom(float *fp, float f, int *ip, int i, long *lp, long l,
__nvvm_atom_min_gen_ull((unsigned long long *)&sll, ll);
// CHECK: cmpxchg
// CHECK-NEXT: extractvalue { i32, i1 } {{%[0-9]+}}, 0
__nvvm_atom_cas_gen_i(ip, 0, i);
// CHECK: cmpxchg
// CHECK-NEXT: extractvalue { {{i32|i64}}, i1 } {{%[0-9]+}}, 0
__nvvm_atom_cas_gen_l(&dl, 0, l);
// CHECK: cmpxchg
// CHECK-NEXT: extractvalue { i64, i1 } {{%[0-9]+}}, 0
__nvvm_atom_cas_gen_ll(&sll, 0, ll);
// CHECK: call float @llvm.nvvm.atomic.load.add.f32.p0f32