forked from OSchip/llvm-project
Reimplement the bittest intrinsic family as builtins with inline asm
We need to implement _interlockedbittestandset as a builtin for windows.h, so we might as well do the whole family. It reduces code duplication anyway. Fixes PR33188, a long standing bug in our bittest implementation encountered by Chakra. llvm-svn: 333978
This commit is contained in:
parent
b6ed992de0
commit
1d9c249db5
|
@ -744,6 +744,14 @@ BUILTIN(__builtin_rindex, "c*cC*i", "Fn")
|
|||
LANGBUILTIN(_alloca, "v*z", "n", ALL_MS_LANGUAGES)
|
||||
LANGBUILTIN(__annotation, "wC*.","n", ALL_MS_LANGUAGES)
|
||||
LANGBUILTIN(__assume, "vb", "n", ALL_MS_LANGUAGES)
|
||||
LANGBUILTIN(_bittest, "UcNiC*Ni", "n", ALL_MS_LANGUAGES)
|
||||
LANGBUILTIN(_bittestandcomplement, "UcNi*Ni", "n", ALL_MS_LANGUAGES)
|
||||
LANGBUILTIN(_bittestandreset, "UcNi*Ni", "n", ALL_MS_LANGUAGES)
|
||||
LANGBUILTIN(_bittestandset, "UcNi*Ni", "n", ALL_MS_LANGUAGES)
|
||||
LANGBUILTIN(_bittest64, "UcWiC*Wi", "n", ALL_MS_LANGUAGES)
|
||||
LANGBUILTIN(_bittestandcomplement64, "UcWi*Wi", "n", ALL_MS_LANGUAGES)
|
||||
LANGBUILTIN(_bittestandreset64, "UcWi*Wi", "n", ALL_MS_LANGUAGES)
|
||||
LANGBUILTIN(_bittestandset64, "UcWi*Wi", "n", ALL_MS_LANGUAGES)
|
||||
LIBBUILTIN(_byteswap_ushort, "UsUs", "fnc", "stdlib.h", ALL_MS_LANGUAGES)
|
||||
LIBBUILTIN(_byteswap_ulong, "UNiUNi", "fnc", "stdlib.h", ALL_MS_LANGUAGES)
|
||||
LIBBUILTIN(_byteswap_uint64, "ULLiULLi", "fnc", "stdlib.h", ALL_MS_LANGUAGES)
|
||||
|
@ -783,7 +791,10 @@ LANGBUILTIN(_InterlockedOr, "NiNiD*Ni", "n", ALL_MS_LANGUAGES)
|
|||
LANGBUILTIN(_InterlockedXor8, "ccD*c", "n", ALL_MS_LANGUAGES)
|
||||
LANGBUILTIN(_InterlockedXor16, "ssD*s", "n", ALL_MS_LANGUAGES)
|
||||
LANGBUILTIN(_InterlockedXor, "NiNiD*Ni", "n", ALL_MS_LANGUAGES)
|
||||
LANGBUILTIN(_interlockedbittestandreset, "UcNiD*Ni", "n", ALL_MS_LANGUAGES)
|
||||
LANGBUILTIN(_interlockedbittestandreset64, "UcWiD*Wi", "n", ALL_MS_LANGUAGES)
|
||||
LANGBUILTIN(_interlockedbittestandset, "UcNiD*Ni", "n", ALL_MS_LANGUAGES)
|
||||
LANGBUILTIN(_interlockedbittestandset64, "UcWiD*Wi", "n", ALL_MS_LANGUAGES)
|
||||
LANGBUILTIN(__noop, "i.", "n", ALL_MS_LANGUAGES)
|
||||
LANGBUILTIN(__popcnt16, "UsUs", "nc", ALL_MS_LANGUAGES)
|
||||
LANGBUILTIN(__popcnt, "UiUi", "nc", ALL_MS_LANGUAGES)
|
||||
|
|
|
@ -484,6 +484,37 @@ CodeGenFunction::emitBuiltinObjectSize(const Expr *E, unsigned Type,
|
|||
return Builder.CreateCall(F, {Ptr, Min, NullIsUnknown});
|
||||
}
|
||||
|
||||
static RValue EmitBitTestIntrinsic(CodeGenFunction &CGF, const CallExpr *E,
|
||||
char TestAnd, char Size,
|
||||
bool Locked = false) {
|
||||
Value *BitBase = CGF.EmitScalarExpr(E->getArg(0));
|
||||
Value *BitPos = CGF.EmitScalarExpr(E->getArg(1));
|
||||
|
||||
// Build the assembly.
|
||||
SmallString<64> Asm;
|
||||
raw_svector_ostream AsmOS(Asm);
|
||||
if (Locked)
|
||||
AsmOS << "lock ";
|
||||
AsmOS << "bt";
|
||||
if (TestAnd)
|
||||
AsmOS << TestAnd;
|
||||
AsmOS << Size << " $2, ($1)\n\tsetc ${0:b}";
|
||||
|
||||
// Build the constraints. FIXME: We should support immediates when possible.
|
||||
std::string Constraints = "=r,r,r,~{cc},~{flags},~{memory},~{fpsr}";
|
||||
llvm::IntegerType *IntType = llvm::IntegerType::get(
|
||||
CGF.getLLVMContext(),
|
||||
CGF.getContext().getTypeSize(E->getArg(1)->getType()));
|
||||
llvm::Type *IntPtrType = IntType->getPointerTo();
|
||||
llvm::FunctionType *FTy =
|
||||
llvm::FunctionType::get(CGF.Int8Ty, {IntPtrType, IntType}, false);
|
||||
|
||||
llvm::InlineAsm *IA =
|
||||
llvm::InlineAsm::get(FTy, Asm, Constraints, /*SideEffects=*/true);
|
||||
CallSite CS = CGF.Builder.CreateCall(IA, {BitBase, BitPos});
|
||||
return RValue::get(CS.getInstruction());
|
||||
}
|
||||
|
||||
// Many of MSVC builtins are on both x64 and ARM; to avoid repeating code, we
|
||||
// handle them here.
|
||||
enum class CodeGenFunction::MSVCIntrin {
|
||||
|
@ -497,7 +528,6 @@ enum class CodeGenFunction::MSVCIntrin {
|
|||
_InterlockedIncrement,
|
||||
_InterlockedOr,
|
||||
_InterlockedXor,
|
||||
_interlockedbittestandset,
|
||||
__fastfail,
|
||||
};
|
||||
|
||||
|
@ -565,22 +595,6 @@ Value *CodeGenFunction::EmitMSVCBuiltinExpr(MSVCIntrin BuiltinID,
|
|||
case MSVCIntrin::_InterlockedXor:
|
||||
return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E);
|
||||
|
||||
case MSVCIntrin::_interlockedbittestandset: {
|
||||
llvm::Value *Addr = EmitScalarExpr(E->getArg(0));
|
||||
llvm::Value *Bit = EmitScalarExpr(E->getArg(1));
|
||||
AtomicRMWInst *RMWI = Builder.CreateAtomicRMW(
|
||||
AtomicRMWInst::Or, Addr,
|
||||
Builder.CreateShl(ConstantInt::get(Bit->getType(), 1), Bit),
|
||||
llvm::AtomicOrdering::SequentiallyConsistent);
|
||||
// Shift the relevant bit to the least significant position, truncate to
|
||||
// the result type, and test the low bit.
|
||||
llvm::Value *Shifted = Builder.CreateLShr(RMWI, Bit);
|
||||
llvm::Value *Truncated =
|
||||
Builder.CreateTrunc(Shifted, ConvertType(E->getType()));
|
||||
return Builder.CreateAnd(Truncated,
|
||||
ConstantInt::get(Truncated->getType(), 1));
|
||||
}
|
||||
|
||||
case MSVCIntrin::_InterlockedDecrement: {
|
||||
llvm::Type *IntTy = ConvertType(E->getType());
|
||||
AtomicRMWInst *RMWI = Builder.CreateAtomicRMW(
|
||||
|
@ -2791,9 +2805,32 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
|
|||
case Builtin::BI_InterlockedXor16:
|
||||
case Builtin::BI_InterlockedXor:
|
||||
return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor, E));
|
||||
|
||||
case Builtin::BI_bittest:
|
||||
return EmitBitTestIntrinsic(*this, E, '\0', 'l');
|
||||
case Builtin::BI_bittestandcomplement:
|
||||
return EmitBitTestIntrinsic(*this, E, 'c', 'l');
|
||||
case Builtin::BI_bittestandreset:
|
||||
return EmitBitTestIntrinsic(*this, E, 'r', 'l');
|
||||
case Builtin::BI_bittestandset:
|
||||
return EmitBitTestIntrinsic(*this, E, 's', 'l');
|
||||
case Builtin::BI_interlockedbittestandreset:
|
||||
return EmitBitTestIntrinsic(*this, E, 'r', 'l', /*Locked=*/true);
|
||||
case Builtin::BI_interlockedbittestandset:
|
||||
return RValue::get(
|
||||
EmitMSVCBuiltinExpr(MSVCIntrin::_interlockedbittestandset, E));
|
||||
return EmitBitTestIntrinsic(*this, E, 's', 'l', /*Locked=*/true);
|
||||
|
||||
case Builtin::BI_bittest64:
|
||||
return EmitBitTestIntrinsic(*this, E, '\0', 'q');
|
||||
case Builtin::BI_bittestandcomplement64:
|
||||
return EmitBitTestIntrinsic(*this, E, 'c', 'q');
|
||||
case Builtin::BI_bittestandreset64:
|
||||
return EmitBitTestIntrinsic(*this, E, 'r', 'q');
|
||||
case Builtin::BI_bittestandset64:
|
||||
return EmitBitTestIntrinsic(*this, E, 's', 'q');
|
||||
case Builtin::BI_interlockedbittestandreset64:
|
||||
return EmitBitTestIntrinsic(*this, E, 'r', 'q', /*Locked=*/true);
|
||||
case Builtin::BI_interlockedbittestandset64:
|
||||
return EmitBitTestIntrinsic(*this, E, 's', 'q', /*Locked=*/true);
|
||||
|
||||
case Builtin::BI__exception_code:
|
||||
case Builtin::BI_exception_code:
|
||||
|
|
|
@ -161,13 +161,9 @@ static __inline__
|
|||
unsigned char _BitScanForward(unsigned long *_Index, unsigned long _Mask);
|
||||
static __inline__
|
||||
unsigned char _BitScanReverse(unsigned long *_Index, unsigned long _Mask);
|
||||
static __inline__
|
||||
unsigned char _bittest(long const *, long);
|
||||
static __inline__
|
||||
unsigned char _bittestandcomplement(long *, long);
|
||||
static __inline__
|
||||
unsigned char _bittestandreset(long *, long);
|
||||
static __inline__
|
||||
unsigned char _bittestandset(long *, long);
|
||||
void __cdecl _disable(void);
|
||||
void __cdecl _enable(void);
|
||||
|
@ -260,20 +256,15 @@ static __inline__
|
|||
unsigned char _BitScanForward64(unsigned long *_Index, unsigned __int64 _Mask);
|
||||
static __inline__
|
||||
unsigned char _BitScanReverse64(unsigned long *_Index, unsigned __int64 _Mask);
|
||||
static __inline__
|
||||
unsigned char _bittest64(__int64 const *, __int64);
|
||||
static __inline__
|
||||
unsigned char _bittestandcomplement64(__int64 *, __int64);
|
||||
static __inline__
|
||||
unsigned char _bittestandreset64(__int64 *, __int64);
|
||||
static __inline__
|
||||
unsigned char _bittestandset64(__int64 *, __int64);
|
||||
long _InterlockedAnd_np(long volatile *_Value, long _Mask);
|
||||
short _InterlockedAnd16_np(short volatile *_Value, short _Mask);
|
||||
__int64 _InterlockedAnd64_np(__int64 volatile *_Value, __int64 _Mask);
|
||||
char _InterlockedAnd8_np(char volatile *_Value, char _Mask);
|
||||
unsigned char _interlockedbittestandreset64(__int64 volatile *, __int64);
|
||||
static __inline__
|
||||
unsigned char _interlockedbittestandset64(__int64 volatile *, __int64);
|
||||
long _InterlockedCompareExchange_np(long volatile *_Destination, long _Exchange,
|
||||
long _Comparand);
|
||||
|
@ -341,78 +332,6 @@ __int64 _InterlockedAnd64(__int64 volatile *_Value, __int64 _Mask);
|
|||
|
||||
#endif
|
||||
|
||||
/*----------------------------------------------------------------------------*\
|
||||
|* Bit Counting and Testing
|
||||
\*----------------------------------------------------------------------------*/
|
||||
static __inline__ unsigned char __DEFAULT_FN_ATTRS
|
||||
_bittest(long const *_BitBase, long _BitPos) {
|
||||
return (*_BitBase >> _BitPos) & 1;
|
||||
}
|
||||
static __inline__ unsigned char __DEFAULT_FN_ATTRS
|
||||
_bittestandcomplement(long *_BitBase, long _BitPos) {
|
||||
unsigned char _Res = (*_BitBase >> _BitPos) & 1;
|
||||
*_BitBase = *_BitBase ^ (1 << _BitPos);
|
||||
return _Res;
|
||||
}
|
||||
static __inline__ unsigned char __DEFAULT_FN_ATTRS
|
||||
_bittestandreset(long *_BitBase, long _BitPos) {
|
||||
unsigned char _Res = (*_BitBase >> _BitPos) & 1;
|
||||
*_BitBase = *_BitBase & ~(1 << _BitPos);
|
||||
return _Res;
|
||||
}
|
||||
static __inline__ unsigned char __DEFAULT_FN_ATTRS
|
||||
_bittestandset(long *_BitBase, long _BitPos) {
|
||||
unsigned char _Res = (*_BitBase >> _BitPos) & 1;
|
||||
*_BitBase = *_BitBase | (1 << _BitPos);
|
||||
return _Res;
|
||||
}
|
||||
#if defined(__arm__) || defined(__aarch64__)
|
||||
static __inline__ unsigned char __DEFAULT_FN_ATTRS
|
||||
_interlockedbittestandset_acq(long volatile *_BitBase, long _BitPos) {
|
||||
long _PrevVal = __atomic_fetch_or(_BitBase, 1l << _BitPos, __ATOMIC_ACQUIRE);
|
||||
return (_PrevVal >> _BitPos) & 1;
|
||||
}
|
||||
static __inline__ unsigned char __DEFAULT_FN_ATTRS
|
||||
_interlockedbittestandset_nf(long volatile *_BitBase, long _BitPos) {
|
||||
long _PrevVal = __atomic_fetch_or(_BitBase, 1l << _BitPos, __ATOMIC_RELAXED);
|
||||
return (_PrevVal >> _BitPos) & 1;
|
||||
}
|
||||
static __inline__ unsigned char __DEFAULT_FN_ATTRS
|
||||
_interlockedbittestandset_rel(long volatile *_BitBase, long _BitPos) {
|
||||
long _PrevVal = __atomic_fetch_or(_BitBase, 1l << _BitPos, __ATOMIC_RELEASE);
|
||||
return (_PrevVal >> _BitPos) & 1;
|
||||
}
|
||||
#endif
|
||||
#ifdef __x86_64__
|
||||
static __inline__ unsigned char __DEFAULT_FN_ATTRS
|
||||
_bittest64(__int64 const *_BitBase, __int64 _BitPos) {
|
||||
return (*_BitBase >> _BitPos) & 1;
|
||||
}
|
||||
static __inline__ unsigned char __DEFAULT_FN_ATTRS
|
||||
_bittestandcomplement64(__int64 *_BitBase, __int64 _BitPos) {
|
||||
unsigned char _Res = (*_BitBase >> _BitPos) & 1;
|
||||
*_BitBase = *_BitBase ^ (1ll << _BitPos);
|
||||
return _Res;
|
||||
}
|
||||
static __inline__ unsigned char __DEFAULT_FN_ATTRS
|
||||
_bittestandreset64(__int64 *_BitBase, __int64 _BitPos) {
|
||||
unsigned char _Res = (*_BitBase >> _BitPos) & 1;
|
||||
*_BitBase = *_BitBase & ~(1ll << _BitPos);
|
||||
return _Res;
|
||||
}
|
||||
static __inline__ unsigned char __DEFAULT_FN_ATTRS
|
||||
_bittestandset64(__int64 *_BitBase, __int64 _BitPos) {
|
||||
unsigned char _Res = (*_BitBase >> _BitPos) & 1;
|
||||
*_BitBase = *_BitBase | (1ll << _BitPos);
|
||||
return _Res;
|
||||
}
|
||||
static __inline__ unsigned char __DEFAULT_FN_ATTRS
|
||||
_interlockedbittestandset64(__int64 volatile *_BitBase, __int64 _BitPos) {
|
||||
long long _PrevVal =
|
||||
__atomic_fetch_or(_BitBase, 1ll << _BitPos, __ATOMIC_SEQ_CST);
|
||||
return (_PrevVal >> _BitPos) & 1;
|
||||
}
|
||||
#endif
|
||||
/*----------------------------------------------------------------------------*\
|
||||
|* Interlocked Exchange Add
|
||||
\*----------------------------------------------------------------------------*/
|
||||
|
|
|
@ -0,0 +1,35 @@
|
|||
// RUN: %clang_cc1 -fms-extensions -triple x86_64-windows-msvc %s -emit-llvm -o - | FileCheck %s
|
||||
|
||||
volatile unsigned char sink = 0;
|
||||
void test32(long *base, long idx) {
|
||||
sink = _bittest(base, idx);
|
||||
sink = _bittestandcomplement(base, idx);
|
||||
sink = _bittestandreset(base, idx);
|
||||
sink = _bittestandset(base, idx);
|
||||
sink = _interlockedbittestandreset(base, idx);
|
||||
sink = _interlockedbittestandset(base, idx);
|
||||
}
|
||||
void test64(__int64 *base, __int64 idx) {
|
||||
sink = _bittest64(base, idx);
|
||||
sink = _bittestandcomplement64(base, idx);
|
||||
sink = _bittestandreset64(base, idx);
|
||||
sink = _bittestandset64(base, idx);
|
||||
sink = _interlockedbittestandreset64(base, idx);
|
||||
sink = _interlockedbittestandset64(base, idx);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: define dso_local void @test32(i32* %base, i32 %idx)
|
||||
// CHECK: call i8 asm sideeffect "btl $2, ($1)\0A\09setc ${0:b}", "=r,r,r,~{{.*}}"(i32* %{{.*}}, i32 {{.*}})
|
||||
// CHECK: call i8 asm sideeffect "btcl $2, ($1)\0A\09setc ${0:b}", "=r,r,r,~{{.*}}"(i32* %{{.*}}, i32 {{.*}})
|
||||
// CHECK: call i8 asm sideeffect "btrl $2, ($1)\0A\09setc ${0:b}", "=r,r,r,~{{.*}}"(i32* %{{.*}}, i32 {{.*}})
|
||||
// CHECK: call i8 asm sideeffect "btsl $2, ($1)\0A\09setc ${0:b}", "=r,r,r,~{{.*}}"(i32* %{{.*}}, i32 {{.*}})
|
||||
// CHECK: call i8 asm sideeffect "lock btrl $2, ($1)\0A\09setc ${0:b}", "=r,r,r,~{{.*}}"(i32* %{{.*}}, i32 {{.*}})
|
||||
// CHECK: call i8 asm sideeffect "lock btsl $2, ($1)\0A\09setc ${0:b}", "=r,r,r,~{{.*}}"(i32* %{{.*}}, i32 {{.*}})
|
||||
|
||||
// CHECK-LABEL: define dso_local void @test64(i64* %base, i64 %idx)
|
||||
// CHECK: call i8 asm sideeffect "btq $2, ($1)\0A\09setc ${0:b}", "=r,r,r,~{{.*}}"(i64* %{{.*}}, i64 {{.*}})
|
||||
// CHECK: call i8 asm sideeffect "btcq $2, ($1)\0A\09setc ${0:b}", "=r,r,r,~{{.*}}"(i64* %{{.*}}, i64 {{.*}})
|
||||
// CHECK: call i8 asm sideeffect "btrq $2, ($1)\0A\09setc ${0:b}", "=r,r,r,~{{.*}}"(i64* %{{.*}}, i64 {{.*}})
|
||||
// CHECK: call i8 asm sideeffect "btsq $2, ($1)\0A\09setc ${0:b}", "=r,r,r,~{{.*}}"(i64* %{{.*}}, i64 {{.*}})
|
||||
// CHECK: call i8 asm sideeffect "lock btrq $2, ($1)\0A\09setc ${0:b}", "=r,r,r,~{{.*}}"(i64* %{{.*}}, i64 {{.*}})
|
||||
// CHECK: call i8 asm sideeffect "lock btsq $2, ($1)\0A\09setc ${0:b}", "=r,r,r,~{{.*}}"(i64* %{{.*}}, i64 {{.*}})
|
|
@ -148,14 +148,3 @@ LONG test_InterlockedDecrement(LONG volatile *Addend) {
|
|||
// CHECK: [[RESULT:%[0-9]+]] = add i32 [[TMP]], -1
|
||||
// CHECK: ret i32 [[RESULT]]
|
||||
// CHECK: }
|
||||
|
||||
unsigned char test_interlockedbittestandset(volatile LONG *ptr, LONG bit) {
|
||||
return _interlockedbittestandset(ptr, bit);
|
||||
}
|
||||
// CHECK-LABEL: define{{.*}} i8 @test_interlockedbittestandset
|
||||
// CHECK: [[MASKBIT:%[0-9]+]] = shl i32 1, %bit
|
||||
// CHECK: [[OLD:%[0-9]+]] = atomicrmw or i32* %ptr, i32 [[MASKBIT]] seq_cst
|
||||
// CHECK: [[SHIFT:%[0-9]+]] = lshr i32 [[OLD]], %bit
|
||||
// CHECK: [[TRUNC:%[0-9]+]] = trunc i32 [[SHIFT]] to i8
|
||||
// CHECK: [[AND:%[0-9]+]] = and i8 [[TRUNC]], 1
|
||||
// CHECK: ret i8 [[AND]]
|
||||
|
|
|
@ -455,17 +455,6 @@ __int64 test_InterlockedDecrement64(__int64 volatile *Addend) {
|
|||
|
||||
#endif
|
||||
|
||||
unsigned char test_interlockedbittestandset(volatile long *ptr, long bit) {
|
||||
return _interlockedbittestandset(ptr, bit);
|
||||
}
|
||||
// CHECK-LABEL: define{{.*}} i8 @test_interlockedbittestandset
|
||||
// CHECK: [[MASKBIT:%[0-9]+]] = shl i32 1, %bit
|
||||
// CHECK: [[OLD:%[0-9]+]] = atomicrmw or i32* %ptr, i32 [[MASKBIT]] seq_cst
|
||||
// CHECK: [[SHIFT:%[0-9]+]] = lshr i32 [[OLD]], %bit
|
||||
// CHECK: [[TRUNC:%[0-9]+]] = trunc i32 [[SHIFT]] to i8
|
||||
// CHECK: [[AND:%[0-9]+]] = and i8 [[TRUNC]], 1
|
||||
// CHECK: ret i8 [[AND]]
|
||||
|
||||
void test__fastfail() {
|
||||
__fastfail(42);
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue