diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index ad2e1cf3b4b2..f65230a0f2d1 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -484,11 +484,48 @@ CodeGenFunction::emitBuiltinObjectSize(const Expr *E, unsigned Type, return Builder.CreateCall(F, {Ptr, Min, NullIsUnknown}); } -static RValue EmitBitTestIntrinsic(CodeGenFunction &CGF, const CallExpr *E, - char TestAnd, char Size, - bool Locked = false) { - Value *BitBase = CGF.EmitScalarExpr(E->getArg(0)); - Value *BitPos = CGF.EmitScalarExpr(E->getArg(1)); +// Get properties of an X86 BT* assembly instruction. The first returned value +// is the action character code, which can be for complement, reset, or set. The +// second is the size suffix which our assembler needs. The last is whether to +// add the lock prefix. +static std::tuple +getBitTestActionSizeAndLocking(unsigned BuiltinID) { + switch (BuiltinID) { + case Builtin::BI_bittest: + return {'\0', 'l', false}; + case Builtin::BI_bittestandcomplement: + return {'c', 'l', false}; + case Builtin::BI_bittestandreset: + return {'r', 'l', false}; + case Builtin::BI_bittestandset: + return {'s', 'l', false}; + case Builtin::BI_interlockedbittestandreset: + return {'r', 'l', /*Locked=*/true}; + case Builtin::BI_interlockedbittestandset: + return {'s', 'l', /*Locked=*/true}; + + case Builtin::BI_bittest64: + return {'\0', 'q', false}; + case Builtin::BI_bittestandcomplement64: + return {'c', 'q', false}; + case Builtin::BI_bittestandreset64: + return {'r', 'q', false}; + case Builtin::BI_bittestandset64: + return {'s', 'q', false}; + case Builtin::BI_interlockedbittestandreset64: + return {'r', 'q', /*Locked=*/true}; + case Builtin::BI_interlockedbittestandset64: + return {'s', 'q', /*Locked=*/true}; + } + llvm_unreachable("expected only bittest builtins"); +} + +static RValue EmitX86BitTestIntrinsic(CodeGenFunction &CGF, unsigned BuiltinID, + const CallExpr *E, Value *BitBase, + Value *BitPos) { + char Action, Size; + bool Locked; + std::tie(Action, Size, Locked) = getBitTestActionSizeAndLocking(BuiltinID); // Build the assembly. SmallString<64> Asm; @@ -496,12 +533,12 @@ static RValue EmitBitTestIntrinsic(CodeGenFunction &CGF, const CallExpr *E, if (Locked) AsmOS << "lock "; AsmOS << "bt"; - if (TestAnd) - AsmOS << TestAnd; + if (Action) + AsmOS << Action; AsmOS << Size << " $2, ($1)\n\tsetc ${0:b}"; // Build the constraints. FIXME: We should support immediates when possible. - std::string Constraints = "=r,r,r,~{cc},~{flags},~{memory},~{fpsr}"; + std::string Constraints = "=r,r,r,~{cc},~{flags},~{fpsr}"; llvm::IntegerType *IntType = llvm::IntegerType::get( CGF.getLLVMContext(), CGF.getContext().getTypeSize(E->getArg(1)->getType())); @@ -515,6 +552,97 @@ static RValue EmitBitTestIntrinsic(CodeGenFunction &CGF, const CallExpr *E, return RValue::get(CS.getInstruction()); } +/// Emit a _bittest* intrinsic. These intrinsics take a pointer to an array of +/// bits and a bit position and read and optionally modify the bit at that +/// position. The position index can be arbitrarily large, i.e. it can be larger +/// than 31 or 63, so we need an indexed load in the general case. +static RValue EmitBitTestIntrinsic(CodeGenFunction &CGF, unsigned BuiltinID, + const CallExpr *E) { + Value *BitBase = CGF.EmitScalarExpr(E->getArg(0)); + Value *BitPos = CGF.EmitScalarExpr(E->getArg(1)); + + // X86 has special BT, BTC, BTR, and BTS instructions that handle the array + // indexing operation internally. Use them if possible. + llvm::Triple::ArchType Arch = CGF.getTarget().getTriple().getArch(); + if (Arch == llvm::Triple::x86 || Arch == llvm::Triple::x86_64) + return EmitX86BitTestIntrinsic(CGF, BuiltinID, E, BitBase, BitPos); + + // Otherwise, use generic code to load one byte and test the bit. Use all but + // the bottom three bits as the array index, and the bottom three bits to form + // a mask. + // Bit = BitBaseI8[BitPos >> 3] & (1 << (BitPos & 0x7)) != 0; + Value *ByteIndex = CGF.Builder.CreateAShr( + BitPos, llvm::ConstantInt::get(BitPos->getType(), 3), "bittest.byteidx"); + Value *BitBaseI8 = CGF.Builder.CreatePointerCast(BitBase, CGF.Int8PtrTy); + Address ByteAddr(CGF.Builder.CreateInBoundsGEP(CGF.Int8Ty, BitBaseI8, + ByteIndex, "bittest.byteaddr"), + CharUnits::One()); + Value *PosLow = + CGF.Builder.CreateAnd(CGF.Builder.CreateTrunc(BitPos, CGF.Int8Ty), + llvm::ConstantInt::get(CGF.Int8Ty, 0x7)); + + // The updating instructions will need a mask. + Value *Mask = nullptr; + if (BuiltinID != Builtin::BI_bittest && BuiltinID != Builtin::BI_bittest64) { + Mask = CGF.Builder.CreateShl(llvm::ConstantInt::get(CGF.Int8Ty, 1), PosLow, + "bittest.mask"); + } + + // Emit a combined atomicrmw load/store operation for the interlocked + // intrinsics. + Value *OldByte = nullptr; + switch (BuiltinID) { + case Builtin::BI_interlockedbittestandreset: + case Builtin::BI_interlockedbittestandreset64: + OldByte = CGF.Builder.CreateAtomicRMW( + AtomicRMWInst::And, ByteAddr.getPointer(), CGF.Builder.CreateNot(Mask), + llvm::AtomicOrdering::SequentiallyConsistent); + break; + case Builtin::BI_interlockedbittestandset: + case Builtin::BI_interlockedbittestandset64: + OldByte = CGF.Builder.CreateAtomicRMW( + AtomicRMWInst::Or, ByteAddr.getPointer(), Mask, + llvm::AtomicOrdering::SequentiallyConsistent); + break; + default: + break; + } + + // Emit a plain load for the non-interlocked intrinsics. + if (!OldByte) { + OldByte = CGF.Builder.CreateLoad(ByteAddr, "bittest.byte"); + Value *NewByte = nullptr; + switch (BuiltinID) { + case Builtin::BI_bittest: + case Builtin::BI_bittest64: + // Don't store anything. + break; + case Builtin::BI_bittestandcomplement: + case Builtin::BI_bittestandcomplement64: + NewByte = CGF.Builder.CreateXor(OldByte, Mask); + break; + case Builtin::BI_bittestandreset: + case Builtin::BI_bittestandreset64: + NewByte = CGF.Builder.CreateAnd(OldByte, CGF.Builder.CreateNot(Mask)); + break; + case Builtin::BI_bittestandset: + case Builtin::BI_bittestandset64: + NewByte = CGF.Builder.CreateOr(OldByte, Mask); + break; + default: + llvm_unreachable("non bittest family builtin"); + } + if (NewByte) + CGF.Builder.CreateStore(NewByte, ByteAddr); + } + + // However we loaded the old byte, either by plain load or atomicrmw, shift + // the bit into the low position and mask it to 0 or 1. + Value *ShiftedByte = CGF.Builder.CreateLShr(OldByte, PosLow, "bittest.shr"); + return RValue::get(CGF.Builder.CreateAnd( + ShiftedByte, llvm::ConstantInt::get(CGF.Int8Ty, 1), "bittest.res")); +} + // Many of MSVC builtins are on both x64 and ARM; to avoid repeating code, we // handle them here. enum class CodeGenFunction::MSVCIntrin { @@ -2806,31 +2934,19 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, case Builtin::BI_InterlockedXor: return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor, E)); - case Builtin::BI_bittest: - return EmitBitTestIntrinsic(*this, E, '\0', 'l'); - case Builtin::BI_bittestandcomplement: - return EmitBitTestIntrinsic(*this, E, 'c', 'l'); - case Builtin::BI_bittestandreset: - return EmitBitTestIntrinsic(*this, E, 'r', 'l'); - case Builtin::BI_bittestandset: - return EmitBitTestIntrinsic(*this, E, 's', 'l'); - case Builtin::BI_interlockedbittestandreset: - return EmitBitTestIntrinsic(*this, E, 'r', 'l', /*Locked=*/true); - case Builtin::BI_interlockedbittestandset: - return EmitBitTestIntrinsic(*this, E, 's', 'l', /*Locked=*/true); - case Builtin::BI_bittest64: - return EmitBitTestIntrinsic(*this, E, '\0', 'q'); + case Builtin::BI_bittest: case Builtin::BI_bittestandcomplement64: - return EmitBitTestIntrinsic(*this, E, 'c', 'q'); + case Builtin::BI_bittestandcomplement: case Builtin::BI_bittestandreset64: - return EmitBitTestIntrinsic(*this, E, 'r', 'q'); + case Builtin::BI_bittestandreset: case Builtin::BI_bittestandset64: - return EmitBitTestIntrinsic(*this, E, 's', 'q'); + case Builtin::BI_bittestandset: + case Builtin::BI_interlockedbittestandreset: case Builtin::BI_interlockedbittestandreset64: - return EmitBitTestIntrinsic(*this, E, 'r', 'q', /*Locked=*/true); case Builtin::BI_interlockedbittestandset64: - return EmitBitTestIntrinsic(*this, E, 's', 'q', /*Locked=*/true); + case Builtin::BI_interlockedbittestandset: + return EmitBitTestIntrinsic(*this, BuiltinID, E); case Builtin::BI__exception_code: case Builtin::BI_exception_code: diff --git a/clang/test/CodeGen/bittest-intrin.c b/clang/test/CodeGen/bittest-intrin.c index 839d5656a9ed..21a26efa1071 100644 --- a/clang/test/CodeGen/bittest-intrin.c +++ b/clang/test/CodeGen/bittest-intrin.c @@ -1,4 +1,6 @@ -// RUN: %clang_cc1 -fms-extensions -triple x86_64-windows-msvc %s -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 -fms-extensions -triple x86_64-windows-msvc %s -emit-llvm -o - | FileCheck %s --check-prefix=X64 +// RUN: %clang_cc1 -fms-extensions -triple thumbv7-windows-msvc %s -emit-llvm -o - | FileCheck %s --check-prefix=ARM +// RUN: %clang_cc1 -fms-extensions -triple aarch64-windows-msvc %s -emit-llvm -o - | FileCheck %s --check-prefix=ARM volatile unsigned char sink = 0; void test32(long *base, long idx) { @@ -18,18 +20,105 @@ void test64(__int64 *base, __int64 idx) { sink = _interlockedbittestandset64(base, idx); } -// CHECK-LABEL: define dso_local void @test32(i32* %base, i32 %idx) -// CHECK: call i8 asm sideeffect "btl $2, ($1)\0A\09setc ${0:b}", "=r,r,r,~{{.*}}"(i32* %{{.*}}, i32 {{.*}}) -// CHECK: call i8 asm sideeffect "btcl $2, ($1)\0A\09setc ${0:b}", "=r,r,r,~{{.*}}"(i32* %{{.*}}, i32 {{.*}}) -// CHECK: call i8 asm sideeffect "btrl $2, ($1)\0A\09setc ${0:b}", "=r,r,r,~{{.*}}"(i32* %{{.*}}, i32 {{.*}}) -// CHECK: call i8 asm sideeffect "btsl $2, ($1)\0A\09setc ${0:b}", "=r,r,r,~{{.*}}"(i32* %{{.*}}, i32 {{.*}}) -// CHECK: call i8 asm sideeffect "lock btrl $2, ($1)\0A\09setc ${0:b}", "=r,r,r,~{{.*}}"(i32* %{{.*}}, i32 {{.*}}) -// CHECK: call i8 asm sideeffect "lock btsl $2, ($1)\0A\09setc ${0:b}", "=r,r,r,~{{.*}}"(i32* %{{.*}}, i32 {{.*}}) +// X64-LABEL: define dso_local void @test32(i32* %base, i32 %idx) +// X64: call i8 asm sideeffect "btl $2, ($1)\0A\09setc ${0:b}", "=r,r,r,~{{.*}}"(i32* %{{.*}}, i32 {{.*}}) +// X64: call i8 asm sideeffect "btcl $2, ($1)\0A\09setc ${0:b}", "=r,r,r,~{{.*}}"(i32* %{{.*}}, i32 {{.*}}) +// X64: call i8 asm sideeffect "btrl $2, ($1)\0A\09setc ${0:b}", "=r,r,r,~{{.*}}"(i32* %{{.*}}, i32 {{.*}}) +// X64: call i8 asm sideeffect "btsl $2, ($1)\0A\09setc ${0:b}", "=r,r,r,~{{.*}}"(i32* %{{.*}}, i32 {{.*}}) +// X64: call i8 asm sideeffect "lock btrl $2, ($1)\0A\09setc ${0:b}", "=r,r,r,~{{.*}}"(i32* %{{.*}}, i32 {{.*}}) +// X64: call i8 asm sideeffect "lock btsl $2, ($1)\0A\09setc ${0:b}", "=r,r,r,~{{.*}}"(i32* %{{.*}}, i32 {{.*}}) -// CHECK-LABEL: define dso_local void @test64(i64* %base, i64 %idx) -// CHECK: call i8 asm sideeffect "btq $2, ($1)\0A\09setc ${0:b}", "=r,r,r,~{{.*}}"(i64* %{{.*}}, i64 {{.*}}) -// CHECK: call i8 asm sideeffect "btcq $2, ($1)\0A\09setc ${0:b}", "=r,r,r,~{{.*}}"(i64* %{{.*}}, i64 {{.*}}) -// CHECK: call i8 asm sideeffect "btrq $2, ($1)\0A\09setc ${0:b}", "=r,r,r,~{{.*}}"(i64* %{{.*}}, i64 {{.*}}) -// CHECK: call i8 asm sideeffect "btsq $2, ($1)\0A\09setc ${0:b}", "=r,r,r,~{{.*}}"(i64* %{{.*}}, i64 {{.*}}) -// CHECK: call i8 asm sideeffect "lock btrq $2, ($1)\0A\09setc ${0:b}", "=r,r,r,~{{.*}}"(i64* %{{.*}}, i64 {{.*}}) -// CHECK: call i8 asm sideeffect "lock btsq $2, ($1)\0A\09setc ${0:b}", "=r,r,r,~{{.*}}"(i64* %{{.*}}, i64 {{.*}}) +// X64-LABEL: define dso_local void @test64(i64* %base, i64 %idx) +// X64: call i8 asm sideeffect "btq $2, ($1)\0A\09setc ${0:b}", "=r,r,r,~{{.*}}"(i64* %{{.*}}, i64 {{.*}}) +// X64: call i8 asm sideeffect "btcq $2, ($1)\0A\09setc ${0:b}", "=r,r,r,~{{.*}}"(i64* %{{.*}}, i64 {{.*}}) +// X64: call i8 asm sideeffect "btrq $2, ($1)\0A\09setc ${0:b}", "=r,r,r,~{{.*}}"(i64* %{{.*}}, i64 {{.*}}) +// X64: call i8 asm sideeffect "btsq $2, ($1)\0A\09setc ${0:b}", "=r,r,r,~{{.*}}"(i64* %{{.*}}, i64 {{.*}}) +// X64: call i8 asm sideeffect "lock btrq $2, ($1)\0A\09setc ${0:b}", "=r,r,r,~{{.*}}"(i64* %{{.*}}, i64 {{.*}}) +// X64: call i8 asm sideeffect "lock btsq $2, ($1)\0A\09setc ${0:b}", "=r,r,r,~{{.*}}"(i64* %{{.*}}, i64 {{.*}}) + +// ARM-LABEL: define dso_local {{.*}}void @test32(i32* %base, i32 %idx) +// ARM: %[[IDXHI:[^ ]*]] = ashr i32 %{{.*}}, 3 +// ARM: %[[BASE:[^ ]*]] = bitcast i32* %{{.*}} to i8* +// ARM: %[[BYTEADDR:[^ ]*]] = getelementptr inbounds i8, i8* %[[BASE]], i32 %[[IDXHI]] +// ARM: %[[IDX8:[^ ]*]] = trunc i32 %{{.*}} to i8 +// ARM: %[[IDXLO:[^ ]*]] = and i8 %[[IDX8]], 7 +// ARM: %[[BYTE:[^ ]*]] = load i8, i8* %[[BYTEADDR]], align 1 +// ARM: %[[BYTESHR:[^ ]*]] = lshr i8 %[[BYTE]], %[[IDXLO]] +// ARM: %[[RES:[^ ]*]] = and i8 %[[BYTESHR]], 1 +// ARM: store volatile i8 %[[RES]], i8* @sink, align 1 + +// ARM: %[[IDXHI:[^ ]*]] = ashr i32 %{{.*}}, 3 +// ARM: %[[BASE:[^ ]*]] = bitcast i32* %{{.*}} to i8* +// ARM: %[[BYTEADDR:[^ ]*]] = getelementptr inbounds i8, i8* %[[BASE]], i32 %[[IDXHI]] +// ARM: %[[IDX8:[^ ]*]] = trunc i32 %{{.*}} to i8 +// ARM: %[[IDXLO:[^ ]*]] = and i8 %[[IDX8]], 7 +// ARM: %[[MASK:[^ ]*]] = shl i8 1, %[[IDXLO]] +// ARM: %[[BYTE:[^ ]*]] = load i8, i8* %[[BYTEADDR]], align 1 +// ARM: %[[NEWBYTE:[^ ]*]] = xor i8 %[[BYTE]], %[[MASK]] +// ARM store i8 %[[NEWBYTE]], i8* %[[BYTEADDR]], align 1 +// ARM: %[[BYTESHR:[^ ]*]] = lshr i8 %[[BYTE]], %[[IDXLO]] +// ARM: %[[RES:[^ ]*]] = and i8 %[[BYTESHR]], 1 +// ARM: store volatile i8 %[[RES]], i8* @sink, align 1 + +// ARM: %[[IDXHI:[^ ]*]] = ashr i32 %{{.*}}, 3 +// ARM: %[[BASE:[^ ]*]] = bitcast i32* %{{.*}} to i8* +// ARM: %[[BYTEADDR:[^ ]*]] = getelementptr inbounds i8, i8* %[[BASE]], i32 %[[IDXHI]] +// ARM: %[[IDX8:[^ ]*]] = trunc i32 %{{.*}} to i8 +// ARM: %[[IDXLO:[^ ]*]] = and i8 %[[IDX8]], 7 +// ARM: %[[MASK:[^ ]*]] = shl i8 1, %[[IDXLO]] +// ARM: %[[BYTE:[^ ]*]] = load i8, i8* %[[BYTEADDR]], align 1 +// ARM: %[[NOTMASK:[^ ]*]] = xor i8 %[[MASK]], -1 +// ARM: %[[NEWBYTE:[^ ]*]] = and i8 %[[BYTE]], %[[NOTMASK]] +// ARM store i8 %[[NEWBYTE]], i8* %[[BYTEADDR]], align 1 +// ARM: %[[BYTESHR:[^ ]*]] = lshr i8 %[[BYTE]], %[[IDXLO]] +// ARM: %[[RES:[^ ]*]] = and i8 %[[BYTESHR]], 1 +// ARM: store volatile i8 %[[RES]], i8* @sink, align 1 + +// ARM: %[[IDXHI:[^ ]*]] = ashr i32 %{{.*}}, 3 +// ARM: %[[BASE:[^ ]*]] = bitcast i32* %{{.*}} to i8* +// ARM: %[[BYTEADDR:[^ ]*]] = getelementptr inbounds i8, i8* %[[BASE]], i32 %[[IDXHI]] +// ARM: %[[IDX8:[^ ]*]] = trunc i32 %{{.*}} to i8 +// ARM: %[[IDXLO:[^ ]*]] = and i8 %[[IDX8]], 7 +// ARM: %[[MASK:[^ ]*]] = shl i8 1, %[[IDXLO]] +// ARM: %[[BYTE:[^ ]*]] = load i8, i8* %[[BYTEADDR]], align 1 +// ARM: %[[NEWBYTE:[^ ]*]] = or i8 %[[BYTE]], %[[MASK]] +// ARM store i8 %[[NEWBYTE]], i8* %[[BYTEADDR]], align 1 +// ARM: %[[BYTESHR:[^ ]*]] = lshr i8 %[[BYTE]], %[[IDXLO]] +// ARM: %[[RES:[^ ]*]] = and i8 %[[BYTESHR]], 1 +// ARM: store volatile i8 %[[RES]], i8* @sink, align 1 + +// ARM: %[[IDXHI:[^ ]*]] = ashr i32 %{{.*}}, 3 +// ARM: %[[BASE:[^ ]*]] = bitcast i32* %{{.*}} to i8* +// ARM: %[[BYTEADDR:[^ ]*]] = getelementptr inbounds i8, i8* %[[BASE]], i32 %[[IDXHI]] +// ARM: %[[IDX8:[^ ]*]] = trunc i32 %{{.*}} to i8 +// ARM: %[[IDXLO:[^ ]*]] = and i8 %[[IDX8]], 7 +// ARM: %[[MASK:[^ ]*]] = shl i8 1, %[[IDXLO]] +// ARM: %[[NOTMASK:[^ ]*]] = xor i8 %[[MASK]], -1 +// ARM: %[[BYTE:[^ ]*]] = atomicrmw and i8* %[[BYTEADDR]], i8 %[[NOTMASK]] seq_cst +// ARM: %[[BYTESHR:[^ ]*]] = lshr i8 %[[BYTE]], %[[IDXLO]] +// ARM: %[[RES:[^ ]*]] = and i8 %[[BYTESHR]], 1 +// ARM: store volatile i8 %[[RES]], i8* @sink, align 1 + +// ARM: %[[IDXHI:[^ ]*]] = ashr i32 %{{.*}}, 3 +// ARM: %[[BASE:[^ ]*]] = bitcast i32* %{{.*}} to i8* +// ARM: %[[BYTEADDR:[^ ]*]] = getelementptr inbounds i8, i8* %[[BASE]], i32 %[[IDXHI]] +// ARM: %[[IDX8:[^ ]*]] = trunc i32 %{{.*}} to i8 +// ARM: %[[IDXLO:[^ ]*]] = and i8 %[[IDX8]], 7 +// ARM: %[[MASK:[^ ]*]] = shl i8 1, %[[IDXLO]] +// ARM: %[[BYTE:[^ ]*]] = atomicrmw or i8* %[[BYTEADDR]], i8 %[[MASK]] seq_cst +// ARM: %[[BYTESHR:[^ ]*]] = lshr i8 %[[BYTE]], %[[IDXLO]] +// ARM: %[[RES:[^ ]*]] = and i8 %[[BYTESHR]], 1 +// ARM: store volatile i8 %[[RES]], i8* @sink, align 1 + +// ARM-LABEL: define dso_local {{.*}}void @test64(i64* %base, i64 %idx) +// ARM: %[[IDXHI:[^ ]*]] = ashr i64 %{{.*}}, 3 +// ARM: %[[BASE:[^ ]*]] = bitcast i64* %{{.*}} to i8* +// ARM: %[[BYTEADDR:[^ ]*]] = getelementptr inbounds i8, i8* %[[BASE]], i64 %[[IDXHI]] +// ARM: %[[IDX8:[^ ]*]] = trunc i64 %{{.*}} to i8 +// ARM: %[[IDXLO:[^ ]*]] = and i8 %[[IDX8]], 7 +// ARM: %[[BYTE:[^ ]*]] = load i8, i8* %[[BYTEADDR]], align 1 +// ARM: %[[BYTESHR:[^ ]*]] = lshr i8 %[[BYTE]], %[[IDXLO]] +// ARM: %[[RES:[^ ]*]] = and i8 %[[BYTESHR]], 1 +// ARM: store volatile i8 %[[RES]], i8* @sink, align 1 + +// ... the rest is the same, but with i64 instead of i32.