forked from OSchip/llvm-project
[PowerPC] Support 16-byte lock free atomics on pwr8 and up
Make 16-byte atomic type aligned to 16-byte on PPC64, thus consistent with GCC. Also enable inlining 16-byte atomics on non-AIX targets on PPC64. Reviewed By: hubert.reinterpretcast Differential Revision: https://reviews.llvm.org/D122377
This commit is contained in:
parent
9fdd25848a
commit
549e118e93
|
@ -81,6 +81,8 @@ bool PPCTargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
|
|||
IsISA3_0 = true;
|
||||
} else if (Feature == "+isa-v31-instructions") {
|
||||
IsISA3_1 = true;
|
||||
} else if (Feature == "+quadword-atomics") {
|
||||
HasQuadwordAtomics = true;
|
||||
}
|
||||
// TODO: Finish this list and add an assert that we've handled them
|
||||
// all.
|
||||
|
@ -550,6 +552,12 @@ bool PPCTargetInfo::initFeatureMap(
|
|||
Features["isa-v30-instructions"] =
|
||||
llvm::StringSwitch<bool>(CPU).Case("pwr9", true).Default(false);
|
||||
|
||||
Features["quadword-atomics"] =
|
||||
getTriple().isArch64Bit() && llvm::StringSwitch<bool>(CPU)
|
||||
.Case("pwr9", true)
|
||||
.Case("pwr8", true)
|
||||
.Default(false);
|
||||
|
||||
// Power10 includes all the same features as Power9 plus any features specific
|
||||
// to the Power10 core.
|
||||
if (CPU == "pwr10" || CPU == "power10") {
|
||||
|
@ -660,6 +668,7 @@ bool PPCTargetInfo::hasFeature(StringRef Feature) const {
|
|||
.Case("isa-v207-instructions", IsISA2_07)
|
||||
.Case("isa-v30-instructions", IsISA3_0)
|
||||
.Case("isa-v31-instructions", IsISA3_1)
|
||||
.Case("quadword-atomics", HasQuadwordAtomics)
|
||||
.Default(false);
|
||||
}
|
||||
|
||||
|
|
|
@ -78,6 +78,7 @@ class LLVM_LIBRARY_VISIBILITY PPCTargetInfo : public TargetInfo {
|
|||
bool IsISA2_07 = false;
|
||||
bool IsISA3_0 = false;
|
||||
bool IsISA3_1 = false;
|
||||
bool HasQuadwordAtomics = false;
|
||||
|
||||
protected:
|
||||
std::string ABI;
|
||||
|
@ -439,8 +440,18 @@ public:
|
|||
DataLayout += "-S128-v256:256:256-v512:512:512";
|
||||
resetDataLayout(DataLayout);
|
||||
|
||||
// PPC64 supports atomics up to 8 bytes.
|
||||
MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64;
|
||||
// Newer PPC64 instruction sets support atomics up to 16 bytes.
|
||||
MaxAtomicPromoteWidth = 128;
|
||||
// Baseline PPC64 supports inlining atomics up to 8 bytes.
|
||||
MaxAtomicInlineWidth = 64;
|
||||
}
|
||||
|
||||
void setMaxAtomicWidth() override {
|
||||
// For power8 and up, backend is able to inline 16-byte atomic lock free
|
||||
// code.
|
||||
// TODO: We should allow AIX to inline quadword atomics in the future.
|
||||
if (!getTriple().isOSAIX() && hasFeature("quadword-atomics"))
|
||||
MaxAtomicInlineWidth = 128;
|
||||
}
|
||||
|
||||
BuiltinVaListKind getBuiltinVaListKind() const override {
|
||||
|
|
|
@ -1,25 +1,30 @@
|
|||
// RUN: %clang_cc1 -verify -triple powerpc-unknown-unknown -emit-llvm -o - %s | \
|
||||
// RUN: %clang_cc1 -Werror -triple powerpc-unknown-unknown -emit-llvm -o - %s | \
|
||||
// RUN: FileCheck %s --check-prefixes=PPC,PPC32
|
||||
// RUN: %clang_cc1 -verify -triple powerpc64le-unknown-linux -emit-llvm -o - %s | \
|
||||
// RUN: FileCheck %s --check-prefixes=PPC,PPC64
|
||||
// RUN: %clang_cc1 -verify -triple powerpc64-unknown-aix -emit-llvm -o - %s | \
|
||||
// RUN: %clang_cc1 -Werror -triple powerpc64le-unknown-linux -emit-llvm -o - %s | \
|
||||
// RUN: FileCheck %s --check-prefixes=PPC,PPC64
|
||||
// RUN: %clang_cc1 -Werror -triple powerpc64le-unknown-linux -emit-llvm -o - %s \
|
||||
// RUN: -target-cpu pwr8 | FileCheck %s --check-prefixes=PPC,PPC64
|
||||
// RUN: %clang_cc1 -Werror -triple powerpc64-unknown-aix -emit-llvm -o - %s | \
|
||||
// RUN: FileCheck %s --check-prefixes=PPC,AIX64
|
||||
// RUN: %clang_cc1 -Werror -triple powerpc64-unknown-aix -emit-llvm -o - %s \
|
||||
// RUN: -target-cpu pwr8 | FileCheck %s --check-prefixes=PPC,AIX64
|
||||
|
||||
// PPC: @c = global i8 0, align 1{{$}}
|
||||
_Atomic(char) c; // expected-no-diagnostics
|
||||
_Atomic(char) c;
|
||||
|
||||
// PPC: @s = global i16 0, align 2{{$}}
|
||||
_Atomic(short) s; // expected-no-diagnostics
|
||||
_Atomic(short) s;
|
||||
|
||||
// PPC: @i = global i32 0, align 4{{$}}
|
||||
_Atomic(int) i; // expected-no-diagnostics
|
||||
_Atomic(int) i;
|
||||
|
||||
// PPC32: @l = global i32 0, align 4{{$}}
|
||||
// PPC64: @l = global i64 0, align 8{{$}}
|
||||
_Atomic(long) l; // expected-no-diagnostics
|
||||
// AIX64: @l = global i64 0, align 8{{$}}
|
||||
_Atomic(long) l;
|
||||
|
||||
// PPC: @ll = global i64 0, align 8{{$}}
|
||||
_Atomic(long long) ll; // expected-no-diagnostics
|
||||
_Atomic(long long) ll;
|
||||
|
||||
typedef struct {
|
||||
char x[8];
|
||||
|
@ -27,11 +32,14 @@ typedef struct {
|
|||
|
||||
// PPC32: @o = global %struct.O zeroinitializer, align 1{{$}}
|
||||
// PPC64: @o = global %struct.O zeroinitializer, align 8{{$}}
|
||||
_Atomic(O) o; // expected-no-diagnostics
|
||||
// AIX64: @o = global %struct.O zeroinitializer, align 8{{$}}
|
||||
_Atomic(O) o;
|
||||
|
||||
typedef struct {
|
||||
char x[16];
|
||||
} Q;
|
||||
|
||||
// PPC: @q = global %struct.Q zeroinitializer, align 1{{$}}
|
||||
_Atomic(Q) q; // expected-no-diagnostics
|
||||
// PPC32: @q = global %struct.Q zeroinitializer, align 1{{$}}
|
||||
// PPC64: @q = global %struct.Q zeroinitializer, align 16{{$}}
|
||||
// AIX64: @q = global %struct.Q zeroinitializer, align 16{{$}}
|
||||
_Atomic(Q) q;
|
||||
|
|
|
@ -0,0 +1,92 @@
|
|||
// RUN: %clang_cc1 -Werror -Wno-atomic-alignment -triple powerpc64le-linux-gnu \
|
||||
// RUN: -target-cpu pwr8 -emit-llvm -o - %s | FileCheck %s --check-prefix=PPC64-PWR8
|
||||
// RUN: %clang_cc1 -Werror -Wno-atomic-alignment -triple powerpc64le-linux-gnu \
|
||||
// RUN: -emit-llvm -o - %s | FileCheck %s --check-prefix=PPC64
|
||||
// RUN: %clang_cc1 -Werror -Wno-atomic-alignment -triple powerpc64-unknown-aix \
|
||||
// RUN: -target-cpu pwr7 -emit-llvm -o - %s | FileCheck %s --check-prefix=PPC64
|
||||
|
||||
typedef struct {
|
||||
char x[16];
|
||||
} Q;
|
||||
|
||||
typedef _Atomic(Q) AtomicQ;
|
||||
|
||||
typedef __int128_t int128_t;
|
||||
|
||||
// PPC64-PWR8-LABEL: @test_load(
|
||||
// PPC64-PWR8: [[TMP3:%.*]] = load atomic i128, i128* [[TMP1:%.*]] acquire, align 16
|
||||
//
|
||||
// PPC64-LABEL: @test_load(
|
||||
// PPC64: call void @__atomic_load(i64 noundef 16, i8* noundef [[TMP3:%.*]], i8* noundef [[TMP4:%.*]], i32 noundef signext 2)
|
||||
//
|
||||
Q test_load(AtomicQ *ptr) {
|
||||
// expected-no-diagnostics
|
||||
return __c11_atomic_load(ptr, __ATOMIC_ACQUIRE);
|
||||
}
|
||||
|
||||
// PPC64-PWR8-LABEL: @test_store(
|
||||
// PPC64-PWR8: store atomic i128 [[TMP6:%.*]], i128* [[TMP4:%.*]] release, align 16
|
||||
//
|
||||
// PPC64-LABEL: @test_store(
|
||||
// PPC64: call void @__atomic_store(i64 noundef 16, i8* noundef [[TMP6:%.*]], i8* noundef [[TMP7:%.*]], i32 noundef signext 3)
|
||||
//
|
||||
void test_store(Q val, AtomicQ *ptr) {
|
||||
// expected-no-diagnostics
|
||||
__c11_atomic_store(ptr, val, __ATOMIC_RELEASE);
|
||||
}
|
||||
|
||||
// PPC64-PWR8-LABEL: @test_add(
|
||||
// PPC64-PWR8: [[TMP3:%.*]] = atomicrmw add i128* [[TMP0:%.*]], i128 [[TMP2:%.*]] monotonic, align 16
|
||||
//
|
||||
// PPC64-LABEL: @test_add(
|
||||
// PPC64: [[CALL:%.*]] = call i128 @__atomic_fetch_add_16(i8* noundef [[TMP2:%.*]], i128 noundef [[TMP3:%.*]], i32 noundef signext 0)
|
||||
//
|
||||
void test_add(_Atomic(int128_t) *ptr, int128_t x) {
|
||||
// expected-no-diagnostics
|
||||
__c11_atomic_fetch_add(ptr, x, __ATOMIC_RELAXED);
|
||||
}
|
||||
|
||||
// PPC64-PWR8-LABEL: @test_xchg(
|
||||
// PPC64-PWR8: [[TMP8:%.*]] = atomicrmw xchg i128* [[TMP4:%.*]], i128 [[TMP7:%.*]] seq_cst, align 16
|
||||
//
|
||||
// PPC64-LABEL: @test_xchg(
|
||||
// PPC64: call void @__atomic_exchange(i64 noundef 16, i8* noundef [[TMP7:%.*]], i8* noundef [[TMP8:%.*]], i8* noundef [[TMP9:%.*]], i32 noundef signext 5)
|
||||
//
|
||||
Q test_xchg(AtomicQ *ptr, Q new) {
|
||||
// expected-no-diagnostics
|
||||
return __c11_atomic_exchange(ptr, new, __ATOMIC_SEQ_CST);
|
||||
}
|
||||
|
||||
// PPC64-PWR8-LABEL: @test_cmpxchg(
|
||||
// PPC64-PWR8: [[TMP10:%.*]] = cmpxchg i128* [[TMP5:%.*]], i128 [[TMP8:%.*]], i128 [[TMP9:%.*]] seq_cst monotonic, align 16
|
||||
//
|
||||
// PPC64-LABEL: @test_cmpxchg(
|
||||
// PPC64: [[CALL:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 noundef 16, i8* noundef [[TMP8:%.*]], i8* noundef [[TMP9:%.*]], i8* noundef [[TMP10:%.*]], i32 noundef signext 5, i32 noundef signext 0)
|
||||
//
|
||||
int test_cmpxchg(AtomicQ *ptr, Q *cmp, Q new) {
|
||||
// expected-no-diagnostics
|
||||
return __c11_atomic_compare_exchange_strong(ptr, cmp, new, __ATOMIC_SEQ_CST, __ATOMIC_RELAXED);
|
||||
}
|
||||
|
||||
// PPC64-PWR8-LABEL: @test_cmpxchg_weak(
|
||||
// PPC64-PWR8: [[TMP10:%.*]] = cmpxchg weak i128* [[TMP5:%.*]], i128 [[TMP8:%.*]], i128 [[TMP9:%.*]] seq_cst monotonic, align 16
|
||||
//
|
||||
// PPC64-LABEL: @test_cmpxchg_weak(
|
||||
// PPC64: [[CALL:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 noundef 16, i8* noundef [[TMP8:%.*]], i8* noundef [[TMP9:%.*]], i8* noundef [[TMP10:%.*]], i32 noundef signext 5, i32 noundef signext 0)
|
||||
//
|
||||
int test_cmpxchg_weak(AtomicQ *ptr, Q *cmp, Q new) {
|
||||
// expected-no-diagnostics
|
||||
return __c11_atomic_compare_exchange_weak(ptr, cmp, new, __ATOMIC_SEQ_CST, __ATOMIC_RELAXED);
|
||||
}
|
||||
|
||||
// PPC64-PWR8-LABEL: @is_lock_free(
|
||||
// PPC64-PWR8: ret i32 1
|
||||
//
|
||||
// PPC64-LABEL: @is_lock_free(
|
||||
// PPC64: [[CALL:%.*]] = call zeroext i1 @__atomic_is_lock_free(i64 noundef 16, i8* noundef null)
|
||||
//
|
||||
int is_lock_free() {
|
||||
AtomicQ q;
|
||||
// expected-no-diagnostics
|
||||
return __c11_atomic_is_lock_free(sizeof(q));
|
||||
}
|
|
@ -9,7 +9,7 @@
|
|||
// RUN: -target-cpu pwr7
|
||||
// RUN: %clang_cc1 %s -verify -fgnuc-version=4.2.1 -ffreestanding \
|
||||
// RUN: -fsyntax-only -triple=powerpc64le-linux-gnu -std=c11 \
|
||||
// RUN: -target-cpu pwr8
|
||||
// RUN: -target-cpu pwr8 -DPPC64_PWR8
|
||||
|
||||
// Basic parsing/Sema tests for __c11_atomic_*
|
||||
|
||||
|
@ -47,7 +47,11 @@ _Static_assert(__c11_atomic_is_lock_free(2), "");
|
|||
_Static_assert(__c11_atomic_is_lock_free(3), ""); // expected-error {{not an integral constant expression}}
|
||||
_Static_assert(__c11_atomic_is_lock_free(4), "");
|
||||
_Static_assert(__c11_atomic_is_lock_free(8), "");
|
||||
#ifndef PPC64_PWR8
|
||||
_Static_assert(__c11_atomic_is_lock_free(16), ""); // expected-error {{not an integral constant expression}}
|
||||
#else
|
||||
_Static_assert(__c11_atomic_is_lock_free(16), ""); // expected-no-error
|
||||
#endif
|
||||
_Static_assert(__c11_atomic_is_lock_free(17), ""); // expected-error {{not an integral constant expression}}
|
||||
|
||||
_Static_assert(__atomic_is_lock_free(1, 0), "");
|
||||
|
@ -55,15 +59,23 @@ _Static_assert(__atomic_is_lock_free(2, 0), "");
|
|||
_Static_assert(__atomic_is_lock_free(3, 0), ""); // expected-error {{not an integral constant expression}}
|
||||
_Static_assert(__atomic_is_lock_free(4, 0), "");
|
||||
_Static_assert(__atomic_is_lock_free(8, 0), "");
|
||||
#ifndef PPC64_PWR8
|
||||
_Static_assert(__atomic_is_lock_free(16, 0), ""); // expected-error {{not an integral constant expression}}
|
||||
#else
|
||||
_Static_assert(__atomic_is_lock_free(16, 0), ""); // expected-no-error
|
||||
#endif
|
||||
_Static_assert(__atomic_is_lock_free(17, 0), ""); // expected-error {{not an integral constant expression}}
|
||||
|
||||
_Static_assert(atomic_is_lock_free((atomic_char*)0), "");
|
||||
_Static_assert(atomic_is_lock_free((atomic_short*)0), "");
|
||||
_Static_assert(atomic_is_lock_free((atomic_int*)0), "");
|
||||
_Static_assert(atomic_is_lock_free((atomic_long*)0), "");
|
||||
#ifndef PPC64_PWR8
|
||||
// noi128-error@+1 {{__int128 is not supported on this target}}
|
||||
_Static_assert(atomic_is_lock_free((_Atomic(__int128)*)0), ""); // expected-error {{not an integral constant expression}}
|
||||
#else
|
||||
_Static_assert(atomic_is_lock_free((_Atomic(__int128)*)0), ""); // expected-no-error
|
||||
#endif
|
||||
_Static_assert(atomic_is_lock_free(0 + (atomic_char*)0), "");
|
||||
|
||||
char i8;
|
||||
|
@ -88,7 +100,11 @@ _Static_assert(__atomic_always_lock_free(2, 0), "");
|
|||
_Static_assert(!__atomic_always_lock_free(3, 0), "");
|
||||
_Static_assert(__atomic_always_lock_free(4, 0), "");
|
||||
_Static_assert(__atomic_always_lock_free(8, 0), "");
|
||||
#ifndef PPC64_PWR8
|
||||
_Static_assert(!__atomic_always_lock_free(16, 0), "");
|
||||
#else
|
||||
_Static_assert(__atomic_always_lock_free(16, 0), "");
|
||||
#endif
|
||||
_Static_assert(!__atomic_always_lock_free(17, 0), "");
|
||||
|
||||
_Static_assert(__atomic_always_lock_free(1, incomplete), "");
|
||||
|
|
|
@ -1321,7 +1321,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
|
|||
setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Expand);
|
||||
}
|
||||
|
||||
if (EnableQuadwordAtomics && Subtarget.hasQuadwordAtomics()) {
|
||||
if (shouldInlineQuadwordAtomics()) {
|
||||
setMaxAtomicSizeInBitsSupported(128);
|
||||
setOperationAction(ISD::ATOMIC_LOAD, MVT::i128, Custom);
|
||||
setOperationAction(ISD::ATOMIC_STORE, MVT::i128, Custom);
|
||||
|
@ -18053,10 +18053,18 @@ CCAssignFn *PPCTargetLowering::ccAssignFnForCall(CallingConv::ID CC,
|
|||
}
|
||||
}
|
||||
|
||||
bool PPCTargetLowering::shouldInlineQuadwordAtomics() const {
|
||||
// TODO: 16-byte atomic type support for AIX is in progress; we should be able
|
||||
// to inline 16-byte atomic ops on AIX too in the future.
|
||||
return Subtarget.isPPC64() &&
|
||||
(EnableQuadwordAtomics || !Subtarget.getTargetTriple().isOSAIX()) &&
|
||||
Subtarget.hasQuadwordAtomics();
|
||||
}
|
||||
|
||||
TargetLowering::AtomicExpansionKind
|
||||
PPCTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
|
||||
unsigned Size = AI->getType()->getPrimitiveSizeInBits();
|
||||
if (EnableQuadwordAtomics && Subtarget.hasQuadwordAtomics() && Size == 128)
|
||||
if (shouldInlineQuadwordAtomics() && Size == 128)
|
||||
return AtomicExpansionKind::MaskedIntrinsic;
|
||||
return TargetLowering::shouldExpandAtomicRMWInIR(AI);
|
||||
}
|
||||
|
@ -18064,7 +18072,7 @@ PPCTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
|
|||
TargetLowering::AtomicExpansionKind
|
||||
PPCTargetLowering::shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const {
|
||||
unsigned Size = AI->getNewValOperand()->getType()->getPrimitiveSizeInBits();
|
||||
if (EnableQuadwordAtomics && Subtarget.hasQuadwordAtomics() && Size == 128)
|
||||
if (shouldInlineQuadwordAtomics() && Size == 128)
|
||||
return AtomicExpansionKind::MaskedIntrinsic;
|
||||
return TargetLowering::shouldExpandAtomicCmpXchgInIR(AI);
|
||||
}
|
||||
|
@ -18094,8 +18102,7 @@ getIntrinsicForAtomicRMWBinOp128(AtomicRMWInst::BinOp BinOp) {
|
|||
Value *PPCTargetLowering::emitMaskedAtomicRMWIntrinsic(
|
||||
IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
|
||||
Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
|
||||
assert(EnableQuadwordAtomics && Subtarget.hasQuadwordAtomics() &&
|
||||
"Only support quadword now");
|
||||
assert(shouldInlineQuadwordAtomics() && "Only support quadword now");
|
||||
Module *M = Builder.GetInsertBlock()->getParent()->getParent();
|
||||
Type *ValTy = Incr->getType();
|
||||
assert(ValTy->getPrimitiveSizeInBits() == 128);
|
||||
|
@ -18119,8 +18126,7 @@ Value *PPCTargetLowering::emitMaskedAtomicRMWIntrinsic(
|
|||
Value *PPCTargetLowering::emitMaskedAtomicCmpXchgIntrinsic(
|
||||
IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
|
||||
Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
|
||||
assert(EnableQuadwordAtomics && Subtarget.hasQuadwordAtomics() &&
|
||||
"Only support quadword now");
|
||||
assert(shouldInlineQuadwordAtomics() && "Only support quadword now");
|
||||
Module *M = Builder.GetInsertBlock()->getParent()->getParent();
|
||||
Type *ValTy = CmpVal->getType();
|
||||
assert(ValTy->getPrimitiveSizeInBits() == 128);
|
||||
|
|
|
@ -910,6 +910,8 @@ namespace llvm {
|
|||
Instruction *emitTrailingFence(IRBuilderBase &Builder, Instruction *Inst,
|
||||
AtomicOrdering Ord) const override;
|
||||
|
||||
bool shouldInlineQuadwordAtomics() const;
|
||||
|
||||
TargetLowering::AtomicExpansionKind
|
||||
shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
|
||||
|
||||
|
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue