[PowerPC] Support 16-byte lock free atomics on pwr8 and up

Make 16-byte atomic type aligned to 16-byte on PPC64, thus consistent with GCC. Also enable inlining 16-byte atomics on non-AIX targets on PPC64.

Reviewed By: hubert.reinterpretcast

Differential Revision: https://reviews.llvm.org/D122377
This commit is contained in:
Kai Luo 2022-04-08 22:50:45 +00:00
parent 9fdd25848a
commit 549e118e93
8 changed files with 1222 additions and 22 deletions

View File

@ -81,6 +81,8 @@ bool PPCTargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
IsISA3_0 = true;
} else if (Feature == "+isa-v31-instructions") {
IsISA3_1 = true;
} else if (Feature == "+quadword-atomics") {
HasQuadwordAtomics = true;
}
// TODO: Finish this list and add an assert that we've handled them
// all.
@ -550,6 +552,12 @@ bool PPCTargetInfo::initFeatureMap(
Features["isa-v30-instructions"] =
llvm::StringSwitch<bool>(CPU).Case("pwr9", true).Default(false);
Features["quadword-atomics"] =
getTriple().isArch64Bit() && llvm::StringSwitch<bool>(CPU)
.Case("pwr9", true)
.Case("pwr8", true)
.Default(false);
// Power10 includes all the same features as Power9 plus any features specific
// to the Power10 core.
if (CPU == "pwr10" || CPU == "power10") {
@ -660,6 +668,7 @@ bool PPCTargetInfo::hasFeature(StringRef Feature) const {
.Case("isa-v207-instructions", IsISA2_07)
.Case("isa-v30-instructions", IsISA3_0)
.Case("isa-v31-instructions", IsISA3_1)
.Case("quadword-atomics", HasQuadwordAtomics)
.Default(false);
}

View File

@ -78,6 +78,7 @@ class LLVM_LIBRARY_VISIBILITY PPCTargetInfo : public TargetInfo {
bool IsISA2_07 = false;
bool IsISA3_0 = false;
bool IsISA3_1 = false;
bool HasQuadwordAtomics = false;
protected:
std::string ABI;
@ -439,8 +440,18 @@ public:
DataLayout += "-S128-v256:256:256-v512:512:512";
resetDataLayout(DataLayout);
// PPC64 supports atomics up to 8 bytes.
MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64;
// Newer PPC64 instruction sets support atomics up to 16 bytes.
MaxAtomicPromoteWidth = 128;
// Baseline PPC64 supports inlining atomics up to 8 bytes.
MaxAtomicInlineWidth = 64;
}
void setMaxAtomicWidth() override {
// For power8 and up, backend is able to inline 16-byte atomic lock free
// code.
// TODO: We should allow AIX to inline quadword atomics in the future.
if (!getTriple().isOSAIX() && hasFeature("quadword-atomics"))
MaxAtomicInlineWidth = 128;
}
BuiltinVaListKind getBuiltinVaListKind() const override {

View File

@ -1,25 +1,30 @@
// RUN: %clang_cc1 -verify -triple powerpc-unknown-unknown -emit-llvm -o - %s | \
// RUN: %clang_cc1 -Werror -triple powerpc-unknown-unknown -emit-llvm -o - %s | \
// RUN: FileCheck %s --check-prefixes=PPC,PPC32
// RUN: %clang_cc1 -verify -triple powerpc64le-unknown-linux -emit-llvm -o - %s | \
// RUN: FileCheck %s --check-prefixes=PPC,PPC64
// RUN: %clang_cc1 -verify -triple powerpc64-unknown-aix -emit-llvm -o - %s | \
// RUN: %clang_cc1 -Werror -triple powerpc64le-unknown-linux -emit-llvm -o - %s | \
// RUN: FileCheck %s --check-prefixes=PPC,PPC64
// RUN: %clang_cc1 -Werror -triple powerpc64le-unknown-linux -emit-llvm -o - %s \
// RUN: -target-cpu pwr8 | FileCheck %s --check-prefixes=PPC,PPC64
// RUN: %clang_cc1 -Werror -triple powerpc64-unknown-aix -emit-llvm -o - %s | \
// RUN: FileCheck %s --check-prefixes=PPC,AIX64
// RUN: %clang_cc1 -Werror -triple powerpc64-unknown-aix -emit-llvm -o - %s \
// RUN: -target-cpu pwr8 | FileCheck %s --check-prefixes=PPC,AIX64
// PPC: @c = global i8 0, align 1{{$}}
_Atomic(char) c; // expected-no-diagnostics
_Atomic(char) c;
// PPC: @s = global i16 0, align 2{{$}}
_Atomic(short) s; // expected-no-diagnostics
_Atomic(short) s;
// PPC: @i = global i32 0, align 4{{$}}
_Atomic(int) i; // expected-no-diagnostics
_Atomic(int) i;
// PPC32: @l = global i32 0, align 4{{$}}
// PPC64: @l = global i64 0, align 8{{$}}
_Atomic(long) l; // expected-no-diagnostics
// AIX64: @l = global i64 0, align 8{{$}}
_Atomic(long) l;
// PPC: @ll = global i64 0, align 8{{$}}
_Atomic(long long) ll; // expected-no-diagnostics
_Atomic(long long) ll;
typedef struct {
char x[8];
@ -27,11 +32,14 @@ typedef struct {
// PPC32: @o = global %struct.O zeroinitializer, align 1{{$}}
// PPC64: @o = global %struct.O zeroinitializer, align 8{{$}}
_Atomic(O) o; // expected-no-diagnostics
// AIX64: @o = global %struct.O zeroinitializer, align 8{{$}}
_Atomic(O) o;
typedef struct {
char x[16];
} Q;
// PPC: @q = global %struct.Q zeroinitializer, align 1{{$}}
_Atomic(Q) q; // expected-no-diagnostics
// PPC32: @q = global %struct.Q zeroinitializer, align 1{{$}}
// PPC64: @q = global %struct.Q zeroinitializer, align 16{{$}}
// AIX64: @q = global %struct.Q zeroinitializer, align 16{{$}}
_Atomic(Q) q;

View File

@ -0,0 +1,92 @@
// RUN: %clang_cc1 -Werror -Wno-atomic-alignment -triple powerpc64le-linux-gnu \
// RUN: -target-cpu pwr8 -emit-llvm -o - %s | FileCheck %s --check-prefix=PPC64-PWR8
// RUN: %clang_cc1 -Werror -Wno-atomic-alignment -triple powerpc64le-linux-gnu \
// RUN: -emit-llvm -o - %s | FileCheck %s --check-prefix=PPC64
// RUN: %clang_cc1 -Werror -Wno-atomic-alignment -triple powerpc64-unknown-aix \
// RUN: -target-cpu pwr7 -emit-llvm -o - %s | FileCheck %s --check-prefix=PPC64
typedef struct {
char x[16];
} Q;
typedef _Atomic(Q) AtomicQ;
typedef __int128_t int128_t;
// PPC64-PWR8-LABEL: @test_load(
// PPC64-PWR8: [[TMP3:%.*]] = load atomic i128, i128* [[TMP1:%.*]] acquire, align 16
//
// PPC64-LABEL: @test_load(
// PPC64: call void @__atomic_load(i64 noundef 16, i8* noundef [[TMP3:%.*]], i8* noundef [[TMP4:%.*]], i32 noundef signext 2)
//
Q test_load(AtomicQ *ptr) {
// expected-no-diagnostics
return __c11_atomic_load(ptr, __ATOMIC_ACQUIRE);
}
// PPC64-PWR8-LABEL: @test_store(
// PPC64-PWR8: store atomic i128 [[TMP6:%.*]], i128* [[TMP4:%.*]] release, align 16
//
// PPC64-LABEL: @test_store(
// PPC64: call void @__atomic_store(i64 noundef 16, i8* noundef [[TMP6:%.*]], i8* noundef [[TMP7:%.*]], i32 noundef signext 3)
//
void test_store(Q val, AtomicQ *ptr) {
// expected-no-diagnostics
__c11_atomic_store(ptr, val, __ATOMIC_RELEASE);
}
// PPC64-PWR8-LABEL: @test_add(
// PPC64-PWR8: [[TMP3:%.*]] = atomicrmw add i128* [[TMP0:%.*]], i128 [[TMP2:%.*]] monotonic, align 16
//
// PPC64-LABEL: @test_add(
// PPC64: [[CALL:%.*]] = call i128 @__atomic_fetch_add_16(i8* noundef [[TMP2:%.*]], i128 noundef [[TMP3:%.*]], i32 noundef signext 0)
//
void test_add(_Atomic(int128_t) *ptr, int128_t x) {
// expected-no-diagnostics
__c11_atomic_fetch_add(ptr, x, __ATOMIC_RELAXED);
}
// PPC64-PWR8-LABEL: @test_xchg(
// PPC64-PWR8: [[TMP8:%.*]] = atomicrmw xchg i128* [[TMP4:%.*]], i128 [[TMP7:%.*]] seq_cst, align 16
//
// PPC64-LABEL: @test_xchg(
// PPC64: call void @__atomic_exchange(i64 noundef 16, i8* noundef [[TMP7:%.*]], i8* noundef [[TMP8:%.*]], i8* noundef [[TMP9:%.*]], i32 noundef signext 5)
//
Q test_xchg(AtomicQ *ptr, Q new) {
// expected-no-diagnostics
return __c11_atomic_exchange(ptr, new, __ATOMIC_SEQ_CST);
}
// PPC64-PWR8-LABEL: @test_cmpxchg(
// PPC64-PWR8: [[TMP10:%.*]] = cmpxchg i128* [[TMP5:%.*]], i128 [[TMP8:%.*]], i128 [[TMP9:%.*]] seq_cst monotonic, align 16
//
// PPC64-LABEL: @test_cmpxchg(
// PPC64: [[CALL:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 noundef 16, i8* noundef [[TMP8:%.*]], i8* noundef [[TMP9:%.*]], i8* noundef [[TMP10:%.*]], i32 noundef signext 5, i32 noundef signext 0)
//
int test_cmpxchg(AtomicQ *ptr, Q *cmp, Q new) {
// expected-no-diagnostics
return __c11_atomic_compare_exchange_strong(ptr, cmp, new, __ATOMIC_SEQ_CST, __ATOMIC_RELAXED);
}
// PPC64-PWR8-LABEL: @test_cmpxchg_weak(
// PPC64-PWR8: [[TMP10:%.*]] = cmpxchg weak i128* [[TMP5:%.*]], i128 [[TMP8:%.*]], i128 [[TMP9:%.*]] seq_cst monotonic, align 16
//
// PPC64-LABEL: @test_cmpxchg_weak(
// PPC64: [[CALL:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 noundef 16, i8* noundef [[TMP8:%.*]], i8* noundef [[TMP9:%.*]], i8* noundef [[TMP10:%.*]], i32 noundef signext 5, i32 noundef signext 0)
//
int test_cmpxchg_weak(AtomicQ *ptr, Q *cmp, Q new) {
// expected-no-diagnostics
return __c11_atomic_compare_exchange_weak(ptr, cmp, new, __ATOMIC_SEQ_CST, __ATOMIC_RELAXED);
}
// PPC64-PWR8-LABEL: @is_lock_free(
// PPC64-PWR8: ret i32 1
//
// PPC64-LABEL: @is_lock_free(
// PPC64: [[CALL:%.*]] = call zeroext i1 @__atomic_is_lock_free(i64 noundef 16, i8* noundef null)
//
int is_lock_free() {
AtomicQ q;
// expected-no-diagnostics
return __c11_atomic_is_lock_free(sizeof(q));
}

View File

@ -9,7 +9,7 @@
// RUN: -target-cpu pwr7
// RUN: %clang_cc1 %s -verify -fgnuc-version=4.2.1 -ffreestanding \
// RUN: -fsyntax-only -triple=powerpc64le-linux-gnu -std=c11 \
// RUN: -target-cpu pwr8
// RUN: -target-cpu pwr8 -DPPC64_PWR8
// Basic parsing/Sema tests for __c11_atomic_*
@ -47,7 +47,11 @@ _Static_assert(__c11_atomic_is_lock_free(2), "");
_Static_assert(__c11_atomic_is_lock_free(3), ""); // expected-error {{not an integral constant expression}}
_Static_assert(__c11_atomic_is_lock_free(4), "");
_Static_assert(__c11_atomic_is_lock_free(8), "");
#ifndef PPC64_PWR8
_Static_assert(__c11_atomic_is_lock_free(16), ""); // expected-error {{not an integral constant expression}}
#else
_Static_assert(__c11_atomic_is_lock_free(16), ""); // expected-no-error
#endif
_Static_assert(__c11_atomic_is_lock_free(17), ""); // expected-error {{not an integral constant expression}}
_Static_assert(__atomic_is_lock_free(1, 0), "");
@ -55,15 +59,23 @@ _Static_assert(__atomic_is_lock_free(2, 0), "");
_Static_assert(__atomic_is_lock_free(3, 0), ""); // expected-error {{not an integral constant expression}}
_Static_assert(__atomic_is_lock_free(4, 0), "");
_Static_assert(__atomic_is_lock_free(8, 0), "");
#ifndef PPC64_PWR8
_Static_assert(__atomic_is_lock_free(16, 0), ""); // expected-error {{not an integral constant expression}}
#else
_Static_assert(__atomic_is_lock_free(16, 0), ""); // expected-no-error
#endif
_Static_assert(__atomic_is_lock_free(17, 0), ""); // expected-error {{not an integral constant expression}}
_Static_assert(atomic_is_lock_free((atomic_char*)0), "");
_Static_assert(atomic_is_lock_free((atomic_short*)0), "");
_Static_assert(atomic_is_lock_free((atomic_int*)0), "");
_Static_assert(atomic_is_lock_free((atomic_long*)0), "");
#ifndef PPC64_PWR8
// noi128-error@+1 {{__int128 is not supported on this target}}
_Static_assert(atomic_is_lock_free((_Atomic(__int128)*)0), ""); // expected-error {{not an integral constant expression}}
#else
_Static_assert(atomic_is_lock_free((_Atomic(__int128)*)0), ""); // expected-no-error
#endif
_Static_assert(atomic_is_lock_free(0 + (atomic_char*)0), "");
char i8;
@ -88,7 +100,11 @@ _Static_assert(__atomic_always_lock_free(2, 0), "");
_Static_assert(!__atomic_always_lock_free(3, 0), "");
_Static_assert(__atomic_always_lock_free(4, 0), "");
_Static_assert(__atomic_always_lock_free(8, 0), "");
#ifndef PPC64_PWR8
_Static_assert(!__atomic_always_lock_free(16, 0), "");
#else
_Static_assert(__atomic_always_lock_free(16, 0), "");
#endif
_Static_assert(!__atomic_always_lock_free(17, 0), "");
_Static_assert(__atomic_always_lock_free(1, incomplete), "");

View File

@ -1321,7 +1321,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Expand);
}
if (EnableQuadwordAtomics && Subtarget.hasQuadwordAtomics()) {
if (shouldInlineQuadwordAtomics()) {
setMaxAtomicSizeInBitsSupported(128);
setOperationAction(ISD::ATOMIC_LOAD, MVT::i128, Custom);
setOperationAction(ISD::ATOMIC_STORE, MVT::i128, Custom);
@ -18053,10 +18053,18 @@ CCAssignFn *PPCTargetLowering::ccAssignFnForCall(CallingConv::ID CC,
}
}
bool PPCTargetLowering::shouldInlineQuadwordAtomics() const {
// TODO: 16-byte atomic type support for AIX is in progress; we should be able
// to inline 16-byte atomic ops on AIX too in the future.
return Subtarget.isPPC64() &&
(EnableQuadwordAtomics || !Subtarget.getTargetTriple().isOSAIX()) &&
Subtarget.hasQuadwordAtomics();
}
TargetLowering::AtomicExpansionKind
PPCTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
unsigned Size = AI->getType()->getPrimitiveSizeInBits();
if (EnableQuadwordAtomics && Subtarget.hasQuadwordAtomics() && Size == 128)
if (shouldInlineQuadwordAtomics() && Size == 128)
return AtomicExpansionKind::MaskedIntrinsic;
return TargetLowering::shouldExpandAtomicRMWInIR(AI);
}
@ -18064,7 +18072,7 @@ PPCTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
TargetLowering::AtomicExpansionKind
PPCTargetLowering::shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const {
unsigned Size = AI->getNewValOperand()->getType()->getPrimitiveSizeInBits();
if (EnableQuadwordAtomics && Subtarget.hasQuadwordAtomics() && Size == 128)
if (shouldInlineQuadwordAtomics() && Size == 128)
return AtomicExpansionKind::MaskedIntrinsic;
return TargetLowering::shouldExpandAtomicCmpXchgInIR(AI);
}
@ -18094,8 +18102,7 @@ getIntrinsicForAtomicRMWBinOp128(AtomicRMWInst::BinOp BinOp) {
Value *PPCTargetLowering::emitMaskedAtomicRMWIntrinsic(
IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
assert(EnableQuadwordAtomics && Subtarget.hasQuadwordAtomics() &&
"Only support quadword now");
assert(shouldInlineQuadwordAtomics() && "Only support quadword now");
Module *M = Builder.GetInsertBlock()->getParent()->getParent();
Type *ValTy = Incr->getType();
assert(ValTy->getPrimitiveSizeInBits() == 128);
@ -18119,8 +18126,7 @@ Value *PPCTargetLowering::emitMaskedAtomicRMWIntrinsic(
Value *PPCTargetLowering::emitMaskedAtomicCmpXchgIntrinsic(
IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
assert(EnableQuadwordAtomics && Subtarget.hasQuadwordAtomics() &&
"Only support quadword now");
assert(shouldInlineQuadwordAtomics() && "Only support quadword now");
Module *M = Builder.GetInsertBlock()->getParent()->getParent();
Type *ValTy = CmpVal->getType();
assert(ValTy->getPrimitiveSizeInBits() == 128);

View File

@ -910,6 +910,8 @@ namespace llvm {
Instruction *emitTrailingFence(IRBuilderBase &Builder, Instruction *Inst,
AtomicOrdering Ord) const override;
bool shouldInlineQuadwordAtomics() const;
TargetLowering::AtomicExpansionKind
shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;

File diff suppressed because it is too large Load Diff