[AArch64][GlobalISel] CodeGen for Armv8.8/9.3 MOPS

This implements codegen for Armv8.8/9.3 Memory Operations extension (MOPS).
Any memcpy/memset/memmov intrinsics will always be emitted as a series
of three consecutive instructions P, M and E which perform the
operation. The SelectionDAG implementation is split into a separate
patch.

AArch64LegalizerInfo will now consider the following generic opcodes
if +mops is available, instead of legalising by expanding them to
libcalls: G_BZERO, G_MEMCPY_INLINE, G_MEMCPY, G_MEMMOVE, G_MEMSET
The s8 value of memset is legalised to s64 to match the pseudos.

AArch64O0PreLegalizerCombinerInfo will still be able to combine
G_MEMCPY_INLINE even if +mops is present, as it is unclear whether it is
better to generate fixed length copies or MOPS instructions for the
inline code of small or zero-sized memory operations, so we choose to be
conservative for now.

AArch64InstructionSelector will select the above as new pseudo
instructions: AArch64::MOPSMemory{Copy/Move/Set/SetTagging} These are
each expanded to a series of three instructions (e.g. SETP/SETM/SETE)
which must be emitted together during code emission to avoid scheduler
reordering.

This is part 3/4 of a series of patches split from
https://reviews.llvm.org/D117405 to facilitate reviewing.

Patch by Tomas Matheson and Son Tuan Vu

Differential Revision: https://reviews.llvm.org/D117763
This commit is contained in:
tyb0807 2022-01-18 22:34:48 +00:00
parent 6ec9fd20bd
commit 78fd413cf7
10 changed files with 1592 additions and 12 deletions

View File

@ -432,16 +432,6 @@ class LegalizeRuleSet {
return TypeIdx;
}
unsigned immIdx(unsigned ImmIdx) {
assert(ImmIdx <= (MCOI::OPERAND_LAST_GENERIC_IMM -
MCOI::OPERAND_FIRST_GENERIC_IMM) &&
"Imm Index is out of bounds");
#ifndef NDEBUG
ImmIdxsCovered.set(ImmIdx);
#endif
return ImmIdx;
}
void markAllIdxsAsCovered() {
#ifndef NDEBUG
TypeIdxsCovered.set();
@ -568,6 +558,16 @@ public:
}
unsigned getAlias() const { return AliasOf; }
unsigned immIdx(unsigned ImmIdx) {
assert(ImmIdx <= (MCOI::OPERAND_LAST_GENERIC_IMM -
MCOI::OPERAND_FIRST_GENERIC_IMM) &&
"Imm Index is out of bounds");
#ifndef NDEBUG
ImmIdxsCovered.set(ImmIdx);
#endif
return ImmIdx;
}
/// The instruction is legal if predicate is true.
LegalizeRuleSet &legalIf(LegalityPredicate Predicate) {
// We have no choice but conservatively assume that the free-form
@ -824,11 +824,22 @@ public:
LegalizeRuleSet &customForCartesianProduct(std::initializer_list<LLT> Types) {
return actionForCartesianProduct(LegalizeAction::Custom, Types);
}
/// The instruction is custom when type indexes 0 and 1 are both in their
/// respective lists.
LegalizeRuleSet &
customForCartesianProduct(std::initializer_list<LLT> Types0,
std::initializer_list<LLT> Types1) {
return actionForCartesianProduct(LegalizeAction::Custom, Types0, Types1);
}
/// The instruction is custom when when type indexes 0, 1, and 2 are all in
/// their respective lists.
LegalizeRuleSet &
customForCartesianProduct(std::initializer_list<LLT> Types0,
std::initializer_list<LLT> Types1,
std::initializer_list<LLT> Types2) {
return actionForCartesianProduct(LegalizeAction::Custom, Types0, Types1,
Types2);
}
/// Unconditionally custom lower.
LegalizeRuleSet &custom() {

View File

@ -95,6 +95,8 @@ public:
void LowerJumpTableDest(MCStreamer &OutStreamer, const MachineInstr &MI);
void LowerMOPS(MCStreamer &OutStreamer, const MachineInstr &MI);
void LowerSTACKMAP(MCStreamer &OutStreamer, StackMaps &SM,
const MachineInstr &MI);
void LowerPATCHPOINT(MCStreamer &OutStreamer, StackMaps &SM,
@ -936,6 +938,43 @@ void AArch64AsmPrinter::LowerJumpTableDest(llvm::MCStreamer &OutStreamer,
.addImm(Size == 4 ? 0 : 2));
}
void AArch64AsmPrinter::LowerMOPS(llvm::MCStreamer &OutStreamer,
const llvm::MachineInstr &MI) {
unsigned Opcode = MI.getOpcode();
assert(STI->hasMOPS());
assert(STI->hasMTE() || Opcode != AArch64::MOPSMemorySetTaggingPseudo);
const auto Ops = [Opcode]() -> std::array<unsigned, 3> {
if (Opcode == AArch64::MOPSMemoryCopyPseudo)
return {AArch64::CPYFP, AArch64::CPYFM, AArch64::CPYFE};
if (Opcode == AArch64::MOPSMemoryMovePseudo)
return {AArch64::CPYP, AArch64::CPYM, AArch64::CPYE};
if (Opcode == AArch64::MOPSMemorySetPseudo)
return {AArch64::SETP, AArch64::SETM, AArch64::SETE};
if (Opcode == AArch64::MOPSMemorySetTaggingPseudo)
return {AArch64::SETGP, AArch64::SETGM, AArch64::MOPSSETGE};
llvm_unreachable("Unhandled memory operation pseudo");
}();
const bool IsSet = Opcode == AArch64::MOPSMemorySetPseudo ||
Opcode == AArch64::MOPSMemorySetTaggingPseudo;
for (auto Op : Ops) {
int i = 0;
auto MCIB = MCInstBuilder(Op);
// Destination registers
MCIB.addReg(MI.getOperand(i++).getReg());
MCIB.addReg(MI.getOperand(i++).getReg());
if (!IsSet)
MCIB.addReg(MI.getOperand(i++).getReg());
// Input registers
MCIB.addReg(MI.getOperand(i++).getReg());
MCIB.addReg(MI.getOperand(i++).getReg());
MCIB.addReg(MI.getOperand(i++).getReg());
EmitToStreamer(OutStreamer, MCIB);
}
}
void AArch64AsmPrinter::LowerSTACKMAP(MCStreamer &OutStreamer, StackMaps &SM,
const MachineInstr &MI) {
unsigned NumNOPBytes = StackMapOpers(&MI).getNumPatchBytes();
@ -1363,6 +1402,13 @@ void AArch64AsmPrinter::emitInstruction(const MachineInstr *MI) {
emitFMov0(*MI);
return;
case AArch64::MOPSMemoryCopyPseudo:
case AArch64::MOPSMemoryMovePseudo:
case AArch64::MOPSMemorySetPseudo:
case AArch64::MOPSMemorySetTaggingPseudo:
LowerMOPS(*OutStreamer, *MI);
return;
case TargetOpcode::STACKMAP:
return LowerSTACKMAP(*OutStreamer, SM, *MI);

View File

@ -130,6 +130,10 @@ unsigned AArch64InstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
case AArch64::JumpTableDest32:
case AArch64::JumpTableDest16:
case AArch64::JumpTableDest8:
case AArch64::MOPSMemoryCopyPseudo:
case AArch64::MOPSMemoryMovePseudo:
case AArch64::MOPSMemorySetPseudo:
case AArch64::MOPSMemorySetTaggingPseudo:
NumBytes = 12;
break;
case AArch64::SPACE:

View File

@ -8362,6 +8362,27 @@ let Predicates = [HasMOPS, HasMTE] in {
}
}
let Predicates = [HasMOPS], Defs = [NZCV], Size = 12, mayStore = 1 in {
let mayLoad = 1 in {
def MOPSMemoryCopyPseudo : Pseudo<(outs GPR64common:$Rd_wb, GPR64common:$Rs_wb, GPR64:$Rn_wb),
(ins GPR64common:$Rd, GPR64common:$Rs, GPR64:$Rn),
[], "$Rd = $Rd_wb,$Rs = $Rs_wb,$Rn = $Rn_wb">, Sched<[]>;
def MOPSMemoryMovePseudo : Pseudo<(outs GPR64common:$Rd_wb, GPR64common:$Rs_wb, GPR64:$Rn_wb),
(ins GPR64common:$Rd, GPR64common:$Rs, GPR64:$Rn),
[], "$Rd = $Rd_wb,$Rs = $Rs_wb,$Rn = $Rn_wb">, Sched<[]>;
}
let mayLoad = 0 in {
def MOPSMemorySetPseudo : Pseudo<(outs GPR64common:$Rd_wb, GPR64:$Rn_wb),
(ins GPR64common:$Rd, GPR64:$Rn, GPR64:$Rm),
[], "$Rd = $Rd_wb,$Rn = $Rn_wb">, Sched<[]>;
}
}
let Predicates = [HasMOPS, HasMTE], Defs = [NZCV], Size = 12, mayLoad = 0, mayStore = 1 in {
def MOPSMemorySetTaggingPseudo : Pseudo<(outs GPR64common:$Rd_wb, GPR64:$Rn_wb),
(ins GPR64common:$Rd, GPR64:$Rn, GPR64:$Rm),
[], "$Rd = $Rd_wb,$Rn = $Rn_wb">, Sched<[]>;
}
let Defs = [X16, X17], mayStore = 1, isCodeGenOnly = 1 in
def StoreSwiftAsyncContext
: Pseudo<(outs), (ins GPR64:$ctx, GPR64sp:$base, simm9:$offset),

View File

@ -192,6 +192,7 @@ private:
bool selectBrJT(MachineInstr &I, MachineRegisterInfo &MRI);
bool selectTLSGlobalValue(MachineInstr &I, MachineRegisterInfo &MRI);
bool selectReduction(MachineInstr &I, MachineRegisterInfo &MRI);
bool selectMOPS(MachineInstr &I, MachineRegisterInfo &MRI);
bool selectUSMovFromExtend(MachineInstr &I, MachineRegisterInfo &MRI);
unsigned emitConstantPoolEntry(const Constant *CPVal,
@ -3424,6 +3425,12 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
case TargetOpcode::G_VECREDUCE_FADD:
case TargetOpcode::G_VECREDUCE_ADD:
return selectReduction(I, MRI);
case TargetOpcode::G_MEMCPY:
case TargetOpcode::G_MEMCPY_INLINE:
case TargetOpcode::G_MEMMOVE:
case TargetOpcode::G_MEMSET:
assert(STI.hasMOPS() && "Shouldn't get here without +mops feature");
return selectMOPS(I, MRI);
}
return false;
@ -3481,6 +3488,64 @@ bool AArch64InstructionSelector::selectReduction(MachineInstr &I,
return false;
}
bool AArch64InstructionSelector::selectMOPS(MachineInstr &GI,
MachineRegisterInfo &MRI) {
unsigned Mopcode;
switch (GI.getOpcode()) {
case TargetOpcode::G_MEMCPY:
case TargetOpcode::G_MEMCPY_INLINE:
Mopcode = AArch64::MOPSMemoryCopyPseudo;
break;
case TargetOpcode::G_MEMMOVE:
Mopcode = AArch64::MOPSMemoryMovePseudo;
break;
case TargetOpcode::G_MEMSET:
// For tagged memset see llvm.aarch64.mops.memset.tag
Mopcode = AArch64::MOPSMemorySetPseudo;
break;
}
auto &DstPtr = GI.getOperand(0);
auto &SrcOrVal = GI.getOperand(1);
auto &Size = GI.getOperand(2);
// Create copies of the registers that can be clobbered.
const Register DstPtrCopy = MRI.cloneVirtualRegister(DstPtr.getReg());
const Register SrcValCopy = MRI.cloneVirtualRegister(SrcOrVal.getReg());
const Register SizeCopy = MRI.cloneVirtualRegister(Size.getReg());
const bool IsSet = Mopcode == AArch64::MOPSMemorySetPseudo;
const auto &SrcValRegClass =
IsSet ? AArch64::GPR64RegClass : AArch64::GPR64commonRegClass;
// Constrain to specific registers
RBI.constrainGenericRegister(DstPtrCopy, AArch64::GPR64commonRegClass, MRI);
RBI.constrainGenericRegister(SrcValCopy, SrcValRegClass, MRI);
RBI.constrainGenericRegister(SizeCopy, AArch64::GPR64RegClass, MRI);
MIB.buildCopy(DstPtrCopy, DstPtr);
MIB.buildCopy(SrcValCopy, SrcOrVal);
MIB.buildCopy(SizeCopy, Size);
// New instruction uses the copied registers because it must update them.
// The defs are not used since they don't exist in G_MEM*. They are still
// tied.
// Note: order of operands is different from G_MEMSET, G_MEMCPY, G_MEMMOVE
Register DefDstPtr = MRI.createVirtualRegister(&AArch64::GPR64commonRegClass);
Register DefSize = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
if (IsSet) {
MIB.buildInstr(Mopcode, {DefDstPtr, DefSize},
{DstPtrCopy, SizeCopy, SrcValCopy});
} else {
Register DefSrcPtr = MRI.createVirtualRegister(&SrcValRegClass);
MIB.buildInstr(Mopcode, {DefDstPtr, DefSrcPtr, DefSize},
{DstPtrCopy, SrcValCopy, SizeCopy});
}
GI.eraseFromParent();
return true;
}
bool AArch64InstructionSelector::selectBrJT(MachineInstr &I,
MachineRegisterInfo &MRI) {
assert(I.getOpcode() == TargetOpcode::G_BRJT && "Expected G_BRJT");
@ -5375,6 +5440,36 @@ bool AArch64InstructionSelector::selectIntrinsicWithSideEffects(
constrainSelectedInstRegOperands(*Store, TII, TRI, RBI);
break;
}
case Intrinsic::aarch64_mops_memset_tag: {
// Transform
// %dst:gpr(p0) = \
// G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.mops.memset.tag),
// \ %dst:gpr(p0), %val:gpr(s64), %n:gpr(s64)
// where %dst is updated, into
// %Rd:GPR64common, %Rn:GPR64) = \
// MOPSMemorySetTaggingPseudo \
// %Rd:GPR64common, %Rn:GPR64, %Rm:GPR64
// where Rd and Rn are tied.
// It is expected that %val has been extended to s64 in legalization.
// Note that the order of the size/value operands are swapped.
Register DstDef = I.getOperand(0).getReg();
// I.getOperand(1) is the intrinsic function
Register DstUse = I.getOperand(2).getReg();
Register ValUse = I.getOperand(3).getReg();
Register SizeUse = I.getOperand(4).getReg();
// MOPSMemorySetTaggingPseudo has two defs; the intrinsic call has only one.
// Therefore an additional virtual register is requried for the updated size
// operand. This value is not accessible via the semantics of the intrinsic.
Register SizeDef = MRI.createGenericVirtualRegister(LLT::scalar(64));
auto Memset = MIB.buildInstr(AArch64::MOPSMemorySetTaggingPseudo,
{DstDef, SizeDef}, {DstUse, SizeUse, ValUse});
Memset.cloneMemRefs(I);
constrainSelectedInstRegOperands(*Memset, TII, TRI, RBI);
break;
}
}
I.eraseFromParent();

View File

@ -699,8 +699,28 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
getActionDefinitionsBuilder(G_DYN_STACKALLOC).lower();
getActionDefinitionsBuilder({G_BZERO, G_MEMCPY, G_MEMMOVE, G_MEMSET})
.libcall();
if (ST.hasMOPS()) {
// G_BZERO is not supported. Currently it is only emitted by
// PreLegalizerCombiner for G_MEMSET with zero constant.
getActionDefinitionsBuilder(G_BZERO).unsupported();
getActionDefinitionsBuilder(G_MEMSET)
.legalForCartesianProduct({p0}, {s64}, {s64})
.customForCartesianProduct({p0}, {s8}, {s64})
.immIdx(0); // Inform verifier imm idx 0 is handled.
getActionDefinitionsBuilder({G_MEMCPY, G_MEMMOVE})
.legalForCartesianProduct({p0}, {p0}, {s64})
.immIdx(0); // Inform verifier imm idx 0 is handled.
// G_MEMCPY_INLINE does not have a tailcall immediate
getActionDefinitionsBuilder(G_MEMCPY_INLINE)
.legalForCartesianProduct({p0}, {p0}, {s64});
} else {
getActionDefinitionsBuilder({G_BZERO, G_MEMCPY, G_MEMMOVE, G_MEMSET})
.libcall();
}
// FIXME: Legal types are only legal with NEON.
getActionDefinitionsBuilder(G_ABS)
@ -832,6 +852,11 @@ bool AArch64LegalizerInfo::legalizeCustom(LegalizerHelper &Helper,
return legalizeAtomicCmpxchg128(MI, MRI, Helper);
case TargetOpcode::G_CTTZ:
return legalizeCTTZ(MI, Helper);
case TargetOpcode::G_BZERO:
case TargetOpcode::G_MEMCPY:
case TargetOpcode::G_MEMMOVE:
case TargetOpcode::G_MEMSET:
return legalizeMemOps(MI, Helper);
}
llvm_unreachable("expected switch to return");
@ -989,6 +1014,15 @@ bool AArch64LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
MI.eraseFromParent();
return true;
}
case Intrinsic::aarch64_mops_memset_tag: {
assert(MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);
// Zext the value to 64 bit
MachineIRBuilder MIB(MI);
auto &Value = MI.getOperand(3);
Register ZExtValueReg = MIB.buildAnyExt(LLT::scalar(64), Value).getReg(0);
Value.setReg(ZExtValueReg);
return true;
}
}
return true;
@ -1359,3 +1393,20 @@ bool AArch64LegalizerInfo::legalizeCTTZ(MachineInstr &MI,
MI.eraseFromParent();
return true;
}
bool AArch64LegalizerInfo::legalizeMemOps(MachineInstr &MI,
LegalizerHelper &Helper) const {
MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
// Tagged version MOPSMemorySetTagged is legalised in legalizeIntrinsic
if (MI.getOpcode() == TargetOpcode::G_MEMSET) {
// Zext the value operand to 64 bit
auto &Value = MI.getOperand(1);
Register ZExtValueReg =
MIRBuilder.buildAnyExt(LLT::scalar(64), Value).getReg(0);
Value.setReg(ZExtValueReg);
return true;
}
return false;
}

View File

@ -56,6 +56,7 @@ private:
bool legalizeAtomicCmpxchg128(MachineInstr &MI, MachineRegisterInfo &MRI,
LegalizerHelper &Helper) const;
bool legalizeCTTZ(MachineInstr &MI, LegalizerHelper &Helper) const;
bool legalizeMemOps(MachineInstr &MI, LegalizerHelper &Helper) const;
const AArch64Subtarget *ST;
};
} // End llvm namespace.

View File

@ -0,0 +1,243 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc %s -o - -mtriple=aarch64-arm-none-eabi -O0 -global-isel=1 -global-isel-abort=1 -mattr=+mops,+mte | FileCheck %s --check-prefix=GISel-O0
; RUN: llc %s -o - -mtriple=aarch64-arm-none-eabi -global-isel=1 -global-isel-abort=1 -mattr=+mops,+mte | FileCheck %s --check-prefix=GISel
; Function Attrs: mustprogress nofree nosync nounwind willreturn writeonly
declare i8* @llvm.aarch64.mops.memset.tag(i8*, i8, i64)
define i8* @memset_tagged_0_zeroval(i8* %dst, i64 %size) {
; GISel-O0-LABEL: memset_tagged_0_zeroval:
; GISel-O0: // %bb.0: // %entry
; GISel-O0-NEXT: mov x8, xzr
; GISel-O0-NEXT: setgp [x0]!, x8!, x8
; GISel-O0-NEXT: setgm [x0]!, x8!, x8
; GISel-O0-NEXT: setge [x0]!, x8!, x8
; GISel-O0-NEXT: ret
;
; GISel-LABEL: memset_tagged_0_zeroval:
; GISel: // %bb.0: // %entry
; GISel-NEXT: mov x8, xzr
; GISel-NEXT: setgp [x0]!, x8!, xzr
; GISel-NEXT: setgm [x0]!, x8!, xzr
; GISel-NEXT: setge [x0]!, x8!, xzr
; GISel-NEXT: ret
entry:
%r = tail call i8* @llvm.aarch64.mops.memset.tag(i8* %dst, i8 0, i64 0)
ret i8* %r
}
define i8* @memset_tagged_1_zeroval(i8* %dst, i64 %size) {
; GISel-O0-LABEL: memset_tagged_1_zeroval:
; GISel-O0: // %bb.0: // %entry
; GISel-O0-NEXT: mov x9, xzr
; GISel-O0-NEXT: mov w8, #1
; GISel-O0-NEXT: // kill: def $x8 killed $w8
; GISel-O0-NEXT: setgp [x0]!, x8!, x9
; GISel-O0-NEXT: setgm [x0]!, x8!, x9
; GISel-O0-NEXT: setge [x0]!, x8!, x9
; GISel-O0-NEXT: ret
;
; GISel-LABEL: memset_tagged_1_zeroval:
; GISel: // %bb.0: // %entry
; GISel-NEXT: mov w8, #1
; GISel-NEXT: setgp [x0]!, x8!, xzr
; GISel-NEXT: setgm [x0]!, x8!, xzr
; GISel-NEXT: setge [x0]!, x8!, xzr
; GISel-NEXT: ret
entry:
%r = tail call i8* @llvm.aarch64.mops.memset.tag(i8* %dst, i8 0, i64 1)
ret i8* %r
}
define i8* @memset_tagged_10_zeroval(i8* %dst, i64 %size) {
; GISel-O0-LABEL: memset_tagged_10_zeroval:
; GISel-O0: // %bb.0: // %entry
; GISel-O0-NEXT: mov x9, xzr
; GISel-O0-NEXT: mov w8, #10
; GISel-O0-NEXT: // kill: def $x8 killed $w8
; GISel-O0-NEXT: setgp [x0]!, x8!, x9
; GISel-O0-NEXT: setgm [x0]!, x8!, x9
; GISel-O0-NEXT: setge [x0]!, x8!, x9
; GISel-O0-NEXT: ret
;
; GISel-LABEL: memset_tagged_10_zeroval:
; GISel: // %bb.0: // %entry
; GISel-NEXT: mov w8, #10
; GISel-NEXT: setgp [x0]!, x8!, xzr
; GISel-NEXT: setgm [x0]!, x8!, xzr
; GISel-NEXT: setge [x0]!, x8!, xzr
; GISel-NEXT: ret
entry:
%r = tail call i8* @llvm.aarch64.mops.memset.tag(i8* %dst, i8 0, i64 10)
ret i8* %r
}
define i8* @memset_tagged_10000_zeroval(i8* %dst, i64 %size) {
; GISel-O0-LABEL: memset_tagged_10000_zeroval:
; GISel-O0: // %bb.0: // %entry
; GISel-O0-NEXT: mov x9, xzr
; GISel-O0-NEXT: mov w8, #10000
; GISel-O0-NEXT: // kill: def $x8 killed $w8
; GISel-O0-NEXT: setgp [x0]!, x8!, x9
; GISel-O0-NEXT: setgm [x0]!, x8!, x9
; GISel-O0-NEXT: setge [x0]!, x8!, x9
; GISel-O0-NEXT: ret
;
; GISel-LABEL: memset_tagged_10000_zeroval:
; GISel: // %bb.0: // %entry
; GISel-NEXT: mov w8, #10000
; GISel-NEXT: setgp [x0]!, x8!, xzr
; GISel-NEXT: setgm [x0]!, x8!, xzr
; GISel-NEXT: setge [x0]!, x8!, xzr
; GISel-NEXT: ret
entry:
%r = tail call i8* @llvm.aarch64.mops.memset.tag(i8* %dst, i8 0, i64 10000)
ret i8* %r
}
define i8* @memset_tagged_size_zeroval(i8* %dst, i64 %size) {
; GISel-O0-LABEL: memset_tagged_size_zeroval:
; GISel-O0: // %bb.0: // %entry
; GISel-O0-NEXT: mov x8, xzr
; GISel-O0-NEXT: setgp [x0]!, x1!, x8
; GISel-O0-NEXT: setgm [x0]!, x1!, x8
; GISel-O0-NEXT: setge [x0]!, x1!, x8
; GISel-O0-NEXT: ret
;
; GISel-LABEL: memset_tagged_size_zeroval:
; GISel: // %bb.0: // %entry
; GISel-NEXT: setgp [x0]!, x1!, xzr
; GISel-NEXT: setgm [x0]!, x1!, xzr
; GISel-NEXT: setge [x0]!, x1!, xzr
; GISel-NEXT: ret
entry:
%r = tail call i8* @llvm.aarch64.mops.memset.tag(i8* %dst, i8 0, i64 %size)
ret i8* %r
}
define i8* @memset_tagged_0(i8* %dst, i64 %size, i32 %value) {
; GISel-O0-LABEL: memset_tagged_0:
; GISel-O0: // %bb.0: // %entry
; GISel-O0-NEXT: // implicit-def: $x9
; GISel-O0-NEXT: mov w9, w2
; GISel-O0-NEXT: mov x8, xzr
; GISel-O0-NEXT: setgp [x0]!, x8!, x9
; GISel-O0-NEXT: setgm [x0]!, x8!, x9
; GISel-O0-NEXT: setge [x0]!, x8!, x9
; GISel-O0-NEXT: ret
;
; GISel-LABEL: memset_tagged_0:
; GISel: // %bb.0: // %entry
; GISel-NEXT: mov x8, xzr
; GISel-NEXT: // kill: def $w2 killed $w2 def $x2
; GISel-NEXT: setgp [x0]!, x8!, x2
; GISel-NEXT: setgm [x0]!, x8!, x2
; GISel-NEXT: setge [x0]!, x8!, x2
; GISel-NEXT: ret
entry:
%value_trunc = trunc i32 %value to i8
%r = tail call i8* @llvm.aarch64.mops.memset.tag(i8* %dst, i8 %value_trunc, i64 0)
ret i8* %r
}
define i8* @memset_tagged_1(i8* %dst, i64 %size, i32 %value) {
; GISel-O0-LABEL: memset_tagged_1:
; GISel-O0: // %bb.0: // %entry
; GISel-O0-NEXT: // implicit-def: $x9
; GISel-O0-NEXT: mov w9, w2
; GISel-O0-NEXT: mov w8, #1
; GISel-O0-NEXT: // kill: def $x8 killed $w8
; GISel-O0-NEXT: setgp [x0]!, x8!, x9
; GISel-O0-NEXT: setgm [x0]!, x8!, x9
; GISel-O0-NEXT: setge [x0]!, x8!, x9
; GISel-O0-NEXT: ret
;
; GISel-LABEL: memset_tagged_1:
; GISel: // %bb.0: // %entry
; GISel-NEXT: mov w8, #1
; GISel-NEXT: // kill: def $w2 killed $w2 def $x2
; GISel-NEXT: setgp [x0]!, x8!, x2
; GISel-NEXT: setgm [x0]!, x8!, x2
; GISel-NEXT: setge [x0]!, x8!, x2
; GISel-NEXT: ret
entry:
%value_trunc = trunc i32 %value to i8
%r = tail call i8* @llvm.aarch64.mops.memset.tag(i8* %dst, i8 %value_trunc, i64 1)
ret i8* %r
}
define i8* @memset_tagged_10(i8* %dst, i64 %size, i32 %value) {
; GISel-O0-LABEL: memset_tagged_10:
; GISel-O0: // %bb.0: // %entry
; GISel-O0-NEXT: // implicit-def: $x9
; GISel-O0-NEXT: mov w9, w2
; GISel-O0-NEXT: mov w8, #10
; GISel-O0-NEXT: // kill: def $x8 killed $w8
; GISel-O0-NEXT: setgp [x0]!, x8!, x9
; GISel-O0-NEXT: setgm [x0]!, x8!, x9
; GISel-O0-NEXT: setge [x0]!, x8!, x9
; GISel-O0-NEXT: ret
;
; GISel-LABEL: memset_tagged_10:
; GISel: // %bb.0: // %entry
; GISel-NEXT: mov w8, #10
; GISel-NEXT: // kill: def $w2 killed $w2 def $x2
; GISel-NEXT: setgp [x0]!, x8!, x2
; GISel-NEXT: setgm [x0]!, x8!, x2
; GISel-NEXT: setge [x0]!, x8!, x2
; GISel-NEXT: ret
entry:
%value_trunc = trunc i32 %value to i8
%r = tail call i8* @llvm.aarch64.mops.memset.tag(i8* %dst, i8 %value_trunc, i64 10)
ret i8* %r
}
define i8* @memset_tagged_10000(i8* %dst, i64 %size, i32 %value) {
; GISel-O0-LABEL: memset_tagged_10000:
; GISel-O0: // %bb.0: // %entry
; GISel-O0-NEXT: // implicit-def: $x9
; GISel-O0-NEXT: mov w9, w2
; GISel-O0-NEXT: mov w8, #10000
; GISel-O0-NEXT: // kill: def $x8 killed $w8
; GISel-O0-NEXT: setgp [x0]!, x8!, x9
; GISel-O0-NEXT: setgm [x0]!, x8!, x9
; GISel-O0-NEXT: setge [x0]!, x8!, x9
; GISel-O0-NEXT: ret
;
; GISel-LABEL: memset_tagged_10000:
; GISel: // %bb.0: // %entry
; GISel-NEXT: mov w8, #10000
; GISel-NEXT: // kill: def $w2 killed $w2 def $x2
; GISel-NEXT: setgp [x0]!, x8!, x2
; GISel-NEXT: setgm [x0]!, x8!, x2
; GISel-NEXT: setge [x0]!, x8!, x2
; GISel-NEXT: ret
entry:
%value_trunc = trunc i32 %value to i8
%r = tail call i8* @llvm.aarch64.mops.memset.tag(i8* %dst, i8 %value_trunc, i64 10000)
ret i8* %r
}
define i8* @memset_tagged_size(i8* %dst, i64 %size, i32 %value) {
; GISel-O0-LABEL: memset_tagged_size:
; GISel-O0: // %bb.0: // %entry
; GISel-O0-NEXT: // implicit-def: $x8
; GISel-O0-NEXT: mov w8, w2
; GISel-O0-NEXT: setgp [x0]!, x1!, x8
; GISel-O0-NEXT: setgm [x0]!, x1!, x8
; GISel-O0-NEXT: setge [x0]!, x1!, x8
; GISel-O0-NEXT: ret
;
; GISel-LABEL: memset_tagged_size:
; GISel: // %bb.0: // %entry
; GISel-NEXT: // kill: def $w2 killed $w2 def $x2
; GISel-NEXT: setgp [x0]!, x1!, x2
; GISel-NEXT: setgm [x0]!, x1!, x2
; GISel-NEXT: setge [x0]!, x1!, x2
; GISel-NEXT: ret
entry:
%value_trunc = trunc i32 %value to i8
%r = tail call i8* @llvm.aarch64.mops.memset.tag(i8* %dst, i8 %value_trunc, i64 %size)
ret i8* %r
}

File diff suppressed because it is too large Load Diff

View File

@ -155,3 +155,17 @@ TEST(InstSizes, TLSDESC_CALLSEQ) {
EXPECT_EQ(16u, II.getInstSizeInBytes(*I));
});
}
TEST(InstSizes, MOPSMemorySetTaggingPseudo) {
std::unique_ptr<LLVMTargetMachine> TM = createTargetMachine();
std::unique_ptr<AArch64InstrInfo> II = createInstrInfo(TM.get());
runChecks(TM.get(), II.get(), "",
" renamable $x0, dead renamable $x1 = MOPSMemorySetTaggingPseudo "
"killed renamable $x0, killed renamable $x1, killed renamable $x2, "
"implicit-def dead $nzcv\n",
[](AArch64InstrInfo &II, MachineFunction &MF) {
auto I = MF.begin()->begin();
EXPECT_EQ(12u, II.getInstSizeInBytes(*I));
});
}