forked from OSchip/llvm-project
[AArch64][GlobalISel] CodeGen for Armv8.8/9.3 MOPS
This implements codegen for Armv8.8/9.3 Memory Operations extension (MOPS). Any memcpy/memset/memmov intrinsics will always be emitted as a series of three consecutive instructions P, M and E which perform the operation. The SelectionDAG implementation is split into a separate patch. AArch64LegalizerInfo will now consider the following generic opcodes if +mops is available, instead of legalising by expanding them to libcalls: G_BZERO, G_MEMCPY_INLINE, G_MEMCPY, G_MEMMOVE, G_MEMSET The s8 value of memset is legalised to s64 to match the pseudos. AArch64O0PreLegalizerCombinerInfo will still be able to combine G_MEMCPY_INLINE even if +mops is present, as it is unclear whether it is better to generate fixed length copies or MOPS instructions for the inline code of small or zero-sized memory operations, so we choose to be conservative for now. AArch64InstructionSelector will select the above as new pseudo instructions: AArch64::MOPSMemory{Copy/Move/Set/SetTagging} These are each expanded to a series of three instructions (e.g. SETP/SETM/SETE) which must be emitted together during code emission to avoid scheduler reordering. This is part 3/4 of a series of patches split from https://reviews.llvm.org/D117405 to facilitate reviewing. Patch by Tomas Matheson and Son Tuan Vu Differential Revision: https://reviews.llvm.org/D117763
This commit is contained in:
parent
6ec9fd20bd
commit
78fd413cf7
|
@ -432,16 +432,6 @@ class LegalizeRuleSet {
|
|||
return TypeIdx;
|
||||
}
|
||||
|
||||
unsigned immIdx(unsigned ImmIdx) {
|
||||
assert(ImmIdx <= (MCOI::OPERAND_LAST_GENERIC_IMM -
|
||||
MCOI::OPERAND_FIRST_GENERIC_IMM) &&
|
||||
"Imm Index is out of bounds");
|
||||
#ifndef NDEBUG
|
||||
ImmIdxsCovered.set(ImmIdx);
|
||||
#endif
|
||||
return ImmIdx;
|
||||
}
|
||||
|
||||
void markAllIdxsAsCovered() {
|
||||
#ifndef NDEBUG
|
||||
TypeIdxsCovered.set();
|
||||
|
@ -568,6 +558,16 @@ public:
|
|||
}
|
||||
unsigned getAlias() const { return AliasOf; }
|
||||
|
||||
unsigned immIdx(unsigned ImmIdx) {
|
||||
assert(ImmIdx <= (MCOI::OPERAND_LAST_GENERIC_IMM -
|
||||
MCOI::OPERAND_FIRST_GENERIC_IMM) &&
|
||||
"Imm Index is out of bounds");
|
||||
#ifndef NDEBUG
|
||||
ImmIdxsCovered.set(ImmIdx);
|
||||
#endif
|
||||
return ImmIdx;
|
||||
}
|
||||
|
||||
/// The instruction is legal if predicate is true.
|
||||
LegalizeRuleSet &legalIf(LegalityPredicate Predicate) {
|
||||
// We have no choice but conservatively assume that the free-form
|
||||
|
@ -824,11 +824,22 @@ public:
|
|||
LegalizeRuleSet &customForCartesianProduct(std::initializer_list<LLT> Types) {
|
||||
return actionForCartesianProduct(LegalizeAction::Custom, Types);
|
||||
}
|
||||
/// The instruction is custom when type indexes 0 and 1 are both in their
|
||||
/// respective lists.
|
||||
LegalizeRuleSet &
|
||||
customForCartesianProduct(std::initializer_list<LLT> Types0,
|
||||
std::initializer_list<LLT> Types1) {
|
||||
return actionForCartesianProduct(LegalizeAction::Custom, Types0, Types1);
|
||||
}
|
||||
/// The instruction is custom when when type indexes 0, 1, and 2 are all in
|
||||
/// their respective lists.
|
||||
LegalizeRuleSet &
|
||||
customForCartesianProduct(std::initializer_list<LLT> Types0,
|
||||
std::initializer_list<LLT> Types1,
|
||||
std::initializer_list<LLT> Types2) {
|
||||
return actionForCartesianProduct(LegalizeAction::Custom, Types0, Types1,
|
||||
Types2);
|
||||
}
|
||||
|
||||
/// Unconditionally custom lower.
|
||||
LegalizeRuleSet &custom() {
|
||||
|
|
|
@ -95,6 +95,8 @@ public:
|
|||
|
||||
void LowerJumpTableDest(MCStreamer &OutStreamer, const MachineInstr &MI);
|
||||
|
||||
void LowerMOPS(MCStreamer &OutStreamer, const MachineInstr &MI);
|
||||
|
||||
void LowerSTACKMAP(MCStreamer &OutStreamer, StackMaps &SM,
|
||||
const MachineInstr &MI);
|
||||
void LowerPATCHPOINT(MCStreamer &OutStreamer, StackMaps &SM,
|
||||
|
@ -936,6 +938,43 @@ void AArch64AsmPrinter::LowerJumpTableDest(llvm::MCStreamer &OutStreamer,
|
|||
.addImm(Size == 4 ? 0 : 2));
|
||||
}
|
||||
|
||||
void AArch64AsmPrinter::LowerMOPS(llvm::MCStreamer &OutStreamer,
|
||||
const llvm::MachineInstr &MI) {
|
||||
unsigned Opcode = MI.getOpcode();
|
||||
assert(STI->hasMOPS());
|
||||
assert(STI->hasMTE() || Opcode != AArch64::MOPSMemorySetTaggingPseudo);
|
||||
|
||||
const auto Ops = [Opcode]() -> std::array<unsigned, 3> {
|
||||
if (Opcode == AArch64::MOPSMemoryCopyPseudo)
|
||||
return {AArch64::CPYFP, AArch64::CPYFM, AArch64::CPYFE};
|
||||
if (Opcode == AArch64::MOPSMemoryMovePseudo)
|
||||
return {AArch64::CPYP, AArch64::CPYM, AArch64::CPYE};
|
||||
if (Opcode == AArch64::MOPSMemorySetPseudo)
|
||||
return {AArch64::SETP, AArch64::SETM, AArch64::SETE};
|
||||
if (Opcode == AArch64::MOPSMemorySetTaggingPseudo)
|
||||
return {AArch64::SETGP, AArch64::SETGM, AArch64::MOPSSETGE};
|
||||
llvm_unreachable("Unhandled memory operation pseudo");
|
||||
}();
|
||||
const bool IsSet = Opcode == AArch64::MOPSMemorySetPseudo ||
|
||||
Opcode == AArch64::MOPSMemorySetTaggingPseudo;
|
||||
|
||||
for (auto Op : Ops) {
|
||||
int i = 0;
|
||||
auto MCIB = MCInstBuilder(Op);
|
||||
// Destination registers
|
||||
MCIB.addReg(MI.getOperand(i++).getReg());
|
||||
MCIB.addReg(MI.getOperand(i++).getReg());
|
||||
if (!IsSet)
|
||||
MCIB.addReg(MI.getOperand(i++).getReg());
|
||||
// Input registers
|
||||
MCIB.addReg(MI.getOperand(i++).getReg());
|
||||
MCIB.addReg(MI.getOperand(i++).getReg());
|
||||
MCIB.addReg(MI.getOperand(i++).getReg());
|
||||
|
||||
EmitToStreamer(OutStreamer, MCIB);
|
||||
}
|
||||
}
|
||||
|
||||
void AArch64AsmPrinter::LowerSTACKMAP(MCStreamer &OutStreamer, StackMaps &SM,
|
||||
const MachineInstr &MI) {
|
||||
unsigned NumNOPBytes = StackMapOpers(&MI).getNumPatchBytes();
|
||||
|
@ -1363,6 +1402,13 @@ void AArch64AsmPrinter::emitInstruction(const MachineInstr *MI) {
|
|||
emitFMov0(*MI);
|
||||
return;
|
||||
|
||||
case AArch64::MOPSMemoryCopyPseudo:
|
||||
case AArch64::MOPSMemoryMovePseudo:
|
||||
case AArch64::MOPSMemorySetPseudo:
|
||||
case AArch64::MOPSMemorySetTaggingPseudo:
|
||||
LowerMOPS(*OutStreamer, *MI);
|
||||
return;
|
||||
|
||||
case TargetOpcode::STACKMAP:
|
||||
return LowerSTACKMAP(*OutStreamer, SM, *MI);
|
||||
|
||||
|
|
|
@ -130,6 +130,10 @@ unsigned AArch64InstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
|
|||
case AArch64::JumpTableDest32:
|
||||
case AArch64::JumpTableDest16:
|
||||
case AArch64::JumpTableDest8:
|
||||
case AArch64::MOPSMemoryCopyPseudo:
|
||||
case AArch64::MOPSMemoryMovePseudo:
|
||||
case AArch64::MOPSMemorySetPseudo:
|
||||
case AArch64::MOPSMemorySetTaggingPseudo:
|
||||
NumBytes = 12;
|
||||
break;
|
||||
case AArch64::SPACE:
|
||||
|
|
|
@ -8362,6 +8362,27 @@ let Predicates = [HasMOPS, HasMTE] in {
|
|||
}
|
||||
}
|
||||
|
||||
let Predicates = [HasMOPS], Defs = [NZCV], Size = 12, mayStore = 1 in {
|
||||
let mayLoad = 1 in {
|
||||
def MOPSMemoryCopyPseudo : Pseudo<(outs GPR64common:$Rd_wb, GPR64common:$Rs_wb, GPR64:$Rn_wb),
|
||||
(ins GPR64common:$Rd, GPR64common:$Rs, GPR64:$Rn),
|
||||
[], "$Rd = $Rd_wb,$Rs = $Rs_wb,$Rn = $Rn_wb">, Sched<[]>;
|
||||
def MOPSMemoryMovePseudo : Pseudo<(outs GPR64common:$Rd_wb, GPR64common:$Rs_wb, GPR64:$Rn_wb),
|
||||
(ins GPR64common:$Rd, GPR64common:$Rs, GPR64:$Rn),
|
||||
[], "$Rd = $Rd_wb,$Rs = $Rs_wb,$Rn = $Rn_wb">, Sched<[]>;
|
||||
}
|
||||
let mayLoad = 0 in {
|
||||
def MOPSMemorySetPseudo : Pseudo<(outs GPR64common:$Rd_wb, GPR64:$Rn_wb),
|
||||
(ins GPR64common:$Rd, GPR64:$Rn, GPR64:$Rm),
|
||||
[], "$Rd = $Rd_wb,$Rn = $Rn_wb">, Sched<[]>;
|
||||
}
|
||||
}
|
||||
let Predicates = [HasMOPS, HasMTE], Defs = [NZCV], Size = 12, mayLoad = 0, mayStore = 1 in {
|
||||
def MOPSMemorySetTaggingPseudo : Pseudo<(outs GPR64common:$Rd_wb, GPR64:$Rn_wb),
|
||||
(ins GPR64common:$Rd, GPR64:$Rn, GPR64:$Rm),
|
||||
[], "$Rd = $Rd_wb,$Rn = $Rn_wb">, Sched<[]>;
|
||||
}
|
||||
|
||||
let Defs = [X16, X17], mayStore = 1, isCodeGenOnly = 1 in
|
||||
def StoreSwiftAsyncContext
|
||||
: Pseudo<(outs), (ins GPR64:$ctx, GPR64sp:$base, simm9:$offset),
|
||||
|
|
|
@ -192,6 +192,7 @@ private:
|
|||
bool selectBrJT(MachineInstr &I, MachineRegisterInfo &MRI);
|
||||
bool selectTLSGlobalValue(MachineInstr &I, MachineRegisterInfo &MRI);
|
||||
bool selectReduction(MachineInstr &I, MachineRegisterInfo &MRI);
|
||||
bool selectMOPS(MachineInstr &I, MachineRegisterInfo &MRI);
|
||||
bool selectUSMovFromExtend(MachineInstr &I, MachineRegisterInfo &MRI);
|
||||
|
||||
unsigned emitConstantPoolEntry(const Constant *CPVal,
|
||||
|
@ -3424,6 +3425,12 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
|
|||
case TargetOpcode::G_VECREDUCE_FADD:
|
||||
case TargetOpcode::G_VECREDUCE_ADD:
|
||||
return selectReduction(I, MRI);
|
||||
case TargetOpcode::G_MEMCPY:
|
||||
case TargetOpcode::G_MEMCPY_INLINE:
|
||||
case TargetOpcode::G_MEMMOVE:
|
||||
case TargetOpcode::G_MEMSET:
|
||||
assert(STI.hasMOPS() && "Shouldn't get here without +mops feature");
|
||||
return selectMOPS(I, MRI);
|
||||
}
|
||||
|
||||
return false;
|
||||
|
@ -3481,6 +3488,64 @@ bool AArch64InstructionSelector::selectReduction(MachineInstr &I,
|
|||
return false;
|
||||
}
|
||||
|
||||
bool AArch64InstructionSelector::selectMOPS(MachineInstr &GI,
|
||||
MachineRegisterInfo &MRI) {
|
||||
unsigned Mopcode;
|
||||
switch (GI.getOpcode()) {
|
||||
case TargetOpcode::G_MEMCPY:
|
||||
case TargetOpcode::G_MEMCPY_INLINE:
|
||||
Mopcode = AArch64::MOPSMemoryCopyPseudo;
|
||||
break;
|
||||
case TargetOpcode::G_MEMMOVE:
|
||||
Mopcode = AArch64::MOPSMemoryMovePseudo;
|
||||
break;
|
||||
case TargetOpcode::G_MEMSET:
|
||||
// For tagged memset see llvm.aarch64.mops.memset.tag
|
||||
Mopcode = AArch64::MOPSMemorySetPseudo;
|
||||
break;
|
||||
}
|
||||
|
||||
auto &DstPtr = GI.getOperand(0);
|
||||
auto &SrcOrVal = GI.getOperand(1);
|
||||
auto &Size = GI.getOperand(2);
|
||||
|
||||
// Create copies of the registers that can be clobbered.
|
||||
const Register DstPtrCopy = MRI.cloneVirtualRegister(DstPtr.getReg());
|
||||
const Register SrcValCopy = MRI.cloneVirtualRegister(SrcOrVal.getReg());
|
||||
const Register SizeCopy = MRI.cloneVirtualRegister(Size.getReg());
|
||||
|
||||
const bool IsSet = Mopcode == AArch64::MOPSMemorySetPseudo;
|
||||
const auto &SrcValRegClass =
|
||||
IsSet ? AArch64::GPR64RegClass : AArch64::GPR64commonRegClass;
|
||||
|
||||
// Constrain to specific registers
|
||||
RBI.constrainGenericRegister(DstPtrCopy, AArch64::GPR64commonRegClass, MRI);
|
||||
RBI.constrainGenericRegister(SrcValCopy, SrcValRegClass, MRI);
|
||||
RBI.constrainGenericRegister(SizeCopy, AArch64::GPR64RegClass, MRI);
|
||||
|
||||
MIB.buildCopy(DstPtrCopy, DstPtr);
|
||||
MIB.buildCopy(SrcValCopy, SrcOrVal);
|
||||
MIB.buildCopy(SizeCopy, Size);
|
||||
|
||||
// New instruction uses the copied registers because it must update them.
|
||||
// The defs are not used since they don't exist in G_MEM*. They are still
|
||||
// tied.
|
||||
// Note: order of operands is different from G_MEMSET, G_MEMCPY, G_MEMMOVE
|
||||
Register DefDstPtr = MRI.createVirtualRegister(&AArch64::GPR64commonRegClass);
|
||||
Register DefSize = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
|
||||
if (IsSet) {
|
||||
MIB.buildInstr(Mopcode, {DefDstPtr, DefSize},
|
||||
{DstPtrCopy, SizeCopy, SrcValCopy});
|
||||
} else {
|
||||
Register DefSrcPtr = MRI.createVirtualRegister(&SrcValRegClass);
|
||||
MIB.buildInstr(Mopcode, {DefDstPtr, DefSrcPtr, DefSize},
|
||||
{DstPtrCopy, SrcValCopy, SizeCopy});
|
||||
}
|
||||
|
||||
GI.eraseFromParent();
|
||||
return true;
|
||||
}
|
||||
|
||||
bool AArch64InstructionSelector::selectBrJT(MachineInstr &I,
|
||||
MachineRegisterInfo &MRI) {
|
||||
assert(I.getOpcode() == TargetOpcode::G_BRJT && "Expected G_BRJT");
|
||||
|
@ -5375,6 +5440,36 @@ bool AArch64InstructionSelector::selectIntrinsicWithSideEffects(
|
|||
constrainSelectedInstRegOperands(*Store, TII, TRI, RBI);
|
||||
break;
|
||||
}
|
||||
case Intrinsic::aarch64_mops_memset_tag: {
|
||||
// Transform
|
||||
// %dst:gpr(p0) = \
|
||||
// G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.mops.memset.tag),
|
||||
// \ %dst:gpr(p0), %val:gpr(s64), %n:gpr(s64)
|
||||
// where %dst is updated, into
|
||||
// %Rd:GPR64common, %Rn:GPR64) = \
|
||||
// MOPSMemorySetTaggingPseudo \
|
||||
// %Rd:GPR64common, %Rn:GPR64, %Rm:GPR64
|
||||
// where Rd and Rn are tied.
|
||||
// It is expected that %val has been extended to s64 in legalization.
|
||||
// Note that the order of the size/value operands are swapped.
|
||||
|
||||
Register DstDef = I.getOperand(0).getReg();
|
||||
// I.getOperand(1) is the intrinsic function
|
||||
Register DstUse = I.getOperand(2).getReg();
|
||||
Register ValUse = I.getOperand(3).getReg();
|
||||
Register SizeUse = I.getOperand(4).getReg();
|
||||
|
||||
// MOPSMemorySetTaggingPseudo has two defs; the intrinsic call has only one.
|
||||
// Therefore an additional virtual register is requried for the updated size
|
||||
// operand. This value is not accessible via the semantics of the intrinsic.
|
||||
Register SizeDef = MRI.createGenericVirtualRegister(LLT::scalar(64));
|
||||
|
||||
auto Memset = MIB.buildInstr(AArch64::MOPSMemorySetTaggingPseudo,
|
||||
{DstDef, SizeDef}, {DstUse, SizeUse, ValUse});
|
||||
Memset.cloneMemRefs(I);
|
||||
constrainSelectedInstRegOperands(*Memset, TII, TRI, RBI);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
I.eraseFromParent();
|
||||
|
|
|
@ -699,8 +699,28 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
|
|||
|
||||
getActionDefinitionsBuilder(G_DYN_STACKALLOC).lower();
|
||||
|
||||
getActionDefinitionsBuilder({G_BZERO, G_MEMCPY, G_MEMMOVE, G_MEMSET})
|
||||
.libcall();
|
||||
if (ST.hasMOPS()) {
|
||||
// G_BZERO is not supported. Currently it is only emitted by
|
||||
// PreLegalizerCombiner for G_MEMSET with zero constant.
|
||||
getActionDefinitionsBuilder(G_BZERO).unsupported();
|
||||
|
||||
getActionDefinitionsBuilder(G_MEMSET)
|
||||
.legalForCartesianProduct({p0}, {s64}, {s64})
|
||||
.customForCartesianProduct({p0}, {s8}, {s64})
|
||||
.immIdx(0); // Inform verifier imm idx 0 is handled.
|
||||
|
||||
getActionDefinitionsBuilder({G_MEMCPY, G_MEMMOVE})
|
||||
.legalForCartesianProduct({p0}, {p0}, {s64})
|
||||
.immIdx(0); // Inform verifier imm idx 0 is handled.
|
||||
|
||||
// G_MEMCPY_INLINE does not have a tailcall immediate
|
||||
getActionDefinitionsBuilder(G_MEMCPY_INLINE)
|
||||
.legalForCartesianProduct({p0}, {p0}, {s64});
|
||||
|
||||
} else {
|
||||
getActionDefinitionsBuilder({G_BZERO, G_MEMCPY, G_MEMMOVE, G_MEMSET})
|
||||
.libcall();
|
||||
}
|
||||
|
||||
// FIXME: Legal types are only legal with NEON.
|
||||
getActionDefinitionsBuilder(G_ABS)
|
||||
|
@ -832,6 +852,11 @@ bool AArch64LegalizerInfo::legalizeCustom(LegalizerHelper &Helper,
|
|||
return legalizeAtomicCmpxchg128(MI, MRI, Helper);
|
||||
case TargetOpcode::G_CTTZ:
|
||||
return legalizeCTTZ(MI, Helper);
|
||||
case TargetOpcode::G_BZERO:
|
||||
case TargetOpcode::G_MEMCPY:
|
||||
case TargetOpcode::G_MEMMOVE:
|
||||
case TargetOpcode::G_MEMSET:
|
||||
return legalizeMemOps(MI, Helper);
|
||||
}
|
||||
|
||||
llvm_unreachable("expected switch to return");
|
||||
|
@ -989,6 +1014,15 @@ bool AArch64LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
|
|||
MI.eraseFromParent();
|
||||
return true;
|
||||
}
|
||||
case Intrinsic::aarch64_mops_memset_tag: {
|
||||
assert(MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);
|
||||
// Zext the value to 64 bit
|
||||
MachineIRBuilder MIB(MI);
|
||||
auto &Value = MI.getOperand(3);
|
||||
Register ZExtValueReg = MIB.buildAnyExt(LLT::scalar(64), Value).getReg(0);
|
||||
Value.setReg(ZExtValueReg);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
|
@ -1359,3 +1393,20 @@ bool AArch64LegalizerInfo::legalizeCTTZ(MachineInstr &MI,
|
|||
MI.eraseFromParent();
|
||||
return true;
|
||||
}
|
||||
|
||||
bool AArch64LegalizerInfo::legalizeMemOps(MachineInstr &MI,
|
||||
LegalizerHelper &Helper) const {
|
||||
MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
|
||||
|
||||
// Tagged version MOPSMemorySetTagged is legalised in legalizeIntrinsic
|
||||
if (MI.getOpcode() == TargetOpcode::G_MEMSET) {
|
||||
// Zext the value operand to 64 bit
|
||||
auto &Value = MI.getOperand(1);
|
||||
Register ZExtValueReg =
|
||||
MIRBuilder.buildAnyExt(LLT::scalar(64), Value).getReg(0);
|
||||
Value.setReg(ZExtValueReg);
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
|
|
@ -56,6 +56,7 @@ private:
|
|||
bool legalizeAtomicCmpxchg128(MachineInstr &MI, MachineRegisterInfo &MRI,
|
||||
LegalizerHelper &Helper) const;
|
||||
bool legalizeCTTZ(MachineInstr &MI, LegalizerHelper &Helper) const;
|
||||
bool legalizeMemOps(MachineInstr &MI, LegalizerHelper &Helper) const;
|
||||
const AArch64Subtarget *ST;
|
||||
};
|
||||
} // End llvm namespace.
|
||||
|
|
|
@ -0,0 +1,243 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
|
||||
; RUN: llc %s -o - -mtriple=aarch64-arm-none-eabi -O0 -global-isel=1 -global-isel-abort=1 -mattr=+mops,+mte | FileCheck %s --check-prefix=GISel-O0
|
||||
; RUN: llc %s -o - -mtriple=aarch64-arm-none-eabi -global-isel=1 -global-isel-abort=1 -mattr=+mops,+mte | FileCheck %s --check-prefix=GISel
|
||||
|
||||
; Function Attrs: mustprogress nofree nosync nounwind willreturn writeonly
|
||||
declare i8* @llvm.aarch64.mops.memset.tag(i8*, i8, i64)
|
||||
|
||||
define i8* @memset_tagged_0_zeroval(i8* %dst, i64 %size) {
|
||||
; GISel-O0-LABEL: memset_tagged_0_zeroval:
|
||||
; GISel-O0: // %bb.0: // %entry
|
||||
; GISel-O0-NEXT: mov x8, xzr
|
||||
; GISel-O0-NEXT: setgp [x0]!, x8!, x8
|
||||
; GISel-O0-NEXT: setgm [x0]!, x8!, x8
|
||||
; GISel-O0-NEXT: setge [x0]!, x8!, x8
|
||||
; GISel-O0-NEXT: ret
|
||||
;
|
||||
; GISel-LABEL: memset_tagged_0_zeroval:
|
||||
; GISel: // %bb.0: // %entry
|
||||
; GISel-NEXT: mov x8, xzr
|
||||
; GISel-NEXT: setgp [x0]!, x8!, xzr
|
||||
; GISel-NEXT: setgm [x0]!, x8!, xzr
|
||||
; GISel-NEXT: setge [x0]!, x8!, xzr
|
||||
; GISel-NEXT: ret
|
||||
entry:
|
||||
%r = tail call i8* @llvm.aarch64.mops.memset.tag(i8* %dst, i8 0, i64 0)
|
||||
ret i8* %r
|
||||
}
|
||||
|
||||
define i8* @memset_tagged_1_zeroval(i8* %dst, i64 %size) {
|
||||
; GISel-O0-LABEL: memset_tagged_1_zeroval:
|
||||
; GISel-O0: // %bb.0: // %entry
|
||||
; GISel-O0-NEXT: mov x9, xzr
|
||||
; GISel-O0-NEXT: mov w8, #1
|
||||
; GISel-O0-NEXT: // kill: def $x8 killed $w8
|
||||
; GISel-O0-NEXT: setgp [x0]!, x8!, x9
|
||||
; GISel-O0-NEXT: setgm [x0]!, x8!, x9
|
||||
; GISel-O0-NEXT: setge [x0]!, x8!, x9
|
||||
; GISel-O0-NEXT: ret
|
||||
;
|
||||
; GISel-LABEL: memset_tagged_1_zeroval:
|
||||
; GISel: // %bb.0: // %entry
|
||||
; GISel-NEXT: mov w8, #1
|
||||
; GISel-NEXT: setgp [x0]!, x8!, xzr
|
||||
; GISel-NEXT: setgm [x0]!, x8!, xzr
|
||||
; GISel-NEXT: setge [x0]!, x8!, xzr
|
||||
; GISel-NEXT: ret
|
||||
entry:
|
||||
%r = tail call i8* @llvm.aarch64.mops.memset.tag(i8* %dst, i8 0, i64 1)
|
||||
ret i8* %r
|
||||
}
|
||||
|
||||
define i8* @memset_tagged_10_zeroval(i8* %dst, i64 %size) {
|
||||
; GISel-O0-LABEL: memset_tagged_10_zeroval:
|
||||
; GISel-O0: // %bb.0: // %entry
|
||||
; GISel-O0-NEXT: mov x9, xzr
|
||||
; GISel-O0-NEXT: mov w8, #10
|
||||
; GISel-O0-NEXT: // kill: def $x8 killed $w8
|
||||
; GISel-O0-NEXT: setgp [x0]!, x8!, x9
|
||||
; GISel-O0-NEXT: setgm [x0]!, x8!, x9
|
||||
; GISel-O0-NEXT: setge [x0]!, x8!, x9
|
||||
; GISel-O0-NEXT: ret
|
||||
;
|
||||
; GISel-LABEL: memset_tagged_10_zeroval:
|
||||
; GISel: // %bb.0: // %entry
|
||||
; GISel-NEXT: mov w8, #10
|
||||
; GISel-NEXT: setgp [x0]!, x8!, xzr
|
||||
; GISel-NEXT: setgm [x0]!, x8!, xzr
|
||||
; GISel-NEXT: setge [x0]!, x8!, xzr
|
||||
; GISel-NEXT: ret
|
||||
entry:
|
||||
%r = tail call i8* @llvm.aarch64.mops.memset.tag(i8* %dst, i8 0, i64 10)
|
||||
ret i8* %r
|
||||
}
|
||||
|
||||
define i8* @memset_tagged_10000_zeroval(i8* %dst, i64 %size) {
|
||||
; GISel-O0-LABEL: memset_tagged_10000_zeroval:
|
||||
; GISel-O0: // %bb.0: // %entry
|
||||
; GISel-O0-NEXT: mov x9, xzr
|
||||
; GISel-O0-NEXT: mov w8, #10000
|
||||
; GISel-O0-NEXT: // kill: def $x8 killed $w8
|
||||
; GISel-O0-NEXT: setgp [x0]!, x8!, x9
|
||||
; GISel-O0-NEXT: setgm [x0]!, x8!, x9
|
||||
; GISel-O0-NEXT: setge [x0]!, x8!, x9
|
||||
; GISel-O0-NEXT: ret
|
||||
;
|
||||
; GISel-LABEL: memset_tagged_10000_zeroval:
|
||||
; GISel: // %bb.0: // %entry
|
||||
; GISel-NEXT: mov w8, #10000
|
||||
; GISel-NEXT: setgp [x0]!, x8!, xzr
|
||||
; GISel-NEXT: setgm [x0]!, x8!, xzr
|
||||
; GISel-NEXT: setge [x0]!, x8!, xzr
|
||||
; GISel-NEXT: ret
|
||||
entry:
|
||||
%r = tail call i8* @llvm.aarch64.mops.memset.tag(i8* %dst, i8 0, i64 10000)
|
||||
ret i8* %r
|
||||
}
|
||||
|
||||
define i8* @memset_tagged_size_zeroval(i8* %dst, i64 %size) {
|
||||
; GISel-O0-LABEL: memset_tagged_size_zeroval:
|
||||
; GISel-O0: // %bb.0: // %entry
|
||||
; GISel-O0-NEXT: mov x8, xzr
|
||||
; GISel-O0-NEXT: setgp [x0]!, x1!, x8
|
||||
; GISel-O0-NEXT: setgm [x0]!, x1!, x8
|
||||
; GISel-O0-NEXT: setge [x0]!, x1!, x8
|
||||
; GISel-O0-NEXT: ret
|
||||
;
|
||||
; GISel-LABEL: memset_tagged_size_zeroval:
|
||||
; GISel: // %bb.0: // %entry
|
||||
; GISel-NEXT: setgp [x0]!, x1!, xzr
|
||||
; GISel-NEXT: setgm [x0]!, x1!, xzr
|
||||
; GISel-NEXT: setge [x0]!, x1!, xzr
|
||||
; GISel-NEXT: ret
|
||||
entry:
|
||||
%r = tail call i8* @llvm.aarch64.mops.memset.tag(i8* %dst, i8 0, i64 %size)
|
||||
ret i8* %r
|
||||
}
|
||||
|
||||
define i8* @memset_tagged_0(i8* %dst, i64 %size, i32 %value) {
|
||||
; GISel-O0-LABEL: memset_tagged_0:
|
||||
; GISel-O0: // %bb.0: // %entry
|
||||
; GISel-O0-NEXT: // implicit-def: $x9
|
||||
; GISel-O0-NEXT: mov w9, w2
|
||||
; GISel-O0-NEXT: mov x8, xzr
|
||||
; GISel-O0-NEXT: setgp [x0]!, x8!, x9
|
||||
; GISel-O0-NEXT: setgm [x0]!, x8!, x9
|
||||
; GISel-O0-NEXT: setge [x0]!, x8!, x9
|
||||
; GISel-O0-NEXT: ret
|
||||
;
|
||||
; GISel-LABEL: memset_tagged_0:
|
||||
; GISel: // %bb.0: // %entry
|
||||
; GISel-NEXT: mov x8, xzr
|
||||
; GISel-NEXT: // kill: def $w2 killed $w2 def $x2
|
||||
; GISel-NEXT: setgp [x0]!, x8!, x2
|
||||
; GISel-NEXT: setgm [x0]!, x8!, x2
|
||||
; GISel-NEXT: setge [x0]!, x8!, x2
|
||||
; GISel-NEXT: ret
|
||||
entry:
|
||||
%value_trunc = trunc i32 %value to i8
|
||||
%r = tail call i8* @llvm.aarch64.mops.memset.tag(i8* %dst, i8 %value_trunc, i64 0)
|
||||
ret i8* %r
|
||||
}
|
||||
|
||||
define i8* @memset_tagged_1(i8* %dst, i64 %size, i32 %value) {
|
||||
; GISel-O0-LABEL: memset_tagged_1:
|
||||
; GISel-O0: // %bb.0: // %entry
|
||||
; GISel-O0-NEXT: // implicit-def: $x9
|
||||
; GISel-O0-NEXT: mov w9, w2
|
||||
; GISel-O0-NEXT: mov w8, #1
|
||||
; GISel-O0-NEXT: // kill: def $x8 killed $w8
|
||||
; GISel-O0-NEXT: setgp [x0]!, x8!, x9
|
||||
; GISel-O0-NEXT: setgm [x0]!, x8!, x9
|
||||
; GISel-O0-NEXT: setge [x0]!, x8!, x9
|
||||
; GISel-O0-NEXT: ret
|
||||
;
|
||||
; GISel-LABEL: memset_tagged_1:
|
||||
; GISel: // %bb.0: // %entry
|
||||
; GISel-NEXT: mov w8, #1
|
||||
; GISel-NEXT: // kill: def $w2 killed $w2 def $x2
|
||||
; GISel-NEXT: setgp [x0]!, x8!, x2
|
||||
; GISel-NEXT: setgm [x0]!, x8!, x2
|
||||
; GISel-NEXT: setge [x0]!, x8!, x2
|
||||
; GISel-NEXT: ret
|
||||
entry:
|
||||
%value_trunc = trunc i32 %value to i8
|
||||
%r = tail call i8* @llvm.aarch64.mops.memset.tag(i8* %dst, i8 %value_trunc, i64 1)
|
||||
ret i8* %r
|
||||
}
|
||||
|
||||
define i8* @memset_tagged_10(i8* %dst, i64 %size, i32 %value) {
|
||||
; GISel-O0-LABEL: memset_tagged_10:
|
||||
; GISel-O0: // %bb.0: // %entry
|
||||
; GISel-O0-NEXT: // implicit-def: $x9
|
||||
; GISel-O0-NEXT: mov w9, w2
|
||||
; GISel-O0-NEXT: mov w8, #10
|
||||
; GISel-O0-NEXT: // kill: def $x8 killed $w8
|
||||
; GISel-O0-NEXT: setgp [x0]!, x8!, x9
|
||||
; GISel-O0-NEXT: setgm [x0]!, x8!, x9
|
||||
; GISel-O0-NEXT: setge [x0]!, x8!, x9
|
||||
; GISel-O0-NEXT: ret
|
||||
;
|
||||
; GISel-LABEL: memset_tagged_10:
|
||||
; GISel: // %bb.0: // %entry
|
||||
; GISel-NEXT: mov w8, #10
|
||||
; GISel-NEXT: // kill: def $w2 killed $w2 def $x2
|
||||
; GISel-NEXT: setgp [x0]!, x8!, x2
|
||||
; GISel-NEXT: setgm [x0]!, x8!, x2
|
||||
; GISel-NEXT: setge [x0]!, x8!, x2
|
||||
; GISel-NEXT: ret
|
||||
entry:
|
||||
%value_trunc = trunc i32 %value to i8
|
||||
%r = tail call i8* @llvm.aarch64.mops.memset.tag(i8* %dst, i8 %value_trunc, i64 10)
|
||||
ret i8* %r
|
||||
}
|
||||
|
||||
define i8* @memset_tagged_10000(i8* %dst, i64 %size, i32 %value) {
|
||||
; GISel-O0-LABEL: memset_tagged_10000:
|
||||
; GISel-O0: // %bb.0: // %entry
|
||||
; GISel-O0-NEXT: // implicit-def: $x9
|
||||
; GISel-O0-NEXT: mov w9, w2
|
||||
; GISel-O0-NEXT: mov w8, #10000
|
||||
; GISel-O0-NEXT: // kill: def $x8 killed $w8
|
||||
; GISel-O0-NEXT: setgp [x0]!, x8!, x9
|
||||
; GISel-O0-NEXT: setgm [x0]!, x8!, x9
|
||||
; GISel-O0-NEXT: setge [x0]!, x8!, x9
|
||||
; GISel-O0-NEXT: ret
|
||||
;
|
||||
; GISel-LABEL: memset_tagged_10000:
|
||||
; GISel: // %bb.0: // %entry
|
||||
; GISel-NEXT: mov w8, #10000
|
||||
; GISel-NEXT: // kill: def $w2 killed $w2 def $x2
|
||||
; GISel-NEXT: setgp [x0]!, x8!, x2
|
||||
; GISel-NEXT: setgm [x0]!, x8!, x2
|
||||
; GISel-NEXT: setge [x0]!, x8!, x2
|
||||
; GISel-NEXT: ret
|
||||
entry:
|
||||
%value_trunc = trunc i32 %value to i8
|
||||
%r = tail call i8* @llvm.aarch64.mops.memset.tag(i8* %dst, i8 %value_trunc, i64 10000)
|
||||
ret i8* %r
|
||||
}
|
||||
|
||||
define i8* @memset_tagged_size(i8* %dst, i64 %size, i32 %value) {
|
||||
; GISel-O0-LABEL: memset_tagged_size:
|
||||
; GISel-O0: // %bb.0: // %entry
|
||||
; GISel-O0-NEXT: // implicit-def: $x8
|
||||
; GISel-O0-NEXT: mov w8, w2
|
||||
; GISel-O0-NEXT: setgp [x0]!, x1!, x8
|
||||
; GISel-O0-NEXT: setgm [x0]!, x1!, x8
|
||||
; GISel-O0-NEXT: setge [x0]!, x1!, x8
|
||||
; GISel-O0-NEXT: ret
|
||||
;
|
||||
; GISel-LABEL: memset_tagged_size:
|
||||
; GISel: // %bb.0: // %entry
|
||||
; GISel-NEXT: // kill: def $w2 killed $w2 def $x2
|
||||
; GISel-NEXT: setgp [x0]!, x1!, x2
|
||||
; GISel-NEXT: setgm [x0]!, x1!, x2
|
||||
; GISel-NEXT: setge [x0]!, x1!, x2
|
||||
; GISel-NEXT: ret
|
||||
entry:
|
||||
%value_trunc = trunc i32 %value to i8
|
||||
%r = tail call i8* @llvm.aarch64.mops.memset.tag(i8* %dst, i8 %value_trunc, i64 %size)
|
||||
ret i8* %r
|
||||
}
|
File diff suppressed because it is too large
Load Diff
|
@ -155,3 +155,17 @@ TEST(InstSizes, TLSDESC_CALLSEQ) {
|
|||
EXPECT_EQ(16u, II.getInstSizeInBytes(*I));
|
||||
});
|
||||
}
|
||||
|
||||
TEST(InstSizes, MOPSMemorySetTaggingPseudo) {
|
||||
std::unique_ptr<LLVMTargetMachine> TM = createTargetMachine();
|
||||
std::unique_ptr<AArch64InstrInfo> II = createInstrInfo(TM.get());
|
||||
|
||||
runChecks(TM.get(), II.get(), "",
|
||||
" renamable $x0, dead renamable $x1 = MOPSMemorySetTaggingPseudo "
|
||||
"killed renamable $x0, killed renamable $x1, killed renamable $x2, "
|
||||
"implicit-def dead $nzcv\n",
|
||||
[](AArch64InstrInfo &II, MachineFunction &MF) {
|
||||
auto I = MF.begin()->begin();
|
||||
EXPECT_EQ(12u, II.getInstSizeInBytes(*I));
|
||||
});
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue