[X86][AMX] Don't emit tilerelease for old AMX instrisic.

We should avoid mixing old AMX instrinsic with new AMX intrinsic. For
old AMX intrinsic, user is responsible for invoking tile release. This
patch is to check if there is any tile config generated by compiler. If
so it emit tilerelease instruction, otherwise it don't emit the
instruction.

Differential Revision: https://reviews.llvm.org/D114066
This commit is contained in:
Luo, Yuanke 2021-11-17 16:22:53 +08:00
parent 0623f52a46
commit c4dba47196
7 changed files with 16 additions and 10 deletions

View File

@ -44,6 +44,7 @@ class X86FastTileConfig : public MachineFunctionPass {
const TargetRegisterInfo *TRI = nullptr;
const TargetInstrInfo *TII = nullptr;
MachineRegisterInfo *MRI = nullptr;
X86MachineFunctionInfo *X86FI = nullptr;
MachineInstr *getTileConfigPoint();
void tileConfig();
@ -289,6 +290,8 @@ bool X86FastTileConfig::fastTileConfig() {
if (!CFGs.empty())
Changed = true;
}
if (Changed)
X86FI->setHasVirtualTileReg(true);
return Changed;
}
@ -298,6 +301,7 @@ bool X86FastTileConfig::runOnMachineFunction(MachineFunction &MFunc) {
ST = &MFunc.getSubtarget<X86Subtarget>();
TRI = ST->getRegisterInfo();
TII = MFunc.getSubtarget().getInstrInfo();
X86FI = MFunc.getInfo<X86MachineFunctionInfo>();
return fastTileConfig();
}

View File

@ -2219,13 +2219,8 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
}
// Emit tilerelease for AMX kernel.
const MachineRegisterInfo &MRI = MF.getRegInfo();
const TargetRegisterClass *RC = TRI->getRegClass(X86::TILERegClassID);
for (unsigned I = 0; I < RC->getNumRegs(); I++)
if (!MRI.reg_nodbg_empty(X86::TMM0 + I)) {
BuildMI(MBB, Terminator, DL, TII.get(X86::TILERELEASE));
break;
}
if (X86FI->hasVirtualTileReg())
BuildMI(MBB, Terminator, DL, TII.get(X86::TILERELEASE));
}
StackOffset X86FrameLowering::getFrameIndexReference(const MachineFunction &MF,

View File

@ -113,6 +113,10 @@ class X86MachineFunctionInfo : public MachineFunctionInfo {
/// other tools to detect the extended record.
bool HasSwiftAsyncContext = false;
/// True if this function has tile virtual register. This is used to
/// determine if we should insert tilerelease in frame lowering.
bool HasVirtualTileReg = false;
Optional<int> SwiftAsyncContextFrameIdx;
ValueMap<const Value *, size_t> PreallocatedIds;
@ -207,6 +211,9 @@ public:
bool hasSwiftAsyncContext() const { return HasSwiftAsyncContext; }
void setHasSwiftAsyncContext(bool v) { HasSwiftAsyncContext = v; }
bool hasVirtualTileReg() const { return HasVirtualTileReg; }
void setHasVirtualTileReg(bool v) { HasVirtualTileReg = v; }
Optional<int> getSwiftAsyncContextFrameIdx() const {
return SwiftAsyncContextFrameIdx;
}

View File

@ -25,6 +25,7 @@
#include "X86.h"
#include "X86InstrBuilder.h"
#include "X86MachineFunctionInfo.h"
#include "X86RegisterInfo.h"
#include "X86Subtarget.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
@ -235,6 +236,7 @@ bool X86PreTileConfig::runOnMachineFunction(MachineFunction &MF) {
const TargetInstrInfo *TII = ST.getInstrInfo();
const TargetRegisterInfo *TRI = ST.getRegisterInfo();
const TargetRegisterClass *RC = TRI->getRegClass(X86::TILERegClassID);
X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
BitVector AMXRegs(TRI->getNumRegs());
for (unsigned I = 0; I < RC->getNumRegs(); I++)
@ -294,6 +296,7 @@ bool X86PreTileConfig::runOnMachineFunction(MachineFunction &MF) {
// There's no AMX instruction if we didn't find a tile config live in point.
if (CfgNeedInsert.empty())
return false;
X86FI->setHasVirtualTileReg(true);
// Avoid to insert ldtilecfg before any shape defs.
SmallVector<MachineBasicBlock *, 8> WorkList;

View File

@ -5,7 +5,6 @@ define void @test_amx() {
; CHECK-LABEL: test_amx:
; CHECK: # %bb.0:
; CHECK-NEXT: tdpbf16ps %tmm7, %tmm4, %tmm3
; CHECK-NEXT: tilerelease
; CHECK-NEXT: retq
call void @llvm.x86.tdpbf16ps(i8 3, i8 4, i8 7)
ret void

View File

@ -8,7 +8,6 @@ define void @test_amx() {
; CHECK-NEXT: tdpbsud %tmm7, %tmm4, %tmm3
; CHECK-NEXT: tdpbusd %tmm7, %tmm0, %tmm3
; CHECK-NEXT: tdpbuud %tmm1, %tmm4, %tmm3
; CHECK-NEXT: tilerelease
; CHECK-NEXT: retq
call void @llvm.x86.tdpbssd(i8 3, i8 4, i8 7)

View File

@ -11,7 +11,6 @@ define void @test_amx(i8* %pointer, i8* %base, i64 %stride) {
; CHECK-NEXT: tileloadd (%rsi,%rdx), %tmm3
; CHECK-NEXT: tileloaddt1 (%rsi,%rdx), %tmm3
; CHECK-NEXT: tilestored %tmm3, (%rsi,%rdx)
; CHECK-NEXT: tilerelease
; CHECK-NEXT: retq
call void @llvm.x86.ldtilecfg(i8* %pointer)