forked from OSchip/llvm-project
[X86][AMX] Don't emit tilerelease for old AMX instrisic.
We should avoid mixing old AMX instrinsic with new AMX intrinsic. For old AMX intrinsic, user is responsible for invoking tile release. This patch is to check if there is any tile config generated by compiler. If so it emit tilerelease instruction, otherwise it don't emit the instruction. Differential Revision: https://reviews.llvm.org/D114066
This commit is contained in:
parent
0623f52a46
commit
c4dba47196
|
@ -44,6 +44,7 @@ class X86FastTileConfig : public MachineFunctionPass {
|
|||
const TargetRegisterInfo *TRI = nullptr;
|
||||
const TargetInstrInfo *TII = nullptr;
|
||||
MachineRegisterInfo *MRI = nullptr;
|
||||
X86MachineFunctionInfo *X86FI = nullptr;
|
||||
|
||||
MachineInstr *getTileConfigPoint();
|
||||
void tileConfig();
|
||||
|
@ -289,6 +290,8 @@ bool X86FastTileConfig::fastTileConfig() {
|
|||
if (!CFGs.empty())
|
||||
Changed = true;
|
||||
}
|
||||
if (Changed)
|
||||
X86FI->setHasVirtualTileReg(true);
|
||||
return Changed;
|
||||
}
|
||||
|
||||
|
@ -298,6 +301,7 @@ bool X86FastTileConfig::runOnMachineFunction(MachineFunction &MFunc) {
|
|||
ST = &MFunc.getSubtarget<X86Subtarget>();
|
||||
TRI = ST->getRegisterInfo();
|
||||
TII = MFunc.getSubtarget().getInstrInfo();
|
||||
X86FI = MFunc.getInfo<X86MachineFunctionInfo>();
|
||||
|
||||
return fastTileConfig();
|
||||
}
|
||||
|
|
|
@ -2219,13 +2219,8 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
|
|||
}
|
||||
|
||||
// Emit tilerelease for AMX kernel.
|
||||
const MachineRegisterInfo &MRI = MF.getRegInfo();
|
||||
const TargetRegisterClass *RC = TRI->getRegClass(X86::TILERegClassID);
|
||||
for (unsigned I = 0; I < RC->getNumRegs(); I++)
|
||||
if (!MRI.reg_nodbg_empty(X86::TMM0 + I)) {
|
||||
BuildMI(MBB, Terminator, DL, TII.get(X86::TILERELEASE));
|
||||
break;
|
||||
}
|
||||
if (X86FI->hasVirtualTileReg())
|
||||
BuildMI(MBB, Terminator, DL, TII.get(X86::TILERELEASE));
|
||||
}
|
||||
|
||||
StackOffset X86FrameLowering::getFrameIndexReference(const MachineFunction &MF,
|
||||
|
|
|
@ -113,6 +113,10 @@ class X86MachineFunctionInfo : public MachineFunctionInfo {
|
|||
/// other tools to detect the extended record.
|
||||
bool HasSwiftAsyncContext = false;
|
||||
|
||||
/// True if this function has tile virtual register. This is used to
|
||||
/// determine if we should insert tilerelease in frame lowering.
|
||||
bool HasVirtualTileReg = false;
|
||||
|
||||
Optional<int> SwiftAsyncContextFrameIdx;
|
||||
|
||||
ValueMap<const Value *, size_t> PreallocatedIds;
|
||||
|
@ -207,6 +211,9 @@ public:
|
|||
bool hasSwiftAsyncContext() const { return HasSwiftAsyncContext; }
|
||||
void setHasSwiftAsyncContext(bool v) { HasSwiftAsyncContext = v; }
|
||||
|
||||
bool hasVirtualTileReg() const { return HasVirtualTileReg; }
|
||||
void setHasVirtualTileReg(bool v) { HasVirtualTileReg = v; }
|
||||
|
||||
Optional<int> getSwiftAsyncContextFrameIdx() const {
|
||||
return SwiftAsyncContextFrameIdx;
|
||||
}
|
||||
|
|
|
@ -25,6 +25,7 @@
|
|||
|
||||
#include "X86.h"
|
||||
#include "X86InstrBuilder.h"
|
||||
#include "X86MachineFunctionInfo.h"
|
||||
#include "X86RegisterInfo.h"
|
||||
#include "X86Subtarget.h"
|
||||
#include "llvm/CodeGen/MachineFunctionPass.h"
|
||||
|
@ -235,6 +236,7 @@ bool X86PreTileConfig::runOnMachineFunction(MachineFunction &MF) {
|
|||
const TargetInstrInfo *TII = ST.getInstrInfo();
|
||||
const TargetRegisterInfo *TRI = ST.getRegisterInfo();
|
||||
const TargetRegisterClass *RC = TRI->getRegClass(X86::TILERegClassID);
|
||||
X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
|
||||
|
||||
BitVector AMXRegs(TRI->getNumRegs());
|
||||
for (unsigned I = 0; I < RC->getNumRegs(); I++)
|
||||
|
@ -294,6 +296,7 @@ bool X86PreTileConfig::runOnMachineFunction(MachineFunction &MF) {
|
|||
// There's no AMX instruction if we didn't find a tile config live in point.
|
||||
if (CfgNeedInsert.empty())
|
||||
return false;
|
||||
X86FI->setHasVirtualTileReg(true);
|
||||
|
||||
// Avoid to insert ldtilecfg before any shape defs.
|
||||
SmallVector<MachineBasicBlock *, 8> WorkList;
|
||||
|
|
|
@ -5,7 +5,6 @@ define void @test_amx() {
|
|||
; CHECK-LABEL: test_amx:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: tdpbf16ps %tmm7, %tmm4, %tmm3
|
||||
; CHECK-NEXT: tilerelease
|
||||
; CHECK-NEXT: retq
|
||||
call void @llvm.x86.tdpbf16ps(i8 3, i8 4, i8 7)
|
||||
ret void
|
||||
|
|
|
@ -8,7 +8,6 @@ define void @test_amx() {
|
|||
; CHECK-NEXT: tdpbsud %tmm7, %tmm4, %tmm3
|
||||
; CHECK-NEXT: tdpbusd %tmm7, %tmm0, %tmm3
|
||||
; CHECK-NEXT: tdpbuud %tmm1, %tmm4, %tmm3
|
||||
; CHECK-NEXT: tilerelease
|
||||
; CHECK-NEXT: retq
|
||||
call void @llvm.x86.tdpbssd(i8 3, i8 4, i8 7)
|
||||
|
||||
|
|
|
@ -11,7 +11,6 @@ define void @test_amx(i8* %pointer, i8* %base, i64 %stride) {
|
|||
; CHECK-NEXT: tileloadd (%rsi,%rdx), %tmm3
|
||||
; CHECK-NEXT: tileloaddt1 (%rsi,%rdx), %tmm3
|
||||
; CHECK-NEXT: tilestored %tmm3, (%rsi,%rdx)
|
||||
; CHECK-NEXT: tilerelease
|
||||
; CHECK-NEXT: retq
|
||||
call void @llvm.x86.ldtilecfg(i8* %pointer)
|
||||
|
||||
|
|
Loading…
Reference in New Issue