From c4dba47196c280dcf3763ccb6133f3dec5285e78 Mon Sep 17 00:00:00 2001 From: "Luo, Yuanke" Date: Wed, 17 Nov 2021 16:22:53 +0800 Subject: [PATCH] [X86][AMX] Don't emit tilerelease for old AMX instrisic. We should avoid mixing old AMX instrinsic with new AMX intrinsic. For old AMX intrinsic, user is responsible for invoking tile release. This patch is to check if there is any tile config generated by compiler. If so it emit tilerelease instruction, otherwise it don't emit the instruction. Differential Revision: https://reviews.llvm.org/D114066 --- llvm/lib/Target/X86/X86FastTileConfig.cpp | 4 ++++ llvm/lib/Target/X86/X86FrameLowering.cpp | 9 ++------- llvm/lib/Target/X86/X86MachineFunctionInfo.h | 7 +++++++ llvm/lib/Target/X86/X86PreTileConfig.cpp | 3 +++ llvm/test/CodeGen/X86/AMX/amx-bf16-intrinsics.ll | 1 - llvm/test/CodeGen/X86/AMX/amx-int8-intrinsics.ll | 1 - llvm/test/CodeGen/X86/AMX/amx-tile-intrinsics.ll | 1 - 7 files changed, 16 insertions(+), 10 deletions(-) diff --git a/llvm/lib/Target/X86/X86FastTileConfig.cpp b/llvm/lib/Target/X86/X86FastTileConfig.cpp index 7031bd40215d..87c04a07cd13 100644 --- a/llvm/lib/Target/X86/X86FastTileConfig.cpp +++ b/llvm/lib/Target/X86/X86FastTileConfig.cpp @@ -44,6 +44,7 @@ class X86FastTileConfig : public MachineFunctionPass { const TargetRegisterInfo *TRI = nullptr; const TargetInstrInfo *TII = nullptr; MachineRegisterInfo *MRI = nullptr; + X86MachineFunctionInfo *X86FI = nullptr; MachineInstr *getTileConfigPoint(); void tileConfig(); @@ -289,6 +290,8 @@ bool X86FastTileConfig::fastTileConfig() { if (!CFGs.empty()) Changed = true; } + if (Changed) + X86FI->setHasVirtualTileReg(true); return Changed; } @@ -298,6 +301,7 @@ bool X86FastTileConfig::runOnMachineFunction(MachineFunction &MFunc) { ST = &MFunc.getSubtarget(); TRI = ST->getRegisterInfo(); TII = MFunc.getSubtarget().getInstrInfo(); + X86FI = MFunc.getInfo(); return fastTileConfig(); } diff --git a/llvm/lib/Target/X86/X86FrameLowering.cpp b/llvm/lib/Target/X86/X86FrameLowering.cpp index d84cbac9dc4b..bd780273509f 100644 --- a/llvm/lib/Target/X86/X86FrameLowering.cpp +++ b/llvm/lib/Target/X86/X86FrameLowering.cpp @@ -2219,13 +2219,8 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF, } // Emit tilerelease for AMX kernel. - const MachineRegisterInfo &MRI = MF.getRegInfo(); - const TargetRegisterClass *RC = TRI->getRegClass(X86::TILERegClassID); - for (unsigned I = 0; I < RC->getNumRegs(); I++) - if (!MRI.reg_nodbg_empty(X86::TMM0 + I)) { - BuildMI(MBB, Terminator, DL, TII.get(X86::TILERELEASE)); - break; - } + if (X86FI->hasVirtualTileReg()) + BuildMI(MBB, Terminator, DL, TII.get(X86::TILERELEASE)); } StackOffset X86FrameLowering::getFrameIndexReference(const MachineFunction &MF, diff --git a/llvm/lib/Target/X86/X86MachineFunctionInfo.h b/llvm/lib/Target/X86/X86MachineFunctionInfo.h index 7f3c55f317c7..99d1a97380dd 100644 --- a/llvm/lib/Target/X86/X86MachineFunctionInfo.h +++ b/llvm/lib/Target/X86/X86MachineFunctionInfo.h @@ -113,6 +113,10 @@ class X86MachineFunctionInfo : public MachineFunctionInfo { /// other tools to detect the extended record. bool HasSwiftAsyncContext = false; + /// True if this function has tile virtual register. This is used to + /// determine if we should insert tilerelease in frame lowering. + bool HasVirtualTileReg = false; + Optional SwiftAsyncContextFrameIdx; ValueMap PreallocatedIds; @@ -207,6 +211,9 @@ public: bool hasSwiftAsyncContext() const { return HasSwiftAsyncContext; } void setHasSwiftAsyncContext(bool v) { HasSwiftAsyncContext = v; } + bool hasVirtualTileReg() const { return HasVirtualTileReg; } + void setHasVirtualTileReg(bool v) { HasVirtualTileReg = v; } + Optional getSwiftAsyncContextFrameIdx() const { return SwiftAsyncContextFrameIdx; } diff --git a/llvm/lib/Target/X86/X86PreTileConfig.cpp b/llvm/lib/Target/X86/X86PreTileConfig.cpp index 53aa8f99ff1e..5d21f8666ec6 100644 --- a/llvm/lib/Target/X86/X86PreTileConfig.cpp +++ b/llvm/lib/Target/X86/X86PreTileConfig.cpp @@ -25,6 +25,7 @@ #include "X86.h" #include "X86InstrBuilder.h" +#include "X86MachineFunctionInfo.h" #include "X86RegisterInfo.h" #include "X86Subtarget.h" #include "llvm/CodeGen/MachineFunctionPass.h" @@ -235,6 +236,7 @@ bool X86PreTileConfig::runOnMachineFunction(MachineFunction &MF) { const TargetInstrInfo *TII = ST.getInstrInfo(); const TargetRegisterInfo *TRI = ST.getRegisterInfo(); const TargetRegisterClass *RC = TRI->getRegClass(X86::TILERegClassID); + X86MachineFunctionInfo *X86FI = MF.getInfo(); BitVector AMXRegs(TRI->getNumRegs()); for (unsigned I = 0; I < RC->getNumRegs(); I++) @@ -294,6 +296,7 @@ bool X86PreTileConfig::runOnMachineFunction(MachineFunction &MF) { // There's no AMX instruction if we didn't find a tile config live in point. if (CfgNeedInsert.empty()) return false; + X86FI->setHasVirtualTileReg(true); // Avoid to insert ldtilecfg before any shape defs. SmallVector WorkList; diff --git a/llvm/test/CodeGen/X86/AMX/amx-bf16-intrinsics.ll b/llvm/test/CodeGen/X86/AMX/amx-bf16-intrinsics.ll index a59b05669e61..a415d9c15242 100644 --- a/llvm/test/CodeGen/X86/AMX/amx-bf16-intrinsics.ll +++ b/llvm/test/CodeGen/X86/AMX/amx-bf16-intrinsics.ll @@ -5,7 +5,6 @@ define void @test_amx() { ; CHECK-LABEL: test_amx: ; CHECK: # %bb.0: ; CHECK-NEXT: tdpbf16ps %tmm7, %tmm4, %tmm3 -; CHECK-NEXT: tilerelease ; CHECK-NEXT: retq call void @llvm.x86.tdpbf16ps(i8 3, i8 4, i8 7) ret void diff --git a/llvm/test/CodeGen/X86/AMX/amx-int8-intrinsics.ll b/llvm/test/CodeGen/X86/AMX/amx-int8-intrinsics.ll index fa811f02bd9a..2bbf4d9edb91 100644 --- a/llvm/test/CodeGen/X86/AMX/amx-int8-intrinsics.ll +++ b/llvm/test/CodeGen/X86/AMX/amx-int8-intrinsics.ll @@ -8,7 +8,6 @@ define void @test_amx() { ; CHECK-NEXT: tdpbsud %tmm7, %tmm4, %tmm3 ; CHECK-NEXT: tdpbusd %tmm7, %tmm0, %tmm3 ; CHECK-NEXT: tdpbuud %tmm1, %tmm4, %tmm3 -; CHECK-NEXT: tilerelease ; CHECK-NEXT: retq call void @llvm.x86.tdpbssd(i8 3, i8 4, i8 7) diff --git a/llvm/test/CodeGen/X86/AMX/amx-tile-intrinsics.ll b/llvm/test/CodeGen/X86/AMX/amx-tile-intrinsics.ll index 9792f8b19af4..4d469c23328e 100644 --- a/llvm/test/CodeGen/X86/AMX/amx-tile-intrinsics.ll +++ b/llvm/test/CodeGen/X86/AMX/amx-tile-intrinsics.ll @@ -11,7 +11,6 @@ define void @test_amx(i8* %pointer, i8* %base, i64 %stride) { ; CHECK-NEXT: tileloadd (%rsi,%rdx), %tmm3 ; CHECK-NEXT: tileloaddt1 (%rsi,%rdx), %tmm3 ; CHECK-NEXT: tilestored %tmm3, (%rsi,%rdx) -; CHECK-NEXT: tilerelease ; CHECK-NEXT: retq call void @llvm.x86.ldtilecfg(i8* %pointer)