From e9c11c1934f90725673f8a808fe6fcc390224a87 Mon Sep 17 00:00:00 2001 From: "Wang, Pengfei" Date: Thu, 18 Feb 2021 11:35:34 +0800 Subject: [PATCH] [X86] Zero AMX config buffer for non AVX512 cases. Zero AMX config buffer for non AVX512 cases. Differential Revision: https://reviews.llvm.org/D96927 --- llvm/lib/Target/X86/X86PreTileConfig.cpp | 32 ++++++++++++++++++++++-- llvm/test/CodeGen/X86/AMX/amx-config.ll | 9 +++++++ 2 files changed, 39 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Target/X86/X86PreTileConfig.cpp b/llvm/lib/Target/X86/X86PreTileConfig.cpp index d8709f98adf9..432e1fe2b694 100644 --- a/llvm/lib/Target/X86/X86PreTileConfig.cpp +++ b/llvm/lib/Target/X86/X86PreTileConfig.cpp @@ -103,9 +103,8 @@ static void buildConfigMI(MachineBasicBlock::iterator MI, int FrameIdx, const X86Subtarget *ST) { auto *MBB = MI->getParent(); - // FIXME: AMX should assume AVX512 enabled. + // Zero stack slot. if (ST->hasAVX512()) { - // Zero stack slot. Register Zmm = MRI->createVirtualRegister(&X86::VR512RegClass); BuildMI(*MBB, MI, DebugLoc(), TII->get(X86::VPXORDZrr), Zmm) .addReg(Zmm, RegState::Undef) @@ -113,6 +112,35 @@ static void buildConfigMI(MachineBasicBlock::iterator MI, int FrameIdx, addFrameReference(BuildMI(*MBB, MI, DebugLoc(), TII->get(X86::VMOVUPSZmr)), FrameIdx) .addReg(Zmm); + } else if (ST->hasAVX2()) { + Register Ymm = MRI->createVirtualRegister(&X86::VR256RegClass); + BuildMI(*MBB, MI, DebugLoc(), TII->get(X86::VPXORYrr), Ymm) + .addReg(Ymm, RegState::Undef) + .addReg(Ymm, RegState::Undef); + addFrameReference(BuildMI(*MBB, MI, DebugLoc(), TII->get(X86::VMOVUPSYmr)), + FrameIdx) + .addReg(Ymm); + addFrameReference(BuildMI(*MBB, MI, DebugLoc(), TII->get(X86::VMOVUPSYmr)), + FrameIdx, 32) + .addReg(Ymm); + } else { + assert(ST->hasSSE2() && "AMX should assume SSE2 enabled"); + Register Xmm = MRI->createVirtualRegister(&X86::VR128RegClass); + BuildMI(*MBB, MI, DebugLoc(), TII->get(X86::PXORrr), Xmm) + .addReg(Xmm, RegState::Undef) + .addReg(Xmm, RegState::Undef); + addFrameReference(BuildMI(*MBB, MI, DebugLoc(), TII->get(X86::MOVUPSmr)), + FrameIdx) + .addReg(Xmm); + addFrameReference(BuildMI(*MBB, MI, DebugLoc(), TII->get(X86::MOVUPSmr)), + FrameIdx, 16) + .addReg(Xmm); + addFrameReference(BuildMI(*MBB, MI, DebugLoc(), TII->get(X86::MOVUPSmr)), + FrameIdx, 32) + .addReg(Xmm); + addFrameReference(BuildMI(*MBB, MI, DebugLoc(), TII->get(X86::MOVUPSmr)), + FrameIdx, 48) + .addReg(Xmm); } // build psuedo ldtilecfg diff --git a/llvm/test/CodeGen/X86/AMX/amx-config.ll b/llvm/test/CodeGen/X86/AMX/amx-config.ll index 453ce113c34b..5d0429927c16 100644 --- a/llvm/test/CodeGen/X86/AMX/amx-config.ll +++ b/llvm/test/CodeGen/X86/AMX/amx-config.ll @@ -45,6 +45,9 @@ define dso_local void @test_api(i32 %0, i16 signext %1, i16 signext %2) { ; AVX2: # %bb.0: ; AVX2-NEXT: testl %edi, %edi ; AVX2-NEXT: movsbl %sil, %eax +; AVX2-NEXT: vxorps %ymm0, %ymm0, %ymm0 +; AVX2-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp) +; AVX2-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp) ; AVX2-NEXT: movb $1, -{{[0-9]+}}(%rsp) ; AVX2-NEXT: movb %al, -{{[0-9]+}}(%rsp) ; AVX2-NEXT: movw %si, -{{[0-9]+}}(%rsp) @@ -69,12 +72,18 @@ define dso_local void @test_api(i32 %0, i16 signext %1, i16 signext %2) { ; AVX2-NEXT: movl $32, %esi ; AVX2-NEXT: tilestored %tmm1, (%rcx,%rsi) ; AVX2-NEXT: tilerelease +; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; ; SSE2-LABEL: test_api: ; SSE2: # %bb.0: ; SSE2-NEXT: testl %edi, %edi ; SSE2-NEXT: movsbl %sil, %eax +; SSE2-NEXT: xorps %xmm0, %xmm0 +; SSE2-NEXT: movups %xmm0, -{{[0-9]+}}(%rsp) +; SSE2-NEXT: movups %xmm0, -{{[0-9]+}}(%rsp) +; SSE2-NEXT: movups %xmm0, -{{[0-9]+}}(%rsp) +; SSE2-NEXT: movups %xmm0, -{{[0-9]+}}(%rsp) ; SSE2-NEXT: movb $1, -{{[0-9]+}}(%rsp) ; SSE2-NEXT: movb %al, -{{[0-9]+}}(%rsp) ; SSE2-NEXT: movw %si, -{{[0-9]+}}(%rsp)