diff --git a/llvm/lib/Target/X86/X86InstrAMX.td b/llvm/lib/Target/X86/X86InstrAMX.td index cc31c322c12b..1a31ffa9d787 100644 --- a/llvm/lib/Target/X86/X86InstrAMX.td +++ b/llvm/lib/Target/X86/X86InstrAMX.td @@ -48,8 +48,7 @@ let Predicates = [HasAMXTILE, In64BitMode] in { VEX, T8XD; // Pseduo instruction for RA. - let isReMaterializable = 1, canFoldAsLoad = 1 in - def PTILELOADDV : PseudoI<(outs TILE: $dst), (ins GR16:$src1, + def PTILELOADDV : PseudoI<(outs TILE: $dst), (ins GR16:$src1, GR16:$src2, opaquemem:$src3), []>; def PTILESTOREDV : PseudoI<(outs), (ins GR16:$src1, diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp index 745bf435b0c2..2b8ef4d9347e 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -1007,7 +1007,6 @@ bool X86InstrInfo::isReallyTriviallyReMaterializable(const MachineInstr &MI, case X86::MOV64ri32: case X86::MOV8ri: case X86::PTILEZEROV: - case X86::PTILELOADDV: return true; case X86::MOV8rm: diff --git a/llvm/test/CodeGen/X86/AMX/amx-across-func.ll b/llvm/test/CodeGen/X86/AMX/amx-across-func.ll index fa097164fdc1..b687d03f92ba 100644 --- a/llvm/test/CodeGen/X86/AMX/amx-across-func.ll +++ b/llvm/test/CodeGen/X86/AMX/amx-across-func.ll @@ -131,10 +131,13 @@ define dso_local i32 @test_loop(i32 %0) nounwind { ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: .LBB2_2: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: tileloadd (%r15,%r12), %tmm0 +; CHECK-NEXT: movabsq $64, %rax +; CHECK-NEXT: tilestored %tmm0, 1024(%rsp,%rax) # 1024-byte Folded Spill ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: callq foo ; CHECK-NEXT: ldtilecfg {{[0-9]+}}(%rsp) -; CHECK-NEXT: tileloadd (%r15,%r12), %tmm0 +; CHECK-NEXT: movabsq $64, %rax +; CHECK-NEXT: tileloadd 1024(%rsp,%rax), %tmm0 # 1024-byte Folded Reload ; CHECK-NEXT: tilestored %tmm0, (%r13,%r12) ; CHECK-NEXT: callq foo ; CHECK-NEXT: ldtilecfg {{[0-9]+}}(%rsp)