[AArch64][GlobalISel] Don't contract cross-bank copies into truncating stores.

Truncating stores with GPR bank sources shouldn't be mutated into using FPR bank
sources, since those aren't supported.

Ideally this should be a selection failure in the tablegen patterns, but for now
avoid generating them.
This commit is contained in:
Amara Emerson 2021-08-20 16:23:23 -07:00
parent 2bd7c30e5a
commit 67bf3ac744
2 changed files with 75 additions and 37 deletions
llvm
lib/Target/AArch64/GISel
test/CodeGen/AArch64/GlobalISel

View File

@ -22,6 +22,7 @@
#include "MCTargetDesc/AArch64AddressingModes.h"
#include "MCTargetDesc/AArch64MCTargetDesc.h"
#include "llvm/ADT/Optional.h"
#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
#include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
@ -104,7 +105,7 @@ private:
bool earlySelectSHL(MachineInstr &I, MachineRegisterInfo &MRI);
/// Eliminate same-sized cross-bank copies into stores before selectImpl().
bool contractCrossBankCopyIntoStore(MachineInstr &I,
bool contractCrossBankCopyIntoStore(GStore &I,
MachineRegisterInfo &MRI);
bool convertPtrAddToAdd(MachineInstr &I, MachineRegisterInfo &MRI);
@ -1934,8 +1935,9 @@ bool AArch64InstructionSelector::preISelLower(MachineInstr &I) {
return true;
}
case TargetOpcode::G_STORE: {
bool Changed = contractCrossBankCopyIntoStore(I, MRI);
MachineOperand &SrcOp = I.getOperand(0);
auto &StoreMI = cast<GStore>(I);
bool Changed = contractCrossBankCopyIntoStore(StoreMI, MRI);
MachineOperand &SrcOp = StoreMI.getOperand(0);
if (MRI.getType(SrcOp.getReg()).isPointer()) {
// Allow matching with imported patterns for stores of pointers. Unlike
// G_LOAD/G_PTR_ADD, we may not have selected all users. So, emit a copy
@ -1946,6 +1948,28 @@ bool AArch64InstructionSelector::preISelLower(MachineInstr &I) {
RBI.constrainGenericRegister(NewSrc, AArch64::GPR64RegClass, MRI);
Changed = true;
}
#if 0
// Now look for truncating stores to the FPR bank. We don't support these,
// but since truncating store formation happens before RBS, we can only
// split them up again here. We don't want to assign truncstores to GPR only
// since that would have a perf impact due to extra moves.
LLT SrcTy = MRI.getType(SrcReg);
if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() == AArch64::FPRRegBankID) {
if (SrcTy.isScalar() &&
SrcTy.getSizeInBits() > StoreMI.getMemSizeInBits()) {
// Generate an explicit truncate and make this into a non-truncating
// store.
auto Trunc =
MIB.buildTrunc(LLT::scalar(StoreMI.getMemSizeInBits()), SrcReg);
MRI.setRegBank(Trunc.getReg(0), RBI.getRegBank(AArch64::FPRRegBankID));
if (!select(*Trunc)) {
return false;
}
SrcOp.setReg(Trunc.getReg(0));
return true;
}
}
#endif
return Changed;
}
case TargetOpcode::G_PTR_ADD:
@ -2081,8 +2105,7 @@ bool AArch64InstructionSelector::earlySelectSHL(MachineInstr &I,
}
bool AArch64InstructionSelector::contractCrossBankCopyIntoStore(
MachineInstr &I, MachineRegisterInfo &MRI) {
assert(I.getOpcode() == TargetOpcode::G_STORE && "Expected G_STORE");
GStore &StoreMI, MachineRegisterInfo &MRI) {
// If we're storing a scalar, it doesn't matter what register bank that
// scalar is on. All that matters is the size.
//
@ -2097,11 +2120,11 @@ bool AArch64InstructionSelector::contractCrossBankCopyIntoStore(
// G_STORE %x:gpr(s32)
//
// And then continue the selection process normally.
Register DefDstReg = getSrcRegIgnoringCopies(I.getOperand(0).getReg(), MRI);
Register DefDstReg = getSrcRegIgnoringCopies(StoreMI.getValueReg(), MRI);
if (!DefDstReg.isValid())
return false;
LLT DefDstTy = MRI.getType(DefDstReg);
Register StoreSrcReg = I.getOperand(0).getReg();
Register StoreSrcReg = StoreMI.getValueReg();
LLT StoreSrcTy = MRI.getType(StoreSrcReg);
// If we get something strange like a physical register, then we shouldn't
@ -2113,12 +2136,16 @@ bool AArch64InstructionSelector::contractCrossBankCopyIntoStore(
if (DefDstTy.getSizeInBits() != StoreSrcTy.getSizeInBits())
return false;
// Is this store a truncating one?
if (StoreSrcTy.getSizeInBits() != StoreMI.getMemSizeInBits())
return false;
if (RBI.getRegBank(StoreSrcReg, MRI, TRI) ==
RBI.getRegBank(DefDstReg, MRI, TRI))
return false;
// We have a cross-bank copy, which is entering a store. Let's fold it.
I.getOperand(0).setReg(DefDstReg);
StoreMI.getOperand(0).setReg(DefDstReg);
return true;
}

View File

@ -1,15 +1,6 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=aarch64-unknown-unknown -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s
--- |
define void @contract_s64_gpr(i64* %addr) { ret void }
define void @contract_s32_gpr(i32* %addr) { ret void }
define void @contract_s64_fpr(i64* %addr) { ret void }
define void @contract_s32_fpr(i32* %addr) { ret void }
define void @contract_s16_fpr(i16* %addr) { ret void }
define void @contract_g_unmerge_values_first(i128* %addr) { ret void }
define void @contract_g_unmerge_values_second(i128* %addr) { ret void }
...
---
name: contract_s64_gpr
legalized: true
@ -20,11 +11,11 @@ body: |
; CHECK-LABEL: name: contract_s64_gpr
; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
; CHECK: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
; CHECK: STRXui [[COPY1]], [[COPY]], 0 :: (store (s64) into %ir.addr)
; CHECK: STRXui [[COPY1]], [[COPY]], 0 :: (store (s64))
%0:gpr(p0) = COPY $x0
%1:gpr(s64) = COPY $x1
%2:fpr(s64) = COPY %1
G_STORE %2:fpr(s64), %0 :: (store (s64) into %ir.addr)
G_STORE %2:fpr(s64), %0 :: (store (s64))
...
---
name: contract_s32_gpr
@ -36,11 +27,11 @@ body: |
; CHECK-LABEL: name: contract_s32_gpr
; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1
; CHECK: STRWui [[COPY1]], [[COPY]], 0 :: (store (s32) into %ir.addr)
; CHECK: STRWui [[COPY1]], [[COPY]], 0 :: (store (s32))
%0:gpr(p0) = COPY $x0
%1:gpr(s32) = COPY $w1
%2:fpr(s32) = COPY %1
G_STORE %2:fpr(s32), %0 :: (store (s32) into %ir.addr)
G_STORE %2:fpr(s32), %0 :: (store (s32))
...
---
name: contract_s64_fpr
@ -52,11 +43,11 @@ body: |
; CHECK-LABEL: name: contract_s64_fpr
; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1
; CHECK: STRDui [[COPY1]], [[COPY]], 0 :: (store (s64) into %ir.addr)
; CHECK: STRDui [[COPY1]], [[COPY]], 0 :: (store (s64))
%0:gpr(p0) = COPY $x0
%1:fpr(s64) = COPY $d1
%2:gpr(s64) = COPY %1
G_STORE %2:gpr(s64), %0 :: (store (s64) into %ir.addr)
G_STORE %2:gpr(s64), %0 :: (store (s64))
...
---
name: contract_s32_fpr
@ -68,11 +59,11 @@ body: |
; CHECK-LABEL: name: contract_s32_fpr
; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
; CHECK: [[COPY1:%[0-9]+]]:fpr32 = COPY $s1
; CHECK: STRSui [[COPY1]], [[COPY]], 0 :: (store (s32) into %ir.addr)
; CHECK: STRSui [[COPY1]], [[COPY]], 0 :: (store (s32))
%0:gpr(p0) = COPY $x0
%1:fpr(s32) = COPY $s1
%2:gpr(s32) = COPY %1
G_STORE %2:gpr(s32), %0 :: (store (s32) into %ir.addr)
G_STORE %2:gpr(s32), %0 :: (store (s32))
...
---
name: contract_s16_fpr
@ -84,11 +75,11 @@ body: |
; CHECK-LABEL: name: contract_s16_fpr
; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
; CHECK: [[COPY1:%[0-9]+]]:fpr16 = COPY $h1
; CHECK: STRHui [[COPY1]], [[COPY]], 0 :: (store (s16) into %ir.addr)
; CHECK: STRHui [[COPY1]], [[COPY]], 0 :: (store (s16))
%0:gpr(p0) = COPY $x0
%1:fpr(s16) = COPY $h1
%2:gpr(s16) = COPY %1
G_STORE %2:gpr(s16), %0 :: (store (s16) into %ir.addr)
G_STORE %2:gpr(s16), %0 :: (store (s16))
...
---
name: contract_g_unmerge_values_first
@ -99,15 +90,16 @@ body: |
liveins: $x0, $x1
; CHECK-LABEL: name: contract_g_unmerge_values_first
; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
; CHECK: [[LOAD:%[0-9]+]]:fpr128 = LDRQui [[COPY]], 0
; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY [[LOAD]].dsub
; CHECK: STRDui [[COPY1]], [[COPY]], 0 :: (store (s64) into %ir.addr)
; CHECK: [[LDRQui:%[0-9]+]]:fpr128 = LDRQui [[COPY]], 0 :: (dereferenceable load (<2 x s64>))
; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY [[LDRQui]].dsub
; CHECK: [[CPYi64_:%[0-9]+]]:fpr64 = CPYi64 [[LDRQui]], 1
; CHECK: STRDui [[COPY1]], [[COPY]], 0 :: (store (s64))
%0:gpr(p0) = COPY $x0
%1:fpr(<2 x s64>) = G_LOAD %0:gpr(p0) :: (dereferenceable load (<2 x s64>) from %ir.addr)
%1:fpr(<2 x s64>) = G_LOAD %0:gpr(p0) :: (dereferenceable load (<2 x s64>))
%2:fpr(s64), %3:fpr(s64) = G_UNMERGE_VALUES %1:fpr(<2 x s64>)
%4:gpr(s64) = COPY %2
%5:gpr(s64) = COPY %3
G_STORE %4:gpr(s64), %0 :: (store (s64) into %ir.addr)
G_STORE %4:gpr(s64), %0 :: (store (s64))
...
---
name: contract_g_unmerge_values_second
@ -118,12 +110,31 @@ body: |
liveins: $x0, $x1
; CHECK-LABEL: name: contract_g_unmerge_values_second
; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
; CHECK: [[LOAD:%[0-9]+]]:fpr128 = LDRQui [[COPY]], 0
; CHECK: [[COPY1:%[0-9]+]]:fpr64 = CPYi64 [[LOAD]], 1
; CHECK: STRDui [[COPY1]], [[COPY]], 0 :: (store (s64) into %ir.addr)
; CHECK: [[LDRQui:%[0-9]+]]:fpr128 = LDRQui [[COPY]], 0 :: (dereferenceable load (<2 x s64>))
; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY [[LDRQui]].dsub
; CHECK: [[CPYi64_:%[0-9]+]]:fpr64 = CPYi64 [[LDRQui]], 1
; CHECK: STRDui [[CPYi64_]], [[COPY]], 0 :: (store (s64))
%0:gpr(p0) = COPY $x0
%1:fpr(<2 x s64>) = G_LOAD %0:gpr(p0) :: (dereferenceable load (<2 x s64>) from %ir.addr)
%1:fpr(<2 x s64>) = G_LOAD %0:gpr(p0) :: (dereferenceable load (<2 x s64>))
%2:fpr(s64), %3:fpr(s64) = G_UNMERGE_VALUES %1:fpr(<2 x s64>)
%4:gpr(s64) = COPY %2
%5:gpr(s64) = COPY %3
G_STORE %5:gpr(s64), %0 :: (store (s64) into %ir.addr)
G_STORE %5:gpr(s64), %0 :: (store (s64))
...
---
name: contract_s16_truncstore
legalized: true
regBankSelected: true
body: |
bb.0:
liveins: $x0, $s1
; CHECK-LABEL: name: contract_s16_truncstore
; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
; CHECK: [[COPY1:%[0-9]+]]:fpr32 = COPY $s1
; CHECK: [[COPY2:%[0-9]+]]:gpr32 = COPY [[COPY1]]
; CHECK: STRHHui [[COPY2]], [[COPY]], 0 :: (store (s16))
%0:gpr(p0) = COPY $x0
%1:fpr(s32) = COPY $s1
%2:gpr(s32) = COPY %1
G_STORE %2:gpr(s32), %0 :: (store (s16))
...