[RISCV] Implement COPY for Zvlsseg registers

When copying Zvlsseg register tuples, we split the COPY to NF whole register moves
as below:

  $v10m2_v12m2 = COPY $v4m2_v6m2 # NF = 2
=>
  $v10m2 = PseudoVMV2R_V $v4m2
  $v12m2 = PseudoVMV2R_V $v6m2

This patch copies forwardCopyWillClobberTuple from AArch64 to check
register overlapping.

Reviewed By: frasercrmck

Differential Revision: https://reviews.llvm.org/D100280
This commit is contained in:
ShihPo Hung 2021-04-11 19:34:10 -07:00
parent 5c500c9f01
commit d5e962f1f2
2 changed files with 381 additions and 14 deletions

View File

@ -110,6 +110,13 @@ unsigned RISCVInstrInfo::isStoreToStackSlot(const MachineInstr &MI,
return 0;
}
static bool forwardCopyWillClobberTuple(unsigned DstReg, unsigned SrcReg,
unsigned NumRegs) {
// We really want the positive remainder mod 32 here, that happens to be
// easily obtainable with a mask.
return ((DstReg - SrcReg) & 0x1f) < NumRegs;
}
void RISCVInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
const DebugLoc &DL, MCRegister DstReg,
@ -123,35 +130,113 @@ void RISCVInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
// FPR->FPR copies and VR->VR copies.
unsigned Opc;
bool IsScalableVector = false;
if (RISCV::FPR16RegClass.contains(DstReg, SrcReg))
bool IsScalableVector = true;
unsigned NF = 1;
unsigned LMul = 1;
unsigned SubRegIdx = RISCV::sub_vrm1_0;
if (RISCV::FPR16RegClass.contains(DstReg, SrcReg)) {
Opc = RISCV::FSGNJ_H;
else if (RISCV::FPR32RegClass.contains(DstReg, SrcReg))
IsScalableVector = false;
} else if (RISCV::FPR32RegClass.contains(DstReg, SrcReg)) {
Opc = RISCV::FSGNJ_S;
else if (RISCV::FPR64RegClass.contains(DstReg, SrcReg))
IsScalableVector = false;
} else if (RISCV::FPR64RegClass.contains(DstReg, SrcReg)) {
Opc = RISCV::FSGNJ_D;
else if (RISCV::VRRegClass.contains(DstReg, SrcReg)) {
IsScalableVector = false;
} else if (RISCV::VRRegClass.contains(DstReg, SrcReg)) {
Opc = RISCV::PseudoVMV1R_V;
IsScalableVector = true;
} else if (RISCV::VRM2RegClass.contains(DstReg, SrcReg)) {
Opc = RISCV::PseudoVMV2R_V;
IsScalableVector = true;
} else if (RISCV::VRM4RegClass.contains(DstReg, SrcReg)) {
Opc = RISCV::PseudoVMV4R_V;
IsScalableVector = true;
} else if (RISCV::VRM8RegClass.contains(DstReg, SrcReg)) {
Opc = RISCV::PseudoVMV8R_V;
IsScalableVector = true;
} else
} else if (RISCV::VRN2M1RegClass.contains(DstReg, SrcReg)) {
Opc = RISCV::PseudoVMV1R_V;
SubRegIdx = RISCV::sub_vrm1_0;
NF = 2;
LMul = 1;
} else if (RISCV::VRN2M2RegClass.contains(DstReg, SrcReg)) {
Opc = RISCV::PseudoVMV2R_V;
SubRegIdx = RISCV::sub_vrm2_0;
NF = 2;
LMul = 2;
} else if (RISCV::VRN2M4RegClass.contains(DstReg, SrcReg)) {
Opc = RISCV::PseudoVMV4R_V;
SubRegIdx = RISCV::sub_vrm4_0;
NF = 2;
LMul = 4;
} else if (RISCV::VRN3M1RegClass.contains(DstReg, SrcReg)) {
Opc = RISCV::PseudoVMV1R_V;
SubRegIdx = RISCV::sub_vrm1_0;
NF = 3;
LMul = 1;
} else if (RISCV::VRN3M2RegClass.contains(DstReg, SrcReg)) {
Opc = RISCV::PseudoVMV2R_V;
SubRegIdx = RISCV::sub_vrm2_0;
NF = 3;
LMul = 2;
} else if (RISCV::VRN4M1RegClass.contains(DstReg, SrcReg)) {
Opc = RISCV::PseudoVMV1R_V;
SubRegIdx = RISCV::sub_vrm1_0;
NF = 4;
LMul = 1;
} else if (RISCV::VRN4M2RegClass.contains(DstReg, SrcReg)) {
Opc = RISCV::PseudoVMV2R_V;
SubRegIdx = RISCV::sub_vrm2_0;
NF = 4;
LMul = 2;
} else if (RISCV::VRN5M1RegClass.contains(DstReg, SrcReg)) {
Opc = RISCV::PseudoVMV1R_V;
SubRegIdx = RISCV::sub_vrm1_0;
NF = 5;
LMul = 1;
} else if (RISCV::VRN6M1RegClass.contains(DstReg, SrcReg)) {
Opc = RISCV::PseudoVMV1R_V;
SubRegIdx = RISCV::sub_vrm1_0;
NF = 6;
LMul = 1;
} else if (RISCV::VRN7M1RegClass.contains(DstReg, SrcReg)) {
Opc = RISCV::PseudoVMV1R_V;
SubRegIdx = RISCV::sub_vrm1_0;
NF = 7;
LMul = 1;
} else if (RISCV::VRN8M1RegClass.contains(DstReg, SrcReg)) {
Opc = RISCV::PseudoVMV1R_V;
SubRegIdx = RISCV::sub_vrm1_0;
NF = 8;
LMul = 1;
} else {
llvm_unreachable("Impossible reg-to-reg copy");
}
if (IsScalableVector)
BuildMI(MBB, MBBI, DL, get(Opc), DstReg)
.addReg(SrcReg, getKillRegState(KillSrc));
else
if (IsScalableVector) {
if (NF == 1) {
BuildMI(MBB, MBBI, DL, get(Opc), DstReg)
.addReg(SrcReg, getKillRegState(KillSrc));
} else {
const TargetRegisterInfo *TRI = STI.getRegisterInfo();
int I = 0, End = NF, Incr = 1;
unsigned SrcEncoding = TRI->getEncodingValue(SrcReg);
unsigned DstEncoding = TRI->getEncodingValue(DstReg);
if (forwardCopyWillClobberTuple(DstEncoding, SrcEncoding, NF * LMul)) {
I = NF - 1;
End = -1;
Incr = -1;
}
for (; I != End; I += Incr) {
BuildMI(MBB, MBBI, DL, get(Opc), TRI->getSubReg(DstReg, SubRegIdx + I))
.addReg(TRI->getSubReg(SrcReg, SubRegIdx + I),
getKillRegState(KillSrc));
}
}
} else {
BuildMI(MBB, MBBI, DL, get(Opc), DstReg)
.addReg(SrcReg, getKillRegState(KillSrc))
.addReg(SrcReg, getKillRegState(KillSrc));
}
}
void RISCVInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,

View File

@ -0,0 +1,282 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -verify-machineinstrs -mtriple riscv64 -run-pass=postrapseudos %s -o - | FileCheck %s
...
---
name: copy_zvlsseg_N2
body: |
bb.0:
; CHECK-LABEL: name: copy_zvlsseg_N2
; CHECK: $v2 = PseudoVMV1R_V $v4
; CHECK: $v3 = PseudoVMV1R_V $v5
; CHECK: $v3 = PseudoVMV1R_V $v4
; CHECK: $v4 = PseudoVMV1R_V $v5
; CHECK: $v6 = PseudoVMV1R_V $v5
; CHECK: $v5 = PseudoVMV1R_V $v4
; CHECK: $v6 = PseudoVMV1R_V $v4
; CHECK: $v7 = PseudoVMV1R_V $v5
; CHECK: $v0m2 = PseudoVMV2R_V $v4m2
; CHECK: $v2m2 = PseudoVMV2R_V $v6m2
; CHECK: $v2m2 = PseudoVMV2R_V $v4m2
; CHECK: $v4m2 = PseudoVMV2R_V $v6m2
; CHECK: $v8m2 = PseudoVMV2R_V $v6m2
; CHECK: $v6m2 = PseudoVMV2R_V $v4m2
; CHECK: $v8m2 = PseudoVMV2R_V $v4m2
; CHECK: $v10m2 = PseudoVMV2R_V $v6m2
; CHECK: $v0m4 = PseudoVMV4R_V $v8m4
; CHECK: $v4m4 = PseudoVMV4R_V $v12m4
; CHECK: $v4m4 = PseudoVMV4R_V $v8m4
; CHECK: $v8m4 = PseudoVMV4R_V $v12m4
; CHECK: $v16m4 = PseudoVMV4R_V $v12m4
; CHECK: $v12m4 = PseudoVMV4R_V $v8m4
; CHECK: $v16m4 = PseudoVMV4R_V $v8m4
; CHECK: $v20m4 = PseudoVMV4R_V $v12m4
$v2_v3 = COPY $v4_v5
$v3_v4 = COPY $v4_v5
$v5_v6 = COPY $v4_v5
$v6_v7 = COPY $v4_v5
$v0m2_v2m2 = COPY $v4m2_v6m2
$v2m2_v4m2 = COPY $v4m2_v6m2
$v6m2_v8m2 = COPY $v4m2_v6m2
$v8m2_v10m2 = COPY $v4m2_v6m2
$v0m4_v4m4 = COPY $v8m4_v12m4
$v4m4_v8m4 = COPY $v8m4_v12m4
$v12m4_v16m4 = COPY $v8m4_v12m4
$v16m4_v20m4 = COPY $v8m4_v12m4
...
---
name: copy_zvlsseg_N3
body: |
bb.0:
; CHECK-LABEL: name: copy_zvlsseg_N3
; CHECK: $v2 = PseudoVMV1R_V $v5
; CHECK: $v3 = PseudoVMV1R_V $v6
; CHECK: $v4 = PseudoVMV1R_V $v7
; CHECK: $v3 = PseudoVMV1R_V $v5
; CHECK: $v4 = PseudoVMV1R_V $v6
; CHECK: $v5 = PseudoVMV1R_V $v7
; CHECK: $v4 = PseudoVMV1R_V $v5
; CHECK: $v5 = PseudoVMV1R_V $v6
; CHECK: $v6 = PseudoVMV1R_V $v7
; CHECK: $v9 = PseudoVMV1R_V $v7
; CHECK: $v8 = PseudoVMV1R_V $v6
; CHECK: $v7 = PseudoVMV1R_V $v5
; CHECK: $v9 = PseudoVMV1R_V $v5
; CHECK: $v10 = PseudoVMV1R_V $v6
; CHECK: $v11 = PseudoVMV1R_V $v7
; CHECK: $v0m2 = PseudoVMV2R_V $v6m2
; CHECK: $v2m2 = PseudoVMV2R_V $v8m2
; CHECK: $v4m2 = PseudoVMV2R_V $v10m2
; CHECK: $v2m2 = PseudoVMV2R_V $v6m2
; CHECK: $v4m2 = PseudoVMV2R_V $v8m2
; CHECK: $v6m2 = PseudoVMV2R_V $v10m2
; CHECK: $v14m2 = PseudoVMV2R_V $v10m2
; CHECK: $v12m2 = PseudoVMV2R_V $v8m2
; CHECK: $v10m2 = PseudoVMV2R_V $v6m2
; CHECK: $v12m2 = PseudoVMV2R_V $v6m2
; CHECK: $v14m2 = PseudoVMV2R_V $v8m2
; CHECK: $v16m2 = PseudoVMV2R_V $v10m2
$v2_v3_v4 = COPY $v5_v6_v7
$v3_v4_v5 = COPY $v5_v6_v7
$v4_v5_v6 = COPY $v5_v6_v7
$v7_v8_v9 = COPY $v5_v6_v7
$v9_v10_v11 = COPY $v5_v6_v7
$v0m2_v2m2_v4m2 = COPY $v6m2_v8m2_v10m2
$v2m2_v4m2_v6m2 = COPY $v6m2_v8m2_v10m2
$v10m2_v12m2_v14m2 = COPY $v6m2_v8m2_v10m2
$v12m2_v14m2_v16m2 = COPY $v6m2_v8m2_v10m2
...
---
name: copy_zvlsseg_N4
body: |
bb.0:
; CHECK-LABEL: name: copy_zvlsseg_N4
; CHECK: $v6 = PseudoVMV1R_V $v10
; CHECK: $v7 = PseudoVMV1R_V $v11
; CHECK: $v8 = PseudoVMV1R_V $v12
; CHECK: $v9 = PseudoVMV1R_V $v13
; CHECK: $v7 = PseudoVMV1R_V $v10
; CHECK: $v8 = PseudoVMV1R_V $v11
; CHECK: $v9 = PseudoVMV1R_V $v12
; CHECK: $v10 = PseudoVMV1R_V $v13
; CHECK: $v16 = PseudoVMV1R_V $v13
; CHECK: $v15 = PseudoVMV1R_V $v12
; CHECK: $v14 = PseudoVMV1R_V $v11
; CHECK: $v13 = PseudoVMV1R_V $v10
; CHECK: $v14 = PseudoVMV1R_V $v10
; CHECK: $v15 = PseudoVMV1R_V $v11
; CHECK: $v16 = PseudoVMV1R_V $v12
; CHECK: $v17 = PseudoVMV1R_V $v13
; CHECK: $v2m2 = PseudoVMV2R_V $v10m2
; CHECK: $v4m2 = PseudoVMV2R_V $v12m2
; CHECK: $v6m2 = PseudoVMV2R_V $v14m2
; CHECK: $v8m2 = PseudoVMV2R_V $v16m2
; CHECK: $v4m2 = PseudoVMV2R_V $v10m2
; CHECK: $v6m2 = PseudoVMV2R_V $v12m2
; CHECK: $v8m2 = PseudoVMV2R_V $v14m2
; CHECK: $v10m2 = PseudoVMV2R_V $v16m2
; CHECK: $v22m2 = PseudoVMV2R_V $v16m2
; CHECK: $v20m2 = PseudoVMV2R_V $v14m2
; CHECK: $v18m2 = PseudoVMV2R_V $v12m2
; CHECK: $v16m2 = PseudoVMV2R_V $v10m2
; CHECK: $v18m2 = PseudoVMV2R_V $v10m2
; CHECK: $v20m2 = PseudoVMV2R_V $v12m2
; CHECK: $v22m2 = PseudoVMV2R_V $v14m2
; CHECK: $v24m2 = PseudoVMV2R_V $v16m2
$v6_v7_v8_v9 = COPY $v10_v11_v12_v13
$v7_v8_v9_v10 = COPY $v10_v11_v12_v13
$v13_v14_v15_v16 = COPY $v10_v11_v12_v13
$v14_v15_v16_v17 = COPY $v10_v11_v12_v13
$v2m2_v4m2_v6m2_v8m2 = COPY $v10m2_v12m2_v14m2_v16m2
$v4m2_v6m2_v8m2_v10m2 = COPY $v10m2_v12m2_v14m2_v16m2
$v16m2_v18m2_v20m2_v22m2 = COPY $v10m2_v12m2_v14m2_v16m2
$v18m2_v20m2_v22m2_v24m2 = COPY $v10m2_v12m2_v14m2_v16m2
...
---
name: copy_zvlsseg_N5
body: |
bb.0:
; CHECK-LABEL: name: copy_zvlsseg_N5
; CHECK: $v5 = PseudoVMV1R_V $v10
; CHECK: $v6 = PseudoVMV1R_V $v11
; CHECK: $v7 = PseudoVMV1R_V $v12
; CHECK: $v8 = PseudoVMV1R_V $v13
; CHECK: $v9 = PseudoVMV1R_V $v14
; CHECK: $v6 = PseudoVMV1R_V $v10
; CHECK: $v7 = PseudoVMV1R_V $v11
; CHECK: $v8 = PseudoVMV1R_V $v12
; CHECK: $v9 = PseudoVMV1R_V $v13
; CHECK: $v10 = PseudoVMV1R_V $v14
; CHECK: $v18 = PseudoVMV1R_V $v14
; CHECK: $v17 = PseudoVMV1R_V $v13
; CHECK: $v16 = PseudoVMV1R_V $v12
; CHECK: $v15 = PseudoVMV1R_V $v11
; CHECK: $v14 = PseudoVMV1R_V $v10
; CHECK: $v15 = PseudoVMV1R_V $v10
; CHECK: $v16 = PseudoVMV1R_V $v11
; CHECK: $v17 = PseudoVMV1R_V $v12
; CHECK: $v18 = PseudoVMV1R_V $v13
; CHECK: $v19 = PseudoVMV1R_V $v14
$v5_v6_v7_v8_v9 = COPY $v10_v11_v12_v13_v14
$v6_v7_v8_v9_v10 = COPY $v10_v11_v12_v13_v14
$v14_v15_v16_v17_v18 = COPY $v10_v11_v12_v13_v14
$v15_v16_v17_v18_v19 = COPY $v10_v11_v12_v13_v14
...
---
name: copy_zvlsseg_N6
body: |
bb.0:
; CHECK-LABEL: name: copy_zvlsseg_N6
; CHECK: $v4 = PseudoVMV1R_V $v10
; CHECK: $v5 = PseudoVMV1R_V $v11
; CHECK: $v6 = PseudoVMV1R_V $v12
; CHECK: $v7 = PseudoVMV1R_V $v13
; CHECK: $v8 = PseudoVMV1R_V $v14
; CHECK: $v9 = PseudoVMV1R_V $v15
; CHECK: $v5 = PseudoVMV1R_V $v10
; CHECK: $v6 = PseudoVMV1R_V $v11
; CHECK: $v7 = PseudoVMV1R_V $v12
; CHECK: $v8 = PseudoVMV1R_V $v13
; CHECK: $v9 = PseudoVMV1R_V $v14
; CHECK: $v10 = PseudoVMV1R_V $v15
; CHECK: $v20 = PseudoVMV1R_V $v15
; CHECK: $v19 = PseudoVMV1R_V $v14
; CHECK: $v18 = PseudoVMV1R_V $v13
; CHECK: $v17 = PseudoVMV1R_V $v12
; CHECK: $v16 = PseudoVMV1R_V $v11
; CHECK: $v15 = PseudoVMV1R_V $v10
; CHECK: $v16 = PseudoVMV1R_V $v10
; CHECK: $v17 = PseudoVMV1R_V $v11
; CHECK: $v18 = PseudoVMV1R_V $v12
; CHECK: $v19 = PseudoVMV1R_V $v13
; CHECK: $v20 = PseudoVMV1R_V $v14
; CHECK: $v21 = PseudoVMV1R_V $v15
$v4_v5_v6_v7_v8_v9 = COPY $v10_v11_v12_v13_v14_v15
$v5_v6_v7_v8_v9_v10 = COPY $v10_v11_v12_v13_v14_v15
$v15_v16_v17_v18_v19_v20 = COPY $v10_v11_v12_v13_v14_v15
$v16_v17_v18_v19_v20_v21 = COPY $v10_v11_v12_v13_v14_v15
...
---
name: copy_zvlsseg_N7
body: |
bb.0:
; CHECK-LABEL: name: copy_zvlsseg_N7
; CHECK: $v3 = PseudoVMV1R_V $v10
; CHECK: $v4 = PseudoVMV1R_V $v11
; CHECK: $v5 = PseudoVMV1R_V $v12
; CHECK: $v6 = PseudoVMV1R_V $v13
; CHECK: $v7 = PseudoVMV1R_V $v14
; CHECK: $v8 = PseudoVMV1R_V $v15
; CHECK: $v9 = PseudoVMV1R_V $v16
; CHECK: $v4 = PseudoVMV1R_V $v10
; CHECK: $v5 = PseudoVMV1R_V $v11
; CHECK: $v6 = PseudoVMV1R_V $v12
; CHECK: $v7 = PseudoVMV1R_V $v13
; CHECK: $v8 = PseudoVMV1R_V $v14
; CHECK: $v9 = PseudoVMV1R_V $v15
; CHECK: $v10 = PseudoVMV1R_V $v16
; CHECK: $v22 = PseudoVMV1R_V $v16
; CHECK: $v21 = PseudoVMV1R_V $v15
; CHECK: $v20 = PseudoVMV1R_V $v14
; CHECK: $v19 = PseudoVMV1R_V $v13
; CHECK: $v18 = PseudoVMV1R_V $v12
; CHECK: $v17 = PseudoVMV1R_V $v11
; CHECK: $v16 = PseudoVMV1R_V $v10
; CHECK: $v17 = PseudoVMV1R_V $v10
; CHECK: $v18 = PseudoVMV1R_V $v11
; CHECK: $v19 = PseudoVMV1R_V $v12
; CHECK: $v20 = PseudoVMV1R_V $v13
; CHECK: $v21 = PseudoVMV1R_V $v14
; CHECK: $v22 = PseudoVMV1R_V $v15
; CHECK: $v23 = PseudoVMV1R_V $v16
$v3_v4_v5_v6_v7_v8_v9 = COPY $v10_v11_v12_v13_v14_v15_v16
$v4_v5_v6_v7_v8_v9_v10 = COPY $v10_v11_v12_v13_v14_v15_v16
$v16_v17_v18_v19_v20_v21_v22 = COPY $v10_v11_v12_v13_v14_v15_v16
$v17_v18_v19_v20_v21_v22_v23 = COPY $v10_v11_v12_v13_v14_v15_v16
...
---
name: copy_zvlsseg_N8
body: |
bb.0:
; CHECK-LABEL: name: copy_zvlsseg_N8
; CHECK: $v2 = PseudoVMV1R_V $v10
; CHECK: $v3 = PseudoVMV1R_V $v11
; CHECK: $v4 = PseudoVMV1R_V $v12
; CHECK: $v5 = PseudoVMV1R_V $v13
; CHECK: $v6 = PseudoVMV1R_V $v14
; CHECK: $v7 = PseudoVMV1R_V $v15
; CHECK: $v8 = PseudoVMV1R_V $v16
; CHECK: $v9 = PseudoVMV1R_V $v17
; CHECK: $v3 = PseudoVMV1R_V $v10
; CHECK: $v4 = PseudoVMV1R_V $v11
; CHECK: $v5 = PseudoVMV1R_V $v12
; CHECK: $v6 = PseudoVMV1R_V $v13
; CHECK: $v7 = PseudoVMV1R_V $v14
; CHECK: $v8 = PseudoVMV1R_V $v15
; CHECK: $v9 = PseudoVMV1R_V $v16
; CHECK: $v10 = PseudoVMV1R_V $v17
; CHECK: $v24 = PseudoVMV1R_V $v17
; CHECK: $v23 = PseudoVMV1R_V $v16
; CHECK: $v22 = PseudoVMV1R_V $v15
; CHECK: $v21 = PseudoVMV1R_V $v14
; CHECK: $v20 = PseudoVMV1R_V $v13
; CHECK: $v19 = PseudoVMV1R_V $v12
; CHECK: $v18 = PseudoVMV1R_V $v11
; CHECK: $v17 = PseudoVMV1R_V $v10
; CHECK: $v18 = PseudoVMV1R_V $v10
; CHECK: $v19 = PseudoVMV1R_V $v11
; CHECK: $v20 = PseudoVMV1R_V $v12
; CHECK: $v21 = PseudoVMV1R_V $v13
; CHECK: $v22 = PseudoVMV1R_V $v14
; CHECK: $v23 = PseudoVMV1R_V $v15
; CHECK: $v24 = PseudoVMV1R_V $v16
; CHECK: $v25 = PseudoVMV1R_V $v17
$v2_v3_v4_v5_v6_v7_v8_v9 = COPY $v10_v11_v12_v13_v14_v15_v16_v17
$v3_v4_v5_v6_v7_v8_v9_v10 = COPY $v10_v11_v12_v13_v14_v15_v16_v17
$v17_v18_v19_v20_v21_v22_v23_v24 = COPY $v10_v11_v12_v13_v14_v15_v16_v17
$v18_v19_v20_v21_v22_v23_v24_v25 = COPY $v10_v11_v12_v13_v14_v15_v16_v17
...