[RISCV] Look through copies when trying to find an implicit def in addVSetVL.

The InstrEmitter can sometimes insert a copy after an IMPLICIT_DEF
before connecting it to the vector instruction. This occurs when
constrainRegClass reduces to a class with less than 4 registers.
I believe LMUL8 on masked instructions triggers this since the
result can only use the v8, v16, or v24 register group as the mask
is using v0.

Reviewed By: frasercrmck

Differential Revision: https://reviews.llvm.org/D98567
This commit is contained in:
Craig Topper 2021-03-16 07:49:24 -07:00
parent 61ca706461
commit 229eeb187d
4 changed files with 93 additions and 9 deletions

View File

@ -4845,6 +4845,19 @@ static MachineBasicBlock *emitSelectPseudo(MachineInstr &MI,
return TailMBB;
}
static MachineInstr *elideCopies(MachineInstr *MI,
const MachineRegisterInfo &MRI) {
while (true) {
if (!MI->isFullCopy())
return MI;
if (!Register::isVirtualRegister(MI->getOperand(1).getReg()))
return nullptr;
MI = MRI.getVRegDef(MI->getOperand(1).getReg());
if (!MI)
return nullptr;
}
}
static MachineBasicBlock *addVSetVL(MachineInstr &MI, MachineBasicBlock *BB,
int VLIndex, unsigned SEWIndex,
RISCVVLMUL VLMul, bool ForceTailAgnostic) {
@ -4905,8 +4918,11 @@ static MachineBasicBlock *addVSetVL(MachineInstr &MI, MachineBasicBlock *BB,
// If the tied operand is an IMPLICIT_DEF we can keep TailAgnostic.
const MachineOperand &UseMO = MI.getOperand(UseOpIdx);
MachineInstr *UseMI = MRI.getVRegDef(UseMO.getReg());
if (UseMI && UseMI->isImplicitDef())
TailAgnostic = true;
if (UseMI) {
UseMI = elideCopies(UseMI, MRI);
if (UseMI && UseMI->isImplicitDef())
TailAgnostic = true;
}
}
// For simplicity we reuse the vtype representation here.

View File

@ -126,7 +126,7 @@ declare <vscale x 8 x float> @llvm.masked.load.nxv8f32(<vscale x 8 x float>*, i3
define <vscale x 8 x double> @masked_load_nxv8f64(<vscale x 8 x double>* %a, <vscale x 8 x i1> %mask) nounwind {
; CHECK-LABEL: masked_load_nxv8f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a1, zero, e64,m8,tu,mu
; CHECK-NEXT: vsetvli a1, zero, e64,m8,ta,mu
; CHECK-NEXT: vle64.v v8, (a0), v0.t
; CHECK-NEXT: ret
%load = call <vscale x 8 x double> @llvm.masked.load.nxv8f64(<vscale x 8 x double>* %a, i32 8, <vscale x 8 x i1> %mask, <vscale x 8 x double> undef)
@ -148,7 +148,7 @@ declare <vscale x 16 x half> @llvm.masked.load.nxv16f16(<vscale x 16 x half>*, i
define <vscale x 16 x float> @masked_load_nxv16f32(<vscale x 16 x float>* %a, <vscale x 16 x i1> %mask) nounwind {
; CHECK-LABEL: masked_load_nxv16f32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a1, zero, e32,m8,tu,mu
; CHECK-NEXT: vsetvli a1, zero, e32,m8,ta,mu
; CHECK-NEXT: vle32.v v8, (a0), v0.t
; CHECK-NEXT: ret
%load = call <vscale x 16 x float> @llvm.masked.load.nxv16f32(<vscale x 16 x float>* %a, i32 4, <vscale x 16 x i1> %mask, <vscale x 16 x float> undef)
@ -159,7 +159,7 @@ declare <vscale x 16 x float> @llvm.masked.load.nxv16f32(<vscale x 16 x float>*,
define <vscale x 32 x half> @masked_load_nxv32f16(<vscale x 32 x half>* %a, <vscale x 32 x i1> %mask) nounwind {
; CHECK-LABEL: masked_load_nxv32f16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a1, zero, e16,m8,tu,mu
; CHECK-NEXT: vsetvli a1, zero, e16,m8,ta,mu
; CHECK-NEXT: vle16.v v8, (a0), v0.t
; CHECK-NEXT: ret
%load = call <vscale x 32 x half> @llvm.masked.load.nxv32f16(<vscale x 32 x half>* %a, i32 2, <vscale x 32 x i1> %mask, <vscale x 32 x half> undef)

View File

@ -170,7 +170,7 @@ declare <vscale x 8 x i32> @llvm.masked.load.nxv8i32(<vscale x 8 x i32>*, i32, <
define <vscale x 8 x i64> @masked_load_nxv8i64(<vscale x 8 x i64>* %a, <vscale x 8 x i1> %mask) nounwind {
; CHECK-LABEL: masked_load_nxv8i64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a1, zero, e64,m8,tu,mu
; CHECK-NEXT: vsetvli a1, zero, e64,m8,ta,mu
; CHECK-NEXT: vle64.v v8, (a0), v0.t
; CHECK-NEXT: ret
%load = call <vscale x 8 x i64> @llvm.masked.load.nxv8i64(<vscale x 8 x i64>* %a, i32 8, <vscale x 8 x i1> %mask, <vscale x 8 x i64> undef)
@ -203,7 +203,7 @@ declare <vscale x 16 x i16> @llvm.masked.load.nxv16i16(<vscale x 16 x i16>*, i32
define <vscale x 16 x i32> @masked_load_nxv16i32(<vscale x 16 x i32>* %a, <vscale x 16 x i1> %mask) nounwind {
; CHECK-LABEL: masked_load_nxv16i32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a1, zero, e32,m8,tu,mu
; CHECK-NEXT: vsetvli a1, zero, e32,m8,ta,mu
; CHECK-NEXT: vle32.v v8, (a0), v0.t
; CHECK-NEXT: ret
%load = call <vscale x 16 x i32> @llvm.masked.load.nxv16i32(<vscale x 16 x i32>* %a, i32 4, <vscale x 16 x i1> %mask, <vscale x 16 x i32> undef)
@ -225,7 +225,7 @@ declare <vscale x 32 x i8> @llvm.masked.load.nxv32i8(<vscale x 32 x i8>*, i32, <
define <vscale x 32 x i16> @masked_load_nxv32i16(<vscale x 32 x i16>* %a, <vscale x 32 x i1> %mask) nounwind {
; CHECK-LABEL: masked_load_nxv32i16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a1, zero, e16,m8,tu,mu
; CHECK-NEXT: vsetvli a1, zero, e16,m8,ta,mu
; CHECK-NEXT: vle16.v v8, (a0), v0.t
; CHECK-NEXT: ret
%load = call <vscale x 32 x i16> @llvm.masked.load.nxv32i16(<vscale x 32 x i16>* %a, i32 2, <vscale x 32 x i1> %mask, <vscale x 32 x i16> undef)
@ -236,7 +236,7 @@ declare <vscale x 32 x i16> @llvm.masked.load.nxv32i16(<vscale x 32 x i16>*, i32
define <vscale x 64 x i8> @masked_load_nxv64i8(<vscale x 64 x i8>* %a, <vscale x 64 x i1> %mask) nounwind {
; CHECK-LABEL: masked_load_nxv64i8:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a1, zero, e8,m8,tu,mu
; CHECK-NEXT: vsetvli a1, zero, e8,m8,ta,mu
; CHECK-NEXT: vle8.v v8, (a0), v0.t
; CHECK-NEXT: ret
%load = call <vscale x 64 x i8> @llvm.masked.load.nxv64i8(<vscale x 64 x i8>* %a, i32 1, <vscale x 64 x i1> %mask, <vscale x 64 x i8> undef)

View File

@ -0,0 +1,68 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc %s -mtriple=riscv64 -mattr=experimental-v -riscv-v-vector-bits-min=128 -run-pass=finalize-isel -o - | FileCheck %s
# This test makes sure we peak through the COPY instruction between the
# IMPLICIT_DEF and PseudoVLE64_V_M8_MASK in order to select the tail agnostic
# policy. The test is working if the second argument to PseudoVSETVLI has bit 6
# set.
--- |
; ModuleID = 'test.ll'
source_filename = "test.ll"
target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n64-S128"
target triple = "riscv64"
; Function Attrs: nounwind
define <vscale x 8 x i64> @masked_load_nxv8i64(<vscale x 8 x i64>* %a, <vscale x 8 x i1> %mask) #0 {
%load = call <vscale x 8 x i64> @llvm.masked.load.nxv8i64.p0nxv8i64(<vscale x 8 x i64>* %a, i32 8, <vscale x 8 x i1> %mask, <vscale x 8 x i64> undef)
ret <vscale x 8 x i64> %load
}
; Function Attrs: argmemonly nofree nosync nounwind readonly willreturn
declare <vscale x 8 x i64> @llvm.masked.load.nxv8i64.p0nxv8i64(<vscale x 8 x i64>*, i32 immarg, <vscale x 8 x i1>, <vscale x 8 x i64>) #1
attributes #0 = { nounwind "target-features"="+experimental-v" }
attributes #1 = { argmemonly nofree nosync nounwind readonly willreturn "target-features"="+experimental-v" }
...
---
name: masked_load_nxv8i64
alignment: 4
tracksRegLiveness: true
registers:
- { id: 0, class: gpr }
- { id: 1, class: vr }
- { id: 2, class: vrm8nov0 }
- { id: 3, class: vrm8 }
- { id: 4, class: vrm8nov0 }
liveins:
- { reg: '$x10', virtual-reg: '%0' }
- { reg: '$v0', virtual-reg: '%1' }
frameInfo:
maxAlignment: 1
machineFunctionInfo: {}
body: |
bb.0 (%ir-block.0):
liveins: $x10, $v0
; CHECK-LABEL: name: masked_load_nxv8i64
; CHECK: liveins: $x10, $v0
; CHECK: [[COPY:%[0-9]+]]:vr = COPY $v0
; CHECK: [[COPY1:%[0-9]+]]:gpr = COPY $x10
; CHECK: $v0 = COPY [[COPY]]
; CHECK: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF
; CHECK: [[COPY2:%[0-9]+]]:vrm8nov0 = COPY [[DEF]]
; CHECK: dead %5:gpr = PseudoVSETVLI $x0, 91, implicit-def $vl, implicit-def $vtype
; CHECK: [[PseudoVLE64_V_M8_MASK:%[0-9]+]]:vrm8nov0 = PseudoVLE64_V_M8_MASK [[COPY2]], [[COPY1]], $v0, $noreg, 64, implicit $vl, implicit $vtype :: (load 64 from %ir.a, align 8)
; CHECK: $v8m8 = COPY [[PseudoVLE64_V_M8_MASK]]
; CHECK: PseudoRET implicit $v8m8
%1:vr = COPY $v0
%0:gpr = COPY $x10
$v0 = COPY %1
%3:vrm8 = IMPLICIT_DEF
%4:vrm8nov0 = COPY %3
%2:vrm8nov0 = PseudoVLE64_V_M8_MASK %4, %0, $v0, $x0, 64, implicit $vl, implicit $vtype :: (load 64 from %ir.a, align 8)
$v8m8 = COPY %2
PseudoRET implicit $v8m8
...