forked from OSchip/llvm-project
[RISCV] Look through copies when trying to find an implicit def in addVSetVL.
The InstrEmitter can sometimes insert a copy after an IMPLICIT_DEF before connecting it to the vector instruction. This occurs when constrainRegClass reduces to a class with less than 4 registers. I believe LMUL8 on masked instructions triggers this since the result can only use the v8, v16, or v24 register group as the mask is using v0. Reviewed By: frasercrmck Differential Revision: https://reviews.llvm.org/D98567
This commit is contained in:
parent
61ca706461
commit
229eeb187d
|
@ -4845,6 +4845,19 @@ static MachineBasicBlock *emitSelectPseudo(MachineInstr &MI,
|
|||
return TailMBB;
|
||||
}
|
||||
|
||||
static MachineInstr *elideCopies(MachineInstr *MI,
|
||||
const MachineRegisterInfo &MRI) {
|
||||
while (true) {
|
||||
if (!MI->isFullCopy())
|
||||
return MI;
|
||||
if (!Register::isVirtualRegister(MI->getOperand(1).getReg()))
|
||||
return nullptr;
|
||||
MI = MRI.getVRegDef(MI->getOperand(1).getReg());
|
||||
if (!MI)
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
static MachineBasicBlock *addVSetVL(MachineInstr &MI, MachineBasicBlock *BB,
|
||||
int VLIndex, unsigned SEWIndex,
|
||||
RISCVVLMUL VLMul, bool ForceTailAgnostic) {
|
||||
|
@ -4905,9 +4918,12 @@ static MachineBasicBlock *addVSetVL(MachineInstr &MI, MachineBasicBlock *BB,
|
|||
// If the tied operand is an IMPLICIT_DEF we can keep TailAgnostic.
|
||||
const MachineOperand &UseMO = MI.getOperand(UseOpIdx);
|
||||
MachineInstr *UseMI = MRI.getVRegDef(UseMO.getReg());
|
||||
if (UseMI) {
|
||||
UseMI = elideCopies(UseMI, MRI);
|
||||
if (UseMI && UseMI->isImplicitDef())
|
||||
TailAgnostic = true;
|
||||
}
|
||||
}
|
||||
|
||||
// For simplicity we reuse the vtype representation here.
|
||||
MIB.addImm(RISCVVType::encodeVTYPE(VLMul, ElementWidth,
|
||||
|
|
|
@ -126,7 +126,7 @@ declare <vscale x 8 x float> @llvm.masked.load.nxv8f32(<vscale x 8 x float>*, i3
|
|||
define <vscale x 8 x double> @masked_load_nxv8f64(<vscale x 8 x double>* %a, <vscale x 8 x i1> %mask) nounwind {
|
||||
; CHECK-LABEL: masked_load_nxv8f64:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vsetvli a1, zero, e64,m8,tu,mu
|
||||
; CHECK-NEXT: vsetvli a1, zero, e64,m8,ta,mu
|
||||
; CHECK-NEXT: vle64.v v8, (a0), v0.t
|
||||
; CHECK-NEXT: ret
|
||||
%load = call <vscale x 8 x double> @llvm.masked.load.nxv8f64(<vscale x 8 x double>* %a, i32 8, <vscale x 8 x i1> %mask, <vscale x 8 x double> undef)
|
||||
|
@ -148,7 +148,7 @@ declare <vscale x 16 x half> @llvm.masked.load.nxv16f16(<vscale x 16 x half>*, i
|
|||
define <vscale x 16 x float> @masked_load_nxv16f32(<vscale x 16 x float>* %a, <vscale x 16 x i1> %mask) nounwind {
|
||||
; CHECK-LABEL: masked_load_nxv16f32:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vsetvli a1, zero, e32,m8,tu,mu
|
||||
; CHECK-NEXT: vsetvli a1, zero, e32,m8,ta,mu
|
||||
; CHECK-NEXT: vle32.v v8, (a0), v0.t
|
||||
; CHECK-NEXT: ret
|
||||
%load = call <vscale x 16 x float> @llvm.masked.load.nxv16f32(<vscale x 16 x float>* %a, i32 4, <vscale x 16 x i1> %mask, <vscale x 16 x float> undef)
|
||||
|
@ -159,7 +159,7 @@ declare <vscale x 16 x float> @llvm.masked.load.nxv16f32(<vscale x 16 x float>*,
|
|||
define <vscale x 32 x half> @masked_load_nxv32f16(<vscale x 32 x half>* %a, <vscale x 32 x i1> %mask) nounwind {
|
||||
; CHECK-LABEL: masked_load_nxv32f16:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vsetvli a1, zero, e16,m8,tu,mu
|
||||
; CHECK-NEXT: vsetvli a1, zero, e16,m8,ta,mu
|
||||
; CHECK-NEXT: vle16.v v8, (a0), v0.t
|
||||
; CHECK-NEXT: ret
|
||||
%load = call <vscale x 32 x half> @llvm.masked.load.nxv32f16(<vscale x 32 x half>* %a, i32 2, <vscale x 32 x i1> %mask, <vscale x 32 x half> undef)
|
||||
|
|
|
@ -170,7 +170,7 @@ declare <vscale x 8 x i32> @llvm.masked.load.nxv8i32(<vscale x 8 x i32>*, i32, <
|
|||
define <vscale x 8 x i64> @masked_load_nxv8i64(<vscale x 8 x i64>* %a, <vscale x 8 x i1> %mask) nounwind {
|
||||
; CHECK-LABEL: masked_load_nxv8i64:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vsetvli a1, zero, e64,m8,tu,mu
|
||||
; CHECK-NEXT: vsetvli a1, zero, e64,m8,ta,mu
|
||||
; CHECK-NEXT: vle64.v v8, (a0), v0.t
|
||||
; CHECK-NEXT: ret
|
||||
%load = call <vscale x 8 x i64> @llvm.masked.load.nxv8i64(<vscale x 8 x i64>* %a, i32 8, <vscale x 8 x i1> %mask, <vscale x 8 x i64> undef)
|
||||
|
@ -203,7 +203,7 @@ declare <vscale x 16 x i16> @llvm.masked.load.nxv16i16(<vscale x 16 x i16>*, i32
|
|||
define <vscale x 16 x i32> @masked_load_nxv16i32(<vscale x 16 x i32>* %a, <vscale x 16 x i1> %mask) nounwind {
|
||||
; CHECK-LABEL: masked_load_nxv16i32:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vsetvli a1, zero, e32,m8,tu,mu
|
||||
; CHECK-NEXT: vsetvli a1, zero, e32,m8,ta,mu
|
||||
; CHECK-NEXT: vle32.v v8, (a0), v0.t
|
||||
; CHECK-NEXT: ret
|
||||
%load = call <vscale x 16 x i32> @llvm.masked.load.nxv16i32(<vscale x 16 x i32>* %a, i32 4, <vscale x 16 x i1> %mask, <vscale x 16 x i32> undef)
|
||||
|
@ -225,7 +225,7 @@ declare <vscale x 32 x i8> @llvm.masked.load.nxv32i8(<vscale x 32 x i8>*, i32, <
|
|||
define <vscale x 32 x i16> @masked_load_nxv32i16(<vscale x 32 x i16>* %a, <vscale x 32 x i1> %mask) nounwind {
|
||||
; CHECK-LABEL: masked_load_nxv32i16:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vsetvli a1, zero, e16,m8,tu,mu
|
||||
; CHECK-NEXT: vsetvli a1, zero, e16,m8,ta,mu
|
||||
; CHECK-NEXT: vle16.v v8, (a0), v0.t
|
||||
; CHECK-NEXT: ret
|
||||
%load = call <vscale x 32 x i16> @llvm.masked.load.nxv32i16(<vscale x 32 x i16>* %a, i32 2, <vscale x 32 x i1> %mask, <vscale x 32 x i16> undef)
|
||||
|
@ -236,7 +236,7 @@ declare <vscale x 32 x i16> @llvm.masked.load.nxv32i16(<vscale x 32 x i16>*, i32
|
|||
define <vscale x 64 x i8> @masked_load_nxv64i8(<vscale x 64 x i8>* %a, <vscale x 64 x i1> %mask) nounwind {
|
||||
; CHECK-LABEL: masked_load_nxv64i8:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vsetvli a1, zero, e8,m8,tu,mu
|
||||
; CHECK-NEXT: vsetvli a1, zero, e8,m8,ta,mu
|
||||
; CHECK-NEXT: vle8.v v8, (a0), v0.t
|
||||
; CHECK-NEXT: ret
|
||||
%load = call <vscale x 64 x i8> @llvm.masked.load.nxv64i8(<vscale x 64 x i8>* %a, i32 1, <vscale x 64 x i1> %mask, <vscale x 64 x i8> undef)
|
||||
|
|
|
@ -0,0 +1,68 @@
|
|||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
# RUN: llc %s -mtriple=riscv64 -mattr=experimental-v -riscv-v-vector-bits-min=128 -run-pass=finalize-isel -o - | FileCheck %s
|
||||
|
||||
# This test makes sure we peak through the COPY instruction between the
|
||||
# IMPLICIT_DEF and PseudoVLE64_V_M8_MASK in order to select the tail agnostic
|
||||
# policy. The test is working if the second argument to PseudoVSETVLI has bit 6
|
||||
# set.
|
||||
|
||||
--- |
|
||||
; ModuleID = 'test.ll'
|
||||
source_filename = "test.ll"
|
||||
target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n64-S128"
|
||||
target triple = "riscv64"
|
||||
|
||||
; Function Attrs: nounwind
|
||||
define <vscale x 8 x i64> @masked_load_nxv8i64(<vscale x 8 x i64>* %a, <vscale x 8 x i1> %mask) #0 {
|
||||
%load = call <vscale x 8 x i64> @llvm.masked.load.nxv8i64.p0nxv8i64(<vscale x 8 x i64>* %a, i32 8, <vscale x 8 x i1> %mask, <vscale x 8 x i64> undef)
|
||||
ret <vscale x 8 x i64> %load
|
||||
}
|
||||
|
||||
; Function Attrs: argmemonly nofree nosync nounwind readonly willreturn
|
||||
declare <vscale x 8 x i64> @llvm.masked.load.nxv8i64.p0nxv8i64(<vscale x 8 x i64>*, i32 immarg, <vscale x 8 x i1>, <vscale x 8 x i64>) #1
|
||||
|
||||
attributes #0 = { nounwind "target-features"="+experimental-v" }
|
||||
attributes #1 = { argmemonly nofree nosync nounwind readonly willreturn "target-features"="+experimental-v" }
|
||||
|
||||
...
|
||||
---
|
||||
name: masked_load_nxv8i64
|
||||
alignment: 4
|
||||
tracksRegLiveness: true
|
||||
registers:
|
||||
- { id: 0, class: gpr }
|
||||
- { id: 1, class: vr }
|
||||
- { id: 2, class: vrm8nov0 }
|
||||
- { id: 3, class: vrm8 }
|
||||
- { id: 4, class: vrm8nov0 }
|
||||
liveins:
|
||||
- { reg: '$x10', virtual-reg: '%0' }
|
||||
- { reg: '$v0', virtual-reg: '%1' }
|
||||
frameInfo:
|
||||
maxAlignment: 1
|
||||
machineFunctionInfo: {}
|
||||
body: |
|
||||
bb.0 (%ir-block.0):
|
||||
liveins: $x10, $v0
|
||||
|
||||
; CHECK-LABEL: name: masked_load_nxv8i64
|
||||
; CHECK: liveins: $x10, $v0
|
||||
; CHECK: [[COPY:%[0-9]+]]:vr = COPY $v0
|
||||
; CHECK: [[COPY1:%[0-9]+]]:gpr = COPY $x10
|
||||
; CHECK: $v0 = COPY [[COPY]]
|
||||
; CHECK: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF
|
||||
; CHECK: [[COPY2:%[0-9]+]]:vrm8nov0 = COPY [[DEF]]
|
||||
; CHECK: dead %5:gpr = PseudoVSETVLI $x0, 91, implicit-def $vl, implicit-def $vtype
|
||||
; CHECK: [[PseudoVLE64_V_M8_MASK:%[0-9]+]]:vrm8nov0 = PseudoVLE64_V_M8_MASK [[COPY2]], [[COPY1]], $v0, $noreg, 64, implicit $vl, implicit $vtype :: (load 64 from %ir.a, align 8)
|
||||
; CHECK: $v8m8 = COPY [[PseudoVLE64_V_M8_MASK]]
|
||||
; CHECK: PseudoRET implicit $v8m8
|
||||
%1:vr = COPY $v0
|
||||
%0:gpr = COPY $x10
|
||||
$v0 = COPY %1
|
||||
%3:vrm8 = IMPLICIT_DEF
|
||||
%4:vrm8nov0 = COPY %3
|
||||
%2:vrm8nov0 = PseudoVLE64_V_M8_MASK %4, %0, $v0, $x0, 64, implicit $vl, implicit $vtype :: (load 64 from %ir.a, align 8)
|
||||
$v8m8 = COPY %2
|
||||
PseudoRET implicit $v8m8
|
||||
|
||||
...
|
Loading…
Reference in New Issue