forked from OSchip/llvm-project
[RISCV] Don't propagate VL/VTYPE across inline assembly in the Insert VSETVLI pass.
It's conceivable someone could put a vsetvli in inline assembly so its safer to consider them as barriers. The alternative would be to trust that the user marks VL and VTYPE registers as clobbers of the inline assembly if they do that, but hat seems error prone. I'm assuming inline assembly in vector code is going to be rare. Reviewed By: frasercrmck, HsiangKai Differential Revision: https://reviews.llvm.org/D103126
This commit is contained in:
parent
a45877eea8
commit
b2c7ac874f
|
@ -438,7 +438,7 @@ bool RISCVInsertVSETVLI::computeVLVTYPEChanges(const MachineBasicBlock &MBB) {
|
|||
|
||||
// If this is something that updates VL/VTYPE that we don't know about, set
|
||||
// the state to unknown.
|
||||
if (MI.isCall() || MI.modifiesRegister(RISCV::VL) ||
|
||||
if (MI.isCall() || MI.isInlineAsm() || MI.modifiesRegister(RISCV::VL) ||
|
||||
MI.modifiesRegister(RISCV::VTYPE)) {
|
||||
BBInfo.Change = VSETVLIInfo::getUnknown();
|
||||
}
|
||||
|
@ -540,7 +540,7 @@ void RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) {
|
|||
|
||||
// If this is something updates VL/VTYPE that we don't know about, set
|
||||
// the state to unknown.
|
||||
if (MI.isCall() || MI.modifiesRegister(RISCV::VL) ||
|
||||
if (MI.isCall() || MI.isInlineAsm() || MI.modifiesRegister(RISCV::VL) ||
|
||||
MI.modifiesRegister(RISCV::VTYPE)) {
|
||||
CurInfo = VSETVLIInfo::getUnknown();
|
||||
}
|
||||
|
|
|
@ -3,19 +3,17 @@
|
|||
# RUN: -run-pass=riscv-insert-vsetvli | FileCheck %s
|
||||
|
||||
--- |
|
||||
; ModuleID = 'test.ll'
|
||||
source_filename = "test.ll"
|
||||
; ModuleID = 'vsetvli-insert.ll'
|
||||
source_filename = "vsetvli-insert.ll"
|
||||
target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n64-S128"
|
||||
target triple = "riscv64"
|
||||
|
||||
; Function Attrs: nounwind
|
||||
define <vscale x 1 x i64> @add(<vscale x 1 x i64> %0, <vscale x 1 x i64> %1, i64 %2) #0 {
|
||||
entry:
|
||||
%a = call <vscale x 1 x i64> @llvm.riscv.vadd.nxv1i64.nxv1i64.i64(<vscale x 1 x i64> %0, <vscale x 1 x i64> %1, i64 %2)
|
||||
ret <vscale x 1 x i64> %a
|
||||
}
|
||||
|
||||
; Function Attrs: nounwind
|
||||
define <vscale x 1 x i64> @load_add(<vscale x 1 x i64>* %0, <vscale x 1 x i64> %1, i64 %2) #0 {
|
||||
entry:
|
||||
%a = call <vscale x 1 x i64> @llvm.riscv.vle.nxv1i64.i64(<vscale x 1 x i64>* %0, i64 %2)
|
||||
|
@ -23,7 +21,6 @@
|
|||
ret <vscale x 1 x i64> %b
|
||||
}
|
||||
|
||||
; Function Attrs: nounwind
|
||||
define <vscale x 1 x i64> @load_zext(<vscale x 1 x i32>* %0, i64 %1) #0 {
|
||||
entry:
|
||||
%a = call <vscale x 1 x i32> @llvm.riscv.vle.nxv1i32.i64(<vscale x 1 x i32>* %0, i64 %1)
|
||||
|
@ -34,14 +31,13 @@
|
|||
; Function Attrs: nounwind readnone
|
||||
declare i64 @llvm.riscv.vmv.x.s.nxv1i64(<vscale x 1 x i64>) #1
|
||||
|
||||
; Function Attrs: nounwind
|
||||
define i64 @vmv_x_s(<vscale x 1 x i64> %0) #0 {
|
||||
entry:
|
||||
%a = call i64 @llvm.riscv.vmv.x.s.nxv1i64(<vscale x 1 x i64> %0)
|
||||
ret i64 %a
|
||||
}
|
||||
|
||||
define void @add_v2i64(<2 x i64>* %x, <2 x i64>* %y) #2 {
|
||||
define void @add_v2i64(<2 x i64>* %x, <2 x i64>* %y) #0 {
|
||||
%a = load <2 x i64>, <2 x i64>* %x, align 16
|
||||
%b = load <2 x i64>, <2 x i64>* %y, align 16
|
||||
%c = add <2 x i64> %a, %b
|
||||
|
@ -50,42 +46,49 @@
|
|||
}
|
||||
|
||||
; Function Attrs: nofree nosync nounwind readnone willreturn
|
||||
declare i64 @llvm.vector.reduce.add.v2i64(<2 x i64>) #3
|
||||
declare i64 @llvm.vector.reduce.add.v2i64(<2 x i64>) #2
|
||||
|
||||
define i64 @vreduce_add_v2i64(<2 x i64>* %x) #2 {
|
||||
define i64 @vreduce_add_v2i64(<2 x i64>* %x) #0 {
|
||||
%v = load <2 x i64>, <2 x i64>* %x, align 16
|
||||
%red = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %v)
|
||||
ret i64 %red
|
||||
}
|
||||
|
||||
; Function Attrs: nounwind
|
||||
declare i64 @llvm.riscv.vsetvli.i64(i64, i64 immarg, i64 immarg) #0
|
||||
declare i64 @llvm.riscv.vsetvli.i64(i64, i64 immarg, i64 immarg) #3
|
||||
|
||||
; Function Attrs: nounwind
|
||||
define <vscale x 1 x i64> @vsetvli_add(<vscale x 1 x i64> %0, <vscale x 1 x i64> %1, i64 %avl) #0 {
|
||||
entry:
|
||||
%a = call i64 @llvm.riscv.vsetvli.i64(i64 %avl, i64 3, i64 1)
|
||||
%a = call i64 @llvm.riscv.vsetvli.i64(i64 %avl, i64 3, i64 0)
|
||||
%b = call <vscale x 1 x i64> @llvm.riscv.vadd.nxv1i64.nxv1i64.i64(<vscale x 1 x i64> %0, <vscale x 1 x i64> %1, i64 %a)
|
||||
ret <vscale x 1 x i64> %b
|
||||
}
|
||||
|
||||
define <vscale x 1 x i64> @load_add_inlineasm(<vscale x 1 x i64>* %0, <vscale x 1 x i64> %1, i64 %2) #0 {
|
||||
entry:
|
||||
%a = call <vscale x 1 x i64> @llvm.riscv.vle.nxv1i64.i64(<vscale x 1 x i64>* %0, i64 %2)
|
||||
call void asm sideeffect "", ""()
|
||||
%b = call <vscale x 1 x i64> @llvm.riscv.vadd.nxv1i64.nxv1i64.i64(<vscale x 1 x i64> %a, <vscale x 1 x i64> %1, i64 %2)
|
||||
ret <vscale x 1 x i64> %b
|
||||
}
|
||||
|
||||
; Function Attrs: nounwind readnone
|
||||
declare <vscale x 1 x i64> @llvm.riscv.vadd.nxv1i64.nxv1i64.i64(<vscale x 1 x i64>, <vscale x 1 x i64>, i64) #1
|
||||
|
||||
; Function Attrs: nounwind readonly
|
||||
declare <vscale x 1 x i64> @llvm.riscv.vle.nxv1i64.i64(<vscale x 1 x i64>* nocapture, i64) #2
|
||||
declare <vscale x 1 x i64> @llvm.riscv.vle.nxv1i64.i64(<vscale x 1 x i64>* nocapture, i64) #4
|
||||
|
||||
; Function Attrs: nounwind readonly
|
||||
declare <vscale x 1 x i32> @llvm.riscv.vle.nxv1i32.i64(<vscale x 1 x i32>* nocapture, i64) #2
|
||||
declare <vscale x 1 x i32> @llvm.riscv.vle.nxv1i32.i64(<vscale x 1 x i32>* nocapture, i64) #4
|
||||
|
||||
; Function Attrs: nounwind readnone
|
||||
declare <vscale x 1 x i64> @llvm.riscv.vzext.nxv1i64.nxv1i32.i64(<vscale x 1 x i32>, i64) #1
|
||||
|
||||
attributes #0 = { nounwind "target-features"="+experimental-v" }
|
||||
attributes #1 = { nounwind readnone "target-features"="+experimental-v" }
|
||||
attributes #2 = { "target-features"="+experimental-v" }
|
||||
attributes #3 = { nofree nosync nounwind readnone willreturn "target-features"="+experimental-v" }
|
||||
attributes #4 = { nounwind readonly "target-features"="+experimental-v" }
|
||||
attributes #0 = { "target-features"="+experimental-v" }
|
||||
attributes #1 = { nounwind readnone }
|
||||
attributes #2 = { nofree nosync nounwind readnone willreturn }
|
||||
attributes #3 = { nounwind }
|
||||
attributes #4 = { nounwind readonly }
|
||||
|
||||
...
|
||||
---
|
||||
|
@ -352,3 +355,46 @@ body: |
|
|||
PseudoRET implicit $v8
|
||||
|
||||
...
|
||||
---
|
||||
name: load_add_inlineasm
|
||||
alignment: 4
|
||||
tracksRegLiveness: true
|
||||
registers:
|
||||
- { id: 0, class: gpr }
|
||||
- { id: 1, class: vr }
|
||||
- { id: 2, class: gpr }
|
||||
- { id: 3, class: vr }
|
||||
- { id: 4, class: vr }
|
||||
liveins:
|
||||
- { reg: '$x10', virtual-reg: '%0' }
|
||||
- { reg: '$v8', virtual-reg: '%1' }
|
||||
- { reg: '$x11', virtual-reg: '%2' }
|
||||
frameInfo:
|
||||
maxAlignment: 1
|
||||
machineFunctionInfo: {}
|
||||
body: |
|
||||
bb.0.entry:
|
||||
liveins: $x10, $v8, $x11
|
||||
|
||||
; CHECK-LABEL: name: load_add_inlineasm
|
||||
; CHECK: liveins: $x10, $v8, $x11
|
||||
; CHECK: [[COPY:%[0-9]+]]:gpr = COPY $x11
|
||||
; CHECK: [[COPY1:%[0-9]+]]:vr = COPY $v8
|
||||
; CHECK: [[COPY2:%[0-9]+]]:gpr = COPY $x10
|
||||
; CHECK: dead %5:gpr = PseudoVSETVLI [[COPY]], 88, implicit-def $vl, implicit-def $vtype
|
||||
; CHECK: [[PseudoVLE64_V_M1_:%[0-9]+]]:vr = PseudoVLE64_V_M1 [[COPY2]], $noreg, 6, implicit $vl, implicit $vtype
|
||||
; CHECK: INLINEASM &"", 1 /* sideeffect attdialect */
|
||||
; CHECK: dead %6:gpr = PseudoVSETVLI [[COPY]], 88, implicit-def $vl, implicit-def $vtype
|
||||
; CHECK: [[PseudoVADD_VV_M1_:%[0-9]+]]:vr = PseudoVADD_VV_M1 killed [[PseudoVLE64_V_M1_]], [[COPY1]], $noreg, 6, implicit $vl, implicit $vtype
|
||||
; CHECK: $v8 = COPY [[PseudoVADD_VV_M1_]]
|
||||
; CHECK: PseudoRET implicit $v8
|
||||
%2:gpr = COPY $x11
|
||||
%1:vr = COPY $v8
|
||||
%0:gpr = COPY $x10
|
||||
%3:vr = PseudoVLE64_V_M1 %0, %2, 6
|
||||
INLINEASM &"", 1 /* sideeffect attdialect */
|
||||
%4:vr = PseudoVADD_VV_M1 killed %3, %1, %2, 6
|
||||
$v8 = COPY %4
|
||||
PseudoRET implicit $v8
|
||||
|
||||
...
|
||||
|
|
Loading…
Reference in New Issue