Recommit "[RISCV] Enable subregister liveness tracking for RVV."

The failure that caused the previous revert has been fixed
by https://reviews.llvm.org/D126048

Original commit message:

RVV makes heavy use of subregisters due to LMUL>1 and segment
load/store tuples. Enabling subregister liveness tracking improves the quality
of the register allocation.

I've added a command line that can be used to turn it off if it causes compile
time or functional issues. I used the command line to keep the old behavior
for one interesting test case that was testing register allocation.

Reviewed By: kito-cheng

Differential Revision: https://reviews.llvm.org/D128016
This commit is contained in:
Craig Topper 2022-06-20 20:20:00 -07:00
parent 163c77b2e0
commit 59cde2133d
38 changed files with 20236 additions and 24652 deletions

View File

@ -201,6 +201,9 @@ bool RISCVSubtarget::useRVVForFixedLengthVectors() const {
}
bool RISCVSubtarget::enableSubRegLiveness() const {
// TODO: Enable for for RVV to better handle LMUL>1 and segment load/store.
return EnableSubRegLiveness;
if (EnableSubRegLiveness.getNumOccurrences())
return EnableSubRegLiveness;
// Enable subregister liveness for RVV to better handle LMUL>1 and segment
// load/store.
return hasVInstructions();
}

View File

@ -5,7 +5,6 @@
define <vscale x 4 x i32> @extract_nxv8i32_nxv4i32_0(<vscale x 8 x i32> %vec) {
; CHECK-LABEL: extract_nxv8i32_nxv4i32_0:
; CHECK: # %bb.0:
; CHECK-NEXT: # kill: def $v8m2 killed $v8m2 killed $v8m4
; CHECK-NEXT: ret
%c = call <vscale x 4 x i32> @llvm.experimental.vector.extract.nxv4i32.nxv8i32(<vscale x 8 x i32> %vec, i64 0)
ret <vscale x 4 x i32> %c
@ -23,7 +22,6 @@ define <vscale x 4 x i32> @extract_nxv8i32_nxv4i32_4(<vscale x 8 x i32> %vec) {
define <vscale x 2 x i32> @extract_nxv8i32_nxv2i32_0(<vscale x 8 x i32> %vec) {
; CHECK-LABEL: extract_nxv8i32_nxv2i32_0:
; CHECK: # %bb.0:
; CHECK-NEXT: # kill: def $v8 killed $v8 killed $v8m4
; CHECK-NEXT: ret
%c = call <vscale x 2 x i32> @llvm.experimental.vector.extract.nxv2i32.nxv8i32(<vscale x 8 x i32> %vec, i64 0)
ret <vscale x 2 x i32> %c
@ -59,7 +57,6 @@ define <vscale x 2 x i32> @extract_nxv8i32_nxv2i32_6(<vscale x 8 x i32> %vec) {
define <vscale x 8 x i32> @extract_nxv16i32_nxv8i32_0(<vscale x 16 x i32> %vec) {
; CHECK-LABEL: extract_nxv16i32_nxv8i32_0:
; CHECK: # %bb.0:
; CHECK-NEXT: # kill: def $v8m4 killed $v8m4 killed $v8m8
; CHECK-NEXT: ret
%c = call <vscale x 8 x i32> @llvm.experimental.vector.extract.nxv8i32.nxv16i32(<vscale x 16 x i32> %vec, i64 0)
ret <vscale x 8 x i32> %c
@ -77,7 +74,6 @@ define <vscale x 8 x i32> @extract_nxv16i32_nxv8i32_8(<vscale x 16 x i32> %vec)
define <vscale x 4 x i32> @extract_nxv16i32_nxv4i32_0(<vscale x 16 x i32> %vec) {
; CHECK-LABEL: extract_nxv16i32_nxv4i32_0:
; CHECK: # %bb.0:
; CHECK-NEXT: # kill: def $v8m2 killed $v8m2 killed $v8m8
; CHECK-NEXT: ret
%c = call <vscale x 4 x i32> @llvm.experimental.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> %vec, i64 0)
ret <vscale x 4 x i32> %c
@ -113,7 +109,6 @@ define <vscale x 4 x i32> @extract_nxv16i32_nxv4i32_12(<vscale x 16 x i32> %vec)
define <vscale x 2 x i32> @extract_nxv16i32_nxv2i32_0(<vscale x 16 x i32> %vec) {
; CHECK-LABEL: extract_nxv16i32_nxv2i32_0:
; CHECK: # %bb.0:
; CHECK-NEXT: # kill: def $v8 killed $v8 killed $v8m8
; CHECK-NEXT: ret
%c = call <vscale x 2 x i32> @llvm.experimental.vector.extract.nxv2i32.nxv16i32(<vscale x 16 x i32> %vec, i64 0)
ret <vscale x 2 x i32> %c
@ -185,7 +180,6 @@ define <vscale x 2 x i32> @extract_nxv16i32_nxv2i32_14(<vscale x 16 x i32> %vec)
define <vscale x 1 x i32> @extract_nxv16i32_nxv1i32_0(<vscale x 16 x i32> %vec) {
; CHECK-LABEL: extract_nxv16i32_nxv1i32_0:
; CHECK: # %bb.0:
; CHECK-NEXT: # kill: def $v8 killed $v8 killed $v8m8
; CHECK-NEXT: ret
%c = call <vscale x 1 x i32> @llvm.experimental.vector.extract.nxv1i32.nxv16i32(<vscale x 16 x i32> %vec, i64 0)
ret <vscale x 1 x i32> %c
@ -247,7 +241,6 @@ define <vscale x 1 x i32> @extract_nxv2i32_nxv1i32_0(<vscale x 2 x i32> %vec) {
define <vscale x 2 x i8> @extract_nxv32i8_nxv2i8_0(<vscale x 32 x i8> %vec) {
; CHECK-LABEL: extract_nxv32i8_nxv2i8_0:
; CHECK: # %bb.0:
; CHECK-NEXT: # kill: def $v8 killed $v8 killed $v8m4
; CHECK-NEXT: ret
%c = call <vscale x 2 x i8> @llvm.experimental.vector.extract.nxv2i8.nxv32i8(<vscale x 32 x i8> %vec, i64 0)
ret <vscale x 2 x i8> %c
@ -344,7 +337,6 @@ define <vscale x 1 x i8> @extract_nxv4i8_nxv1i8_3(<vscale x 4 x i8> %vec) {
define <vscale x 2 x half> @extract_nxv2f16_nxv16f16_0(<vscale x 16 x half> %vec) {
; CHECK-LABEL: extract_nxv2f16_nxv16f16_0:
; CHECK: # %bb.0:
; CHECK-NEXT: # kill: def $v8 killed $v8 killed $v8m4
; CHECK-NEXT: ret
%c = call <vscale x 2 x half> @llvm.experimental.vector.extract.nxv2f16.nxv16f16(<vscale x 16 x half> %vec, i64 0)
ret <vscale x 2 x half> %c
@ -467,7 +459,6 @@ define <vscale x 16 x i1> @extract_nxv16i1_nxv32i1_16(<vscale x 32 x i1> %x) {
define <vscale x 6 x half> @extract_nxv6f16_nxv12f16_0(<vscale x 12 x half> %in) {
; CHECK-LABEL: extract_nxv6f16_nxv12f16_0:
; CHECK: # %bb.0:
; CHECK-NEXT: # kill: def $v8m2 killed $v8m2 killed $v8m4
; CHECK-NEXT: ret
%res = call <vscale x 6 x half> @llvm.experimental.vector.extract.nxv6f16.nxv12f16(<vscale x 12 x half> %in, i64 0)
ret <vscale x 6 x half> %res
@ -479,14 +470,13 @@ define <vscale x 6 x half> @extract_nxv6f16_nxv12f16_6(<vscale x 12 x half> %in)
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: srli a0, a0, 2
; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu
; CHECK-NEXT: vslidedown.vx v14, v10, a0
; CHECK-NEXT: vslidedown.vx v12, v9, a0
; CHECK-NEXT: vslidedown.vx v11, v10, a0
; CHECK-NEXT: vslidedown.vx v8, v9, a0
; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu
; CHECK-NEXT: vslideup.vi v13, v14, 0
; CHECK-NEXT: vslideup.vi v9, v11, 0
; CHECK-NEXT: add a1, a0, a0
; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, mu
; CHECK-NEXT: vslideup.vx v12, v10, a0
; CHECK-NEXT: vmv2r.v v8, v12
; CHECK-NEXT: vslideup.vx v8, v10, a0
; CHECK-NEXT: ret
%res = call <vscale x 6 x half> @llvm.experimental.vector.extract.nxv6f16.nxv12f16(<vscale x 12 x half> %in, i64 6)
ret <vscale x 6 x half> %res

View File

@ -7,7 +7,6 @@ define <8 x i8> @load_factor2(<16 x i8>* %ptr) {
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
; CHECK-NEXT: vlseg2e8.v v7, (a0)
; CHECK-NEXT: # kill: def $v8 killed $v8 killed $v7_v8
; CHECK-NEXT: ret
%1 = bitcast <16 x i8>* %ptr to i8*
%2 = call { <8 x i8>, <8 x i8> } @llvm.riscv.seg2.load.v8i8.p0i8.i64(i8* %1, i64 8)
@ -21,7 +20,6 @@ define <8 x i8> @load_factor3(<24 x i8>* %ptr) {
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
; CHECK-NEXT: vlseg3e8.v v6, (a0)
; CHECK-NEXT: # kill: def $v8 killed $v8 killed $v6_v7_v8
; CHECK-NEXT: ret
%1 = bitcast <24 x i8>* %ptr to i8*
%2 = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.riscv.seg3.load.v8i8.p0i8.i64(i8* %1, i64 8)
@ -36,7 +34,6 @@ define <8 x i8> @load_factor4(<32 x i8>* %ptr) {
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
; CHECK-NEXT: vlseg4e8.v v5, (a0)
; CHECK-NEXT: # kill: def $v8 killed $v8 killed $v5_v6_v7_v8
; CHECK-NEXT: ret
%1 = bitcast <32 x i8>* %ptr to i8*
%2 = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.riscv.seg4.load.v8i8.p0i8.i64(i8* %1, i64 8)
@ -52,7 +49,6 @@ define <8 x i8> @load_factor5(<40 x i8>* %ptr) {
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
; CHECK-NEXT: vlseg5e8.v v4, (a0)
; CHECK-NEXT: # kill: def $v8 killed $v8 killed $v4_v5_v6_v7_v8
; CHECK-NEXT: ret
%1 = bitcast <40 x i8>* %ptr to i8*
%2 = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.riscv.seg5.load.v8i8.p0i8.i64(i8* %1, i64 8)
@ -69,7 +65,6 @@ define <8 x i8> @load_factor6(<48 x i8>* %ptr) {
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
; CHECK-NEXT: vlseg6e8.v v3, (a0)
; CHECK-NEXT: # kill: def $v8 killed $v8 killed $v3_v4_v5_v6_v7_v8
; CHECK-NEXT: ret
%1 = bitcast <48 x i8>* %ptr to i8*
%2 = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.riscv.seg6.load.v8i8.p0i8.i64(i8* %1, i64 8)
@ -87,7 +82,6 @@ define <8 x i8> @load_factor7(<56 x i8>* %ptr) {
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
; CHECK-NEXT: vlseg7e8.v v2, (a0)
; CHECK-NEXT: # kill: def $v8 killed $v8 killed $v2_v3_v4_v5_v6_v7_v8
; CHECK-NEXT: ret
%1 = bitcast <56 x i8>* %ptr to i8*
%2 = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.riscv.seg7.load.v8i8.p0i8.i64(i8* %1, i64 8)
@ -106,7 +100,6 @@ define <8 x i8> @load_factor8(<64 x i8>* %ptr) {
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
; CHECK-NEXT: vlseg8e8.v v1, (a0)
; CHECK-NEXT: # kill: def $v8 killed $v8 killed $v1_v2_v3_v4_v5_v6_v7_v8
; CHECK-NEXT: ret
%1 = bitcast <64 x i8>* %ptr to i8*
%2 = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.riscv.seg8.load.v8i8.p0i8.i64(i8* %1, i64 8)

View File

@ -267,7 +267,7 @@ define <128 x i32> @vtrunc_nxv128i32_nxv128i64(<128 x i64> %a, <128 x i1> %m, i3
; CHECK-NEXT: mv a4, a3
; CHECK-NEXT: .LBB16_2:
; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, mu
; CHECK-NEXT: vslidedown.vi v3, v2, 4
; CHECK-NEXT: vslidedown.vi v28, v2, 4
; CHECK-NEXT: addi a6, a4, -32
; CHECK-NEXT: addi a3, a1, 640
; CHECK-NEXT: mv a5, a2
@ -276,7 +276,7 @@ define <128 x i32> @vtrunc_nxv128i32_nxv128i64(<128 x i64> %a, <128 x i1> %m, i3
; CHECK-NEXT: mv a5, a6
; CHECK-NEXT: .LBB16_4:
; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, mu
; CHECK-NEXT: vslidedown.vi v0, v3, 2
; CHECK-NEXT: vslidedown.vi v0, v28, 2
; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, mu
; CHECK-NEXT: vle64.v v16, (a3)
; CHECK-NEXT: addi t0, a5, -16
@ -301,7 +301,7 @@ define <128 x i32> @vtrunc_nxv128i32_nxv128i64(<128 x i64> %a, <128 x i1> %m, i3
; CHECK-NEXT: .LBB16_8:
; CHECK-NEXT: vsetvli zero, a5, e32, m4, ta, mu
; CHECK-NEXT: li a5, 64
; CHECK-NEXT: vmv1r.v v0, v3
; CHECK-NEXT: vmv1r.v v0, v28
; CHECK-NEXT: vncvt.x.x.w v16, v8, v0.t
; CHECK-NEXT: csrr a6, vlenb
; CHECK-NEXT: li t0, 48
@ -314,7 +314,7 @@ define <128 x i32> @vtrunc_nxv128i32_nxv128i64(<128 x i64> %a, <128 x i1> %m, i3
; CHECK-NEXT: li a7, 64
; CHECK-NEXT: .LBB16_10:
; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, mu
; CHECK-NEXT: vslidedown.vi v3, v1, 4
; CHECK-NEXT: vslidedown.vi v28, v1, 4
; CHECK-NEXT: addi t0, a7, -32
; CHECK-NEXT: addi a5, a1, 128
; CHECK-NEXT: mv a6, a2
@ -323,7 +323,7 @@ define <128 x i32> @vtrunc_nxv128i32_nxv128i64(<128 x i64> %a, <128 x i1> %m, i3
; CHECK-NEXT: mv a6, t0
; CHECK-NEXT: .LBB16_12:
; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, mu
; CHECK-NEXT: vslidedown.vi v0, v3, 2
; CHECK-NEXT: vslidedown.vi v0, v28, 2
; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, mu
; CHECK-NEXT: vle64.v v16, (a5)
; CHECK-NEXT: addi a5, a6, -16
@ -347,7 +347,7 @@ define <128 x i32> @vtrunc_nxv128i32_nxv128i64(<128 x i64> %a, <128 x i1> %m, i3
; CHECK-NEXT: .LBB16_16:
; CHECK-NEXT: addi t0, a1, 384
; CHECK-NEXT: vsetvli zero, a6, e32, m4, ta, mu
; CHECK-NEXT: vmv1r.v v0, v3
; CHECK-NEXT: vmv1r.v v0, v28
; CHECK-NEXT: vncvt.x.x.w v16, v8, v0.t
; CHECK-NEXT: csrr a6, vlenb
; CHECK-NEXT: li t1, 40

View File

@ -39,22 +39,20 @@ define <4 x double> @interleave_v2f64(<2 x double> %x, <2 x double> %y) {
; RV32-V128-LABEL: interleave_v2f64:
; RV32-V128: # %bb.0:
; RV32-V128-NEXT: vmv1r.v v12, v9
; RV32-V128-NEXT: # kill: def $v8 killed $v8 def $v8m2
; RV32-V128-NEXT: vsetivli zero, 4, e16, mf2, ta, mu
; RV32-V128-NEXT: vid.v v10
; RV32-V128-NEXT: vsrl.vi v14, v10, 1
; RV32-V128-NEXT: vid.v v9
; RV32-V128-NEXT: vsrl.vi v9, v9, 1
; RV32-V128-NEXT: vsetvli zero, zero, e64, m2, ta, mu
; RV32-V128-NEXT: li a0, 10
; RV32-V128-NEXT: vmv.s.x v0, a0
; RV32-V128-NEXT: vrgatherei16.vv v10, v8, v14
; RV32-V128-NEXT: vrgatherei16.vv v10, v12, v14, v0.t
; RV32-V128-NEXT: vrgatherei16.vv v10, v8, v9
; RV32-V128-NEXT: vrgatherei16.vv v10, v12, v9, v0.t
; RV32-V128-NEXT: vmv.v.v v8, v10
; RV32-V128-NEXT: ret
;
; RV64-V128-LABEL: interleave_v2f64:
; RV64-V128: # %bb.0:
; RV64-V128-NEXT: vmv1r.v v12, v9
; RV64-V128-NEXT: # kill: def $v8 killed $v8 def $v8m2
; RV64-V128-NEXT: vsetivli zero, 4, e64, m2, ta, mu
; RV64-V128-NEXT: vid.v v10
; RV64-V128-NEXT: vsrl.vi v14, v10, 1
@ -269,9 +267,9 @@ define <64 x float> @interleave_v32f32(<32 x float> %x, <32 x float> %y) {
; RV32-V128-NEXT: vsetvli zero, a1, e32, m8, ta, mu
; RV32-V128-NEXT: vle32.v v0, (a0)
; RV32-V128-NEXT: vmv8r.v v24, v8
; RV32-V128-NEXT: addi a0, sp, 16
; RV32-V128-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
; RV32-V128-NEXT: vrgather.vv v8, v24, v0
; RV32-V128-NEXT: addi a0, sp, 16
; RV32-V128-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
; RV32-V128-NEXT: lui a0, %hi(.LCPI10_1)
; RV32-V128-NEXT: addi a0, a0, %lo(.LCPI10_1)
; RV32-V128-NEXT: vle32.v v24, (a0)
@ -319,9 +317,9 @@ define <64 x float> @interleave_v32f32(<32 x float> %x, <32 x float> %y) {
; RV64-V128-NEXT: vsetvli zero, a1, e32, m8, ta, mu
; RV64-V128-NEXT: vle32.v v0, (a0)
; RV64-V128-NEXT: vmv8r.v v24, v8
; RV64-V128-NEXT: addi a0, sp, 16
; RV64-V128-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
; RV64-V128-NEXT: vrgather.vv v8, v24, v0
; RV64-V128-NEXT: addi a0, sp, 16
; RV64-V128-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
; RV64-V128-NEXT: lui a0, %hi(.LCPI10_1)
; RV64-V128-NEXT: addi a0, a0, %lo(.LCPI10_1)
; RV64-V128-NEXT: vle32.v v24, (a0)

View File

@ -92,16 +92,16 @@ define void @sext_v32i8_v32i32(<32 x i8>* %x, <32 x i32>* %z) {
; LMULMAX2-NEXT: vsetivli zero, 16, e8, m2, ta, mu
; LMULMAX2-NEXT: vslidedown.vi v10, v8, 16
; LMULMAX2-NEXT: vsetivli zero, 8, e8, m1, ta, mu
; LMULMAX2-NEXT: vslidedown.vi v14, v10, 8
; LMULMAX2-NEXT: vslidedown.vi v9, v10, 8
; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, mu
; LMULMAX2-NEXT: vsext.vf4 v16, v14
; LMULMAX2-NEXT: vsext.vf4 v14, v8
; LMULMAX2-NEXT: vsext.vf4 v14, v9
; LMULMAX2-NEXT: vsext.vf4 v16, v8
; LMULMAX2-NEXT: vsext.vf4 v8, v10
; LMULMAX2-NEXT: addi a0, a1, 64
; LMULMAX2-NEXT: vse32.v v8, (a0)
; LMULMAX2-NEXT: vse32.v v14, (a1)
; LMULMAX2-NEXT: vse32.v v16, (a1)
; LMULMAX2-NEXT: addi a0, a1, 96
; LMULMAX2-NEXT: vse32.v v16, (a0)
; LMULMAX2-NEXT: vse32.v v14, (a0)
; LMULMAX2-NEXT: addi a0, a1, 32
; LMULMAX2-NEXT: vse32.v v12, (a0)
; LMULMAX2-NEXT: ret

View File

@ -52,22 +52,20 @@ define <4 x i64> @interleave_v2i64(<2 x i64> %x, <2 x i64> %y) {
; RV32-V128-LABEL: interleave_v2i64:
; RV32-V128: # %bb.0:
; RV32-V128-NEXT: vmv1r.v v12, v9
; RV32-V128-NEXT: # kill: def $v8 killed $v8 def $v8m2
; RV32-V128-NEXT: vsetivli zero, 4, e16, mf2, ta, mu
; RV32-V128-NEXT: vid.v v10
; RV32-V128-NEXT: vsrl.vi v14, v10, 1
; RV32-V128-NEXT: vid.v v9
; RV32-V128-NEXT: vsrl.vi v9, v9, 1
; RV32-V128-NEXT: vsetvli zero, zero, e64, m2, ta, mu
; RV32-V128-NEXT: li a0, 10
; RV32-V128-NEXT: vmv.s.x v0, a0
; RV32-V128-NEXT: vrgatherei16.vv v10, v8, v14
; RV32-V128-NEXT: vrgatherei16.vv v10, v12, v14, v0.t
; RV32-V128-NEXT: vrgatherei16.vv v10, v8, v9
; RV32-V128-NEXT: vrgatherei16.vv v10, v12, v9, v0.t
; RV32-V128-NEXT: vmv.v.v v8, v10
; RV32-V128-NEXT: ret
;
; RV64-V128-LABEL: interleave_v2i64:
; RV64-V128: # %bb.0:
; RV64-V128-NEXT: vmv1r.v v12, v9
; RV64-V128-NEXT: # kill: def $v8 killed $v8 def $v8m2
; RV64-V128-NEXT: vsetivli zero, 4, e64, m2, ta, mu
; RV64-V128-NEXT: vid.v v10
; RV64-V128-NEXT: vsrl.vi v14, v10, 1
@ -375,9 +373,9 @@ define <64 x i32> @interleave_v32i32(<32 x i32> %x, <32 x i32> %y) {
; RV32-V128-NEXT: vsetvli zero, a1, e32, m8, ta, mu
; RV32-V128-NEXT: vle32.v v0, (a0)
; RV32-V128-NEXT: vmv8r.v v24, v8
; RV32-V128-NEXT: addi a0, sp, 16
; RV32-V128-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
; RV32-V128-NEXT: vrgather.vv v8, v24, v0
; RV32-V128-NEXT: addi a0, sp, 16
; RV32-V128-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
; RV32-V128-NEXT: lui a0, %hi(.LCPI15_1)
; RV32-V128-NEXT: addi a0, a0, %lo(.LCPI15_1)
; RV32-V128-NEXT: vle32.v v24, (a0)
@ -425,9 +423,9 @@ define <64 x i32> @interleave_v32i32(<32 x i32> %x, <32 x i32> %y) {
; RV64-V128-NEXT: vsetvli zero, a1, e32, m8, ta, mu
; RV64-V128-NEXT: vle32.v v0, (a0)
; RV64-V128-NEXT: vmv8r.v v24, v8
; RV64-V128-NEXT: addi a0, sp, 16
; RV64-V128-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
; RV64-V128-NEXT: vrgather.vv v8, v24, v0
; RV64-V128-NEXT: addi a0, sp, 16
; RV64-V128-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
; RV64-V128-NEXT: lui a0, %hi(.LCPI15_1)
; RV64-V128-NEXT: addi a0, a0, %lo(.LCPI15_1)
; RV64-V128-NEXT: vle32.v v24, (a0)

View File

@ -12819,8 +12819,8 @@ define <32 x i8> @mgather_baseidx_v32i8(i8* %base, <32 x i8> %idxs, <32 x i1> %m
; RV64ZVE32F-NEXT: beqz a2, .LBB98_23
; RV64ZVE32F-NEXT: # %bb.22: # %cond.load37
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu
; RV64ZVE32F-NEXT: vslidedown.vi v13, v12, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v13
; RV64ZVE32F-NEXT: vslidedown.vi v9, v12, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lb a2, 0(a2)
; RV64ZVE32F-NEXT: li a3, 32
@ -12832,25 +12832,25 @@ define <32 x i8> @mgather_baseidx_v32i8(i8* %base, <32 x i8> %idxs, <32 x i1> %m
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, mu
; RV64ZVE32F-NEXT: lui a2, 4
; RV64ZVE32F-NEXT: and a2, a1, a2
; RV64ZVE32F-NEXT: vslidedown.vi v12, v12, 2
; RV64ZVE32F-NEXT: vslidedown.vi v9, v12, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB98_25
; RV64ZVE32F-NEXT: # %bb.24: # %cond.load40
; RV64ZVE32F-NEXT: vmv.x.s a2, v12
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lb a2, 0(a2)
; RV64ZVE32F-NEXT: li a3, 32
; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m2, ta, mu
; RV64ZVE32F-NEXT: vmv.s.x v14, a2
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vsetivli zero, 15, e8, m2, tu, mu
; RV64ZVE32F-NEXT: vslideup.vi v10, v14, 14
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 14
; RV64ZVE32F-NEXT: .LBB98_25: # %else41
; RV64ZVE32F-NEXT: lui a2, 8
; RV64ZVE32F-NEXT: and a2, a1, a2
; RV64ZVE32F-NEXT: beqz a2, .LBB98_27
; RV64ZVE32F-NEXT: # %bb.26: # %cond.load43
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu
; RV64ZVE32F-NEXT: vslidedown.vi v12, v12, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v12
; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lb a2, 0(a2)
; RV64ZVE32F-NEXT: li a3, 32
@ -12878,8 +12878,8 @@ define <32 x i8> @mgather_baseidx_v32i8(i8* %base, <32 x i8> %idxs, <32 x i1> %m
; RV64ZVE32F-NEXT: beqz a2, .LBB98_31
; RV64ZVE32F-NEXT: # %bb.30: # %cond.load49
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v12
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lb a2, 0(a2)
; RV64ZVE32F-NEXT: li a3, 32
@ -12891,10 +12891,10 @@ define <32 x i8> @mgather_baseidx_v32i8(i8* %base, <32 x i8> %idxs, <32 x i1> %m
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, mu
; RV64ZVE32F-NEXT: lui a2, 64
; RV64ZVE32F-NEXT: and a2, a1, a2
; RV64ZVE32F-NEXT: vslidedown.vi v13, v8, 2
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB98_33
; RV64ZVE32F-NEXT: # %bb.32: # %cond.load52
; RV64ZVE32F-NEXT: vmv.x.s a2, v13
; RV64ZVE32F-NEXT: vmv.x.s a2, v12
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lb a2, 0(a2)
; RV64ZVE32F-NEXT: li a3, 32
@ -12906,55 +12906,55 @@ define <32 x i8> @mgather_baseidx_v32i8(i8* %base, <32 x i8> %idxs, <32 x i1> %m
; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, mu
; RV64ZVE32F-NEXT: lui a2, 128
; RV64ZVE32F-NEXT: and a2, a1, a2
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
; RV64ZVE32F-NEXT: beqz a2, .LBB98_35
; RV64ZVE32F-NEXT: # %bb.34: # %cond.load55
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu
; RV64ZVE32F-NEXT: vslidedown.vi v13, v13, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v13
; RV64ZVE32F-NEXT: vslidedown.vi v12, v12, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v12
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lb a2, 0(a2)
; RV64ZVE32F-NEXT: li a3, 32
; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m2, ta, mu
; RV64ZVE32F-NEXT: vmv.s.x v14, a2
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vsetivli zero, 20, e8, m2, tu, mu
; RV64ZVE32F-NEXT: vslideup.vi v10, v14, 19
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 19
; RV64ZVE32F-NEXT: .LBB98_35: # %else56
; RV64ZVE32F-NEXT: lui a2, 256
; RV64ZVE32F-NEXT: and a2, a1, a2
; RV64ZVE32F-NEXT: beqz a2, .LBB98_37
; RV64ZVE32F-NEXT: # %bb.36: # %cond.load58
; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu
; RV64ZVE32F-NEXT: vmv.x.s a2, v12
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lb a2, 0(a2)
; RV64ZVE32F-NEXT: li a3, 32
; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m2, ta, mu
; RV64ZVE32F-NEXT: vmv.s.x v14, a2
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vsetivli zero, 21, e8, m2, tu, mu
; RV64ZVE32F-NEXT: vslideup.vi v10, v14, 20
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 20
; RV64ZVE32F-NEXT: .LBB98_37: # %else59
; RV64ZVE32F-NEXT: lui a2, 512
; RV64ZVE32F-NEXT: and a2, a1, a2
; RV64ZVE32F-NEXT: beqz a2, .LBB98_39
; RV64ZVE32F-NEXT: # %bb.38: # %cond.load61
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu
; RV64ZVE32F-NEXT: vslidedown.vi v13, v12, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v13
; RV64ZVE32F-NEXT: vslidedown.vi v12, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v12
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lb a2, 0(a2)
; RV64ZVE32F-NEXT: li a3, 32
; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m2, ta, mu
; RV64ZVE32F-NEXT: vmv.s.x v14, a2
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vsetivli zero, 22, e8, m2, tu, mu
; RV64ZVE32F-NEXT: vslideup.vi v10, v14, 21
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 21
; RV64ZVE32F-NEXT: .LBB98_39: # %else62
; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, m1, ta, mu
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 8
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, mu
; RV64ZVE32F-NEXT: lui a2, 1024
; RV64ZVE32F-NEXT: and a2, a1, a2
; RV64ZVE32F-NEXT: vslidedown.vi v9, v12, 2
; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB98_41
; RV64ZVE32F-NEXT: # %bb.40: # %cond.load64
; RV64ZVE32F-NEXT: vmv.x.s a2, v9

View File

@ -11157,8 +11157,8 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, i8* %base, <32 x i8> %idxs,
; RV64ZVE32F-NEXT: beqz a2, .LBB92_23
; RV64ZVE32F-NEXT: # %bb.22: # %cond.store25
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu
; RV64ZVE32F-NEXT: vslidedown.vi v13, v12, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v13
; RV64ZVE32F-NEXT: vslidedown.vi v11, v12, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v11
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, mu
; RV64ZVE32F-NEXT: vslidedown.vi v14, v8, 13
@ -11167,22 +11167,22 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, i8* %base, <32 x i8> %idxs,
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, mu
; RV64ZVE32F-NEXT: lui a2, 4
; RV64ZVE32F-NEXT: and a2, a1, a2
; RV64ZVE32F-NEXT: vslidedown.vi v12, v12, 2
; RV64ZVE32F-NEXT: vslidedown.vi v11, v12, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB92_25
; RV64ZVE32F-NEXT: # %bb.24: # %cond.store27
; RV64ZVE32F-NEXT: vmv.x.s a2, v12
; RV64ZVE32F-NEXT: vmv.x.s a2, v11
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, mu
; RV64ZVE32F-NEXT: vslidedown.vi v14, v8, 14
; RV64ZVE32F-NEXT: vse8.v v14, (a2)
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 14
; RV64ZVE32F-NEXT: vse8.v v12, (a2)
; RV64ZVE32F-NEXT: .LBB92_25: # %else28
; RV64ZVE32F-NEXT: lui a2, 8
; RV64ZVE32F-NEXT: and a2, a1, a2
; RV64ZVE32F-NEXT: beqz a2, .LBB92_27
; RV64ZVE32F-NEXT: # %bb.26: # %cond.store29
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu
; RV64ZVE32F-NEXT: vslidedown.vi v12, v12, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v12
; RV64ZVE32F-NEXT: vslidedown.vi v11, v11, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v11
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, mu
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 15
@ -11204,8 +11204,8 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, i8* %base, <32 x i8> %idxs,
; RV64ZVE32F-NEXT: beqz a2, .LBB92_31
; RV64ZVE32F-NEXT: # %bb.30: # %cond.store33
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu
; RV64ZVE32F-NEXT: vslidedown.vi v12, v10, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v12
; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v11
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, mu
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 17
@ -11214,10 +11214,10 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, i8* %base, <32 x i8> %idxs,
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, mu
; RV64ZVE32F-NEXT: lui a2, 64
; RV64ZVE32F-NEXT: and a2, a1, a2
; RV64ZVE32F-NEXT: vslidedown.vi v13, v10, 2
; RV64ZVE32F-NEXT: vslidedown.vi v12, v10, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB92_33
; RV64ZVE32F-NEXT: # %bb.32: # %cond.store35
; RV64ZVE32F-NEXT: vmv.x.s a2, v13
; RV64ZVE32F-NEXT: vmv.x.s a2, v12
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, mu
; RV64ZVE32F-NEXT: vslidedown.vi v14, v8, 18
@ -11226,46 +11226,46 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, i8* %base, <32 x i8> %idxs,
; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, mu
; RV64ZVE32F-NEXT: lui a2, 128
; RV64ZVE32F-NEXT: and a2, a1, a2
; RV64ZVE32F-NEXT: vslidedown.vi v12, v10, 4
; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 4
; RV64ZVE32F-NEXT: beqz a2, .LBB92_35
; RV64ZVE32F-NEXT: # %bb.34: # %cond.store37
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu
; RV64ZVE32F-NEXT: vslidedown.vi v13, v13, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v13
; RV64ZVE32F-NEXT: vslidedown.vi v12, v12, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v12
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, mu
; RV64ZVE32F-NEXT: vslidedown.vi v14, v8, 19
; RV64ZVE32F-NEXT: vse8.v v14, (a2)
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 19
; RV64ZVE32F-NEXT: vse8.v v12, (a2)
; RV64ZVE32F-NEXT: .LBB92_35: # %else38
; RV64ZVE32F-NEXT: lui a2, 256
; RV64ZVE32F-NEXT: and a2, a1, a2
; RV64ZVE32F-NEXT: beqz a2, .LBB92_37
; RV64ZVE32F-NEXT: # %bb.36: # %cond.store39
; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu
; RV64ZVE32F-NEXT: vmv.x.s a2, v12
; RV64ZVE32F-NEXT: vmv.x.s a2, v11
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, mu
; RV64ZVE32F-NEXT: vslidedown.vi v14, v8, 20
; RV64ZVE32F-NEXT: vse8.v v14, (a2)
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 20
; RV64ZVE32F-NEXT: vse8.v v12, (a2)
; RV64ZVE32F-NEXT: .LBB92_37: # %else40
; RV64ZVE32F-NEXT: lui a2, 512
; RV64ZVE32F-NEXT: and a2, a1, a2
; RV64ZVE32F-NEXT: beqz a2, .LBB92_39
; RV64ZVE32F-NEXT: # %bb.38: # %cond.store41
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu
; RV64ZVE32F-NEXT: vslidedown.vi v13, v12, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v13
; RV64ZVE32F-NEXT: vslidedown.vi v12, v11, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v12
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, mu
; RV64ZVE32F-NEXT: vslidedown.vi v14, v8, 21
; RV64ZVE32F-NEXT: vse8.v v14, (a2)
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 21
; RV64ZVE32F-NEXT: vse8.v v12, (a2)
; RV64ZVE32F-NEXT: .LBB92_39: # %else42
; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, m1, ta, mu
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 8
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, mu
; RV64ZVE32F-NEXT: lui a2, 1024
; RV64ZVE32F-NEXT: and a2, a1, a2
; RV64ZVE32F-NEXT: vslidedown.vi v11, v12, 2
; RV64ZVE32F-NEXT: vslidedown.vi v11, v11, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB92_41
; RV64ZVE32F-NEXT: # %bb.40: # %cond.store43
; RV64ZVE32F-NEXT: vmv.x.s a2, v11

View File

@ -747,15 +747,15 @@ define float @vreduce_ord_fwadd_v64f32(<64 x half>* %x, float %s) {
; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu
; CHECK-NEXT: vslidedown.vx v8, v16, a0
; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, mu
; CHECK-NEXT: vfmv.s.f v24, fa0
; CHECK-NEXT: vfmv.s.f v12, fa0
; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu
; CHECK-NEXT: vfwredosum.vs v16, v16, v24
; CHECK-NEXT: vfwredosum.vs v12, v16, v12
; CHECK-NEXT: vsetivli zero, 0, e32, m1, ta, mu
; CHECK-NEXT: vfmv.f.s ft0, v16
; CHECK-NEXT: vfmv.f.s ft0, v12
; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, mu
; CHECK-NEXT: vfmv.s.f v16, ft0
; CHECK-NEXT: vfmv.s.f v12, ft0
; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu
; CHECK-NEXT: vfwredosum.vs v8, v8, v16
; CHECK-NEXT: vfwredosum.vs v8, v8, v12
; CHECK-NEXT: vsetivli zero, 0, e32, m1, ta, mu
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
@ -1195,15 +1195,15 @@ define double @vreduce_ord_fwadd_v32f64(<32 x float>* %x, double %s) {
; CHECK-NEXT: vsetivli zero, 16, e32, m8, ta, mu
; CHECK-NEXT: vslidedown.vi v8, v16, 16
; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, mu
; CHECK-NEXT: vfmv.s.f v24, fa0
; CHECK-NEXT: vfmv.s.f v12, fa0
; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu
; CHECK-NEXT: vfwredosum.vs v16, v16, v24
; CHECK-NEXT: vfwredosum.vs v12, v16, v12
; CHECK-NEXT: vsetivli zero, 0, e64, m1, ta, mu
; CHECK-NEXT: vfmv.f.s ft0, v16
; CHECK-NEXT: vfmv.f.s ft0, v12
; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, mu
; CHECK-NEXT: vfmv.s.f v16, ft0
; CHECK-NEXT: vfmv.s.f v12, ft0
; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu
; CHECK-NEXT: vfwredosum.vs v8, v8, v16
; CHECK-NEXT: vfwredosum.vs v8, v8, v12
; CHECK-NEXT: vsetivli zero, 0, e64, m1, ta, mu
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret

View File

@ -1596,11 +1596,11 @@ define i64 @vwreduce_add_v64i64(<64 x i32>* %x) {
; RV32-NEXT: li a2, 32
; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, mu
; RV32-NEXT: vle32.v v8, (a0)
; RV32-NEXT: addi a0, sp, 16
; RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
; RV32-NEXT: vle32.v v16, (a1)
; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, mu
; RV32-NEXT: vslidedown.vi v24, v8, 16
; RV32-NEXT: addi a0, sp, 16
; RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: li a1, 24
; RV32-NEXT: mul a0, a0, a1
@ -1664,11 +1664,11 @@ define i64 @vwreduce_add_v64i64(<64 x i32>* %x) {
; RV64-NEXT: li a2, 32
; RV64-NEXT: vsetvli zero, a2, e32, m8, ta, mu
; RV64-NEXT: vle32.v v8, (a0)
; RV64-NEXT: addi a0, sp, 16
; RV64-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
; RV64-NEXT: vle32.v v16, (a1)
; RV64-NEXT: vsetivli zero, 16, e32, m8, ta, mu
; RV64-NEXT: vslidedown.vi v24, v8, 16
; RV64-NEXT: addi a0, sp, 16
; RV64-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
; RV64-NEXT: csrr a0, vlenb
; RV64-NEXT: li a1, 24
; RV64-NEXT: mul a0, a0, a1
@ -1735,11 +1735,11 @@ define i64 @vwreduce_uadd_v64i64(<64 x i32>* %x) {
; RV32-NEXT: li a2, 32
; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, mu
; RV32-NEXT: vle32.v v8, (a0)
; RV32-NEXT: addi a0, sp, 16
; RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
; RV32-NEXT: vle32.v v16, (a1)
; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, mu
; RV32-NEXT: vslidedown.vi v24, v8, 16
; RV32-NEXT: addi a0, sp, 16
; RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: li a1, 24
; RV32-NEXT: mul a0, a0, a1
@ -1803,11 +1803,11 @@ define i64 @vwreduce_uadd_v64i64(<64 x i32>* %x) {
; RV64-NEXT: li a2, 32
; RV64-NEXT: vsetvli zero, a2, e32, m8, ta, mu
; RV64-NEXT: vle32.v v8, (a0)
; RV64-NEXT: addi a0, sp, 16
; RV64-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
; RV64-NEXT: vle32.v v16, (a1)
; RV64-NEXT: vsetivli zero, 16, e32, m8, ta, mu
; RV64-NEXT: vslidedown.vi v24, v8, 16
; RV64-NEXT: addi a0, sp, 16
; RV64-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
; RV64-NEXT: csrr a0, vlenb
; RV64-NEXT: li a1, 24
; RV64-NEXT: mul a0, a0, a1

View File

@ -367,7 +367,6 @@ define <vscale x 32 x half> @insert_nxv32f16_nxv2f16_26(<vscale x 32 x half> %ve
define <vscale x 32 x half> @insert_nxv32f16_undef_nxv1f16_0(<vscale x 1 x half> %subvec) {
; CHECK-LABEL: insert_nxv32f16_undef_nxv1f16_0:
; CHECK: # %bb.0:
; CHECK-NEXT: # kill: def $v8 killed $v8 def $v8m8
; CHECK-NEXT: ret
%v = call <vscale x 32 x half> @llvm.experimental.vector.insert.nxv1f16.nxv32f16(<vscale x 32 x half> undef, <vscale x 1 x half> %subvec, i64 0)
ret <vscale x 32 x half> %v
@ -381,8 +380,7 @@ define <vscale x 32 x half> @insert_nxv32f16_undef_nxv1f16_26(<vscale x 1 x half
; CHECK-NEXT: srli a0, a0, 2
; CHECK-NEXT: add a1, a0, a1
; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, mu
; CHECK-NEXT: vslideup.vx v22, v8, a0
; CHECK-NEXT: vmv8r.v v8, v16
; CHECK-NEXT: vslideup.vx v14, v8, a0
; CHECK-NEXT: ret
%v = call <vscale x 32 x half> @llvm.experimental.vector.insert.nxv1f16.nxv32f16(<vscale x 32 x half> undef, <vscale x 1 x half> %subvec, i64 26)
ret <vscale x 32 x half> %v

View File

@ -46,7 +46,6 @@ define <vscale x 1 x i32> @spill_zvlsseg_nxv1i32(i32* %base, i32 %vl) nounwind {
; SPILL-O2-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload
; SPILL-O2-NEXT: add a0, a0, a1
; SPILL-O2-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
; SPILL-O2-NEXT: # kill: def $v8 killed $v8 killed $v7_v8
; SPILL-O2-NEXT: csrr a0, vlenb
; SPILL-O2-NEXT: slli a0, a0, 1
; SPILL-O2-NEXT: add sp, sp, a0
@ -102,7 +101,6 @@ define <vscale x 2 x i32> @spill_zvlsseg_nxv2i32(i32* %base, i32 %vl) nounwind {
; SPILL-O2-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload
; SPILL-O2-NEXT: add a0, a0, a1
; SPILL-O2-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
; SPILL-O2-NEXT: # kill: def $v8 killed $v8 killed $v7_v8
; SPILL-O2-NEXT: csrr a0, vlenb
; SPILL-O2-NEXT: slli a0, a0, 1
; SPILL-O2-NEXT: add sp, sp, a0
@ -160,7 +158,6 @@ define <vscale x 4 x i32> @spill_zvlsseg_nxv4i32(i32* %base, i32 %vl) nounwind {
; SPILL-O2-NEXT: vl2r.v v6, (a0) # Unknown-size Folded Reload
; SPILL-O2-NEXT: add a0, a0, a1
; SPILL-O2-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload
; SPILL-O2-NEXT: # kill: def $v8m2 killed $v8m2 killed $v6m2_v8m2
; SPILL-O2-NEXT: csrr a0, vlenb
; SPILL-O2-NEXT: slli a0, a0, 2
; SPILL-O2-NEXT: add sp, sp, a0
@ -218,7 +215,6 @@ define <vscale x 8 x i32> @spill_zvlsseg_nxv8i32(i32* %base, i32 %vl) nounwind {
; SPILL-O2-NEXT: vl4r.v v4, (a0) # Unknown-size Folded Reload
; SPILL-O2-NEXT: add a0, a0, a1
; SPILL-O2-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload
; SPILL-O2-NEXT: # kill: def $v8m4 killed $v8m4 killed $v4m4_v8m4
; SPILL-O2-NEXT: csrr a0, vlenb
; SPILL-O2-NEXT: slli a0, a0, 3
; SPILL-O2-NEXT: add sp, sp, a0
@ -281,7 +277,6 @@ define <vscale x 4 x i32> @spill_zvlsseg3_nxv4i32(i32* %base, i32 %vl) nounwind
; SPILL-O2-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload
; SPILL-O2-NEXT: add a0, a0, a1
; SPILL-O2-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload
; SPILL-O2-NEXT: # kill: def $v8m2 killed $v8m2 killed $v6m2_v8m2_v10m2
; SPILL-O2-NEXT: csrr a0, vlenb
; SPILL-O2-NEXT: li a1, 6
; SPILL-O2-NEXT: mul a0, a0, a1

View File

@ -46,7 +46,6 @@ define <vscale x 1 x i32> @spill_zvlsseg_nxv1i32(i32* %base, i64 %vl) nounwind {
; SPILL-O2-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload
; SPILL-O2-NEXT: add a0, a0, a1
; SPILL-O2-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
; SPILL-O2-NEXT: # kill: def $v8 killed $v8 killed $v7_v8
; SPILL-O2-NEXT: csrr a0, vlenb
; SPILL-O2-NEXT: slli a0, a0, 1
; SPILL-O2-NEXT: add sp, sp, a0
@ -102,7 +101,6 @@ define <vscale x 2 x i32> @spill_zvlsseg_nxv2i32(i32* %base, i64 %vl) nounwind {
; SPILL-O2-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload
; SPILL-O2-NEXT: add a0, a0, a1
; SPILL-O2-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
; SPILL-O2-NEXT: # kill: def $v8 killed $v8 killed $v7_v8
; SPILL-O2-NEXT: csrr a0, vlenb
; SPILL-O2-NEXT: slli a0, a0, 1
; SPILL-O2-NEXT: add sp, sp, a0
@ -160,7 +158,6 @@ define <vscale x 4 x i32> @spill_zvlsseg_nxv4i32(i32* %base, i64 %vl) nounwind {
; SPILL-O2-NEXT: vl2r.v v6, (a0) # Unknown-size Folded Reload
; SPILL-O2-NEXT: add a0, a0, a1
; SPILL-O2-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload
; SPILL-O2-NEXT: # kill: def $v8m2 killed $v8m2 killed $v6m2_v8m2
; SPILL-O2-NEXT: csrr a0, vlenb
; SPILL-O2-NEXT: slli a0, a0, 2
; SPILL-O2-NEXT: add sp, sp, a0
@ -218,7 +215,6 @@ define <vscale x 8 x i32> @spill_zvlsseg_nxv8i32(i32* %base, i64 %vl) nounwind {
; SPILL-O2-NEXT: vl4r.v v4, (a0) # Unknown-size Folded Reload
; SPILL-O2-NEXT: add a0, a0, a1
; SPILL-O2-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload
; SPILL-O2-NEXT: # kill: def $v8m4 killed $v8m4 killed $v4m4_v8m4
; SPILL-O2-NEXT: csrr a0, vlenb
; SPILL-O2-NEXT: slli a0, a0, 3
; SPILL-O2-NEXT: add sp, sp, a0
@ -281,7 +277,6 @@ define <vscale x 4 x i32> @spill_zvlsseg3_nxv4i32(i32* %base, i64 %vl) nounwind
; SPILL-O2-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload
; SPILL-O2-NEXT: add a0, a0, a1
; SPILL-O2-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload
; SPILL-O2-NEXT: # kill: def $v8m2 killed $v8m2 killed $v6m2_v8m2_v10m2
; SPILL-O2-NEXT: csrr a0, vlenb
; SPILL-O2-NEXT: li a1, 6
; SPILL-O2-NEXT: mul a0, a0, a1

View File

@ -94,14 +94,7 @@ declare <vscale x 16 x float> @llvm.vp.fptrunc.nxv16f64.nxv16f32(<vscale x 16 x
define <vscale x 16 x float> @vfptrunc_nxv16f32_nxv16f64(<vscale x 16 x double> %a, <vscale x 16 x i1> %m, i32 zeroext %vl) {
; CHECK-LABEL: vfptrunc_nxv16f32_nxv16f64:
; CHECK: # %bb.0:
; CHECK-NEXT: addi sp, sp, -16
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: slli a1, a1, 3
; CHECK-NEXT: sub sp, sp, a1
; CHECK-NEXT: vmv1r.v v24, v0
; CHECK-NEXT: addi a1, sp, 16
; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
; CHECK-NEXT: li a2, 0
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: srli a4, a1, 3
@ -113,20 +106,15 @@ define <vscale x 16 x float> @vfptrunc_nxv16f32_nxv16f64(<vscale x 16 x double>
; CHECK-NEXT: mv a2, a3
; CHECK-NEXT: .LBB7_2:
; CHECK-NEXT: vsetvli zero, a2, e32, m4, ta, mu
; CHECK-NEXT: vfncvt.f.f.w v12, v16, v0.t
; CHECK-NEXT: vfncvt.f.f.w v28, v16, v0.t
; CHECK-NEXT: bltu a0, a1, .LBB7_4
; CHECK-NEXT: # %bb.3:
; CHECK-NEXT: mv a0, a1
; CHECK-NEXT: .LBB7_4:
; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu
; CHECK-NEXT: vmv1r.v v0, v24
; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vl8re8.v v16, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: vfncvt.f.f.w v8, v16, v0.t
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: add sp, sp, a0
; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: vfncvt.f.f.w v24, v8, v0.t
; CHECK-NEXT: vmv8r.v v8, v24
; CHECK-NEXT: ret
%v = call <vscale x 16 x float> @llvm.vp.fptrunc.nxv16f64.nxv16f32(<vscale x 16 x double> %a, <vscale x 16 x i1> %m, i32 %vl)
ret <vscale x 16 x float> %v

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -23,7 +23,6 @@ entry:
define void @test_vlseg2ff_mask_dead_value(<vscale x 16 x i16> %val, i16* %base, i32 %vl, <vscale x 16 x i1> %mask, i32* %outvl) {
; CHECK-LABEL: test_vlseg2ff_mask_dead_value:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: # kill: def $v8m4 killed $v8m4 def $v8m4_v12m4
; CHECK-NEXT: vmv4r.v v12, v8
; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu
; CHECK-NEXT: vlseg2e16ff.v v8, (a0), v0.t
@ -42,7 +41,6 @@ define <vscale x 16 x i16> @test_vlseg2ff_dead_vl(i16* %base, i32 %vl) {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu
; CHECK-NEXT: vlseg2e16ff.v v4, (a0)
; CHECK-NEXT: # kill: def $v8m4 killed $v8m4 killed $v4m4_v8m4
; CHECK-NEXT: ret
entry:
%0 = tail call {<vscale x 16 x i16>,<vscale x 16 x i16>, i32} @llvm.riscv.vlseg2ff.nxv16i16(<vscale x 16 x i16> undef, <vscale x 16 x i16> undef, i16* %base, i32 %vl)
@ -56,7 +54,6 @@ define <vscale x 16 x i16> @test_vlseg2ff_mask_dead_vl(<vscale x 16 x i16> %val,
; CHECK-NEXT: vmv4r.v v4, v8
; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu
; CHECK-NEXT: vlseg2e16ff.v v4, (a0), v0.t
; CHECK-NEXT: # kill: def $v8m4 killed $v8m4 killed $v4m4_v8m4
; CHECK-NEXT: ret
entry:
%0 = tail call {<vscale x 16 x i16>,<vscale x 16 x i16>, i32} @llvm.riscv.vlseg2ff.mask.nxv16i16(<vscale x 16 x i16> %val,<vscale x 16 x i16> %val, i16* %base, <vscale x 16 x i1> %mask, i32 %vl, i32 1)
@ -78,7 +75,6 @@ entry:
define void @test_vlseg2ff_mask_dead_all(<vscale x 16 x i16> %val, i16* %base, i32 %vl, <vscale x 16 x i1> %mask) {
; CHECK-LABEL: test_vlseg2ff_mask_dead_all:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: # kill: def $v8m4 killed $v8m4 def $v8m4_v12m4
; CHECK-NEXT: vmv4r.v v12, v8
; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu
; CHECK-NEXT: vlseg2e16ff.v v8, (a0), v0.t

File diff suppressed because it is too large Load Diff

View File

@ -23,7 +23,6 @@ entry:
define void @test_vlseg2ff_mask_dead_value(<vscale x 16 x i16> %val, i16* %base, i64 %vl, <vscale x 16 x i1> %mask, i64* %outvl) {
; CHECK-LABEL: test_vlseg2ff_mask_dead_value:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: # kill: def $v8m4 killed $v8m4 def $v8m4_v12m4
; CHECK-NEXT: vmv4r.v v12, v8
; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu
; CHECK-NEXT: vlseg2e16ff.v v8, (a0), v0.t
@ -42,7 +41,6 @@ define <vscale x 16 x i16> @test_vlseg2ff_dead_vl(i16* %base, i64 %vl) {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu
; CHECK-NEXT: vlseg2e16ff.v v4, (a0)
; CHECK-NEXT: # kill: def $v8m4 killed $v8m4 killed $v4m4_v8m4
; CHECK-NEXT: ret
entry:
%0 = tail call {<vscale x 16 x i16>,<vscale x 16 x i16>, i64} @llvm.riscv.vlseg2ff.nxv16i16(<vscale x 16 x i16> undef, <vscale x 16 x i16> undef, i16* %base, i64 %vl)
@ -56,7 +54,6 @@ define <vscale x 16 x i16> @test_vlseg2ff_mask_dead_vl(<vscale x 16 x i16> %val,
; CHECK-NEXT: vmv4r.v v4, v8
; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu
; CHECK-NEXT: vlseg2e16ff.v v4, (a0), v0.t
; CHECK-NEXT: # kill: def $v8m4 killed $v8m4 killed $v4m4_v8m4
; CHECK-NEXT: ret
entry:
%0 = tail call {<vscale x 16 x i16>,<vscale x 16 x i16>, i64} @llvm.riscv.vlseg2ff.mask.nxv16i16(<vscale x 16 x i16> %val,<vscale x 16 x i16> %val, i16* %base, <vscale x 16 x i1> %mask, i64 %vl, i64 1)
@ -78,7 +75,6 @@ entry:
define void @test_vlseg2ff_mask_dead_all(<vscale x 16 x i16> %val, i16* %base, i64 %vl, <vscale x 16 x i1> %mask) {
; CHECK-LABEL: test_vlseg2ff_mask_dead_all:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: # kill: def $v8m4 killed $v8m4 def $v8m4_v12m4
; CHECK-NEXT: vmv4r.v v12, v8
; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu
; CHECK-NEXT: vlseg2e16ff.v v8, (a0), v0.t

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -269,19 +269,18 @@ define <vscale x 32 x i8> @vpgather_baseidx_nxv32i8(i8* %base, <vscale x 32 x i8
; RV32-NEXT: mv a3, a4
; RV32-NEXT: .LBB12_2:
; RV32-NEXT: vsetvli a4, zero, e32, m8, ta, mu
; RV32-NEXT: vsext.vf4 v24, v10
; RV32-NEXT: vsext.vf4 v16, v10
; RV32-NEXT: vsetvli zero, a3, e8, m2, ta, mu
; RV32-NEXT: vluxei32.v v18, (a0), v24, v0.t
; RV32-NEXT: vluxei32.v v10, (a0), v16, v0.t
; RV32-NEXT: bltu a1, a2, .LBB12_4
; RV32-NEXT: # %bb.3:
; RV32-NEXT: mv a1, a2
; RV32-NEXT: .LBB12_4:
; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, mu
; RV32-NEXT: vsext.vf4 v24, v8
; RV32-NEXT: vsext.vf4 v16, v8
; RV32-NEXT: vsetvli zero, a1, e8, m2, ta, mu
; RV32-NEXT: vmv1r.v v0, v12
; RV32-NEXT: vluxei32.v v16, (a0), v24, v0.t
; RV32-NEXT: vmv4r.v v8, v16
; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t
; RV32-NEXT: ret
;
; RV64-LABEL: vpgather_baseidx_nxv32i8:
@ -309,9 +308,9 @@ define <vscale x 32 x i8> @vpgather_baseidx_nxv32i8(i8* %base, <vscale x 32 x i8
; RV64-NEXT: vsetvli t0, zero, e8, mf4, ta, mu
; RV64-NEXT: vslidedown.vx v0, v13, a6
; RV64-NEXT: vsetvli t0, zero, e64, m8, ta, mu
; RV64-NEXT: vsext.vf8 v24, v11
; RV64-NEXT: vsext.vf8 v16, v11
; RV64-NEXT: vsetvli zero, a7, e8, m1, ta, mu
; RV64-NEXT: vluxei64.v v19, (a0), v24, v0.t
; RV64-NEXT: vluxei64.v v11, (a0), v16, v0.t
; RV64-NEXT: bltu a1, a5, .LBB12_6
; RV64-NEXT: # %bb.5:
; RV64-NEXT: mv a1, a5
@ -324,28 +323,27 @@ define <vscale x 32 x i8> @vpgather_baseidx_nxv32i8(i8* %base, <vscale x 32 x i8
; RV64-NEXT: vsetvli a5, zero, e8, mf4, ta, mu
; RV64-NEXT: vslidedown.vx v0, v12, a6
; RV64-NEXT: vsetvli a5, zero, e64, m8, ta, mu
; RV64-NEXT: vsext.vf8 v24, v9
; RV64-NEXT: vsext.vf8 v16, v9
; RV64-NEXT: vsetvli zero, a4, e8, m1, ta, mu
; RV64-NEXT: vluxei64.v v17, (a0), v24, v0.t
; RV64-NEXT: vluxei64.v v9, (a0), v16, v0.t
; RV64-NEXT: bltu a1, a3, .LBB12_10
; RV64-NEXT: # %bb.9:
; RV64-NEXT: mv a1, a3
; RV64-NEXT: .LBB12_10:
; RV64-NEXT: vsetvli a4, zero, e64, m8, ta, mu
; RV64-NEXT: vsext.vf8 v24, v8
; RV64-NEXT: vsext.vf8 v16, v8
; RV64-NEXT: vsetvli zero, a1, e8, m1, ta, mu
; RV64-NEXT: vmv1r.v v0, v12
; RV64-NEXT: vluxei64.v v16, (a0), v24, v0.t
; RV64-NEXT: vluxei64.v v8, (a0), v16, v0.t
; RV64-NEXT: bltu a2, a3, .LBB12_12
; RV64-NEXT: # %bb.11:
; RV64-NEXT: mv a2, a3
; RV64-NEXT: .LBB12_12:
; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu
; RV64-NEXT: vsext.vf8 v24, v10
; RV64-NEXT: vsext.vf8 v16, v10
; RV64-NEXT: vsetvli zero, a2, e8, m1, ta, mu
; RV64-NEXT: vmv1r.v v0, v13
; RV64-NEXT: vluxei64.v v18, (a0), v24, v0.t
; RV64-NEXT: vmv4r.v v8, v16
; RV64-NEXT: vluxei64.v v10, (a0), v16, v0.t
; RV64-NEXT: ret
%ptrs = getelementptr inbounds i8, i8* %base, <vscale x 32 x i8> %idxs
%v = call <vscale x 32 x i8> @llvm.vp.gather.nxv32i8.nxv32p0i8(<vscale x 32 x i8*> %ptrs, <vscale x 32 x i1> %m, i32 %evl)

View File

@ -91,11 +91,13 @@ define <vscale x 1 x double> @test3(i64 %avl, i8 zeroext %cond, <vscale x 1 x do
; CHECK-NEXT: vsetvli a0, a0, e64, m1, ta, mu
; CHECK-NEXT: vfadd.vv v9, v8, v9
; CHECK-NEXT: vfmul.vv v8, v9, v8
; CHECK-NEXT: # implicit-def: $x10
; CHECK-NEXT: ret
; CHECK-NEXT: .LBB2_2: # %if.else
; CHECK-NEXT: vsetvli a0, a0, e64, m1, ta, mu
; CHECK-NEXT: vfsub.vv v9, v8, v9
; CHECK-NEXT: vfmul.vv v8, v9, v8
; CHECK-NEXT: # implicit-def: $x10
; CHECK-NEXT: ret
entry:
%tobool = icmp eq i8 %cond, 0

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -15,7 +15,6 @@ define <vscale x 16 x i16> @test_vlseg2_mask_nxv16i16(i16* %base, <vscale x 16 x
; CHECK-NEXT: vlseg2e16.v v4, (a0)
; CHECK-NEXT: vmv4r.v v8, v4
; CHECK-NEXT: vlseg2e16.v v4, (a0), v0.t
; CHECK-NEXT: # kill: def $v8m4 killed $v8m4 killed $v4m4_v8m4
; CHECK-NEXT: ret
entry:
%0 = tail call {<vscale x 16 x i16>,<vscale x 16 x i16>} @llvm.riscv.vlseg2.nxv16i16(<vscale x 16 x i16> undef,<vscale x 16 x i16> undef, i16* %base, i64 0)
@ -35,7 +34,6 @@ define <vscale x 16 x i16> @test_vlsseg2_mask_nxv16i16(i16* %base, i64 %offset,
; CHECK-NEXT: vlsseg2e16.v v4, (a0), a1
; CHECK-NEXT: vmv4r.v v8, v4
; CHECK-NEXT: vlsseg2e16.v v4, (a0), a1, v0.t
; CHECK-NEXT: # kill: def $v8m4 killed $v8m4 killed $v4m4_v8m4
; CHECK-NEXT: ret
entry:
%0 = tail call {<vscale x 16 x i16>,<vscale x 16 x i16>} @llvm.riscv.vlsseg2.nxv16i16(<vscale x 16 x i16> undef, <vscale x 16 x i16> undef, i16* %base, i64 %offset, i64 0)
@ -94,7 +92,6 @@ define <vscale x 16 x i16> @test_vlseg2ff_nxv16i16(i16* %base, i64* %outvl) {
; CHECK-NEXT: vlseg2e16ff.v v4, (a0)
; CHECK-NEXT: csrr a0, vl
; CHECK-NEXT: sd a0, 0(a1)
; CHECK-NEXT: # kill: def $v8m4 killed $v8m4 killed $v4m4_v8m4
; CHECK-NEXT: ret
entry:
%0 = tail call {<vscale x 16 x i16>,<vscale x 16 x i16>, i64} @llvm.riscv.vlseg2ff.nxv16i16(<vscale x 16 x i16> undef, <vscale x 16 x i16> undef, i16* %base, i64 0)
@ -112,7 +109,6 @@ define <vscale x 16 x i16> @test_vlseg2ff_mask_nxv16i16(<vscale x 16 x i16> %val
; CHECK-NEXT: vlseg2e16ff.v v4, (a0), v0.t
; CHECK-NEXT: csrr a0, vl
; CHECK-NEXT: sd a0, 0(a1)
; CHECK-NEXT: # kill: def $v8m4 killed $v8m4 killed $v4m4_v8m4
; CHECK-NEXT: ret
entry:
%0 = tail call {<vscale x 16 x i16>,<vscale x 16 x i16>, i64} @llvm.riscv.vlseg2ff.mask.nxv16i16(<vscale x 16 x i16> %val,<vscale x 16 x i16> %val, i16* %base, <vscale x 16 x i1> %mask, i64 0, i64 1)
@ -128,7 +124,6 @@ declare void @llvm.riscv.vsseg2.mask.nxv16i16(<vscale x 16 x i16>,<vscale x 16 x
define void @test_vsseg2_nxv16i16(<vscale x 16 x i16> %val, i16* %base) {
; CHECK-LABEL: test_vsseg2_nxv16i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: # kill: def $v8m4 killed $v8m4 def $v8m4_v12m4
; CHECK-NEXT: vmv4r.v v12, v8
; CHECK-NEXT: vsetivli zero, 0, e16, m4, ta, mu
; CHECK-NEXT: vsseg2e16.v v8, (a0)
@ -141,7 +136,6 @@ entry:
define void @test_vsseg2_mask_nxv16i16(<vscale x 16 x i16> %val, i16* %base, <vscale x 16 x i1> %mask) {
; CHECK-LABEL: test_vsseg2_mask_nxv16i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: # kill: def $v8m4 killed $v8m4 def $v8m4_v12m4
; CHECK-NEXT: vmv4r.v v12, v8
; CHECK-NEXT: vsetivli zero, 0, e16, m4, ta, mu
; CHECK-NEXT: vsseg2e16.v v8, (a0), v0.t
@ -157,7 +151,6 @@ declare void @llvm.riscv.vssseg2.mask.nxv16i16(<vscale x 16 x i16>,<vscale x 16
define void @test_vssseg2_nxv16i16(<vscale x 16 x i16> %val, i16* %base, i64 %offset) {
; CHECK-LABEL: test_vssseg2_nxv16i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: # kill: def $v8m4 killed $v8m4 def $v8m4_v12m4
; CHECK-NEXT: vmv4r.v v12, v8
; CHECK-NEXT: vsetivli zero, 0, e16, m4, ta, mu
; CHECK-NEXT: vssseg2e16.v v8, (a0), a1
@ -170,7 +163,6 @@ entry:
define void @test_vssseg2_mask_nxv16i16(<vscale x 16 x i16> %val, i16* %base, i64 %offset, <vscale x 16 x i1> %mask) {
; CHECK-LABEL: test_vssseg2_mask_nxv16i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: # kill: def $v8m4 killed $v8m4 def $v8m4_v12m4
; CHECK-NEXT: vmv4r.v v12, v8
; CHECK-NEXT: vsetivli zero, 0, e16, m4, ta, mu
; CHECK-NEXT: vssseg2e16.v v8, (a0), a1, v0.t
@ -186,7 +178,6 @@ declare void @llvm.riscv.vsoxseg2.mask.nxv16i16.nxv16i16(<vscale x 16 x i16>,<vs
define void @test_vsoxseg2_nxv16i16_nxv16i16(<vscale x 16 x i16> %val, i16* %base, <vscale x 16 x i16> %index) {
; CHECK-LABEL: test_vsoxseg2_nxv16i16_nxv16i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: # kill: def $v8m4 killed $v8m4 killed $v8m4_v12m4 def $v8m4_v12m4
; CHECK-NEXT: vmv4r.v v16, v12
; CHECK-NEXT: vmv4r.v v12, v8
; CHECK-NEXT: vsetivli zero, 0, e16, m4, ta, mu
@ -200,7 +191,6 @@ entry:
define void @test_vsoxseg2_mask_nxv16i16_nxv16i16(<vscale x 16 x i16> %val, i16* %base, <vscale x 16 x i16> %index, <vscale x 16 x i1> %mask) {
; CHECK-LABEL: test_vsoxseg2_mask_nxv16i16_nxv16i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: # kill: def $v8m4 killed $v8m4 killed $v8m4_v12m4 def $v8m4_v12m4
; CHECK-NEXT: vmv4r.v v16, v12
; CHECK-NEXT: vmv4r.v v12, v8
; CHECK-NEXT: vsetivli zero, 0, e16, m4, ta, mu
@ -217,7 +207,6 @@ declare void @llvm.riscv.vsuxseg2.mask.nxv16i16.nxv16i16(<vscale x 16 x i16>,<vs
define void @test_vsuxseg2_nxv16i16_nxv16i16(<vscale x 16 x i16> %val, i16* %base, <vscale x 16 x i16> %index) {
; CHECK-LABEL: test_vsuxseg2_nxv16i16_nxv16i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: # kill: def $v8m4 killed $v8m4 killed $v8m4_v12m4 def $v8m4_v12m4
; CHECK-NEXT: vmv4r.v v16, v12
; CHECK-NEXT: vmv4r.v v12, v8
; CHECK-NEXT: vsetivli zero, 0, e16, m4, ta, mu
@ -231,7 +220,6 @@ entry:
define void @test_vsuxseg2_mask_nxv16i16_nxv16i16(<vscale x 16 x i16> %val, i16* %base, <vscale x 16 x i16> %index, <vscale x 16 x i1> %mask) {
; CHECK-LABEL: test_vsuxseg2_mask_nxv16i16_nxv16i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: # kill: def $v8m4 killed $v8m4 killed $v8m4_v12m4 def $v8m4_v12m4
; CHECK-NEXT: vmv4r.v v16, v12
; CHECK-NEXT: vmv4r.v v12, v8
; CHECK-NEXT: vsetivli zero, 0, e16, m4, ta, mu