diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 6ddad93bc2dd..cd47d65d50a8 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -1195,20 +1195,18 @@ static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, // Don't perform this optimization when optimizing for size, since // materializing elements and inserting them tends to cause code bloat. if (DominantValue && !DAG.shouldOptForSize()) { - unsigned Opc = - VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL : RISCVISD::VMV_V_X_VL; - SDValue Vec = DAG.getNode(Opc, DL, ContainerVT, DominantValue, VL); + SDValue Vec = DAG.getSplatBuildVector(VT, DL, DominantValue); if (ValueCounts.size() != 1) { MVT XLenVT = Subtarget.getXLenVT(); for (unsigned I = 0; I < NumElts; ++I) { if (!Op.getOperand(I).isUndef() && Op.getOperand(I) != DominantValue) - Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ContainerVT, Vec, + Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Vec, Op.getOperand(I), DAG.getConstant(I, DL, XLenVT)); } } - return convertFromScalableVector(VT, Vec, DAG, Subtarget); + return Vec; } return SDValue(); diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap.ll index 1b1a8e649adb..bb5dbda70eb0 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap.ll @@ -670,9 +670,7 @@ define void @bswap_v2i64(<2 x i64>* %x, <2 x i64>* %y) { ; LMULMAX2-RV64-NEXT: or a1, a2, a1 ; LMULMAX2-RV64-NEXT: vsetivli a2, 2, e64,m1,ta,mu ; LMULMAX2-RV64-NEXT: vmv.v.x v25, a1 -; LMULMAX2-RV64-NEXT: vsetvli a1, zero, e64,m1,ta,mu ; LMULMAX2-RV64-NEXT: vmv.s.x v25, t1 -; LMULMAX2-RV64-NEXT: vsetivli a1, 2, e64,m1,ta,mu ; LMULMAX2-RV64-NEXT: vse64.v v25, (a0) ; LMULMAX2-RV64-NEXT: ret ; @@ -801,9 +799,7 @@ define void @bswap_v2i64(<2 x i64>* %x, <2 x i64>* %y) { ; LMULMAX1-RV64-NEXT: or a1, a2, a1 ; LMULMAX1-RV64-NEXT: vsetivli a2, 2, e64,m1,ta,mu ; LMULMAX1-RV64-NEXT: vmv.v.x v25, a1 -; LMULMAX1-RV64-NEXT: vsetvli a1, zero, e64,m1,ta,mu ; LMULMAX1-RV64-NEXT: vmv.s.x v25, t1 -; LMULMAX1-RV64-NEXT: vsetivli a1, 2, e64,m1,ta,mu ; LMULMAX1-RV64-NEXT: vse64.v v25, (a0) ; LMULMAX1-RV64-NEXT: ret %a = load <2 x i64>, <2 x i64>* %x @@ -2255,7 +2251,6 @@ define void @bswap_v4i64(<4 x i64>* %x, <4 x i64>* %y) { ; LMULMAX1-RV64-NEXT: or a1, a2, a1 ; LMULMAX1-RV64-NEXT: vsetivli a2, 2, e64,m1,ta,mu ; LMULMAX1-RV64-NEXT: vmv.v.x v26, a1 -; LMULMAX1-RV64-NEXT: vsetvli a1, zero, e64,m1,ta,mu ; LMULMAX1-RV64-NEXT: vmv.s.x v26, t4 ; LMULMAX1-RV64-NEXT: vmv.x.s a1, v25 ; LMULMAX1-RV64-NEXT: srli a2, a1, 40 @@ -2305,9 +2300,7 @@ define void @bswap_v4i64(<4 x i64>* %x, <4 x i64>* %y) { ; LMULMAX1-RV64-NEXT: or a1, a2, a1 ; LMULMAX1-RV64-NEXT: vsetivli a2, 2, e64,m1,ta,mu ; LMULMAX1-RV64-NEXT: vmv.v.x v25, a1 -; LMULMAX1-RV64-NEXT: vsetvli a1, zero, e64,m1,ta,mu ; LMULMAX1-RV64-NEXT: vmv.s.x v25, t4 -; LMULMAX1-RV64-NEXT: vsetivli a1, 2, e64,m1,ta,mu ; LMULMAX1-RV64-NEXT: vse64.v v25, (a0) ; LMULMAX1-RV64-NEXT: vse64.v v26, (a6) ; LMULMAX1-RV64-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll index cb48a7a7b236..561b01828120 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll @@ -3878,9 +3878,7 @@ define void @ctlz_v2i64(<2 x i64>* %x, <2 x i64>* %y) { ; LMULMAX2-RV64-NEXT: and a1, a1, a4 ; LMULMAX2-RV64-NEXT: mul a1, a1, a5 ; LMULMAX2-RV64-NEXT: srli a1, a1, 56 -; LMULMAX2-RV64-NEXT: vsetvli a2, zero, e64,m1,ta,mu ; LMULMAX2-RV64-NEXT: vmv.s.x v26, a1 -; LMULMAX2-RV64-NEXT: vsetivli a1, 2, e64,m1,ta,mu ; LMULMAX2-RV64-NEXT: vse64.v v26, (a0) ; LMULMAX2-RV64-NEXT: ret ; @@ -4113,9 +4111,7 @@ define void @ctlz_v2i64(<2 x i64>* %x, <2 x i64>* %y) { ; LMULMAX1-RV64-NEXT: and a1, a1, a4 ; LMULMAX1-RV64-NEXT: mul a1, a1, a5 ; LMULMAX1-RV64-NEXT: srli a1, a1, 56 -; LMULMAX1-RV64-NEXT: vsetvli a2, zero, e64,m1,ta,mu ; LMULMAX1-RV64-NEXT: vmv.s.x v26, a1 -; LMULMAX1-RV64-NEXT: vsetivli a1, 2, e64,m1,ta,mu ; LMULMAX1-RV64-NEXT: vse64.v v26, (a0) ; LMULMAX1-RV64-NEXT: ret %a = load <2 x i64>, <2 x i64>* %x @@ -11882,7 +11878,6 @@ define void @ctlz_v4i64(<4 x i64>* %x, <4 x i64>* %y) { ; LMULMAX1-RV64-NEXT: and a1, a1, a4 ; LMULMAX1-RV64-NEXT: mul a1, a1, a5 ; LMULMAX1-RV64-NEXT: srli a1, a1, 56 -; LMULMAX1-RV64-NEXT: vsetvli a2, zero, e64,m1,ta,mu ; LMULMAX1-RV64-NEXT: vmv.s.x v26, a1 ; LMULMAX1-RV64-NEXT: vsetivli a1, 1, e64,m1,ta,mu ; LMULMAX1-RV64-NEXT: vslidedown.vi v27, v25, 1 @@ -11940,9 +11935,7 @@ define void @ctlz_v4i64(<4 x i64>* %x, <4 x i64>* %y) { ; LMULMAX1-RV64-NEXT: and a1, a1, a4 ; LMULMAX1-RV64-NEXT: mul a1, a1, a5 ; LMULMAX1-RV64-NEXT: srli a1, a1, 56 -; LMULMAX1-RV64-NEXT: vsetvli a2, zero, e64,m1,ta,mu ; LMULMAX1-RV64-NEXT: vmv.s.x v27, a1 -; LMULMAX1-RV64-NEXT: vsetivli a1, 2, e64,m1,ta,mu ; LMULMAX1-RV64-NEXT: vse64.v v27, (a0) ; LMULMAX1-RV64-NEXT: vse64.v v26, (a6) ; LMULMAX1-RV64-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll index d6c3aba0be8b..e18c38e8ce30 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll @@ -2706,9 +2706,7 @@ define void @cttz_v2i64(<2 x i64>* %x, <2 x i64>* %y) { ; LMULMAX2-RV64-NEXT: and a1, a1, a4 ; LMULMAX2-RV64-NEXT: mul a1, a1, a5 ; LMULMAX2-RV64-NEXT: srli a1, a1, 56 -; LMULMAX2-RV64-NEXT: vsetvli a2, zero, e64,m1,ta,mu ; LMULMAX2-RV64-NEXT: vmv.s.x v26, a1 -; LMULMAX2-RV64-NEXT: vsetivli a1, 2, e64,m1,ta,mu ; LMULMAX2-RV64-NEXT: vse64.v v26, (a0) ; LMULMAX2-RV64-NEXT: ret ; @@ -2889,9 +2887,7 @@ define void @cttz_v2i64(<2 x i64>* %x, <2 x i64>* %y) { ; LMULMAX1-RV64-NEXT: and a1, a1, a4 ; LMULMAX1-RV64-NEXT: mul a1, a1, a5 ; LMULMAX1-RV64-NEXT: srli a1, a1, 56 -; LMULMAX1-RV64-NEXT: vsetvli a2, zero, e64,m1,ta,mu ; LMULMAX1-RV64-NEXT: vmv.s.x v26, a1 -; LMULMAX1-RV64-NEXT: vsetivli a1, 2, e64,m1,ta,mu ; LMULMAX1-RV64-NEXT: vse64.v v26, (a0) ; LMULMAX1-RV64-NEXT: ret %a = load <2 x i64>, <2 x i64>* %x @@ -8230,7 +8226,6 @@ define void @cttz_v4i64(<4 x i64>* %x, <4 x i64>* %y) { ; LMULMAX1-RV64-NEXT: and a2, a2, a5 ; LMULMAX1-RV64-NEXT: mul a2, a2, a1 ; LMULMAX1-RV64-NEXT: srli a2, a2, 56 -; LMULMAX1-RV64-NEXT: vsetvli a4, zero, e64,m1,ta,mu ; LMULMAX1-RV64-NEXT: vmv.s.x v27, a2 ; LMULMAX1-RV64-NEXT: vsetivli a2, 1, e64,m1,ta,mu ; LMULMAX1-RV64-NEXT: vslidedown.vi v26, v25, 1 @@ -8268,9 +8263,7 @@ define void @cttz_v4i64(<4 x i64>* %x, <4 x i64>* %y) { ; LMULMAX1-RV64-NEXT: and a2, a2, a5 ; LMULMAX1-RV64-NEXT: mul a1, a2, a1 ; LMULMAX1-RV64-NEXT: srli a1, a1, 56 -; LMULMAX1-RV64-NEXT: vsetvli a2, zero, e64,m1,ta,mu ; LMULMAX1-RV64-NEXT: vmv.s.x v26, a1 -; LMULMAX1-RV64-NEXT: vsetivli a1, 2, e64,m1,ta,mu ; LMULMAX1-RV64-NEXT: vse64.v v26, (a0) ; LMULMAX1-RV64-NEXT: vse64.v v27, (a6) ; LMULMAX1-RV64-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll index b8085f0bc618..a48323916e1a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll @@ -23,13 +23,12 @@ define void @buildvec_no_vid_v4f32(<4 x float>* %x) { define void @buildvec_dominant0_v4f32(<4 x float>* %x) { ; CHECK-LABEL: buildvec_dominant0_v4f32: ; CHECK: # %bb.0: -; CHECK-NEXT: fmv.w.x ft0, zero ; CHECK-NEXT: lui a1, %hi(.LCPI1_0) -; CHECK-NEXT: flw ft1, %lo(.LCPI1_0)(a1) -; CHECK-NEXT: vsetvli a1, zero, e32,m1,ta,mu -; CHECK-NEXT: vfmv.s.f v25, ft0 +; CHECK-NEXT: flw ft0, %lo(.LCPI1_0)(a1) +; CHECK-NEXT: fmv.w.x ft1, zero ; CHECK-NEXT: vsetivli a1, 4, e32,m1,ta,mu -; CHECK-NEXT: vfmv.v.f v26, ft1 +; CHECK-NEXT: vfmv.s.f v25, ft1 +; CHECK-NEXT: vfmv.v.f v26, ft0 ; CHECK-NEXT: vsetivli a1, 3, e32,m1,tu,mu ; CHECK-NEXT: vslideup.vi v26, v25, 2 ; CHECK-NEXT: vsetivli a1, 4, e32,m1,ta,mu @@ -43,9 +42,8 @@ define void @buildvec_dominant1_v4f32(<4 x float>* %x, float %f) { ; CHECK-LABEL: buildvec_dominant1_v4f32: ; CHECK: # %bb.0: ; CHECK-NEXT: fmv.w.x ft0, zero -; CHECK-NEXT: vsetvli a1, zero, e32,m1,ta,mu -; CHECK-NEXT: vfmv.s.f v25, ft0 ; CHECK-NEXT: vsetivli a1, 4, e32,m1,ta,mu +; CHECK-NEXT: vfmv.s.f v25, ft0 ; CHECK-NEXT: vfmv.v.f v26, fa0 ; CHECK-NEXT: vsetivli a1, 2, e32,m1,tu,mu ; CHECK-NEXT: vslideup.vi v26, v25, 1 @@ -66,13 +64,12 @@ define void @buildvec_dominant2_v4f32(<4 x float>* %x, float %f) { ; CHECK-NEXT: lui a1, %hi(.LCPI3_0) ; CHECK-NEXT: flw ft0, %lo(.LCPI3_0)(a1) ; CHECK-NEXT: vsetivli a1, 4, e32,m1,ta,mu -; CHECK-NEXT: vfmv.v.f v25, fa0 -; CHECK-NEXT: vsetvli a1, zero, e32,m1,ta,mu -; CHECK-NEXT: vfmv.s.f v26, ft0 +; CHECK-NEXT: vfmv.s.f v25, ft0 +; CHECK-NEXT: vfmv.v.f v26, fa0 ; CHECK-NEXT: vsetivli a1, 2, e32,m1,tu,mu -; CHECK-NEXT: vslideup.vi v25, v26, 1 +; CHECK-NEXT: vslideup.vi v26, v25, 1 ; CHECK-NEXT: vsetivli a1, 4, e32,m1,ta,mu -; CHECK-NEXT: vse32.v v25, (a0) +; CHECK-NEXT: vse32.v v26, (a0) ; CHECK-NEXT: ret %v0 = insertelement <4 x float> undef, float %f, i32 0 %v1 = insertelement <4 x float> %v0, float 2.0, i32 1 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll index 19b3ef6defff..43626ca3f5dd 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll @@ -53,7 +53,6 @@ define void @insertelt_v3i64(<3 x i64>* %x, i64 %y) { ; RV32-NEXT: vsetivli a4, 4, e32,m1,ta,mu ; RV32-NEXT: lw a4, 16(a0) ; RV32-NEXT: vmv.v.x v26, a3 -; RV32-NEXT: vsetvli a3, zero, e32,m1,ta,mu ; RV32-NEXT: vmv.s.x v26, a4 ; RV32-NEXT: vsetivli a3, 4, e64,m2,tu,mu ; RV32-NEXT: vslideup.vi v28, v26, 2 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll index fecac9000096..7abea8116cbe 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll @@ -69,9 +69,8 @@ define void @buildvec_vid_mpy_imm_v16i8(<16 x i8>* %x) { define void @buildvec_dominant0_v8i16(<8 x i16>* %x) { ; CHECK-LABEL: buildvec_dominant0_v8i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e16,m1,ta,mu -; CHECK-NEXT: vmv.s.x v25, zero ; CHECK-NEXT: vsetivli a1, 8, e16,m1,ta,mu +; CHECK-NEXT: vmv.s.x v25, zero ; CHECK-NEXT: vmv.v.i v26, 8 ; CHECK-NEXT: vsetivli a1, 4, e16,m1,tu,mu ; CHECK-NEXT: vslideup.vi v26, v25, 3 @@ -117,9 +116,7 @@ define void @buildvec_dominant2_v2i8(<2 x i8>* %x) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli a1, 2, e8,m1,ta,mu ; CHECK-NEXT: vmv.v.i v25, -1 -; CHECK-NEXT: vsetvli a1, zero, e8,m1,ta,mu ; CHECK-NEXT: vmv.s.x v25, zero -; CHECK-NEXT: vsetivli a1, 2, e8,m1,ta,mu ; CHECK-NEXT: vse8.v v25, (a0) ; CHECK-NEXT: ret store <2 x i8> , <2 x i8>* %x @@ -148,9 +145,7 @@ define void @buildvec_dominant0_v2i32(<2 x i64>* %x) { ; RV64-NEXT: addi a1, a1, -455 ; RV64-NEXT: slli a1, a1, 13 ; RV64-NEXT: addi a1, a1, -910 -; RV64-NEXT: vsetvli a2, zero, e64,m1,ta,mu ; RV64-NEXT: vmv.s.x v25, a1 -; RV64-NEXT: vsetivli a1, 2, e64,m1,ta,mu ; RV64-NEXT: vse64.v v25, (a0) ; RV64-NEXT: ret store <2 x i64> , <2 x i64>* %x diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll index 33f2e0d3998e..84784ee82c1c 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll @@ -872,28 +872,24 @@ define void @mulhu_v8i16(<8 x i16>* %x) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli a1, 8, e16,m1,ta,mu ; CHECK-NEXT: vle16.v v25, (a0) +; CHECK-NEXT: vmv.v.i v26, 0 +; CHECK-NEXT: lui a1, 1048568 +; CHECK-NEXT: vmv1r.v v27, v26 +; CHECK-NEXT: vmv.s.x v27, a1 ; CHECK-NEXT: addi a1, zero, 1 -; CHECK-NEXT: vsetvli a2, zero, e16,m1,ta,mu -; CHECK-NEXT: vmv.s.x v26, a1 -; CHECK-NEXT: vsetivli a1, 8, e16,m1,ta,mu -; CHECK-NEXT: vmv.v.i v27, 0 +; CHECK-NEXT: vmv.s.x v28, a1 ; CHECK-NEXT: vsetivli a1, 7, e16,m1,tu,mu -; CHECK-NEXT: vmv1r.v v28, v27 -; CHECK-NEXT: vslideup.vi v28, v26, 6 +; CHECK-NEXT: vslideup.vi v26, v28, 6 ; CHECK-NEXT: vsetivli a1, 8, e16,m1,ta,mu ; CHECK-NEXT: lui a1, %hi(.LCPI53_0) ; CHECK-NEXT: addi a1, a1, %lo(.LCPI53_0) -; CHECK-NEXT: vle16.v v26, (a1) -; CHECK-NEXT: vsrl.vv v28, v25, v28 -; CHECK-NEXT: vmulhu.vv v26, v28, v26 -; CHECK-NEXT: vsub.vv v25, v25, v26 -; CHECK-NEXT: lui a1, 1048568 -; CHECK-NEXT: vsetvli a2, zero, e16,m1,ta,mu -; CHECK-NEXT: vmv.s.x v27, a1 -; CHECK-NEXT: vsetivli a1, 8, e16,m1,ta,mu +; CHECK-NEXT: vle16.v v28, (a1) +; CHECK-NEXT: vsrl.vv v26, v25, v26 +; CHECK-NEXT: vmulhu.vv v26, v26, v28 ; CHECK-NEXT: lui a1, %hi(.LCPI53_1) ; CHECK-NEXT: addi a1, a1, %lo(.LCPI53_1) ; CHECK-NEXT: vle16.v v28, (a1) +; CHECK-NEXT: vsub.vv v25, v25, v26 ; CHECK-NEXT: vmulhu.vv v25, v25, v27 ; CHECK-NEXT: vadd.vv v25, v25, v26 ; CHECK-NEXT: vsrl.vv v25, v25, v28 @@ -910,25 +906,21 @@ define void @mulhu_v4i32(<4 x i32>* %x) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli a1, 4, e32,m1,ta,mu ; CHECK-NEXT: vle32.v v25, (a0) +; CHECK-NEXT: lui a1, 524288 +; CHECK-NEXT: vmv.s.x v26, a1 +; CHECK-NEXT: vmv.v.i v27, 0 +; CHECK-NEXT: vsetivli a1, 3, e32,m1,tu,mu +; CHECK-NEXT: vslideup.vi v27, v26, 2 ; CHECK-NEXT: lui a1, %hi(.LCPI54_0) ; CHECK-NEXT: addi a1, a1, %lo(.LCPI54_0) +; CHECK-NEXT: vsetivli a2, 4, e32,m1,ta,mu ; CHECK-NEXT: vle32.v v26, (a1) ; CHECK-NEXT: vmulhu.vv v26, v25, v26 ; CHECK-NEXT: vsub.vv v25, v25, v26 -; CHECK-NEXT: lui a1, 524288 -; CHECK-NEXT: vsetvli a2, zero, e32,m1,ta,mu -; CHECK-NEXT: vmv.s.x v27, a1 -; CHECK-NEXT: vsetivli a1, 4, e32,m1,ta,mu -; CHECK-NEXT: vmv.v.i v28, 0 -; CHECK-NEXT: vsetivli a1, 3, e32,m1,tu,mu -; CHECK-NEXT: vslideup.vi v28, v27, 2 -; CHECK-NEXT: vsetivli a1, 4, e32,m1,ta,mu -; CHECK-NEXT: vmulhu.vv v25, v25, v28 +; CHECK-NEXT: vmulhu.vv v25, v25, v27 ; CHECK-NEXT: vadd.vv v25, v25, v26 ; CHECK-NEXT: addi a1, zero, 1 -; CHECK-NEXT: vsetvli a2, zero, e32,m1,ta,mu ; CHECK-NEXT: vmv.s.x v26, a1 -; CHECK-NEXT: vsetivli a1, 4, e32,m1,ta,mu ; CHECK-NEXT: vmv.v.i v27, 2 ; CHECK-NEXT: vsetivli a1, 4, e32,m1,tu,mu ; CHECK-NEXT: vslideup.vi v27, v26, 3 @@ -966,6 +958,9 @@ define void @mulhu_v2i64(<2 x i64>* %x) { ; RV64: # %bb.0: ; RV64-NEXT: vsetivli a1, 2, e64,m1,ta,mu ; RV64-NEXT: vle64.v v25, (a0) +; RV64-NEXT: vmv.v.i v26, 2 +; RV64-NEXT: addi a1, zero, 1 +; RV64-NEXT: vmv.s.x v26, a1 ; RV64-NEXT: lui a1, 1035469 ; RV64-NEXT: addiw a1, a1, -819 ; RV64-NEXT: slli a1, a1, 12 @@ -974,7 +969,7 @@ define void @mulhu_v2i64(<2 x i64>* %x) { ; RV64-NEXT: addi a1, a1, -819 ; RV64-NEXT: slli a1, a1, 12 ; RV64-NEXT: addi a1, a1, -819 -; RV64-NEXT: vmv.v.x v26, a1 +; RV64-NEXT: vmv.v.x v27, a1 ; RV64-NEXT: lui a1, 1026731 ; RV64-NEXT: addiw a1, a1, -1365 ; RV64-NEXT: slli a1, a1, 12 @@ -983,15 +978,8 @@ define void @mulhu_v2i64(<2 x i64>* %x) { ; RV64-NEXT: addi a1, a1, -1365 ; RV64-NEXT: slli a1, a1, 12 ; RV64-NEXT: addi a1, a1, -1365 -; RV64-NEXT: vsetvli a2, zero, e64,m1,ta,mu -; RV64-NEXT: vmv.s.x v26, a1 -; RV64-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; RV64-NEXT: vmulhu.vv v25, v25, v26 -; RV64-NEXT: vmv.v.i v26, 2 -; RV64-NEXT: addi a1, zero, 1 -; RV64-NEXT: vsetvli a2, zero, e64,m1,ta,mu -; RV64-NEXT: vmv.s.x v26, a1 -; RV64-NEXT: vsetivli a1, 2, e64,m1,ta,mu +; RV64-NEXT: vmv.s.x v27, a1 +; RV64-NEXT: vmulhu.vv v25, v25, v27 ; RV64-NEXT: vsrl.vv v25, v25, v26 ; RV64-NEXT: vse64.v v25, (a0) ; RV64-NEXT: ret @@ -1092,7 +1080,6 @@ define void @mulhs_v2i64(<2 x i64>* %x) { ; RV32-NEXT: vsetivli a3, 4, e32,m1,ta,mu ; RV32-NEXT: vmv.v.x v27, a2 ; RV32-NEXT: addi a1, a1, 1366 -; RV32-NEXT: vsetvli a2, zero, e32,m1,ta,mu ; RV32-NEXT: vmv.s.x v27, a1 ; RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu ; RV32-NEXT: vmulh.vv v25, v25, v27 @@ -1104,9 +1091,8 @@ define void @mulhs_v2i64(<2 x i64>* %x) { ; RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu ; RV32-NEXT: vsrl.vv v26, v25, v26 ; RV32-NEXT: addi a1, zero, 1 -; RV32-NEXT: vsetvli a2, zero, e32,m1,ta,mu +; RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu ; RV32-NEXT: vmv.s.x v27, a1 -; RV32-NEXT: vsetivli a1, 4, e32,m1,ta,mu ; RV32-NEXT: vmv.v.i v28, 0 ; RV32-NEXT: vsetivli a1, 3, e32,m1,tu,mu ; RV32-NEXT: vslideup.vi v28, v27, 2 @@ -1121,9 +1107,7 @@ define void @mulhs_v2i64(<2 x i64>* %x) { ; RV64-NEXT: vsetivli a1, 2, e64,m1,ta,mu ; RV64-NEXT: vle64.v v25, (a0) ; RV64-NEXT: vmv.v.i v26, -1 -; RV64-NEXT: vsetvli a1, zero, e64,m1,ta,mu ; RV64-NEXT: vmv.s.x v26, zero -; RV64-NEXT: vsetivli a1, 2, e64,m1,ta,mu ; RV64-NEXT: vmul.vv v26, v25, v26 ; RV64-NEXT: lui a1, 21845 ; RV64-NEXT: addiw a1, a1, 1365 @@ -1135,9 +1119,7 @@ define void @mulhs_v2i64(<2 x i64>* %x) { ; RV64-NEXT: addi a2, a1, 1365 ; RV64-NEXT: vmv.v.x v27, a2 ; RV64-NEXT: addi a1, a1, 1366 -; RV64-NEXT: vsetvli a2, zero, e64,m1,ta,mu ; RV64-NEXT: vmv.s.x v27, a1 -; RV64-NEXT: vsetivli a1, 2, e64,m1,ta,mu ; RV64-NEXT: vmulh.vv v25, v25, v27 ; RV64-NEXT: vadd.vv v25, v25, v26 ; RV64-NEXT: addi a1, zero, 63 @@ -3983,40 +3965,36 @@ define void @mulhu_v8i32(<8 x i32>* %x) { ; LMULMAX1-RV32-LABEL: mulhu_v8i32: ; LMULMAX1-RV32: # %bb.0: ; LMULMAX1-RV32-NEXT: vsetivli a1, 4, e32,m1,ta,mu +; LMULMAX1-RV32-NEXT: vle32.v v25, (a0) ; LMULMAX1-RV32-NEXT: addi a1, a0, 16 -; LMULMAX1-RV32-NEXT: vle32.v v25, (a1) +; LMULMAX1-RV32-NEXT: vle32.v v26, (a1) +; LMULMAX1-RV32-NEXT: lui a2, 524288 +; LMULMAX1-RV32-NEXT: vmv.s.x v27, a2 +; LMULMAX1-RV32-NEXT: vmv.v.i v28, 0 +; LMULMAX1-RV32-NEXT: vsetivli a2, 3, e32,m1,tu,mu +; LMULMAX1-RV32-NEXT: vslideup.vi v28, v27, 2 ; LMULMAX1-RV32-NEXT: lui a2, %hi(.LCPI131_0) ; LMULMAX1-RV32-NEXT: addi a2, a2, %lo(.LCPI131_0) -; LMULMAX1-RV32-NEXT: vle32.v v26, (a2) -; LMULMAX1-RV32-NEXT: vle32.v v27, (a0) -; LMULMAX1-RV32-NEXT: vmulhu.vv v28, v25, v26 -; LMULMAX1-RV32-NEXT: vsub.vv v25, v25, v28 -; LMULMAX1-RV32-NEXT: lui a2, 524288 -; LMULMAX1-RV32-NEXT: vsetvli a3, zero, e32,m1,ta,mu -; LMULMAX1-RV32-NEXT: vmv.s.x v29, a2 -; LMULMAX1-RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu -; LMULMAX1-RV32-NEXT: vmv.v.i v30, 0 -; LMULMAX1-RV32-NEXT: vsetivli a2, 3, e32,m1,tu,mu -; LMULMAX1-RV32-NEXT: vslideup.vi v30, v29, 2 -; LMULMAX1-RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu -; LMULMAX1-RV32-NEXT: vmulhu.vv v25, v25, v30 -; LMULMAX1-RV32-NEXT: vadd.vv v25, v25, v28 +; LMULMAX1-RV32-NEXT: vsetivli a3, 4, e32,m1,ta,mu +; LMULMAX1-RV32-NEXT: vle32.v v27, (a2) +; LMULMAX1-RV32-NEXT: vmulhu.vv v29, v26, v27 +; LMULMAX1-RV32-NEXT: vsub.vv v26, v26, v29 +; LMULMAX1-RV32-NEXT: vmulhu.vv v26, v26, v28 +; LMULMAX1-RV32-NEXT: vadd.vv v26, v26, v29 ; LMULMAX1-RV32-NEXT: addi a2, zero, 1 -; LMULMAX1-RV32-NEXT: vsetvli a3, zero, e32,m1,ta,mu -; LMULMAX1-RV32-NEXT: vmv.s.x v28, a2 -; LMULMAX1-RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu -; LMULMAX1-RV32-NEXT: vmv.v.i v29, 2 +; LMULMAX1-RV32-NEXT: vmv.s.x v29, a2 +; LMULMAX1-RV32-NEXT: vmv.v.i v30, 2 ; LMULMAX1-RV32-NEXT: vsetivli a2, 4, e32,m1,tu,mu -; LMULMAX1-RV32-NEXT: vslideup.vi v29, v28, 3 +; LMULMAX1-RV32-NEXT: vslideup.vi v30, v29, 3 ; LMULMAX1-RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu -; LMULMAX1-RV32-NEXT: vsrl.vv v25, v25, v29 -; LMULMAX1-RV32-NEXT: vmulhu.vv v26, v27, v26 -; LMULMAX1-RV32-NEXT: vsub.vv v27, v27, v26 -; LMULMAX1-RV32-NEXT: vmulhu.vv v27, v27, v30 -; LMULMAX1-RV32-NEXT: vadd.vv v26, v27, v26 -; LMULMAX1-RV32-NEXT: vsrl.vv v26, v26, v29 -; LMULMAX1-RV32-NEXT: vse32.v v26, (a0) -; LMULMAX1-RV32-NEXT: vse32.v v25, (a1) +; LMULMAX1-RV32-NEXT: vsrl.vv v26, v26, v30 +; LMULMAX1-RV32-NEXT: vmulhu.vv v27, v25, v27 +; LMULMAX1-RV32-NEXT: vsub.vv v25, v25, v27 +; LMULMAX1-RV32-NEXT: vmulhu.vv v25, v25, v28 +; LMULMAX1-RV32-NEXT: vadd.vv v25, v25, v27 +; LMULMAX1-RV32-NEXT: vsrl.vv v25, v25, v30 +; LMULMAX1-RV32-NEXT: vse32.v v25, (a0) +; LMULMAX1-RV32-NEXT: vse32.v v26, (a1) ; LMULMAX1-RV32-NEXT: ret ; ; LMULMAX1-RV64-LABEL: mulhu_v8i32: @@ -4052,9 +4030,8 @@ define void @mulhu_v4i64(<4 x i64>* %x) { ; LMULMAX2-RV32-NEXT: vmulhu.vv v28, v26, v28 ; LMULMAX2-RV32-NEXT: vsub.vv v26, v26, v28 ; LMULMAX2-RV32-NEXT: lui a1, 524288 -; LMULMAX2-RV32-NEXT: vsetvli a2, zero, e32,m2,ta,mu +; LMULMAX2-RV32-NEXT: vsetivli a2, 8, e32,m2,ta,mu ; LMULMAX2-RV32-NEXT: vmv.s.x v30, a1 -; LMULMAX2-RV32-NEXT: vsetivli a1, 8, e32,m2,ta,mu ; LMULMAX2-RV32-NEXT: vmv.v.i v8, 0 ; LMULMAX2-RV32-NEXT: vsetivli a1, 6, e32,m2,tu,mu ; LMULMAX2-RV32-NEXT: vslideup.vi v8, v30, 5 @@ -4074,26 +4051,24 @@ define void @mulhu_v4i64(<4 x i64>* %x) { ; LMULMAX2-RV64: # %bb.0: ; LMULMAX2-RV64-NEXT: vsetivli a1, 4, e64,m2,ta,mu ; LMULMAX2-RV64-NEXT: vle64.v v26, (a0) -; LMULMAX2-RV64-NEXT: lui a1, %hi(.LCPI132_0) -; LMULMAX2-RV64-NEXT: addi a1, a1, %lo(.LCPI132_0) -; LMULMAX2-RV64-NEXT: vle64.v v28, (a1) -; LMULMAX2-RV64-NEXT: vmulhu.vv v28, v26, v28 -; LMULMAX2-RV64-NEXT: vsub.vv v26, v26, v28 ; LMULMAX2-RV64-NEXT: addi a1, zero, -1 ; LMULMAX2-RV64-NEXT: slli a1, a1, 63 -; LMULMAX2-RV64-NEXT: vsetvli a2, zero, e64,m2,ta,mu -; LMULMAX2-RV64-NEXT: vmv.s.x v30, a1 -; LMULMAX2-RV64-NEXT: vsetivli a1, 4, e64,m2,ta,mu -; LMULMAX2-RV64-NEXT: vmv.v.i v8, 0 +; LMULMAX2-RV64-NEXT: vmv.s.x v28, a1 +; LMULMAX2-RV64-NEXT: vmv.v.i v30, 0 ; LMULMAX2-RV64-NEXT: vsetivli a1, 3, e64,m2,tu,mu -; LMULMAX2-RV64-NEXT: vslideup.vi v8, v30, 2 -; LMULMAX2-RV64-NEXT: vsetivli a1, 4, e64,m2,ta,mu +; LMULMAX2-RV64-NEXT: vslideup.vi v30, v28, 2 +; LMULMAX2-RV64-NEXT: lui a1, %hi(.LCPI132_0) +; LMULMAX2-RV64-NEXT: addi a1, a1, %lo(.LCPI132_0) +; LMULMAX2-RV64-NEXT: vsetivli a2, 4, e64,m2,ta,mu +; LMULMAX2-RV64-NEXT: vle64.v v28, (a1) +; LMULMAX2-RV64-NEXT: vmulhu.vv v28, v26, v28 ; LMULMAX2-RV64-NEXT: lui a1, %hi(.LCPI132_1) ; LMULMAX2-RV64-NEXT: addi a1, a1, %lo(.LCPI132_1) -; LMULMAX2-RV64-NEXT: vle64.v v30, (a1) -; LMULMAX2-RV64-NEXT: vmulhu.vv v26, v26, v8 +; LMULMAX2-RV64-NEXT: vle64.v v8, (a1) +; LMULMAX2-RV64-NEXT: vsub.vv v26, v26, v28 +; LMULMAX2-RV64-NEXT: vmulhu.vv v26, v26, v30 ; LMULMAX2-RV64-NEXT: vadd.vv v26, v26, v28 -; LMULMAX2-RV64-NEXT: vsrl.vv v26, v26, v30 +; LMULMAX2-RV64-NEXT: vsrl.vv v26, v26, v8 ; LMULMAX2-RV64-NEXT: vse64.v v26, (a0) ; LMULMAX2-RV64-NEXT: ret ; @@ -4121,11 +4096,15 @@ define void @mulhu_v4i64(<4 x i64>* %x) { ; ; LMULMAX1-RV64-LABEL: mulhu_v4i64: ; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: addi a2, zero, 2 -; LMULMAX1-RV64-NEXT: vsetivli a1, 2, e64,m1,ta,mu +; LMULMAX1-RV64-NEXT: addi a1, zero, 2 +; LMULMAX1-RV64-NEXT: vsetivli a2, 2, e64,m1,ta,mu ; LMULMAX1-RV64-NEXT: vle64.v v25, (a0) -; LMULMAX1-RV64-NEXT: addi a1, a0, 16 -; LMULMAX1-RV64-NEXT: vle64.v v26, (a1) +; LMULMAX1-RV64-NEXT: addi a2, a0, 16 +; LMULMAX1-RV64-NEXT: vle64.v v26, (a2) +; LMULMAX1-RV64-NEXT: vmv.v.i v27, 0 +; LMULMAX1-RV64-NEXT: addi a3, zero, -1 +; LMULMAX1-RV64-NEXT: slli a3, a3, 63 +; LMULMAX1-RV64-NEXT: vmv.s.x v27, a3 ; LMULMAX1-RV64-NEXT: lui a3, 1044935 ; LMULMAX1-RV64-NEXT: addiw a3, a3, 455 ; LMULMAX1-RV64-NEXT: slli a3, a3, 12 @@ -4134,7 +4113,7 @@ define void @mulhu_v4i64(<4 x i64>* %x) { ; LMULMAX1-RV64-NEXT: addi a3, a3, 455 ; LMULMAX1-RV64-NEXT: slli a3, a3, 13 ; LMULMAX1-RV64-NEXT: addi a3, a3, 911 -; LMULMAX1-RV64-NEXT: vmv.v.x v27, a3 +; LMULMAX1-RV64-NEXT: vmv.v.x v28, a3 ; LMULMAX1-RV64-NEXT: lui a3, 4681 ; LMULMAX1-RV64-NEXT: addiw a3, a3, 585 ; LMULMAX1-RV64-NEXT: slli a3, a3, 12 @@ -4143,53 +4122,39 @@ define void @mulhu_v4i64(<4 x i64>* %x) { ; LMULMAX1-RV64-NEXT: addi a3, a3, 585 ; LMULMAX1-RV64-NEXT: slli a3, a3, 13 ; LMULMAX1-RV64-NEXT: addi a3, a3, 1171 -; LMULMAX1-RV64-NEXT: vsetvli a4, zero, e64,m1,ta,mu -; LMULMAX1-RV64-NEXT: vmv.s.x v27, a3 -; LMULMAX1-RV64-NEXT: vsetivli a3, 2, e64,m1,ta,mu -; LMULMAX1-RV64-NEXT: vmulhu.vv v27, v26, v27 -; LMULMAX1-RV64-NEXT: vsub.vv v26, v26, v27 -; LMULMAX1-RV64-NEXT: vmv.v.i v28, 0 -; LMULMAX1-RV64-NEXT: addi a3, zero, -1 -; LMULMAX1-RV64-NEXT: slli a3, a3, 63 -; LMULMAX1-RV64-NEXT: vsetvli a4, zero, e64,m1,ta,mu ; LMULMAX1-RV64-NEXT: vmv.s.x v28, a3 -; LMULMAX1-RV64-NEXT: vsetivli a3, 2, e64,m1,ta,mu -; LMULMAX1-RV64-NEXT: vmulhu.vv v26, v26, v28 -; LMULMAX1-RV64-NEXT: vadd.vv v26, v26, v27 +; LMULMAX1-RV64-NEXT: vmulhu.vv v28, v26, v28 +; LMULMAX1-RV64-NEXT: vsub.vv v26, v26, v28 +; LMULMAX1-RV64-NEXT: vmulhu.vv v26, v26, v27 +; LMULMAX1-RV64-NEXT: vadd.vv v26, v26, v28 ; LMULMAX1-RV64-NEXT: vmv.v.i v27, 3 -; LMULMAX1-RV64-NEXT: vsetvli a3, zero, e64,m1,ta,mu -; LMULMAX1-RV64-NEXT: vmv.s.x v27, a2 -; LMULMAX1-RV64-NEXT: vsetivli a2, 2, e64,m1,ta,mu +; LMULMAX1-RV64-NEXT: vmv.s.x v27, a1 ; LMULMAX1-RV64-NEXT: vsrl.vv v26, v26, v27 -; LMULMAX1-RV64-NEXT: lui a2, 1035469 -; LMULMAX1-RV64-NEXT: addiw a2, a2, -819 -; LMULMAX1-RV64-NEXT: slli a2, a2, 12 -; LMULMAX1-RV64-NEXT: addi a2, a2, -819 -; LMULMAX1-RV64-NEXT: slli a2, a2, 12 -; LMULMAX1-RV64-NEXT: addi a2, a2, -819 -; LMULMAX1-RV64-NEXT: slli a2, a2, 12 -; LMULMAX1-RV64-NEXT: addi a2, a2, -819 -; LMULMAX1-RV64-NEXT: vmv.v.x v27, a2 -; LMULMAX1-RV64-NEXT: lui a2, 1026731 -; LMULMAX1-RV64-NEXT: addiw a2, a2, -1365 -; LMULMAX1-RV64-NEXT: slli a2, a2, 12 -; LMULMAX1-RV64-NEXT: addi a2, a2, -1365 -; LMULMAX1-RV64-NEXT: slli a2, a2, 12 -; LMULMAX1-RV64-NEXT: addi a2, a2, -1365 -; LMULMAX1-RV64-NEXT: slli a2, a2, 12 -; LMULMAX1-RV64-NEXT: addi a2, a2, -1365 -; LMULMAX1-RV64-NEXT: vsetvli a3, zero, e64,m1,ta,mu -; LMULMAX1-RV64-NEXT: vmv.s.x v27, a2 -; LMULMAX1-RV64-NEXT: vsetivli a2, 2, e64,m1,ta,mu -; LMULMAX1-RV64-NEXT: vmulhu.vv v25, v25, v27 ; LMULMAX1-RV64-NEXT: vmv.v.i v27, 2 -; LMULMAX1-RV64-NEXT: addi a2, zero, 1 -; LMULMAX1-RV64-NEXT: vsetvli a3, zero, e64,m1,ta,mu -; LMULMAX1-RV64-NEXT: vmv.s.x v27, a2 -; LMULMAX1-RV64-NEXT: vsetivli a2, 2, e64,m1,ta,mu +; LMULMAX1-RV64-NEXT: addi a1, zero, 1 +; LMULMAX1-RV64-NEXT: vmv.s.x v27, a1 +; LMULMAX1-RV64-NEXT: lui a1, 1035469 +; LMULMAX1-RV64-NEXT: addiw a1, a1, -819 +; LMULMAX1-RV64-NEXT: slli a1, a1, 12 +; LMULMAX1-RV64-NEXT: addi a1, a1, -819 +; LMULMAX1-RV64-NEXT: slli a1, a1, 12 +; LMULMAX1-RV64-NEXT: addi a1, a1, -819 +; LMULMAX1-RV64-NEXT: slli a1, a1, 12 +; LMULMAX1-RV64-NEXT: addi a1, a1, -819 +; LMULMAX1-RV64-NEXT: vmv.v.x v28, a1 +; LMULMAX1-RV64-NEXT: lui a1, 1026731 +; LMULMAX1-RV64-NEXT: addiw a1, a1, -1365 +; LMULMAX1-RV64-NEXT: slli a1, a1, 12 +; LMULMAX1-RV64-NEXT: addi a1, a1, -1365 +; LMULMAX1-RV64-NEXT: slli a1, a1, 12 +; LMULMAX1-RV64-NEXT: addi a1, a1, -1365 +; LMULMAX1-RV64-NEXT: slli a1, a1, 12 +; LMULMAX1-RV64-NEXT: addi a1, a1, -1365 +; LMULMAX1-RV64-NEXT: vmv.s.x v28, a1 +; LMULMAX1-RV64-NEXT: vmulhu.vv v25, v25, v28 ; LMULMAX1-RV64-NEXT: vsrl.vv v25, v25, v27 ; LMULMAX1-RV64-NEXT: vse64.v v25, (a0) -; LMULMAX1-RV64-NEXT: vse64.v v26, (a1) +; LMULMAX1-RV64-NEXT: vse64.v v26, (a2) ; LMULMAX1-RV64-NEXT: ret %a = load <4 x i64>, <4 x i64>* %x %b = udiv <4 x i64> %a, @@ -4416,14 +4381,12 @@ define void @mulhs_v4i64(<4 x i64>* %x) { ; LMULMAX1-RV64-LABEL: mulhs_v4i64: ; LMULMAX1-RV64: # %bb.0: ; LMULMAX1-RV64-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; LMULMAX1-RV64-NEXT: vle64.v v25, (a0) ; LMULMAX1-RV64-NEXT: addi a1, a0, 16 -; LMULMAX1-RV64-NEXT: vle64.v v26, (a1) +; LMULMAX1-RV64-NEXT: vle64.v v25, (a1) +; LMULMAX1-RV64-NEXT: vle64.v v26, (a0) ; LMULMAX1-RV64-NEXT: vmv.v.i v27, -1 -; LMULMAX1-RV64-NEXT: vsetvli a2, zero, e64,m1,ta,mu ; LMULMAX1-RV64-NEXT: vmv.s.x v27, zero -; LMULMAX1-RV64-NEXT: vsetivli a2, 2, e64,m1,ta,mu -; LMULMAX1-RV64-NEXT: vmul.vv v28, v26, v27 +; LMULMAX1-RV64-NEXT: vmul.vv v28, v25, v27 ; LMULMAX1-RV64-NEXT: lui a2, 21845 ; LMULMAX1-RV64-NEXT: addiw a2, a2, 1365 ; LMULMAX1-RV64-NEXT: slli a2, a2, 12 @@ -4434,24 +4397,22 @@ define void @mulhs_v4i64(<4 x i64>* %x) { ; LMULMAX1-RV64-NEXT: addi a3, a2, 1365 ; LMULMAX1-RV64-NEXT: vmv.v.x v29, a3 ; LMULMAX1-RV64-NEXT: addi a2, a2, 1366 -; LMULMAX1-RV64-NEXT: vsetvli a3, zero, e64,m1,ta,mu ; LMULMAX1-RV64-NEXT: vmv.s.x v29, a2 -; LMULMAX1-RV64-NEXT: vsetivli a2, 2, e64,m1,ta,mu -; LMULMAX1-RV64-NEXT: vmulh.vv v26, v26, v29 -; LMULMAX1-RV64-NEXT: vadd.vv v26, v26, v28 -; LMULMAX1-RV64-NEXT: addi a2, zero, 63 -; LMULMAX1-RV64-NEXT: vsrl.vx v28, v26, a2 -; LMULMAX1-RV64-NEXT: vid.v v30 -; LMULMAX1-RV64-NEXT: vsra.vv v26, v26, v30 -; LMULMAX1-RV64-NEXT: vadd.vv v26, v26, v28 -; LMULMAX1-RV64-NEXT: vmul.vv v27, v25, v27 ; LMULMAX1-RV64-NEXT: vmulh.vv v25, v25, v29 -; LMULMAX1-RV64-NEXT: vadd.vv v25, v25, v27 -; LMULMAX1-RV64-NEXT: vsrl.vx v27, v25, a2 +; LMULMAX1-RV64-NEXT: vadd.vv v25, v25, v28 +; LMULMAX1-RV64-NEXT: addi a2, zero, 63 +; LMULMAX1-RV64-NEXT: vsrl.vx v28, v25, a2 +; LMULMAX1-RV64-NEXT: vid.v v30 ; LMULMAX1-RV64-NEXT: vsra.vv v25, v25, v30 -; LMULMAX1-RV64-NEXT: vadd.vv v25, v25, v27 -; LMULMAX1-RV64-NEXT: vse64.v v25, (a0) -; LMULMAX1-RV64-NEXT: vse64.v v26, (a1) +; LMULMAX1-RV64-NEXT: vadd.vv v25, v25, v28 +; LMULMAX1-RV64-NEXT: vmul.vv v27, v26, v27 +; LMULMAX1-RV64-NEXT: vmulh.vv v26, v26, v29 +; LMULMAX1-RV64-NEXT: vadd.vv v26, v26, v27 +; LMULMAX1-RV64-NEXT: vsrl.vx v27, v26, a2 +; LMULMAX1-RV64-NEXT: vsra.vv v26, v26, v30 +; LMULMAX1-RV64-NEXT: vadd.vv v26, v26, v27 +; LMULMAX1-RV64-NEXT: vse64.v v26, (a0) +; LMULMAX1-RV64-NEXT: vse64.v v25, (a1) ; LMULMAX1-RV64-NEXT: ret %a = load <4 x i64>, <4 x i64>* %x %b = sdiv <4 x i64> %a,