From e68b0d587581e68e04c155f5f319c22348f1e2b5 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Thu, 13 Oct 2022 08:52:58 -0700 Subject: [PATCH] [RISCV] Match (select C, -1, X)->(or -C, X) during lowerSelect Same with (select C, X, -1), (select C, 0, X), and (select C, X, 0). There's a DAGCombine after we turn the select into select_cc, but that may introduce a setcc that didn't previously exist. We could add more DAGCombines to remove the extra setcc, but this seemed lower effort. Reviewed By: reames Differential Revision: https://reviews.llvm.org/D135833 --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 24 + llvm/test/CodeGen/RISCV/double-convert.ll | 243 +++---- .../CodeGen/RISCV/double-round-conv-sat.ll | 155 ++--- llvm/test/CodeGen/RISCV/float-convert.ll | 213 +++--- .../CodeGen/RISCV/float-round-conv-sat.ll | 155 ++--- llvm/test/CodeGen/RISCV/forced-atomics.ll | 12 +- llvm/test/CodeGen/RISCV/fpclamptosat.ll | 352 ++++------ llvm/test/CodeGen/RISCV/fpclamptosat_vec.ll | 655 +++++++++--------- llvm/test/CodeGen/RISCV/half-convert.ll | 251 +++---- .../test/CodeGen/RISCV/half-round-conv-sat.ll | 185 +++-- llvm/test/CodeGen/RISCV/min-max.ll | 12 +- llvm/test/CodeGen/RISCV/rv64zbb.ll | 8 +- .../CodeGen/RISCV/selectcc-to-shiftand.ll | 3 +- llvm/test/CodeGen/RISCV/uadd_sat.ll | 6 +- llvm/test/CodeGen/RISCV/uadd_sat_plus.ll | 6 +- llvm/test/CodeGen/RISCV/usub_sat.ll | 2 - llvm/test/CodeGen/RISCV/usub_sat_plus.ll | 2 - llvm/test/CodeGen/RISCV/vec3-setcc-crash.ll | 44 +- 18 files changed, 1057 insertions(+), 1271 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 46c9fd36fb53..874fe95c0a95 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -4222,6 +4222,30 @@ SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const { return DAG.getNode(ISD::VSELECT, DL, VT, CondSplat, TrueV, FalseV); } + // (select c, -1, y) -> -c | y + if (isAllOnesConstant(TrueV)) { + SDValue Neg = DAG.getNegative(CondV, DL, VT); + return DAG.getNode(ISD::OR, DL, VT, Neg, FalseV); + } + // (select c, y, -1) -> (c-1) | y + if (isAllOnesConstant(FalseV)) { + SDValue Neg = DAG.getNode(ISD::ADD, DL, VT, CondV, + DAG.getAllOnesConstant(DL, VT)); + return DAG.getNode(ISD::OR, DL, VT, Neg, TrueV); + } + + // (select c, 0, y) -> (c-1) & y + if (isNullConstant(TrueV)) { + SDValue Neg = DAG.getNode(ISD::ADD, DL, VT, CondV, + DAG.getAllOnesConstant(DL, VT)); + return DAG.getNode(ISD::AND, DL, VT, Neg, FalseV); + } + // (select c, y, 0) -> -c & y + if (isNullConstant(FalseV)) { + SDValue Neg = DAG.getNegative(CondV, DL, VT); + return DAG.getNode(ISD::AND, DL, VT, Neg, TrueV); + } + // If the CondV is the output of a SETCC node which operates on XLenVT inputs, // then merge the SETCC node into the lowered RISCVISD::SELECT_CC to take // advantage of the integer compare+branch instructions. i.e.: diff --git a/llvm/test/CodeGen/RISCV/double-convert.ll b/llvm/test/CodeGen/RISCV/double-convert.ll index dce61afedb1b..825bbe0b7b94 100644 --- a/llvm/test/CodeGen/RISCV/double-convert.ll +++ b/llvm/test/CodeGen/RISCV/double-convert.ll @@ -309,29 +309,27 @@ define i32 @fcvt_wu_d_sat(double %a) nounwind { ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s2, a1 -; RV32I-NEXT: mv s3, a0 +; RV32I-NEXT: mv s0, a1 +; RV32I-NEXT: mv s1, a0 ; RV32I-NEXT: lui a0, 270080 ; RV32I-NEXT: addi a3, a0, -1 ; RV32I-NEXT: lui a2, 1048064 -; RV32I-NEXT: mv a0, s3 +; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: call __gtdf2@plt ; RV32I-NEXT: sgtz a0, a0 -; RV32I-NEXT: neg s0, a0 -; RV32I-NEXT: mv a0, s3 -; RV32I-NEXT: mv a1, s2 -; RV32I-NEXT: call __fixunsdfsi@plt -; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: mv a0, s3 -; RV32I-NEXT: mv a1, s2 +; RV32I-NEXT: neg s2, a0 +; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: mv a1, s0 ; RV32I-NEXT: li a2, 0 ; RV32I-NEXT: li a3, 0 ; RV32I-NEXT: call __gedf2@plt -; RV32I-NEXT: bltz a0, .LBB6_2 -; RV32I-NEXT: # %bb.1: # %start -; RV32I-NEXT: or s0, s0, s1 -; RV32I-NEXT: .LBB6_2: # %start -; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: slti a0, a0, 0 +; RV32I-NEXT: addi s3, a0, -1 +; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: mv a1, s0 +; RV32I-NEXT: call __fixunsdfsi@plt +; RV32I-NEXT: and a0, s3, a0 +; RV32I-NEXT: or a0, s2, a0 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -570,11 +568,9 @@ define i64 @fcvt_l_d_sat(double %a) nounwind { ; RV32IFD-NEXT: seqz a3, a3 ; RV32IFD-NEXT: addi a3, a3, -1 ; RV32IFD-NEXT: and a1, a3, a1 -; RV32IFD-NEXT: seqz a4, s0 -; RV32IFD-NEXT: addi a4, a4, -1 +; RV32IFD-NEXT: neg a2, a2 +; RV32IFD-NEXT: neg a4, s0 ; RV32IFD-NEXT: and a0, a4, a0 -; RV32IFD-NEXT: seqz a2, a2 -; RV32IFD-NEXT: addi a2, a2, -1 ; RV32IFD-NEXT: or a0, a2, a0 ; RV32IFD-NEXT: and a0, a3, a0 ; RV32IFD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload @@ -594,25 +590,24 @@ define i64 @fcvt_l_d_sat(double %a) nounwind { ; ; RV32I-LABEL: fcvt_l_d_sat: ; RV32I: # %bb.0: # %start -; RV32I-NEXT: addi sp, sp, -48 -; RV32I-NEXT: sw ra, 44(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s0, 40(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s1, 36(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s2, 32(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s3, 28(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s4, 24(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s5, 20(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s6, 16(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s7, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: addi sp, sp, -32 +; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s6, 0(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a1 ; RV32I-NEXT: mv s1, a0 ; RV32I-NEXT: lui a0, 278016 -; RV32I-NEXT: addi s3, a0, -1 +; RV32I-NEXT: addi s2, a0, -1 ; RV32I-NEXT: li a2, -1 ; RV32I-NEXT: mv a0, s1 -; RV32I-NEXT: mv a3, s3 +; RV32I-NEXT: mv a3, s2 ; RV32I-NEXT: call __gtdf2@plt -; RV32I-NEXT: mv s7, a0 +; RV32I-NEXT: mv s4, a0 ; RV32I-NEXT: lui a3, 802304 ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: mv a1, s0 @@ -622,43 +617,41 @@ define i64 @fcvt_l_d_sat(double %a) nounwind { ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: mv a1, s0 ; RV32I-NEXT: call __fixdfdi@plt -; RV32I-NEXT: mv s6, a0 -; RV32I-NEXT: mv s4, a1 +; RV32I-NEXT: mv s3, a0 +; RV32I-NEXT: lui a0, 524288 +; RV32I-NEXT: lui s6, 524288 +; RV32I-NEXT: bltz s5, .LBB12_2 +; RV32I-NEXT: # %bb.1: # %start +; RV32I-NEXT: mv s6, a1 +; RV32I-NEXT: .LBB12_2: # %start +; RV32I-NEXT: blez s4, .LBB12_4 +; RV32I-NEXT: # %bb.3: +; RV32I-NEXT: addi s6, a0, -1 +; RV32I-NEXT: .LBB12_4: # %start ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: mv a1, s0 ; RV32I-NEXT: mv a2, s1 ; RV32I-NEXT: mv a3, s0 ; RV32I-NEXT: call __unorddf2@plt ; RV32I-NEXT: snez a0, a0 -; RV32I-NEXT: addi s2, a0, -1 -; RV32I-NEXT: bgtz s7, .LBB12_2 -; RV32I-NEXT: # %bb.1: # %start -; RV32I-NEXT: slti a0, s5, 0 ; RV32I-NEXT: addi a0, a0, -1 -; RV32I-NEXT: and a0, a0, s6 -; RV32I-NEXT: and s2, s2, a0 -; RV32I-NEXT: .LBB12_2: # %start -; RV32I-NEXT: li a2, -1 -; RV32I-NEXT: mv a0, s1 -; RV32I-NEXT: mv a1, s0 -; RV32I-NEXT: mv a3, s3 -; RV32I-NEXT: call __gtdf2@plt -; RV32I-NEXT: mv s3, a0 +; RV32I-NEXT: and s4, a0, s6 ; RV32I-NEXT: lui a3, 802304 ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: mv a1, s0 ; RV32I-NEXT: li a2, 0 ; RV32I-NEXT: call __gedf2@plt -; RV32I-NEXT: lui a1, 524288 -; RV32I-NEXT: lui s5, 524288 -; RV32I-NEXT: bltz a0, .LBB12_4 -; RV32I-NEXT: # %bb.3: # %start -; RV32I-NEXT: mv s5, s4 -; RV32I-NEXT: .LBB12_4: # %start -; RV32I-NEXT: blez s3, .LBB12_6 -; RV32I-NEXT: # %bb.5: -; RV32I-NEXT: addi s5, a1, -1 -; RV32I-NEXT: .LBB12_6: # %start +; RV32I-NEXT: slti a0, a0, 0 +; RV32I-NEXT: addi a0, a0, -1 +; RV32I-NEXT: and s3, a0, s3 +; RV32I-NEXT: li a2, -1 +; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: mv a1, s0 +; RV32I-NEXT: mv a3, s2 +; RV32I-NEXT: call __gtdf2@plt +; RV32I-NEXT: sgtz a0, a0 +; RV32I-NEXT: neg a0, a0 +; RV32I-NEXT: or s2, a0, s3 ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: mv a1, s0 ; RV32I-NEXT: mv a2, s1 @@ -666,18 +659,17 @@ define i64 @fcvt_l_d_sat(double %a) nounwind { ; RV32I-NEXT: call __unorddf2@plt ; RV32I-NEXT: snez a0, a0 ; RV32I-NEXT: addi a0, a0, -1 -; RV32I-NEXT: and a1, a0, s5 -; RV32I-NEXT: mv a0, s2 -; RV32I-NEXT: lw ra, 44(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s0, 40(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s1, 36(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s2, 32(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s3, 28(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s4, 24(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s5, 20(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s6, 16(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s7, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: addi sp, sp, 48 +; RV32I-NEXT: and a0, a0, s2 +; RV32I-NEXT: mv a1, s4 +; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s5, 4(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s6, 0(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; ; RV64I-LABEL: fcvt_l_d_sat: @@ -778,15 +770,13 @@ define i64 @fcvt_lu_d_sat(double %a) nounwind { ; RV32IFD-NEXT: fmv.d fs0, fa0 ; RV32IFD-NEXT: fcvt.d.w ft0, zero ; RV32IFD-NEXT: fle.d a0, ft0, fa0 -; RV32IFD-NEXT: seqz a0, a0 -; RV32IFD-NEXT: addi s0, a0, -1 +; RV32IFD-NEXT: neg s0, a0 ; RV32IFD-NEXT: call __fixunsdfdi@plt ; RV32IFD-NEXT: lui a2, %hi(.LCPI14_0) ; RV32IFD-NEXT: fld ft0, %lo(.LCPI14_0)(a2) ; RV32IFD-NEXT: and a0, s0, a0 ; RV32IFD-NEXT: flt.d a2, ft0, fs0 -; RV32IFD-NEXT: seqz a2, a2 -; RV32IFD-NEXT: addi a2, a2, -1 +; RV32IFD-NEXT: neg a2, a2 ; RV32IFD-NEXT: or a0, a2, a0 ; RV32IFD-NEXT: and a1, s0, a1 ; RV32IFD-NEXT: or a1, a2, a1 @@ -816,47 +806,45 @@ define i64 @fcvt_lu_d_sat(double %a) nounwind { ; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a1 -; RV32I-NEXT: mv s2, a0 +; RV32I-NEXT: mv s1, a0 ; RV32I-NEXT: lui a0, 278272 -; RV32I-NEXT: addi s3, a0, -1 +; RV32I-NEXT: addi s2, a0, -1 ; RV32I-NEXT: li a2, -1 -; RV32I-NEXT: mv a0, s2 -; RV32I-NEXT: mv a3, s3 +; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: mv a3, s2 ; RV32I-NEXT: call __gtdf2@plt ; RV32I-NEXT: sgtz a0, a0 -; RV32I-NEXT: neg s1, a0 -; RV32I-NEXT: mv a0, s2 +; RV32I-NEXT: neg s4, a0 +; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: mv a1, s0 +; RV32I-NEXT: li a2, 0 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: call __gedf2@plt +; RV32I-NEXT: slti a0, a0, 0 +; RV32I-NEXT: addi s5, a0, -1 +; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: mv a1, s0 ; RV32I-NEXT: call __fixunsdfdi@plt -; RV32I-NEXT: mv s5, a0 -; RV32I-NEXT: mv s4, a1 -; RV32I-NEXT: mv a0, s2 -; RV32I-NEXT: mv a1, s0 -; RV32I-NEXT: li a2, 0 -; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: call __gedf2@plt -; RV32I-NEXT: bltz a0, .LBB14_2 -; RV32I-NEXT: # %bb.1: # %start -; RV32I-NEXT: or s1, s1, s5 -; RV32I-NEXT: .LBB14_2: # %start +; RV32I-NEXT: mv s3, a1 +; RV32I-NEXT: and a0, s5, a0 +; RV32I-NEXT: or s4, s4, a0 ; RV32I-NEXT: li a2, -1 -; RV32I-NEXT: mv a0, s2 +; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: mv a1, s0 -; RV32I-NEXT: mv a3, s3 +; RV32I-NEXT: mv a3, s2 ; RV32I-NEXT: call __gtdf2@plt ; RV32I-NEXT: sgtz a0, a0 -; RV32I-NEXT: neg s3, a0 -; RV32I-NEXT: mv a0, s2 +; RV32I-NEXT: neg s2, a0 +; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: mv a1, s0 ; RV32I-NEXT: li a2, 0 ; RV32I-NEXT: li a3, 0 ; RV32I-NEXT: call __gedf2@plt -; RV32I-NEXT: bltz a0, .LBB14_4 -; RV32I-NEXT: # %bb.3: # %start -; RV32I-NEXT: or s3, s3, s4 -; RV32I-NEXT: .LBB14_4: # %start -; RV32I-NEXT: mv a0, s1 -; RV32I-NEXT: mv a1, s3 +; RV32I-NEXT: slti a0, a0, 0 +; RV32I-NEXT: addi a0, a0, -1 +; RV32I-NEXT: and a0, a0, s3 +; RV32I-NEXT: or a1, s2, a0 +; RV32I-NEXT: mv a0, s4 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -873,30 +861,25 @@ define i64 @fcvt_lu_d_sat(double %a) nounwind { ; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s2, 0(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s2, a0 +; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: li a1, 0 +; RV64I-NEXT: call __gedf2@plt +; RV64I-NEXT: slti a0, a0, 0 +; RV64I-NEXT: addi s1, a0, -1 +; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: call __fixunsdfdi@plt +; RV64I-NEXT: and s1, s1, a0 ; RV64I-NEXT: li a0, 1087 ; RV64I-NEXT: slli a0, a0, 52 ; RV64I-NEXT: addi a1, a0, -1 -; RV64I-NEXT: mv a0, s2 +; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: call __gtdf2@plt ; RV64I-NEXT: sgtz a0, a0 -; RV64I-NEXT: neg s0, a0 -; RV64I-NEXT: mv a0, s2 -; RV64I-NEXT: call __fixunsdfdi@plt -; RV64I-NEXT: mv s1, a0 -; RV64I-NEXT: mv a0, s2 -; RV64I-NEXT: li a1, 0 -; RV64I-NEXT: call __gedf2@plt -; RV64I-NEXT: bltz a0, .LBB14_2 -; RV64I-NEXT: # %bb.1: # %start -; RV64I-NEXT: or s0, s0, s1 -; RV64I-NEXT: .LBB14_2: # %start -; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: neg a0, a0 +; RV64I-NEXT: or a0, a0, s1 ; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s2, 0(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 32 ; RV64I-NEXT: ret start: @@ -1916,29 +1899,27 @@ define zeroext i32 @fcvt_wu_d_sat_zext(double %a) nounwind { ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s2, a1 -; RV32I-NEXT: mv s3, a0 +; RV32I-NEXT: mv s0, a1 +; RV32I-NEXT: mv s1, a0 ; RV32I-NEXT: lui a0, 270080 ; RV32I-NEXT: addi a3, a0, -1 ; RV32I-NEXT: lui a2, 1048064 -; RV32I-NEXT: mv a0, s3 +; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: call __gtdf2@plt ; RV32I-NEXT: sgtz a0, a0 -; RV32I-NEXT: neg s0, a0 -; RV32I-NEXT: mv a0, s3 -; RV32I-NEXT: mv a1, s2 -; RV32I-NEXT: call __fixunsdfsi@plt -; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: mv a0, s3 -; RV32I-NEXT: mv a1, s2 +; RV32I-NEXT: neg s2, a0 +; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: mv a1, s0 ; RV32I-NEXT: li a2, 0 ; RV32I-NEXT: li a3, 0 ; RV32I-NEXT: call __gedf2@plt -; RV32I-NEXT: bltz a0, .LBB33_2 -; RV32I-NEXT: # %bb.1: # %start -; RV32I-NEXT: or s0, s0, s1 -; RV32I-NEXT: .LBB33_2: # %start -; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: slti a0, a0, 0 +; RV32I-NEXT: addi s3, a0, -1 +; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: mv a1, s0 +; RV32I-NEXT: call __fixunsdfsi@plt +; RV32I-NEXT: and a0, s3, a0 +; RV32I-NEXT: or a0, s2, a0 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload diff --git a/llvm/test/CodeGen/RISCV/double-round-conv-sat.ll b/llvm/test/CodeGen/RISCV/double-round-conv-sat.ll index d60917aa0430..cd3a7ace04fe 100644 --- a/llvm/test/CodeGen/RISCV/double-round-conv-sat.ll +++ b/llvm/test/CodeGen/RISCV/double-round-conv-sat.ll @@ -47,11 +47,9 @@ define i64 @test_floor_si64(double %x) nounwind { ; RV32IFD-NEXT: seqz a3, a3 ; RV32IFD-NEXT: addi a3, a3, -1 ; RV32IFD-NEXT: and a1, a3, a1 -; RV32IFD-NEXT: seqz a4, s0 -; RV32IFD-NEXT: addi a4, a4, -1 +; RV32IFD-NEXT: neg a2, a2 +; RV32IFD-NEXT: neg a4, s0 ; RV32IFD-NEXT: and a0, a4, a0 -; RV32IFD-NEXT: seqz a2, a2 -; RV32IFD-NEXT: addi a2, a2, -1 ; RV32IFD-NEXT: or a0, a2, a0 ; RV32IFD-NEXT: and a0, a3, a0 ; RV32IFD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload @@ -93,26 +91,23 @@ define i64 @test_floor_ui64(double %x) nounwind { ; RV32IFD-NEXT: addi sp, sp, -16 ; RV32IFD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IFD-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IFD-NEXT: fsd fs0, 0(sp) # 8-byte Folded Spill +; RV32IFD-NEXT: sw s1, 4(sp) # 4-byte Folded Spill ; RV32IFD-NEXT: call floor@plt -; RV32IFD-NEXT: fmv.d fs0, fa0 +; RV32IFD-NEXT: lui a0, %hi(.LCPI3_0) +; RV32IFD-NEXT: fld ft0, %lo(.LCPI3_0)(a0) +; RV32IFD-NEXT: flt.d a0, ft0, fa0 +; RV32IFD-NEXT: neg s0, a0 ; RV32IFD-NEXT: fcvt.d.w ft0, zero ; RV32IFD-NEXT: fle.d a0, ft0, fa0 -; RV32IFD-NEXT: seqz a0, a0 -; RV32IFD-NEXT: addi s0, a0, -1 +; RV32IFD-NEXT: neg s1, a0 ; RV32IFD-NEXT: call __fixunsdfdi@plt -; RV32IFD-NEXT: lui a2, %hi(.LCPI3_0) -; RV32IFD-NEXT: fld ft0, %lo(.LCPI3_0)(a2) -; RV32IFD-NEXT: and a0, s0, a0 -; RV32IFD-NEXT: flt.d a2, ft0, fs0 -; RV32IFD-NEXT: seqz a2, a2 -; RV32IFD-NEXT: addi a2, a2, -1 -; RV32IFD-NEXT: or a0, a2, a0 -; RV32IFD-NEXT: and a1, s0, a1 -; RV32IFD-NEXT: or a1, a2, a1 +; RV32IFD-NEXT: and a0, s1, a0 +; RV32IFD-NEXT: or a0, s0, a0 +; RV32IFD-NEXT: and a1, s1, a1 +; RV32IFD-NEXT: or a1, s0, a1 ; RV32IFD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IFD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IFD-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload +; RV32IFD-NEXT: lw s1, 4(sp) # 4-byte Folded Reload ; RV32IFD-NEXT: addi sp, sp, 16 ; RV32IFD-NEXT: ret ; @@ -172,11 +167,9 @@ define i64 @test_ceil_si64(double %x) nounwind { ; RV32IFD-NEXT: seqz a3, a3 ; RV32IFD-NEXT: addi a3, a3, -1 ; RV32IFD-NEXT: and a1, a3, a1 -; RV32IFD-NEXT: seqz a4, s0 -; RV32IFD-NEXT: addi a4, a4, -1 +; RV32IFD-NEXT: neg a2, a2 +; RV32IFD-NEXT: neg a4, s0 ; RV32IFD-NEXT: and a0, a4, a0 -; RV32IFD-NEXT: seqz a2, a2 -; RV32IFD-NEXT: addi a2, a2, -1 ; RV32IFD-NEXT: or a0, a2, a0 ; RV32IFD-NEXT: and a0, a3, a0 ; RV32IFD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload @@ -218,26 +211,23 @@ define i64 @test_ceil_ui64(double %x) nounwind { ; RV32IFD-NEXT: addi sp, sp, -16 ; RV32IFD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IFD-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IFD-NEXT: fsd fs0, 0(sp) # 8-byte Folded Spill +; RV32IFD-NEXT: sw s1, 4(sp) # 4-byte Folded Spill ; RV32IFD-NEXT: call ceil@plt -; RV32IFD-NEXT: fmv.d fs0, fa0 +; RV32IFD-NEXT: lui a0, %hi(.LCPI7_0) +; RV32IFD-NEXT: fld ft0, %lo(.LCPI7_0)(a0) +; RV32IFD-NEXT: flt.d a0, ft0, fa0 +; RV32IFD-NEXT: neg s0, a0 ; RV32IFD-NEXT: fcvt.d.w ft0, zero ; RV32IFD-NEXT: fle.d a0, ft0, fa0 -; RV32IFD-NEXT: seqz a0, a0 -; RV32IFD-NEXT: addi s0, a0, -1 +; RV32IFD-NEXT: neg s1, a0 ; RV32IFD-NEXT: call __fixunsdfdi@plt -; RV32IFD-NEXT: lui a2, %hi(.LCPI7_0) -; RV32IFD-NEXT: fld ft0, %lo(.LCPI7_0)(a2) -; RV32IFD-NEXT: and a0, s0, a0 -; RV32IFD-NEXT: flt.d a2, ft0, fs0 -; RV32IFD-NEXT: seqz a2, a2 -; RV32IFD-NEXT: addi a2, a2, -1 -; RV32IFD-NEXT: or a0, a2, a0 -; RV32IFD-NEXT: and a1, s0, a1 -; RV32IFD-NEXT: or a1, a2, a1 +; RV32IFD-NEXT: and a0, s1, a0 +; RV32IFD-NEXT: or a0, s0, a0 +; RV32IFD-NEXT: and a1, s1, a1 +; RV32IFD-NEXT: or a1, s0, a1 ; RV32IFD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IFD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IFD-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload +; RV32IFD-NEXT: lw s1, 4(sp) # 4-byte Folded Reload ; RV32IFD-NEXT: addi sp, sp, 16 ; RV32IFD-NEXT: ret ; @@ -297,11 +287,9 @@ define i64 @test_trunc_si64(double %x) nounwind { ; RV32IFD-NEXT: seqz a3, a3 ; RV32IFD-NEXT: addi a3, a3, -1 ; RV32IFD-NEXT: and a1, a3, a1 -; RV32IFD-NEXT: seqz a4, s0 -; RV32IFD-NEXT: addi a4, a4, -1 +; RV32IFD-NEXT: neg a2, a2 +; RV32IFD-NEXT: neg a4, s0 ; RV32IFD-NEXT: and a0, a4, a0 -; RV32IFD-NEXT: seqz a2, a2 -; RV32IFD-NEXT: addi a2, a2, -1 ; RV32IFD-NEXT: or a0, a2, a0 ; RV32IFD-NEXT: and a0, a3, a0 ; RV32IFD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload @@ -343,26 +331,23 @@ define i64 @test_trunc_ui64(double %x) nounwind { ; RV32IFD-NEXT: addi sp, sp, -16 ; RV32IFD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IFD-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IFD-NEXT: fsd fs0, 0(sp) # 8-byte Folded Spill +; RV32IFD-NEXT: sw s1, 4(sp) # 4-byte Folded Spill ; RV32IFD-NEXT: call trunc@plt -; RV32IFD-NEXT: fmv.d fs0, fa0 +; RV32IFD-NEXT: lui a0, %hi(.LCPI11_0) +; RV32IFD-NEXT: fld ft0, %lo(.LCPI11_0)(a0) +; RV32IFD-NEXT: flt.d a0, ft0, fa0 +; RV32IFD-NEXT: neg s0, a0 ; RV32IFD-NEXT: fcvt.d.w ft0, zero ; RV32IFD-NEXT: fle.d a0, ft0, fa0 -; RV32IFD-NEXT: seqz a0, a0 -; RV32IFD-NEXT: addi s0, a0, -1 +; RV32IFD-NEXT: neg s1, a0 ; RV32IFD-NEXT: call __fixunsdfdi@plt -; RV32IFD-NEXT: lui a2, %hi(.LCPI11_0) -; RV32IFD-NEXT: fld ft0, %lo(.LCPI11_0)(a2) -; RV32IFD-NEXT: and a0, s0, a0 -; RV32IFD-NEXT: flt.d a2, ft0, fs0 -; RV32IFD-NEXT: seqz a2, a2 -; RV32IFD-NEXT: addi a2, a2, -1 -; RV32IFD-NEXT: or a0, a2, a0 -; RV32IFD-NEXT: and a1, s0, a1 -; RV32IFD-NEXT: or a1, a2, a1 +; RV32IFD-NEXT: and a0, s1, a0 +; RV32IFD-NEXT: or a0, s0, a0 +; RV32IFD-NEXT: and a1, s1, a1 +; RV32IFD-NEXT: or a1, s0, a1 ; RV32IFD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IFD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IFD-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload +; RV32IFD-NEXT: lw s1, 4(sp) # 4-byte Folded Reload ; RV32IFD-NEXT: addi sp, sp, 16 ; RV32IFD-NEXT: ret ; @@ -422,11 +407,9 @@ define i64 @test_round_si64(double %x) nounwind { ; RV32IFD-NEXT: seqz a3, a3 ; RV32IFD-NEXT: addi a3, a3, -1 ; RV32IFD-NEXT: and a1, a3, a1 -; RV32IFD-NEXT: seqz a4, s0 -; RV32IFD-NEXT: addi a4, a4, -1 +; RV32IFD-NEXT: neg a2, a2 +; RV32IFD-NEXT: neg a4, s0 ; RV32IFD-NEXT: and a0, a4, a0 -; RV32IFD-NEXT: seqz a2, a2 -; RV32IFD-NEXT: addi a2, a2, -1 ; RV32IFD-NEXT: or a0, a2, a0 ; RV32IFD-NEXT: and a0, a3, a0 ; RV32IFD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload @@ -468,26 +451,23 @@ define i64 @test_round_ui64(double %x) nounwind { ; RV32IFD-NEXT: addi sp, sp, -16 ; RV32IFD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IFD-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IFD-NEXT: fsd fs0, 0(sp) # 8-byte Folded Spill +; RV32IFD-NEXT: sw s1, 4(sp) # 4-byte Folded Spill ; RV32IFD-NEXT: call round@plt -; RV32IFD-NEXT: fmv.d fs0, fa0 +; RV32IFD-NEXT: lui a0, %hi(.LCPI15_0) +; RV32IFD-NEXT: fld ft0, %lo(.LCPI15_0)(a0) +; RV32IFD-NEXT: flt.d a0, ft0, fa0 +; RV32IFD-NEXT: neg s0, a0 ; RV32IFD-NEXT: fcvt.d.w ft0, zero ; RV32IFD-NEXT: fle.d a0, ft0, fa0 -; RV32IFD-NEXT: seqz a0, a0 -; RV32IFD-NEXT: addi s0, a0, -1 +; RV32IFD-NEXT: neg s1, a0 ; RV32IFD-NEXT: call __fixunsdfdi@plt -; RV32IFD-NEXT: lui a2, %hi(.LCPI15_0) -; RV32IFD-NEXT: fld ft0, %lo(.LCPI15_0)(a2) -; RV32IFD-NEXT: and a0, s0, a0 -; RV32IFD-NEXT: flt.d a2, ft0, fs0 -; RV32IFD-NEXT: seqz a2, a2 -; RV32IFD-NEXT: addi a2, a2, -1 -; RV32IFD-NEXT: or a0, a2, a0 -; RV32IFD-NEXT: and a1, s0, a1 -; RV32IFD-NEXT: or a1, a2, a1 +; RV32IFD-NEXT: and a0, s1, a0 +; RV32IFD-NEXT: or a0, s0, a0 +; RV32IFD-NEXT: and a1, s1, a1 +; RV32IFD-NEXT: or a1, s0, a1 ; RV32IFD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IFD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IFD-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload +; RV32IFD-NEXT: lw s1, 4(sp) # 4-byte Folded Reload ; RV32IFD-NEXT: addi sp, sp, 16 ; RV32IFD-NEXT: ret ; @@ -547,11 +527,9 @@ define i64 @test_roundeven_si64(double %x) nounwind { ; RV32IFD-NEXT: seqz a3, a3 ; RV32IFD-NEXT: addi a3, a3, -1 ; RV32IFD-NEXT: and a1, a3, a1 -; RV32IFD-NEXT: seqz a4, s0 -; RV32IFD-NEXT: addi a4, a4, -1 +; RV32IFD-NEXT: neg a2, a2 +; RV32IFD-NEXT: neg a4, s0 ; RV32IFD-NEXT: and a0, a4, a0 -; RV32IFD-NEXT: seqz a2, a2 -; RV32IFD-NEXT: addi a2, a2, -1 ; RV32IFD-NEXT: or a0, a2, a0 ; RV32IFD-NEXT: and a0, a3, a0 ; RV32IFD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload @@ -593,26 +571,23 @@ define i64 @test_roundeven_ui64(double %x) nounwind { ; RV32IFD-NEXT: addi sp, sp, -16 ; RV32IFD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IFD-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IFD-NEXT: fsd fs0, 0(sp) # 8-byte Folded Spill +; RV32IFD-NEXT: sw s1, 4(sp) # 4-byte Folded Spill ; RV32IFD-NEXT: call roundeven@plt -; RV32IFD-NEXT: fmv.d fs0, fa0 +; RV32IFD-NEXT: lui a0, %hi(.LCPI19_0) +; RV32IFD-NEXT: fld ft0, %lo(.LCPI19_0)(a0) +; RV32IFD-NEXT: flt.d a0, ft0, fa0 +; RV32IFD-NEXT: neg s0, a0 ; RV32IFD-NEXT: fcvt.d.w ft0, zero ; RV32IFD-NEXT: fle.d a0, ft0, fa0 -; RV32IFD-NEXT: seqz a0, a0 -; RV32IFD-NEXT: addi s0, a0, -1 +; RV32IFD-NEXT: neg s1, a0 ; RV32IFD-NEXT: call __fixunsdfdi@plt -; RV32IFD-NEXT: lui a2, %hi(.LCPI19_0) -; RV32IFD-NEXT: fld ft0, %lo(.LCPI19_0)(a2) -; RV32IFD-NEXT: and a0, s0, a0 -; RV32IFD-NEXT: flt.d a2, ft0, fs0 -; RV32IFD-NEXT: seqz a2, a2 -; RV32IFD-NEXT: addi a2, a2, -1 -; RV32IFD-NEXT: or a0, a2, a0 -; RV32IFD-NEXT: and a1, s0, a1 -; RV32IFD-NEXT: or a1, a2, a1 +; RV32IFD-NEXT: and a0, s1, a0 +; RV32IFD-NEXT: or a0, s0, a0 +; RV32IFD-NEXT: and a1, s1, a1 +; RV32IFD-NEXT: or a1, s0, a1 ; RV32IFD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IFD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IFD-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload +; RV32IFD-NEXT: lw s1, 4(sp) # 4-byte Folded Reload ; RV32IFD-NEXT: addi sp, sp, 16 ; RV32IFD-NEXT: ret ; diff --git a/llvm/test/CodeGen/RISCV/float-convert.ll b/llvm/test/CodeGen/RISCV/float-convert.ll index 2156cd8557ea..2b589c5229b9 100644 --- a/llvm/test/CodeGen/RISCV/float-convert.ll +++ b/llvm/test/CodeGen/RISCV/float-convert.ll @@ -236,29 +236,24 @@ define i32 @fcvt_wu_s_sat(float %a) nounwind { ; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s2, 0(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s2, a0 -; RV32I-NEXT: lui a0, 325632 -; RV32I-NEXT: addi a1, a0, -1 -; RV32I-NEXT: mv a0, s2 -; RV32I-NEXT: call __gtsf2@plt -; RV32I-NEXT: sgtz a0, a0 -; RV32I-NEXT: neg s0, a0 -; RV32I-NEXT: mv a0, s2 -; RV32I-NEXT: call __fixunssfsi@plt -; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: mv a0, s2 +; RV32I-NEXT: mv s0, a0 ; RV32I-NEXT: li a1, 0 ; RV32I-NEXT: call __gesf2@plt -; RV32I-NEXT: bltz a0, .LBB4_2 -; RV32I-NEXT: # %bb.1: # %start -; RV32I-NEXT: or s0, s0, s1 -; RV32I-NEXT: .LBB4_2: # %start +; RV32I-NEXT: slti a0, a0, 0 +; RV32I-NEXT: addi s1, a0, -1 ; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: call __fixunssfsi@plt +; RV32I-NEXT: and s1, s1, a0 +; RV32I-NEXT: lui a0, 325632 +; RV32I-NEXT: addi a1, a0, -1 +; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: call __gtsf2@plt +; RV32I-NEXT: sgtz a0, a0 +; RV32I-NEXT: neg a0, a0 +; RV32I-NEXT: or a0, a0, s1 ; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s2, 0(sp) # 4-byte Folded Reload ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; @@ -552,11 +547,9 @@ define i64 @fcvt_l_s_sat(float %a) nounwind { ; RV32IF-NEXT: seqz a3, a3 ; RV32IF-NEXT: addi a3, a3, -1 ; RV32IF-NEXT: and a1, a3, a1 -; RV32IF-NEXT: seqz a4, s0 -; RV32IF-NEXT: addi a4, a4, -1 +; RV32IF-NEXT: neg a2, a2 +; RV32IF-NEXT: neg a4, s0 ; RV32IF-NEXT: and a0, a4, a0 -; RV32IF-NEXT: seqz a2, a2 -; RV32IF-NEXT: addi a2, a2, -1 ; RV32IF-NEXT: or a0, a2, a0 ; RV32IF-NEXT: and a0, a3, a0 ; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload @@ -583,62 +576,59 @@ define i64 @fcvt_l_s_sat(float %a) nounwind { ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a0 ; RV32I-NEXT: lui a1, 913408 ; RV32I-NEXT: call __gesf2@plt -; RV32I-NEXT: mv s4, a0 +; RV32I-NEXT: mv s2, a0 ; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: call __fixsfdi@plt -; RV32I-NEXT: mv s5, a0 -; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: lui s4, 524288 +; RV32I-NEXT: lui s3, 524288 +; RV32I-NEXT: bltz s2, .LBB12_2 +; RV32I-NEXT: # %bb.1: # %start +; RV32I-NEXT: mv s3, a1 +; RV32I-NEXT: .LBB12_2: # %start +; RV32I-NEXT: lui a0, 389120 +; RV32I-NEXT: addi s2, a0, -1 +; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a1, s2 +; RV32I-NEXT: call __gtsf2@plt +; RV32I-NEXT: blez a0, .LBB12_4 +; RV32I-NEXT: # %bb.3: +; RV32I-NEXT: addi s3, s4, -1 +; RV32I-NEXT: .LBB12_4: # %start ; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: mv a1, s0 ; RV32I-NEXT: call __unordsf2@plt ; RV32I-NEXT: snez a0, a0 -; RV32I-NEXT: addi s1, a0, -1 -; RV32I-NEXT: lui a0, 389120 -; RV32I-NEXT: addi s3, a0, -1 -; RV32I-NEXT: mv a0, s0 -; RV32I-NEXT: mv a1, s3 -; RV32I-NEXT: call __gtsf2@plt -; RV32I-NEXT: bgtz a0, .LBB12_2 -; RV32I-NEXT: # %bb.1: # %start -; RV32I-NEXT: slti a0, s4, 0 ; RV32I-NEXT: addi a0, a0, -1 -; RV32I-NEXT: and a0, a0, s5 -; RV32I-NEXT: and s1, s1, a0 -; RV32I-NEXT: .LBB12_2: # %start +; RV32I-NEXT: and s3, a0, s3 ; RV32I-NEXT: lui a1, 913408 ; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: call __gesf2@plt -; RV32I-NEXT: lui s5, 524288 -; RV32I-NEXT: lui s4, 524288 -; RV32I-NEXT: bltz a0, .LBB12_4 -; RV32I-NEXT: # %bb.3: # %start -; RV32I-NEXT: mv s4, s2 -; RV32I-NEXT: .LBB12_4: # %start +; RV32I-NEXT: slti a0, a0, 0 +; RV32I-NEXT: addi a0, a0, -1 +; RV32I-NEXT: and s1, a0, s1 ; RV32I-NEXT: mv a0, s0 -; RV32I-NEXT: mv a1, s3 +; RV32I-NEXT: mv a1, s2 ; RV32I-NEXT: call __gtsf2@plt -; RV32I-NEXT: blez a0, .LBB12_6 -; RV32I-NEXT: # %bb.5: -; RV32I-NEXT: addi s4, s5, -1 -; RV32I-NEXT: .LBB12_6: # %start +; RV32I-NEXT: sgtz a0, a0 +; RV32I-NEXT: neg a0, a0 +; RV32I-NEXT: or s1, a0, s1 ; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: mv a1, s0 ; RV32I-NEXT: call __unordsf2@plt ; RV32I-NEXT: snez a0, a0 ; RV32I-NEXT: addi a0, a0, -1 -; RV32I-NEXT: and a1, a0, s4 -; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: and a0, a0, s1 +; RV32I-NEXT: mv a1, s3 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s5, 4(sp) # 4-byte Folded Reload ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; @@ -737,15 +727,13 @@ define i64 @fcvt_lu_s_sat(float %a) nounwind { ; RV32IF-NEXT: fmv.s fs0, fa0 ; RV32IF-NEXT: fmv.w.x ft0, zero ; RV32IF-NEXT: fle.s a0, ft0, fa0 -; RV32IF-NEXT: seqz a0, a0 -; RV32IF-NEXT: addi s0, a0, -1 +; RV32IF-NEXT: neg s0, a0 ; RV32IF-NEXT: call __fixunssfdi@plt ; RV32IF-NEXT: lui a2, %hi(.LCPI14_0) ; RV32IF-NEXT: flw ft0, %lo(.LCPI14_0)(a2) ; RV32IF-NEXT: and a0, s0, a0 ; RV32IF-NEXT: flt.s a2, ft0, fs0 -; RV32IF-NEXT: seqz a2, a2 -; RV32IF-NEXT: addi a2, a2, -1 +; RV32IF-NEXT: neg a2, a2 ; RV32IF-NEXT: or a0, a2, a0 ; RV32IF-NEXT: and a1, s0, a1 ; RV32IF-NEXT: or a1, a2, a1 @@ -772,46 +760,41 @@ define i64 @fcvt_lu_s_sat(float %a) nounwind { ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: li a1, 0 +; RV32I-NEXT: call __gesf2@plt +; RV32I-NEXT: slti a0, a0, 0 +; RV32I-NEXT: addi s2, a0, -1 +; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: call __fixunssfdi@plt +; RV32I-NEXT: mv s1, a1 +; RV32I-NEXT: and s3, s2, a0 ; RV32I-NEXT: lui a0, 391168 ; RV32I-NEXT: addi s2, a0, -1 -; RV32I-NEXT: mv a0, s1 -; RV32I-NEXT: mv a1, s2 -; RV32I-NEXT: call __gtsf2@plt -; RV32I-NEXT: sgtz a0, a0 -; RV32I-NEXT: neg s0, a0 -; RV32I-NEXT: mv a0, s1 -; RV32I-NEXT: call __fixunssfdi@plt -; RV32I-NEXT: mv s4, a0 -; RV32I-NEXT: mv s3, a1 -; RV32I-NEXT: mv a0, s1 -; RV32I-NEXT: li a1, 0 -; RV32I-NEXT: call __gesf2@plt -; RV32I-NEXT: bltz a0, .LBB14_2 -; RV32I-NEXT: # %bb.1: # %start -; RV32I-NEXT: or s0, s0, s4 -; RV32I-NEXT: .LBB14_2: # %start -; RV32I-NEXT: mv a0, s1 -; RV32I-NEXT: mv a1, s2 -; RV32I-NEXT: call __gtsf2@plt -; RV32I-NEXT: sgtz a0, a0 -; RV32I-NEXT: neg s2, a0 -; RV32I-NEXT: mv a0, s1 -; RV32I-NEXT: li a1, 0 -; RV32I-NEXT: call __gesf2@plt -; RV32I-NEXT: bltz a0, .LBB14_4 -; RV32I-NEXT: # %bb.3: # %start -; RV32I-NEXT: or s2, s2, s3 -; RV32I-NEXT: .LBB14_4: # %start ; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: mv a1, s2 +; RV32I-NEXT: call __gtsf2@plt +; RV32I-NEXT: sgtz a0, a0 +; RV32I-NEXT: neg a0, a0 +; RV32I-NEXT: or s3, a0, s3 +; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: li a1, 0 +; RV32I-NEXT: call __gesf2@plt +; RV32I-NEXT: slti a0, a0, 0 +; RV32I-NEXT: addi a0, a0, -1 +; RV32I-NEXT: and s1, a0, s1 +; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a1, s2 +; RV32I-NEXT: call __gtsf2@plt +; RV32I-NEXT: sgtz a0, a0 +; RV32I-NEXT: neg a0, a0 +; RV32I-NEXT: or a1, a0, s1 +; RV32I-NEXT: mv a0, s3 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; @@ -821,29 +804,24 @@ define i64 @fcvt_lu_s_sat(float %a) nounwind { ; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s2, 0(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s2, a0 -; RV64I-NEXT: lui a0, 391168 -; RV64I-NEXT: addiw a1, a0, -1 -; RV64I-NEXT: mv a0, s2 -; RV64I-NEXT: call __gtsf2@plt -; RV64I-NEXT: sgtz a0, a0 -; RV64I-NEXT: neg s0, a0 -; RV64I-NEXT: mv a0, s2 -; RV64I-NEXT: call __fixunssfdi@plt -; RV64I-NEXT: mv s1, a0 -; RV64I-NEXT: mv a0, s2 +; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: li a1, 0 ; RV64I-NEXT: call __gesf2@plt -; RV64I-NEXT: bltz a0, .LBB14_2 -; RV64I-NEXT: # %bb.1: # %start -; RV64I-NEXT: or s0, s0, s1 -; RV64I-NEXT: .LBB14_2: # %start +; RV64I-NEXT: slti a0, a0, 0 +; RV64I-NEXT: addi s1, a0, -1 ; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: call __fixunssfdi@plt +; RV64I-NEXT: and s1, s1, a0 +; RV64I-NEXT: lui a0, 391168 +; RV64I-NEXT: addiw a1, a0, -1 +; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: call __gtsf2@plt +; RV64I-NEXT: sgtz a0, a0 +; RV64I-NEXT: neg a0, a0 +; RV64I-NEXT: or a0, a0, s1 ; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s2, 0(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 32 ; RV64I-NEXT: ret start: @@ -1725,29 +1703,24 @@ define zeroext i32 @fcvt_wu_s_sat_zext(float %a) nounwind { ; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s2, 0(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s2, a0 -; RV32I-NEXT: lui a0, 325632 -; RV32I-NEXT: addi a1, a0, -1 -; RV32I-NEXT: mv a0, s2 -; RV32I-NEXT: call __gtsf2@plt -; RV32I-NEXT: sgtz a0, a0 -; RV32I-NEXT: neg s0, a0 -; RV32I-NEXT: mv a0, s2 -; RV32I-NEXT: call __fixunssfsi@plt -; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: mv a0, s2 +; RV32I-NEXT: mv s0, a0 ; RV32I-NEXT: li a1, 0 ; RV32I-NEXT: call __gesf2@plt -; RV32I-NEXT: bltz a0, .LBB31_2 -; RV32I-NEXT: # %bb.1: # %start -; RV32I-NEXT: or s0, s0, s1 -; RV32I-NEXT: .LBB31_2: # %start +; RV32I-NEXT: slti a0, a0, 0 +; RV32I-NEXT: addi s1, a0, -1 ; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: call __fixunssfsi@plt +; RV32I-NEXT: and s1, s1, a0 +; RV32I-NEXT: lui a0, 325632 +; RV32I-NEXT: addi a1, a0, -1 +; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: call __gtsf2@plt +; RV32I-NEXT: sgtz a0, a0 +; RV32I-NEXT: neg a0, a0 +; RV32I-NEXT: or a0, a0, s1 ; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s2, 0(sp) # 4-byte Folded Reload ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; diff --git a/llvm/test/CodeGen/RISCV/float-round-conv-sat.ll b/llvm/test/CodeGen/RISCV/float-round-conv-sat.ll index df0d625e4e91..224eef9121de 100644 --- a/llvm/test/CodeGen/RISCV/float-round-conv-sat.ll +++ b/llvm/test/CodeGen/RISCV/float-round-conv-sat.ll @@ -47,11 +47,9 @@ define i64 @test_floor_si64(float %x) nounwind { ; RV32IF-NEXT: seqz a3, a3 ; RV32IF-NEXT: addi a3, a3, -1 ; RV32IF-NEXT: and a1, a3, a1 -; RV32IF-NEXT: seqz a4, s0 -; RV32IF-NEXT: addi a4, a4, -1 +; RV32IF-NEXT: neg a2, a2 +; RV32IF-NEXT: neg a4, s0 ; RV32IF-NEXT: and a0, a4, a0 -; RV32IF-NEXT: seqz a2, a2 -; RV32IF-NEXT: addi a2, a2, -1 ; RV32IF-NEXT: or a0, a2, a0 ; RV32IF-NEXT: and a0, a3, a0 ; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload @@ -93,26 +91,23 @@ define i64 @test_floor_ui64(float %x) nounwind { ; RV32IF-NEXT: addi sp, sp, -16 ; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IF-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IF-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV32IF-NEXT: sw s1, 4(sp) # 4-byte Folded Spill ; RV32IF-NEXT: call floorf@plt -; RV32IF-NEXT: fmv.s fs0, fa0 +; RV32IF-NEXT: lui a0, %hi(.LCPI3_0) +; RV32IF-NEXT: flw ft0, %lo(.LCPI3_0)(a0) +; RV32IF-NEXT: flt.s a0, ft0, fa0 +; RV32IF-NEXT: neg s0, a0 ; RV32IF-NEXT: fmv.w.x ft0, zero ; RV32IF-NEXT: fle.s a0, ft0, fa0 -; RV32IF-NEXT: seqz a0, a0 -; RV32IF-NEXT: addi s0, a0, -1 +; RV32IF-NEXT: neg s1, a0 ; RV32IF-NEXT: call __fixunssfdi@plt -; RV32IF-NEXT: lui a2, %hi(.LCPI3_0) -; RV32IF-NEXT: flw ft0, %lo(.LCPI3_0)(a2) -; RV32IF-NEXT: and a0, s0, a0 -; RV32IF-NEXT: flt.s a2, ft0, fs0 -; RV32IF-NEXT: seqz a2, a2 -; RV32IF-NEXT: addi a2, a2, -1 -; RV32IF-NEXT: or a0, a2, a0 -; RV32IF-NEXT: and a1, s0, a1 -; RV32IF-NEXT: or a1, a2, a1 +; RV32IF-NEXT: and a0, s1, a0 +; RV32IF-NEXT: or a0, s0, a0 +; RV32IF-NEXT: and a1, s1, a1 +; RV32IF-NEXT: or a1, s0, a1 ; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IF-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IF-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload +; RV32IF-NEXT: lw s1, 4(sp) # 4-byte Folded Reload ; RV32IF-NEXT: addi sp, sp, 16 ; RV32IF-NEXT: ret ; @@ -172,11 +167,9 @@ define i64 @test_ceil_si64(float %x) nounwind { ; RV32IF-NEXT: seqz a3, a3 ; RV32IF-NEXT: addi a3, a3, -1 ; RV32IF-NEXT: and a1, a3, a1 -; RV32IF-NEXT: seqz a4, s0 -; RV32IF-NEXT: addi a4, a4, -1 +; RV32IF-NEXT: neg a2, a2 +; RV32IF-NEXT: neg a4, s0 ; RV32IF-NEXT: and a0, a4, a0 -; RV32IF-NEXT: seqz a2, a2 -; RV32IF-NEXT: addi a2, a2, -1 ; RV32IF-NEXT: or a0, a2, a0 ; RV32IF-NEXT: and a0, a3, a0 ; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload @@ -218,26 +211,23 @@ define i64 @test_ceil_ui64(float %x) nounwind { ; RV32IF-NEXT: addi sp, sp, -16 ; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IF-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IF-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV32IF-NEXT: sw s1, 4(sp) # 4-byte Folded Spill ; RV32IF-NEXT: call ceilf@plt -; RV32IF-NEXT: fmv.s fs0, fa0 +; RV32IF-NEXT: lui a0, %hi(.LCPI7_0) +; RV32IF-NEXT: flw ft0, %lo(.LCPI7_0)(a0) +; RV32IF-NEXT: flt.s a0, ft0, fa0 +; RV32IF-NEXT: neg s0, a0 ; RV32IF-NEXT: fmv.w.x ft0, zero ; RV32IF-NEXT: fle.s a0, ft0, fa0 -; RV32IF-NEXT: seqz a0, a0 -; RV32IF-NEXT: addi s0, a0, -1 +; RV32IF-NEXT: neg s1, a0 ; RV32IF-NEXT: call __fixunssfdi@plt -; RV32IF-NEXT: lui a2, %hi(.LCPI7_0) -; RV32IF-NEXT: flw ft0, %lo(.LCPI7_0)(a2) -; RV32IF-NEXT: and a0, s0, a0 -; RV32IF-NEXT: flt.s a2, ft0, fs0 -; RV32IF-NEXT: seqz a2, a2 -; RV32IF-NEXT: addi a2, a2, -1 -; RV32IF-NEXT: or a0, a2, a0 -; RV32IF-NEXT: and a1, s0, a1 -; RV32IF-NEXT: or a1, a2, a1 +; RV32IF-NEXT: and a0, s1, a0 +; RV32IF-NEXT: or a0, s0, a0 +; RV32IF-NEXT: and a1, s1, a1 +; RV32IF-NEXT: or a1, s0, a1 ; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IF-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IF-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload +; RV32IF-NEXT: lw s1, 4(sp) # 4-byte Folded Reload ; RV32IF-NEXT: addi sp, sp, 16 ; RV32IF-NEXT: ret ; @@ -297,11 +287,9 @@ define i64 @test_trunc_si64(float %x) nounwind { ; RV32IF-NEXT: seqz a3, a3 ; RV32IF-NEXT: addi a3, a3, -1 ; RV32IF-NEXT: and a1, a3, a1 -; RV32IF-NEXT: seqz a4, s0 -; RV32IF-NEXT: addi a4, a4, -1 +; RV32IF-NEXT: neg a2, a2 +; RV32IF-NEXT: neg a4, s0 ; RV32IF-NEXT: and a0, a4, a0 -; RV32IF-NEXT: seqz a2, a2 -; RV32IF-NEXT: addi a2, a2, -1 ; RV32IF-NEXT: or a0, a2, a0 ; RV32IF-NEXT: and a0, a3, a0 ; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload @@ -343,26 +331,23 @@ define i64 @test_trunc_ui64(float %x) nounwind { ; RV32IF-NEXT: addi sp, sp, -16 ; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IF-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IF-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV32IF-NEXT: sw s1, 4(sp) # 4-byte Folded Spill ; RV32IF-NEXT: call truncf@plt -; RV32IF-NEXT: fmv.s fs0, fa0 +; RV32IF-NEXT: lui a0, %hi(.LCPI11_0) +; RV32IF-NEXT: flw ft0, %lo(.LCPI11_0)(a0) +; RV32IF-NEXT: flt.s a0, ft0, fa0 +; RV32IF-NEXT: neg s0, a0 ; RV32IF-NEXT: fmv.w.x ft0, zero ; RV32IF-NEXT: fle.s a0, ft0, fa0 -; RV32IF-NEXT: seqz a0, a0 -; RV32IF-NEXT: addi s0, a0, -1 +; RV32IF-NEXT: neg s1, a0 ; RV32IF-NEXT: call __fixunssfdi@plt -; RV32IF-NEXT: lui a2, %hi(.LCPI11_0) -; RV32IF-NEXT: flw ft0, %lo(.LCPI11_0)(a2) -; RV32IF-NEXT: and a0, s0, a0 -; RV32IF-NEXT: flt.s a2, ft0, fs0 -; RV32IF-NEXT: seqz a2, a2 -; RV32IF-NEXT: addi a2, a2, -1 -; RV32IF-NEXT: or a0, a2, a0 -; RV32IF-NEXT: and a1, s0, a1 -; RV32IF-NEXT: or a1, a2, a1 +; RV32IF-NEXT: and a0, s1, a0 +; RV32IF-NEXT: or a0, s0, a0 +; RV32IF-NEXT: and a1, s1, a1 +; RV32IF-NEXT: or a1, s0, a1 ; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IF-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IF-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload +; RV32IF-NEXT: lw s1, 4(sp) # 4-byte Folded Reload ; RV32IF-NEXT: addi sp, sp, 16 ; RV32IF-NEXT: ret ; @@ -422,11 +407,9 @@ define i64 @test_round_si64(float %x) nounwind { ; RV32IF-NEXT: seqz a3, a3 ; RV32IF-NEXT: addi a3, a3, -1 ; RV32IF-NEXT: and a1, a3, a1 -; RV32IF-NEXT: seqz a4, s0 -; RV32IF-NEXT: addi a4, a4, -1 +; RV32IF-NEXT: neg a2, a2 +; RV32IF-NEXT: neg a4, s0 ; RV32IF-NEXT: and a0, a4, a0 -; RV32IF-NEXT: seqz a2, a2 -; RV32IF-NEXT: addi a2, a2, -1 ; RV32IF-NEXT: or a0, a2, a0 ; RV32IF-NEXT: and a0, a3, a0 ; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload @@ -468,26 +451,23 @@ define i64 @test_round_ui64(float %x) nounwind { ; RV32IF-NEXT: addi sp, sp, -16 ; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IF-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IF-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV32IF-NEXT: sw s1, 4(sp) # 4-byte Folded Spill ; RV32IF-NEXT: call roundf@plt -; RV32IF-NEXT: fmv.s fs0, fa0 +; RV32IF-NEXT: lui a0, %hi(.LCPI15_0) +; RV32IF-NEXT: flw ft0, %lo(.LCPI15_0)(a0) +; RV32IF-NEXT: flt.s a0, ft0, fa0 +; RV32IF-NEXT: neg s0, a0 ; RV32IF-NEXT: fmv.w.x ft0, zero ; RV32IF-NEXT: fle.s a0, ft0, fa0 -; RV32IF-NEXT: seqz a0, a0 -; RV32IF-NEXT: addi s0, a0, -1 +; RV32IF-NEXT: neg s1, a0 ; RV32IF-NEXT: call __fixunssfdi@plt -; RV32IF-NEXT: lui a2, %hi(.LCPI15_0) -; RV32IF-NEXT: flw ft0, %lo(.LCPI15_0)(a2) -; RV32IF-NEXT: and a0, s0, a0 -; RV32IF-NEXT: flt.s a2, ft0, fs0 -; RV32IF-NEXT: seqz a2, a2 -; RV32IF-NEXT: addi a2, a2, -1 -; RV32IF-NEXT: or a0, a2, a0 -; RV32IF-NEXT: and a1, s0, a1 -; RV32IF-NEXT: or a1, a2, a1 +; RV32IF-NEXT: and a0, s1, a0 +; RV32IF-NEXT: or a0, s0, a0 +; RV32IF-NEXT: and a1, s1, a1 +; RV32IF-NEXT: or a1, s0, a1 ; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IF-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IF-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload +; RV32IF-NEXT: lw s1, 4(sp) # 4-byte Folded Reload ; RV32IF-NEXT: addi sp, sp, 16 ; RV32IF-NEXT: ret ; @@ -547,11 +527,9 @@ define i64 @test_roundeven_si64(float %x) nounwind { ; RV32IF-NEXT: seqz a3, a3 ; RV32IF-NEXT: addi a3, a3, -1 ; RV32IF-NEXT: and a1, a3, a1 -; RV32IF-NEXT: seqz a4, s0 -; RV32IF-NEXT: addi a4, a4, -1 +; RV32IF-NEXT: neg a2, a2 +; RV32IF-NEXT: neg a4, s0 ; RV32IF-NEXT: and a0, a4, a0 -; RV32IF-NEXT: seqz a2, a2 -; RV32IF-NEXT: addi a2, a2, -1 ; RV32IF-NEXT: or a0, a2, a0 ; RV32IF-NEXT: and a0, a3, a0 ; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload @@ -593,26 +571,23 @@ define i64 @test_roundeven_ui64(float %x) nounwind { ; RV32IF-NEXT: addi sp, sp, -16 ; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IF-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IF-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV32IF-NEXT: sw s1, 4(sp) # 4-byte Folded Spill ; RV32IF-NEXT: call roundevenf@plt -; RV32IF-NEXT: fmv.s fs0, fa0 +; RV32IF-NEXT: lui a0, %hi(.LCPI19_0) +; RV32IF-NEXT: flw ft0, %lo(.LCPI19_0)(a0) +; RV32IF-NEXT: flt.s a0, ft0, fa0 +; RV32IF-NEXT: neg s0, a0 ; RV32IF-NEXT: fmv.w.x ft0, zero ; RV32IF-NEXT: fle.s a0, ft0, fa0 -; RV32IF-NEXT: seqz a0, a0 -; RV32IF-NEXT: addi s0, a0, -1 +; RV32IF-NEXT: neg s1, a0 ; RV32IF-NEXT: call __fixunssfdi@plt -; RV32IF-NEXT: lui a2, %hi(.LCPI19_0) -; RV32IF-NEXT: flw ft0, %lo(.LCPI19_0)(a2) -; RV32IF-NEXT: and a0, s0, a0 -; RV32IF-NEXT: flt.s a2, ft0, fs0 -; RV32IF-NEXT: seqz a2, a2 -; RV32IF-NEXT: addi a2, a2, -1 -; RV32IF-NEXT: or a0, a2, a0 -; RV32IF-NEXT: and a1, s0, a1 -; RV32IF-NEXT: or a1, a2, a1 +; RV32IF-NEXT: and a0, s1, a0 +; RV32IF-NEXT: or a0, s0, a0 +; RV32IF-NEXT: and a1, s1, a1 +; RV32IF-NEXT: or a1, s0, a1 ; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IF-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IF-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload +; RV32IF-NEXT: lw s1, 4(sp) # 4-byte Folded Reload ; RV32IF-NEXT: addi sp, sp, 16 ; RV32IF-NEXT: ret ; diff --git a/llvm/test/CodeGen/RISCV/forced-atomics.ll b/llvm/test/CodeGen/RISCV/forced-atomics.ll index cd5f95691492..edca10087e13 100644 --- a/llvm/test/CodeGen/RISCV/forced-atomics.ll +++ b/llvm/test/CodeGen/RISCV/forced-atomics.ll @@ -2504,8 +2504,7 @@ define i64 @rmw64_max_seq_cst(ptr %p) nounwind { ; RV32-NEXT: j .LBB49_2 ; RV32-NEXT: .LBB49_1: # %atomicrmw.start ; RV32-NEXT: # in Loop: Header=BB49_2 Depth=1 -; RV32-NEXT: seqz a0, a0 -; RV32-NEXT: addi a0, a0, -1 +; RV32-NEXT: neg a0, a0 ; RV32-NEXT: and a3, a0, a1 ; RV32-NEXT: sw a4, 0(sp) ; RV32-NEXT: sw a1, 4(sp) @@ -2600,8 +2599,7 @@ define i64 @rmw64_min_seq_cst(ptr %p) nounwind { ; RV32-NEXT: j .LBB50_2 ; RV32-NEXT: .LBB50_1: # %atomicrmw.start ; RV32-NEXT: # in Loop: Header=BB50_2 Depth=1 -; RV32-NEXT: seqz a0, a0 -; RV32-NEXT: addi a0, a0, -1 +; RV32-NEXT: neg a0, a0 ; RV32-NEXT: and a3, a0, a1 ; RV32-NEXT: sw a4, 0(sp) ; RV32-NEXT: sw a1, 4(sp) @@ -2698,8 +2696,7 @@ define i64 @rmw64_umax_seq_cst(ptr %p) nounwind { ; RV32-NEXT: j .LBB51_2 ; RV32-NEXT: .LBB51_1: # %atomicrmw.start ; RV32-NEXT: # in Loop: Header=BB51_2 Depth=1 -; RV32-NEXT: seqz a0, a0 -; RV32-NEXT: addi a0, a0, -1 +; RV32-NEXT: neg a0, a0 ; RV32-NEXT: and a3, a0, a1 ; RV32-NEXT: sw a4, 0(sp) ; RV32-NEXT: sw a1, 4(sp) @@ -2794,8 +2791,7 @@ define i64 @rmw64_umin_seq_cst(ptr %p) nounwind { ; RV32-NEXT: j .LBB52_2 ; RV32-NEXT: .LBB52_1: # %atomicrmw.start ; RV32-NEXT: # in Loop: Header=BB52_2 Depth=1 -; RV32-NEXT: seqz a0, a0 -; RV32-NEXT: addi a0, a0, -1 +; RV32-NEXT: neg a0, a0 ; RV32-NEXT: and a3, a0, a1 ; RV32-NEXT: sw a4, 0(sp) ; RV32-NEXT: sw a1, 4(sp) diff --git a/llvm/test/CodeGen/RISCV/fpclamptosat.ll b/llvm/test/CodeGen/RISCV/fpclamptosat.ll index e1eef5a5e13b..a2718295ae6a 100644 --- a/llvm/test/CodeGen/RISCV/fpclamptosat.ll +++ b/llvm/test/CodeGen/RISCV/fpclamptosat.ll @@ -27,8 +27,7 @@ define i32 @stest_f64i32(double %x) { ; RV32IF-NEXT: .LBB0_2: ; RV32IF-NEXT: sltu a4, a0, a3 ; RV32IF-NEXT: .LBB0_3: # %entry -; RV32IF-NEXT: seqz a5, a4 -; RV32IF-NEXT: addi a5, a5, -1 +; RV32IF-NEXT: neg a5, a4 ; RV32IF-NEXT: and a1, a5, a1 ; RV32IF-NEXT: bnez a4, .LBB0_5 ; RV32IF-NEXT: # %bb.4: # %entry @@ -184,7 +183,6 @@ define i32 @ustest_f64i32(double %x) { ; RV32IF-NEXT: .LBB2_2: ; RV32IF-NEXT: sltiu a2, a0, -1 ; RV32IF-NEXT: .LBB2_3: # %entry -; RV32IF-NEXT: snez a2, a2 ; RV32IF-NEXT: addi a3, a2, -1 ; RV32IF-NEXT: neg a2, a2 ; RV32IF-NEXT: and a1, a2, a1 @@ -196,8 +194,7 @@ define i32 @ustest_f64i32(double %x) { ; RV32IF-NEXT: .LBB2_5: ; RV32IF-NEXT: snez a1, a0 ; RV32IF-NEXT: .LBB2_6: # %entry -; RV32IF-NEXT: seqz a1, a1 -; RV32IF-NEXT: addi a1, a1, -1 +; RV32IF-NEXT: neg a1, a1 ; RV32IF-NEXT: and a0, a1, a0 ; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IF-NEXT: addi sp, sp, 16 @@ -371,8 +368,7 @@ define i32 @stest_f16i32(half %x) { ; RV32-NEXT: .LBB6_2: ; RV32-NEXT: sltu a4, a0, a3 ; RV32-NEXT: .LBB6_3: # %entry -; RV32-NEXT: seqz a5, a4 -; RV32-NEXT: addi a5, a5, -1 +; RV32-NEXT: neg a5, a4 ; RV32-NEXT: and a1, a5, a1 ; RV32-NEXT: bnez a4, .LBB6_5 ; RV32-NEXT: # %bb.4: # %entry @@ -490,7 +486,6 @@ define i32 @ustest_f16i32(half %x) { ; RV32-NEXT: .LBB8_2: ; RV32-NEXT: sltiu a2, a0, -1 ; RV32-NEXT: .LBB8_3: # %entry -; RV32-NEXT: snez a2, a2 ; RV32-NEXT: addi a3, a2, -1 ; RV32-NEXT: neg a2, a2 ; RV32-NEXT: and a1, a2, a1 @@ -502,8 +497,7 @@ define i32 @ustest_f16i32(half %x) { ; RV32-NEXT: .LBB8_5: ; RV32-NEXT: snez a1, a0 ; RV32-NEXT: .LBB8_6: # %entry -; RV32-NEXT: seqz a1, a1 -; RV32-NEXT: addi a1, a1, -1 +; RV32-NEXT: neg a1, a1 ; RV32-NEXT: and a0, a1, a0 ; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32-NEXT: addi sp, sp, 16 @@ -1082,9 +1076,8 @@ define i64 @stest_f64i64(double %x) { ; RV32IF-NEXT: .LBB18_3: # %entry ; RV32IF-NEXT: slti a6, a0, 0 ; RV32IF-NEXT: .LBB18_4: # %entry -; RV32IF-NEXT: seqz t0, a6 -; RV32IF-NEXT: addi a7, t0, -1 -; RV32IF-NEXT: neg t0, t0 +; RV32IF-NEXT: neg a7, a6 +; RV32IF-NEXT: addi t0, a6, -1 ; RV32IF-NEXT: bnez a6, .LBB18_6 ; RV32IF-NEXT: # %bb.5: # %entry ; RV32IF-NEXT: mv a1, a5 @@ -1110,8 +1103,7 @@ define i64 @stest_f64i64(double %x) { ; RV32IF-NEXT: # %bb.12: # %entry ; RV32IF-NEXT: lui a1, 524288 ; RV32IF-NEXT: .LBB18_13: # %entry -; RV32IF-NEXT: seqz a0, a0 -; RV32IF-NEXT: addi a0, a0, -1 +; RV32IF-NEXT: neg a0, a0 ; RV32IF-NEXT: and a0, a0, a4 ; RV32IF-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IF-NEXT: addi sp, sp, 32 @@ -1133,8 +1125,7 @@ define i64 @stest_f64i64(double %x) { ; RV64IF-NEXT: .LBB18_2: ; RV64IF-NEXT: sltu a4, a0, a3 ; RV64IF-NEXT: .LBB18_3: # %entry -; RV64IF-NEXT: seqz a5, a4 -; RV64IF-NEXT: addi a5, a5, -1 +; RV64IF-NEXT: neg a5, a4 ; RV64IF-NEXT: and a5, a5, a1 ; RV64IF-NEXT: bnez a4, .LBB18_5 ; RV64IF-NEXT: # %bb.4: # %entry @@ -1184,9 +1175,8 @@ define i64 @stest_f64i64(double %x) { ; RV32IFD-NEXT: .LBB18_3: # %entry ; RV32IFD-NEXT: slti a6, a0, 0 ; RV32IFD-NEXT: .LBB18_4: # %entry -; RV32IFD-NEXT: seqz t0, a6 -; RV32IFD-NEXT: addi a7, t0, -1 -; RV32IFD-NEXT: neg t0, t0 +; RV32IFD-NEXT: neg a7, a6 +; RV32IFD-NEXT: addi t0, a6, -1 ; RV32IFD-NEXT: bnez a6, .LBB18_6 ; RV32IFD-NEXT: # %bb.5: # %entry ; RV32IFD-NEXT: mv a1, a5 @@ -1212,8 +1202,7 @@ define i64 @stest_f64i64(double %x) { ; RV32IFD-NEXT: # %bb.12: # %entry ; RV32IFD-NEXT: lui a1, 524288 ; RV32IFD-NEXT: .LBB18_13: # %entry -; RV32IFD-NEXT: seqz a0, a0 -; RV32IFD-NEXT: addi a0, a0, -1 +; RV32IFD-NEXT: neg a0, a0 ; RV32IFD-NEXT: and a0, a0, a4 ; RV32IFD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IFD-NEXT: addi sp, sp, 32 @@ -1261,8 +1250,7 @@ define i64 @utest_f64i64(double %x) { ; RV32IF-NEXT: seqz a0, a0 ; RV32IF-NEXT: addi a0, a0, -1 ; RV32IF-NEXT: and a0, a0, a4 -; RV32IF-NEXT: seqz a0, a0 -; RV32IF-NEXT: addi a1, a0, -1 +; RV32IF-NEXT: neg a1, a0 ; RV32IF-NEXT: and a0, a1, a3 ; RV32IF-NEXT: and a1, a1, a2 ; RV32IF-NEXT: lw ra, 28(sp) # 4-byte Folded Reload @@ -1304,8 +1292,7 @@ define i64 @utest_f64i64(double %x) { ; RV32IFD-NEXT: seqz a0, a0 ; RV32IFD-NEXT: addi a0, a0, -1 ; RV32IFD-NEXT: and a0, a0, a4 -; RV32IFD-NEXT: seqz a0, a0 -; RV32IFD-NEXT: addi a1, a0, -1 +; RV32IFD-NEXT: neg a1, a0 ; RV32IFD-NEXT: and a0, a1, a3 ; RV32IFD-NEXT: and a1, a1, a2 ; RV32IFD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload @@ -1339,45 +1326,43 @@ define i64 @ustest_f64i64(double %x) { ; RV32IF-NEXT: .LBB20_2: ; RV32IF-NEXT: seqz a2, a0 ; RV32IF-NEXT: .LBB20_3: # %entry -; RV32IF-NEXT: lw a4, 12(sp) -; RV32IF-NEXT: xori a3, a0, 1 -; RV32IF-NEXT: or a3, a3, a1 -; RV32IF-NEXT: seqz a3, a3 -; RV32IF-NEXT: addi a3, a3, -1 -; RV32IF-NEXT: and a2, a3, a2 -; RV32IF-NEXT: seqz a3, a2 -; RV32IF-NEXT: addi a3, a3, -1 +; RV32IF-NEXT: lw a3, 12(sp) +; RV32IF-NEXT: xori a4, a0, 1 +; RV32IF-NEXT: or a4, a4, a1 +; RV32IF-NEXT: seqz a4, a4 +; RV32IF-NEXT: addi a4, a4, -1 +; RV32IF-NEXT: and a2, a4, a2 +; RV32IF-NEXT: neg a4, a2 ; RV32IF-NEXT: bnez a2, .LBB20_5 ; RV32IF-NEXT: # %bb.4: # %entry ; RV32IF-NEXT: li a0, 1 ; RV32IF-NEXT: .LBB20_5: # %entry ; RV32IF-NEXT: lw a5, 8(sp) -; RV32IF-NEXT: and a2, a3, a1 -; RV32IF-NEXT: and a1, a3, a4 +; RV32IF-NEXT: and a2, a4, a1 +; RV32IF-NEXT: and a1, a4, a3 ; RV32IF-NEXT: beqz a2, .LBB20_8 ; RV32IF-NEXT: # %bb.6: # %entry -; RV32IF-NEXT: sgtz a4, a2 -; RV32IF-NEXT: and a3, a3, a5 +; RV32IF-NEXT: sgtz a3, a2 +; RV32IF-NEXT: and a4, a4, a5 ; RV32IF-NEXT: bnez a1, .LBB20_9 ; RV32IF-NEXT: .LBB20_7: -; RV32IF-NEXT: snez a5, a3 +; RV32IF-NEXT: snez a5, a4 ; RV32IF-NEXT: or a0, a0, a2 ; RV32IF-NEXT: bnez a0, .LBB20_10 ; RV32IF-NEXT: j .LBB20_11 ; RV32IF-NEXT: .LBB20_8: -; RV32IF-NEXT: snez a4, a0 -; RV32IF-NEXT: and a3, a3, a5 +; RV32IF-NEXT: snez a3, a0 +; RV32IF-NEXT: and a4, a4, a5 ; RV32IF-NEXT: beqz a1, .LBB20_7 ; RV32IF-NEXT: .LBB20_9: # %entry ; RV32IF-NEXT: snez a5, a1 ; RV32IF-NEXT: or a0, a0, a2 ; RV32IF-NEXT: beqz a0, .LBB20_11 ; RV32IF-NEXT: .LBB20_10: # %entry -; RV32IF-NEXT: mv a5, a4 +; RV32IF-NEXT: mv a5, a3 ; RV32IF-NEXT: .LBB20_11: # %entry -; RV32IF-NEXT: seqz a0, a5 -; RV32IF-NEXT: addi a2, a0, -1 -; RV32IF-NEXT: and a0, a2, a3 +; RV32IF-NEXT: neg a2, a5 +; RV32IF-NEXT: and a0, a2, a4 ; RV32IF-NEXT: and a1, a2, a1 ; RV32IF-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IF-NEXT: addi sp, sp, 32 @@ -1390,23 +1375,21 @@ define i64 @ustest_f64i64(double %x) { ; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; RV64-NEXT: .cfi_offset ra, -8 ; RV64-NEXT: call __fixdfti@plt -; RV64-NEXT: mv a2, a1 +; RV64-NEXT: slti a2, a1, 1 ; RV64-NEXT: blez a1, .LBB20_2 ; RV64-NEXT: # %bb.1: # %entry -; RV64-NEXT: li a2, 1 +; RV64-NEXT: li a1, 1 ; RV64-NEXT: .LBB20_2: # %entry -; RV64-NEXT: sgtz a1, a1 -; RV64-NEXT: addi a1, a1, -1 -; RV64-NEXT: and a0, a1, a0 -; RV64-NEXT: beqz a2, .LBB20_4 +; RV64-NEXT: neg a2, a2 +; RV64-NEXT: and a0, a2, a0 +; RV64-NEXT: beqz a1, .LBB20_4 ; RV64-NEXT: # %bb.3: # %entry -; RV64-NEXT: sgtz a1, a2 +; RV64-NEXT: sgtz a1, a1 ; RV64-NEXT: j .LBB20_5 ; RV64-NEXT: .LBB20_4: ; RV64-NEXT: snez a1, a0 ; RV64-NEXT: .LBB20_5: # %entry -; RV64-NEXT: seqz a1, a1 -; RV64-NEXT: addi a1, a1, -1 +; RV64-NEXT: neg a1, a1 ; RV64-NEXT: and a0, a1, a0 ; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64-NEXT: addi sp, sp, 16 @@ -1429,45 +1412,43 @@ define i64 @ustest_f64i64(double %x) { ; RV32IFD-NEXT: .LBB20_2: ; RV32IFD-NEXT: seqz a2, a0 ; RV32IFD-NEXT: .LBB20_3: # %entry -; RV32IFD-NEXT: lw a4, 12(sp) -; RV32IFD-NEXT: xori a3, a0, 1 -; RV32IFD-NEXT: or a3, a3, a1 -; RV32IFD-NEXT: seqz a3, a3 -; RV32IFD-NEXT: addi a3, a3, -1 -; RV32IFD-NEXT: and a2, a3, a2 -; RV32IFD-NEXT: seqz a3, a2 -; RV32IFD-NEXT: addi a3, a3, -1 +; RV32IFD-NEXT: lw a3, 12(sp) +; RV32IFD-NEXT: xori a4, a0, 1 +; RV32IFD-NEXT: or a4, a4, a1 +; RV32IFD-NEXT: seqz a4, a4 +; RV32IFD-NEXT: addi a4, a4, -1 +; RV32IFD-NEXT: and a2, a4, a2 +; RV32IFD-NEXT: neg a4, a2 ; RV32IFD-NEXT: bnez a2, .LBB20_5 ; RV32IFD-NEXT: # %bb.4: # %entry ; RV32IFD-NEXT: li a0, 1 ; RV32IFD-NEXT: .LBB20_5: # %entry ; RV32IFD-NEXT: lw a5, 8(sp) -; RV32IFD-NEXT: and a2, a3, a1 -; RV32IFD-NEXT: and a1, a3, a4 +; RV32IFD-NEXT: and a2, a4, a1 +; RV32IFD-NEXT: and a1, a4, a3 ; RV32IFD-NEXT: beqz a2, .LBB20_8 ; RV32IFD-NEXT: # %bb.6: # %entry -; RV32IFD-NEXT: sgtz a4, a2 -; RV32IFD-NEXT: and a3, a3, a5 +; RV32IFD-NEXT: sgtz a3, a2 +; RV32IFD-NEXT: and a4, a4, a5 ; RV32IFD-NEXT: bnez a1, .LBB20_9 ; RV32IFD-NEXT: .LBB20_7: -; RV32IFD-NEXT: snez a5, a3 +; RV32IFD-NEXT: snez a5, a4 ; RV32IFD-NEXT: or a0, a0, a2 ; RV32IFD-NEXT: bnez a0, .LBB20_10 ; RV32IFD-NEXT: j .LBB20_11 ; RV32IFD-NEXT: .LBB20_8: -; RV32IFD-NEXT: snez a4, a0 -; RV32IFD-NEXT: and a3, a3, a5 +; RV32IFD-NEXT: snez a3, a0 +; RV32IFD-NEXT: and a4, a4, a5 ; RV32IFD-NEXT: beqz a1, .LBB20_7 ; RV32IFD-NEXT: .LBB20_9: # %entry ; RV32IFD-NEXT: snez a5, a1 ; RV32IFD-NEXT: or a0, a0, a2 ; RV32IFD-NEXT: beqz a0, .LBB20_11 ; RV32IFD-NEXT: .LBB20_10: # %entry -; RV32IFD-NEXT: mv a5, a4 +; RV32IFD-NEXT: mv a5, a3 ; RV32IFD-NEXT: .LBB20_11: # %entry -; RV32IFD-NEXT: seqz a0, a5 -; RV32IFD-NEXT: addi a2, a0, -1 -; RV32IFD-NEXT: and a0, a2, a3 +; RV32IFD-NEXT: neg a2, a5 +; RV32IFD-NEXT: and a0, a2, a4 ; RV32IFD-NEXT: and a1, a2, a1 ; RV32IFD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IFD-NEXT: addi sp, sp, 32 @@ -1510,9 +1491,8 @@ define i64 @stest_f32i64(float %x) { ; RV32-NEXT: .LBB21_3: # %entry ; RV32-NEXT: slti a6, a0, 0 ; RV32-NEXT: .LBB21_4: # %entry -; RV32-NEXT: seqz t0, a6 -; RV32-NEXT: addi a7, t0, -1 -; RV32-NEXT: neg t0, t0 +; RV32-NEXT: neg a7, a6 +; RV32-NEXT: addi t0, a6, -1 ; RV32-NEXT: bnez a6, .LBB21_6 ; RV32-NEXT: # %bb.5: # %entry ; RV32-NEXT: mv a1, a5 @@ -1538,8 +1518,7 @@ define i64 @stest_f32i64(float %x) { ; RV32-NEXT: # %bb.12: # %entry ; RV32-NEXT: lui a1, 524288 ; RV32-NEXT: .LBB21_13: # %entry -; RV32-NEXT: seqz a0, a0 -; RV32-NEXT: addi a0, a0, -1 +; RV32-NEXT: neg a0, a0 ; RV32-NEXT: and a0, a0, a4 ; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32-NEXT: addi sp, sp, 32 @@ -1585,8 +1564,7 @@ define i64 @utest_f32i64(float %x) { ; RV32-NEXT: seqz a0, a0 ; RV32-NEXT: addi a0, a0, -1 ; RV32-NEXT: and a0, a0, a4 -; RV32-NEXT: seqz a0, a0 -; RV32-NEXT: addi a1, a0, -1 +; RV32-NEXT: neg a1, a0 ; RV32-NEXT: and a0, a1, a3 ; RV32-NEXT: and a1, a1, a2 ; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload @@ -1632,45 +1610,43 @@ define i64 @ustest_f32i64(float %x) { ; RV32-NEXT: .LBB23_2: ; RV32-NEXT: seqz a2, a0 ; RV32-NEXT: .LBB23_3: # %entry -; RV32-NEXT: lw a4, 12(sp) -; RV32-NEXT: xori a3, a0, 1 -; RV32-NEXT: or a3, a3, a1 -; RV32-NEXT: seqz a3, a3 -; RV32-NEXT: addi a3, a3, -1 -; RV32-NEXT: and a2, a3, a2 -; RV32-NEXT: seqz a3, a2 -; RV32-NEXT: addi a3, a3, -1 +; RV32-NEXT: lw a3, 12(sp) +; RV32-NEXT: xori a4, a0, 1 +; RV32-NEXT: or a4, a4, a1 +; RV32-NEXT: seqz a4, a4 +; RV32-NEXT: addi a4, a4, -1 +; RV32-NEXT: and a2, a4, a2 +; RV32-NEXT: neg a4, a2 ; RV32-NEXT: bnez a2, .LBB23_5 ; RV32-NEXT: # %bb.4: # %entry ; RV32-NEXT: li a0, 1 ; RV32-NEXT: .LBB23_5: # %entry ; RV32-NEXT: lw a5, 8(sp) -; RV32-NEXT: and a2, a3, a1 -; RV32-NEXT: and a1, a3, a4 +; RV32-NEXT: and a2, a4, a1 +; RV32-NEXT: and a1, a4, a3 ; RV32-NEXT: beqz a2, .LBB23_8 ; RV32-NEXT: # %bb.6: # %entry -; RV32-NEXT: sgtz a4, a2 -; RV32-NEXT: and a3, a3, a5 +; RV32-NEXT: sgtz a3, a2 +; RV32-NEXT: and a4, a4, a5 ; RV32-NEXT: bnez a1, .LBB23_9 ; RV32-NEXT: .LBB23_7: -; RV32-NEXT: snez a5, a3 +; RV32-NEXT: snez a5, a4 ; RV32-NEXT: or a0, a0, a2 ; RV32-NEXT: bnez a0, .LBB23_10 ; RV32-NEXT: j .LBB23_11 ; RV32-NEXT: .LBB23_8: -; RV32-NEXT: snez a4, a0 -; RV32-NEXT: and a3, a3, a5 +; RV32-NEXT: snez a3, a0 +; RV32-NEXT: and a4, a4, a5 ; RV32-NEXT: beqz a1, .LBB23_7 ; RV32-NEXT: .LBB23_9: # %entry ; RV32-NEXT: snez a5, a1 ; RV32-NEXT: or a0, a0, a2 ; RV32-NEXT: beqz a0, .LBB23_11 ; RV32-NEXT: .LBB23_10: # %entry -; RV32-NEXT: mv a5, a4 +; RV32-NEXT: mv a5, a3 ; RV32-NEXT: .LBB23_11: # %entry -; RV32-NEXT: seqz a0, a5 -; RV32-NEXT: addi a2, a0, -1 -; RV32-NEXT: and a0, a2, a3 +; RV32-NEXT: neg a2, a5 +; RV32-NEXT: and a0, a2, a4 ; RV32-NEXT: and a1, a2, a1 ; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32-NEXT: addi sp, sp, 32 @@ -1683,23 +1659,21 @@ define i64 @ustest_f32i64(float %x) { ; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; RV64-NEXT: .cfi_offset ra, -8 ; RV64-NEXT: call __fixsfti@plt -; RV64-NEXT: mv a2, a1 +; RV64-NEXT: slti a2, a1, 1 ; RV64-NEXT: blez a1, .LBB23_2 ; RV64-NEXT: # %bb.1: # %entry -; RV64-NEXT: li a2, 1 +; RV64-NEXT: li a1, 1 ; RV64-NEXT: .LBB23_2: # %entry -; RV64-NEXT: sgtz a1, a1 -; RV64-NEXT: addi a1, a1, -1 -; RV64-NEXT: and a0, a1, a0 -; RV64-NEXT: beqz a2, .LBB23_4 +; RV64-NEXT: neg a2, a2 +; RV64-NEXT: and a0, a2, a0 +; RV64-NEXT: beqz a1, .LBB23_4 ; RV64-NEXT: # %bb.3: # %entry -; RV64-NEXT: sgtz a1, a2 +; RV64-NEXT: sgtz a1, a1 ; RV64-NEXT: j .LBB23_5 ; RV64-NEXT: .LBB23_4: ; RV64-NEXT: snez a1, a0 ; RV64-NEXT: .LBB23_5: # %entry -; RV64-NEXT: seqz a1, a1 -; RV64-NEXT: addi a1, a1, -1 +; RV64-NEXT: neg a1, a1 ; RV64-NEXT: and a0, a1, a0 ; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64-NEXT: addi sp, sp, 16 @@ -1744,9 +1718,8 @@ define i64 @stest_f16i64(half %x) { ; RV32-NEXT: .LBB24_3: # %entry ; RV32-NEXT: slti a6, a0, 0 ; RV32-NEXT: .LBB24_4: # %entry -; RV32-NEXT: seqz t0, a6 -; RV32-NEXT: addi a7, t0, -1 -; RV32-NEXT: neg t0, t0 +; RV32-NEXT: neg a7, a6 +; RV32-NEXT: addi t0, a6, -1 ; RV32-NEXT: bnez a6, .LBB24_6 ; RV32-NEXT: # %bb.5: # %entry ; RV32-NEXT: mv a1, a5 @@ -1772,8 +1745,7 @@ define i64 @stest_f16i64(half %x) { ; RV32-NEXT: # %bb.12: # %entry ; RV32-NEXT: lui a1, 524288 ; RV32-NEXT: .LBB24_13: # %entry -; RV32-NEXT: seqz a0, a0 -; RV32-NEXT: addi a0, a0, -1 +; RV32-NEXT: neg a0, a0 ; RV32-NEXT: and a0, a0, a4 ; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32-NEXT: addi sp, sp, 32 @@ -1797,8 +1769,7 @@ define i64 @stest_f16i64(half %x) { ; RV64-NEXT: .LBB24_2: ; RV64-NEXT: sltu a4, a0, a3 ; RV64-NEXT: .LBB24_3: # %entry -; RV64-NEXT: seqz a5, a4 -; RV64-NEXT: addi a5, a5, -1 +; RV64-NEXT: neg a5, a4 ; RV64-NEXT: and a5, a5, a1 ; RV64-NEXT: bnez a4, .LBB24_5 ; RV64-NEXT: # %bb.4: # %entry @@ -1854,8 +1825,7 @@ define i64 @utesth_f16i64(half %x) { ; RV32-NEXT: seqz a0, a0 ; RV32-NEXT: addi a0, a0, -1 ; RV32-NEXT: and a0, a0, a4 -; RV32-NEXT: seqz a0, a0 -; RV32-NEXT: addi a1, a0, -1 +; RV32-NEXT: neg a1, a0 ; RV32-NEXT: and a0, a1, a3 ; RV32-NEXT: and a1, a1, a2 ; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload @@ -1905,45 +1875,43 @@ define i64 @ustest_f16i64(half %x) { ; RV32-NEXT: .LBB26_2: ; RV32-NEXT: seqz a2, a0 ; RV32-NEXT: .LBB26_3: # %entry -; RV32-NEXT: lw a4, 12(sp) -; RV32-NEXT: xori a3, a0, 1 -; RV32-NEXT: or a3, a3, a1 -; RV32-NEXT: seqz a3, a3 -; RV32-NEXT: addi a3, a3, -1 -; RV32-NEXT: and a2, a3, a2 -; RV32-NEXT: seqz a3, a2 -; RV32-NEXT: addi a3, a3, -1 +; RV32-NEXT: lw a3, 12(sp) +; RV32-NEXT: xori a4, a0, 1 +; RV32-NEXT: or a4, a4, a1 +; RV32-NEXT: seqz a4, a4 +; RV32-NEXT: addi a4, a4, -1 +; RV32-NEXT: and a2, a4, a2 +; RV32-NEXT: neg a4, a2 ; RV32-NEXT: bnez a2, .LBB26_5 ; RV32-NEXT: # %bb.4: # %entry ; RV32-NEXT: li a0, 1 ; RV32-NEXT: .LBB26_5: # %entry ; RV32-NEXT: lw a5, 8(sp) -; RV32-NEXT: and a2, a3, a1 -; RV32-NEXT: and a1, a3, a4 +; RV32-NEXT: and a2, a4, a1 +; RV32-NEXT: and a1, a4, a3 ; RV32-NEXT: beqz a2, .LBB26_8 ; RV32-NEXT: # %bb.6: # %entry -; RV32-NEXT: sgtz a4, a2 -; RV32-NEXT: and a3, a3, a5 +; RV32-NEXT: sgtz a3, a2 +; RV32-NEXT: and a4, a4, a5 ; RV32-NEXT: bnez a1, .LBB26_9 ; RV32-NEXT: .LBB26_7: -; RV32-NEXT: snez a5, a3 +; RV32-NEXT: snez a5, a4 ; RV32-NEXT: or a0, a0, a2 ; RV32-NEXT: bnez a0, .LBB26_10 ; RV32-NEXT: j .LBB26_11 ; RV32-NEXT: .LBB26_8: -; RV32-NEXT: snez a4, a0 -; RV32-NEXT: and a3, a3, a5 +; RV32-NEXT: snez a3, a0 +; RV32-NEXT: and a4, a4, a5 ; RV32-NEXT: beqz a1, .LBB26_7 ; RV32-NEXT: .LBB26_9: # %entry ; RV32-NEXT: snez a5, a1 ; RV32-NEXT: or a0, a0, a2 ; RV32-NEXT: beqz a0, .LBB26_11 ; RV32-NEXT: .LBB26_10: # %entry -; RV32-NEXT: mv a5, a4 +; RV32-NEXT: mv a5, a3 ; RV32-NEXT: .LBB26_11: # %entry -; RV32-NEXT: seqz a0, a5 -; RV32-NEXT: addi a2, a0, -1 -; RV32-NEXT: and a0, a2, a3 +; RV32-NEXT: neg a2, a5 +; RV32-NEXT: and a0, a2, a4 ; RV32-NEXT: and a1, a2, a1 ; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32-NEXT: addi sp, sp, 32 @@ -1958,23 +1926,21 @@ define i64 @ustest_f16i64(half %x) { ; RV64-NEXT: fmv.x.w a0, fa0 ; RV64-NEXT: call __extendhfsf2@plt ; RV64-NEXT: call __fixsfti@plt -; RV64-NEXT: mv a2, a1 +; RV64-NEXT: slti a2, a1, 1 ; RV64-NEXT: blez a1, .LBB26_2 ; RV64-NEXT: # %bb.1: # %entry -; RV64-NEXT: li a2, 1 +; RV64-NEXT: li a1, 1 ; RV64-NEXT: .LBB26_2: # %entry -; RV64-NEXT: sgtz a1, a1 -; RV64-NEXT: addi a1, a1, -1 -; RV64-NEXT: and a0, a1, a0 -; RV64-NEXT: beqz a2, .LBB26_4 +; RV64-NEXT: neg a2, a2 +; RV64-NEXT: and a0, a2, a0 +; RV64-NEXT: beqz a1, .LBB26_4 ; RV64-NEXT: # %bb.3: # %entry -; RV64-NEXT: sgtz a1, a2 +; RV64-NEXT: sgtz a1, a1 ; RV64-NEXT: j .LBB26_5 ; RV64-NEXT: .LBB26_4: ; RV64-NEXT: snez a1, a0 ; RV64-NEXT: .LBB26_5: # %entry -; RV64-NEXT: seqz a1, a1 -; RV64-NEXT: addi a1, a1, -1 +; RV64-NEXT: neg a1, a1 ; RV64-NEXT: and a0, a1, a0 ; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64-NEXT: addi sp, sp, 16 @@ -3272,8 +3238,7 @@ define i64 @utest_f64i64_mm(double %x) { ; RV32IF-NEXT: snez a5, a1 ; RV32IF-NEXT: addi a5, a5, -1 ; RV32IF-NEXT: and a4, a5, a4 -; RV32IF-NEXT: seqz a4, a4 -; RV32IF-NEXT: addi a4, a4, -1 +; RV32IF-NEXT: neg a4, a4 ; RV32IF-NEXT: and a3, a4, a3 ; RV32IF-NEXT: xori a0, a0, 1 ; RV32IF-NEXT: or a0, a0, a1 @@ -3320,8 +3285,7 @@ define i64 @utest_f64i64_mm(double %x) { ; RV32IFD-NEXT: snez a5, a1 ; RV32IFD-NEXT: addi a5, a5, -1 ; RV32IFD-NEXT: and a4, a5, a4 -; RV32IFD-NEXT: seqz a4, a4 -; RV32IFD-NEXT: addi a4, a4, -1 +; RV32IFD-NEXT: neg a4, a4 ; RV32IFD-NEXT: and a3, a4, a3 ; RV32IFD-NEXT: xori a0, a0, 1 ; RV32IFD-NEXT: or a0, a0, a1 @@ -3364,28 +3328,27 @@ define i64 @ustest_f64i64_mm(double %x) { ; RV32IF-NEXT: # %bb.3: # %entry ; RV32IF-NEXT: li a2, 1 ; RV32IF-NEXT: .LBB47_4: # %entry -; RV32IF-NEXT: lw a5, 12(sp) +; RV32IF-NEXT: lw a4, 12(sp) ; RV32IF-NEXT: lw a3, 8(sp) -; RV32IF-NEXT: slti a4, a0, 0 +; RV32IF-NEXT: slti a5, a0, 0 ; RV32IF-NEXT: beqz a0, .LBB47_6 ; RV32IF-NEXT: # %bb.5: # %entry ; RV32IF-NEXT: mv a2, a6 -; RV32IF-NEXT: mv a6, a4 +; RV32IF-NEXT: mv a6, a5 ; RV32IF-NEXT: j .LBB47_7 ; RV32IF-NEXT: .LBB47_6: ; RV32IF-NEXT: seqz a6, a1 ; RV32IF-NEXT: .LBB47_7: # %entry -; RV32IF-NEXT: seqz a6, a6 -; RV32IF-NEXT: addi a6, a6, -1 +; RV32IF-NEXT: neg a6, a6 ; RV32IF-NEXT: and a3, a6, a3 ; RV32IF-NEXT: xori a1, a1, 1 ; RV32IF-NEXT: or a1, a1, a0 ; RV32IF-NEXT: seqz a1, a1 ; RV32IF-NEXT: addi a1, a1, -1 ; RV32IF-NEXT: and a3, a1, a3 -; RV32IF-NEXT: and a5, a6, a5 -; RV32IF-NEXT: and a1, a1, a5 -; RV32IF-NEXT: neg a4, a4 +; RV32IF-NEXT: and a4, a6, a4 +; RV32IF-NEXT: and a1, a1, a4 +; RV32IF-NEXT: neg a4, a5 ; RV32IF-NEXT: and a4, a4, a0 ; RV32IF-NEXT: mv a0, a3 ; RV32IF-NEXT: beqz a1, .LBB47_9 @@ -3405,8 +3368,7 @@ define i64 @ustest_f64i64_mm(double %x) { ; RV32IF-NEXT: or a2, a2, a4 ; RV32IF-NEXT: beqz a2, .LBB47_13 ; RV32IF-NEXT: .LBB47_12: # %entry -; RV32IF-NEXT: seqz a0, a5 -; RV32IF-NEXT: addi a2, a0, -1 +; RV32IF-NEXT: neg a2, a5 ; RV32IF-NEXT: and a0, a2, a3 ; RV32IF-NEXT: and a1, a2, a1 ; RV32IF-NEXT: .LBB47_13: # %entry @@ -3426,8 +3388,8 @@ define i64 @ustest_f64i64_mm(double %x) { ; RV64-NEXT: # %bb.1: # %entry ; RV64-NEXT: li a2, 1 ; RV64-NEXT: .LBB47_2: # %entry -; RV64-NEXT: sgtz a3, a1 -; RV64-NEXT: addi a3, a3, -1 +; RV64-NEXT: slti a3, a1, 1 +; RV64-NEXT: neg a3, a3 ; RV64-NEXT: and a0, a3, a0 ; RV64-NEXT: addi a1, a1, -1 ; RV64-NEXT: seqz a1, a1 @@ -3464,28 +3426,27 @@ define i64 @ustest_f64i64_mm(double %x) { ; RV32IFD-NEXT: # %bb.3: # %entry ; RV32IFD-NEXT: li a2, 1 ; RV32IFD-NEXT: .LBB47_4: # %entry -; RV32IFD-NEXT: lw a5, 12(sp) +; RV32IFD-NEXT: lw a4, 12(sp) ; RV32IFD-NEXT: lw a3, 8(sp) -; RV32IFD-NEXT: slti a4, a0, 0 +; RV32IFD-NEXT: slti a5, a0, 0 ; RV32IFD-NEXT: beqz a0, .LBB47_6 ; RV32IFD-NEXT: # %bb.5: # %entry ; RV32IFD-NEXT: mv a2, a6 -; RV32IFD-NEXT: mv a6, a4 +; RV32IFD-NEXT: mv a6, a5 ; RV32IFD-NEXT: j .LBB47_7 ; RV32IFD-NEXT: .LBB47_6: ; RV32IFD-NEXT: seqz a6, a1 ; RV32IFD-NEXT: .LBB47_7: # %entry -; RV32IFD-NEXT: seqz a6, a6 -; RV32IFD-NEXT: addi a6, a6, -1 +; RV32IFD-NEXT: neg a6, a6 ; RV32IFD-NEXT: and a3, a6, a3 ; RV32IFD-NEXT: xori a1, a1, 1 ; RV32IFD-NEXT: or a1, a1, a0 ; RV32IFD-NEXT: seqz a1, a1 ; RV32IFD-NEXT: addi a1, a1, -1 ; RV32IFD-NEXT: and a3, a1, a3 -; RV32IFD-NEXT: and a5, a6, a5 -; RV32IFD-NEXT: and a1, a1, a5 -; RV32IFD-NEXT: neg a4, a4 +; RV32IFD-NEXT: and a4, a6, a4 +; RV32IFD-NEXT: and a1, a1, a4 +; RV32IFD-NEXT: neg a4, a5 ; RV32IFD-NEXT: and a4, a4, a0 ; RV32IFD-NEXT: mv a0, a3 ; RV32IFD-NEXT: beqz a1, .LBB47_9 @@ -3505,8 +3466,7 @@ define i64 @ustest_f64i64_mm(double %x) { ; RV32IFD-NEXT: or a2, a2, a4 ; RV32IFD-NEXT: beqz a2, .LBB47_13 ; RV32IFD-NEXT: .LBB47_12: # %entry -; RV32IFD-NEXT: seqz a0, a5 -; RV32IFD-NEXT: addi a2, a0, -1 +; RV32IFD-NEXT: neg a2, a5 ; RV32IFD-NEXT: and a0, a2, a3 ; RV32IFD-NEXT: and a1, a2, a1 ; RV32IFD-NEXT: .LBB47_13: # %entry @@ -3645,8 +3605,7 @@ define i64 @utest_f32i64_mm(float %x) { ; RV32-NEXT: snez a5, a1 ; RV32-NEXT: addi a5, a5, -1 ; RV32-NEXT: and a4, a5, a4 -; RV32-NEXT: seqz a4, a4 -; RV32-NEXT: addi a4, a4, -1 +; RV32-NEXT: neg a4, a4 ; RV32-NEXT: and a3, a4, a3 ; RV32-NEXT: xori a0, a0, 1 ; RV32-NEXT: or a0, a0, a1 @@ -3705,28 +3664,27 @@ define i64 @ustest_f32i64_mm(float %x) { ; RV32-NEXT: # %bb.3: # %entry ; RV32-NEXT: li a2, 1 ; RV32-NEXT: .LBB50_4: # %entry -; RV32-NEXT: lw a5, 12(sp) +; RV32-NEXT: lw a4, 12(sp) ; RV32-NEXT: lw a3, 8(sp) -; RV32-NEXT: slti a4, a0, 0 +; RV32-NEXT: slti a5, a0, 0 ; RV32-NEXT: beqz a0, .LBB50_6 ; RV32-NEXT: # %bb.5: # %entry ; RV32-NEXT: mv a2, a6 -; RV32-NEXT: mv a6, a4 +; RV32-NEXT: mv a6, a5 ; RV32-NEXT: j .LBB50_7 ; RV32-NEXT: .LBB50_6: ; RV32-NEXT: seqz a6, a1 ; RV32-NEXT: .LBB50_7: # %entry -; RV32-NEXT: seqz a6, a6 -; RV32-NEXT: addi a6, a6, -1 +; RV32-NEXT: neg a6, a6 ; RV32-NEXT: and a3, a6, a3 ; RV32-NEXT: xori a1, a1, 1 ; RV32-NEXT: or a1, a1, a0 ; RV32-NEXT: seqz a1, a1 ; RV32-NEXT: addi a1, a1, -1 ; RV32-NEXT: and a3, a1, a3 -; RV32-NEXT: and a5, a6, a5 -; RV32-NEXT: and a1, a1, a5 -; RV32-NEXT: neg a4, a4 +; RV32-NEXT: and a4, a6, a4 +; RV32-NEXT: and a1, a1, a4 +; RV32-NEXT: neg a4, a5 ; RV32-NEXT: and a4, a4, a0 ; RV32-NEXT: mv a0, a3 ; RV32-NEXT: beqz a1, .LBB50_9 @@ -3746,8 +3704,7 @@ define i64 @ustest_f32i64_mm(float %x) { ; RV32-NEXT: or a2, a2, a4 ; RV32-NEXT: beqz a2, .LBB50_13 ; RV32-NEXT: .LBB50_12: # %entry -; RV32-NEXT: seqz a0, a5 -; RV32-NEXT: addi a2, a0, -1 +; RV32-NEXT: neg a2, a5 ; RV32-NEXT: and a0, a2, a3 ; RV32-NEXT: and a1, a2, a1 ; RV32-NEXT: .LBB50_13: # %entry @@ -3767,8 +3724,8 @@ define i64 @ustest_f32i64_mm(float %x) { ; RV64-NEXT: # %bb.1: # %entry ; RV64-NEXT: li a2, 1 ; RV64-NEXT: .LBB50_2: # %entry -; RV64-NEXT: sgtz a3, a1 -; RV64-NEXT: addi a3, a3, -1 +; RV64-NEXT: slti a3, a1, 1 +; RV64-NEXT: neg a3, a3 ; RV64-NEXT: and a0, a3, a0 ; RV64-NEXT: addi a1, a1, -1 ; RV64-NEXT: seqz a1, a1 @@ -3961,8 +3918,7 @@ define i64 @utesth_f16i64_mm(half %x) { ; RV32-NEXT: snez a5, a1 ; RV32-NEXT: addi a5, a5, -1 ; RV32-NEXT: and a4, a5, a4 -; RV32-NEXT: seqz a4, a4 -; RV32-NEXT: addi a4, a4, -1 +; RV32-NEXT: neg a4, a4 ; RV32-NEXT: and a3, a4, a3 ; RV32-NEXT: xori a0, a0, 1 ; RV32-NEXT: or a0, a0, a1 @@ -4025,28 +3981,27 @@ define i64 @ustest_f16i64_mm(half %x) { ; RV32-NEXT: # %bb.3: # %entry ; RV32-NEXT: li a2, 1 ; RV32-NEXT: .LBB53_4: # %entry -; RV32-NEXT: lw a5, 12(sp) +; RV32-NEXT: lw a4, 12(sp) ; RV32-NEXT: lw a3, 8(sp) -; RV32-NEXT: slti a4, a0, 0 +; RV32-NEXT: slti a5, a0, 0 ; RV32-NEXT: beqz a0, .LBB53_6 ; RV32-NEXT: # %bb.5: # %entry ; RV32-NEXT: mv a2, a6 -; RV32-NEXT: mv a6, a4 +; RV32-NEXT: mv a6, a5 ; RV32-NEXT: j .LBB53_7 ; RV32-NEXT: .LBB53_6: ; RV32-NEXT: seqz a6, a1 ; RV32-NEXT: .LBB53_7: # %entry -; RV32-NEXT: seqz a6, a6 -; RV32-NEXT: addi a6, a6, -1 +; RV32-NEXT: neg a6, a6 ; RV32-NEXT: and a3, a6, a3 ; RV32-NEXT: xori a1, a1, 1 ; RV32-NEXT: or a1, a1, a0 ; RV32-NEXT: seqz a1, a1 ; RV32-NEXT: addi a1, a1, -1 ; RV32-NEXT: and a3, a1, a3 -; RV32-NEXT: and a5, a6, a5 -; RV32-NEXT: and a1, a1, a5 -; RV32-NEXT: neg a4, a4 +; RV32-NEXT: and a4, a6, a4 +; RV32-NEXT: and a1, a1, a4 +; RV32-NEXT: neg a4, a5 ; RV32-NEXT: and a4, a4, a0 ; RV32-NEXT: mv a0, a3 ; RV32-NEXT: beqz a1, .LBB53_9 @@ -4066,8 +4021,7 @@ define i64 @ustest_f16i64_mm(half %x) { ; RV32-NEXT: or a2, a2, a4 ; RV32-NEXT: beqz a2, .LBB53_13 ; RV32-NEXT: .LBB53_12: # %entry -; RV32-NEXT: seqz a0, a5 -; RV32-NEXT: addi a2, a0, -1 +; RV32-NEXT: neg a2, a5 ; RV32-NEXT: and a0, a2, a3 ; RV32-NEXT: and a1, a2, a1 ; RV32-NEXT: .LBB53_13: # %entry @@ -4089,8 +4043,8 @@ define i64 @ustest_f16i64_mm(half %x) { ; RV64-NEXT: # %bb.1: # %entry ; RV64-NEXT: li a2, 1 ; RV64-NEXT: .LBB53_2: # %entry -; RV64-NEXT: sgtz a3, a1 -; RV64-NEXT: addi a3, a3, -1 +; RV64-NEXT: slti a3, a1, 1 +; RV64-NEXT: neg a3, a3 ; RV64-NEXT: and a0, a3, a0 ; RV64-NEXT: addi a1, a1, -1 ; RV64-NEXT: seqz a1, a1 diff --git a/llvm/test/CodeGen/RISCV/fpclamptosat_vec.ll b/llvm/test/CodeGen/RISCV/fpclamptosat_vec.ll index 6601da392632..fbab3376941d 100644 --- a/llvm/test/CodeGen/RISCV/fpclamptosat_vec.ll +++ b/llvm/test/CodeGen/RISCV/fpclamptosat_vec.ll @@ -96,22 +96,22 @@ entry: define <2 x i32> @ustest_f64i32(<2 x double> %x) { ; CHECK-NOV-LABEL: ustest_f64i32: ; CHECK-NOV: # %bb.0: # %entry -; CHECK-NOV-NEXT: fcvt.l.d a1, fa1, rtz -; CHECK-NOV-NEXT: li a0, -1 -; CHECK-NOV-NEXT: srli a2, a0, 32 ; CHECK-NOV-NEXT: fcvt.l.d a0, fa0, rtz -; CHECK-NOV-NEXT: blt a1, a2, .LBB2_2 +; CHECK-NOV-NEXT: li a1, -1 +; CHECK-NOV-NEXT: srli a2, a1, 32 +; CHECK-NOV-NEXT: fcvt.l.d a1, fa1, rtz +; CHECK-NOV-NEXT: blt a0, a2, .LBB2_2 ; CHECK-NOV-NEXT: # %bb.1: # %entry -; CHECK-NOV-NEXT: mv a1, a2 -; CHECK-NOV-NEXT: .LBB2_2: # %entry -; CHECK-NOV-NEXT: blt a0, a2, .LBB2_4 -; CHECK-NOV-NEXT: # %bb.3: # %entry ; CHECK-NOV-NEXT: mv a0, a2 +; CHECK-NOV-NEXT: .LBB2_2: # %entry +; CHECK-NOV-NEXT: blt a1, a2, .LBB2_4 +; CHECK-NOV-NEXT: # %bb.3: # %entry +; CHECK-NOV-NEXT: mv a1, a2 ; CHECK-NOV-NEXT: .LBB2_4: # %entry -; CHECK-NOV-NEXT: sgtz a2, a0 -; CHECK-NOV-NEXT: neg a2, a2 -; CHECK-NOV-NEXT: and a0, a2, a0 ; CHECK-NOV-NEXT: sgtz a2, a1 +; CHECK-NOV-NEXT: sgtz a3, a0 +; CHECK-NOV-NEXT: neg a3, a3 +; CHECK-NOV-NEXT: and a0, a3, a0 ; CHECK-NOV-NEXT: neg a2, a2 ; CHECK-NOV-NEXT: and a1, a2, a1 ; CHECK-NOV-NEXT: ret @@ -275,16 +275,16 @@ entry: define <4 x i32> @ustest_f32i32(<4 x float> %x) { ; CHECK-NOV-LABEL: ustest_f32i32: ; CHECK-NOV: # %bb.0: # %entry -; CHECK-NOV-NEXT: fcvt.l.s a1, fa3, rtz +; CHECK-NOV-NEXT: fcvt.l.s a1, fa0, rtz ; CHECK-NOV-NEXT: li a2, -1 ; CHECK-NOV-NEXT: srli a4, a2, 32 -; CHECK-NOV-NEXT: fcvt.l.s a2, fa2, rtz +; CHECK-NOV-NEXT: fcvt.l.s a2, fa1, rtz ; CHECK-NOV-NEXT: bge a1, a4, .LBB5_6 ; CHECK-NOV-NEXT: # %bb.1: # %entry -; CHECK-NOV-NEXT: fcvt.l.s a3, fa1, rtz +; CHECK-NOV-NEXT: fcvt.l.s a3, fa2, rtz ; CHECK-NOV-NEXT: bge a2, a4, .LBB5_7 ; CHECK-NOV-NEXT: .LBB5_2: # %entry -; CHECK-NOV-NEXT: fcvt.l.s a5, fa0, rtz +; CHECK-NOV-NEXT: fcvt.l.s a5, fa3, rtz ; CHECK-NOV-NEXT: bge a3, a4, .LBB5_8 ; CHECK-NOV-NEXT: .LBB5_3: # %entry ; CHECK-NOV-NEXT: blt a5, a4, .LBB5_5 @@ -292,29 +292,29 @@ define <4 x i32> @ustest_f32i32(<4 x float> %x) { ; CHECK-NOV-NEXT: mv a5, a4 ; CHECK-NOV-NEXT: .LBB5_5: # %entry ; CHECK-NOV-NEXT: sgtz a4, a5 +; CHECK-NOV-NEXT: sgtz a6, a3 +; CHECK-NOV-NEXT: sgtz a7, a2 +; CHECK-NOV-NEXT: sgtz t0, a1 +; CHECK-NOV-NEXT: neg t0, t0 +; CHECK-NOV-NEXT: and a1, t0, a1 +; CHECK-NOV-NEXT: neg a7, a7 +; CHECK-NOV-NEXT: and a2, a7, a2 +; CHECK-NOV-NEXT: neg a6, a6 +; CHECK-NOV-NEXT: and a3, a6, a3 ; CHECK-NOV-NEXT: neg a4, a4 ; CHECK-NOV-NEXT: and a4, a4, a5 -; CHECK-NOV-NEXT: sgtz a5, a3 -; CHECK-NOV-NEXT: neg a5, a5 -; CHECK-NOV-NEXT: and a3, a5, a3 -; CHECK-NOV-NEXT: sgtz a5, a2 -; CHECK-NOV-NEXT: neg a5, a5 -; CHECK-NOV-NEXT: and a2, a5, a2 -; CHECK-NOV-NEXT: sgtz a5, a1 -; CHECK-NOV-NEXT: neg a5, a5 -; CHECK-NOV-NEXT: and a1, a5, a1 -; CHECK-NOV-NEXT: sw a1, 12(a0) -; CHECK-NOV-NEXT: sw a2, 8(a0) -; CHECK-NOV-NEXT: sw a3, 4(a0) -; CHECK-NOV-NEXT: sw a4, 0(a0) +; CHECK-NOV-NEXT: sw a4, 12(a0) +; CHECK-NOV-NEXT: sw a3, 8(a0) +; CHECK-NOV-NEXT: sw a2, 4(a0) +; CHECK-NOV-NEXT: sw a1, 0(a0) ; CHECK-NOV-NEXT: ret ; CHECK-NOV-NEXT: .LBB5_6: # %entry ; CHECK-NOV-NEXT: mv a1, a4 -; CHECK-NOV-NEXT: fcvt.l.s a3, fa1, rtz +; CHECK-NOV-NEXT: fcvt.l.s a3, fa2, rtz ; CHECK-NOV-NEXT: blt a2, a4, .LBB5_2 ; CHECK-NOV-NEXT: .LBB5_7: # %entry ; CHECK-NOV-NEXT: mv a2, a4 -; CHECK-NOV-NEXT: fcvt.l.s a5, fa0, rtz +; CHECK-NOV-NEXT: fcvt.l.s a5, fa3, rtz ; CHECK-NOV-NEXT: blt a3, a4, .LBB5_3 ; CHECK-NOV-NEXT: .LBB5_8: # %entry ; CHECK-NOV-NEXT: mv a3, a4 @@ -683,10 +683,10 @@ define <4 x i32> @ustest_f16i32(<4 x half> %x) { ; CHECK-NOV-NEXT: .cfi_offset fs0, -48 ; CHECK-NOV-NEXT: .cfi_offset fs1, -56 ; CHECK-NOV-NEXT: .cfi_offset fs2, -64 -; CHECK-NOV-NEXT: lhu s1, 24(a1) -; CHECK-NOV-NEXT: lhu s2, 0(a1) -; CHECK-NOV-NEXT: lhu s3, 8(a1) -; CHECK-NOV-NEXT: lhu a1, 16(a1) +; CHECK-NOV-NEXT: lhu s1, 0(a1) +; CHECK-NOV-NEXT: lhu s2, 24(a1) +; CHECK-NOV-NEXT: lhu s3, 16(a1) +; CHECK-NOV-NEXT: lhu a1, 8(a1) ; CHECK-NOV-NEXT: mv s0, a0 ; CHECK-NOV-NEXT: mv a0, a1 ; CHECK-NOV-NEXT: call __extendhfsf2@plt @@ -716,21 +716,21 @@ define <4 x i32> @ustest_f16i32(<4 x half> %x) { ; CHECK-NOV-NEXT: mv a3, a2 ; CHECK-NOV-NEXT: .LBB8_5: # %entry ; CHECK-NOV-NEXT: sgtz a2, a3 +; CHECK-NOV-NEXT: sgtz a4, a1 +; CHECK-NOV-NEXT: sgtz a5, s2 +; CHECK-NOV-NEXT: sgtz a6, a0 +; CHECK-NOV-NEXT: neg a6, a6 +; CHECK-NOV-NEXT: and a0, a6, a0 +; CHECK-NOV-NEXT: neg a5, a5 +; CHECK-NOV-NEXT: and a5, a5, s2 +; CHECK-NOV-NEXT: neg a4, a4 +; CHECK-NOV-NEXT: and a1, a4, a1 ; CHECK-NOV-NEXT: neg a2, a2 ; CHECK-NOV-NEXT: and a2, a2, a3 -; CHECK-NOV-NEXT: sgtz a3, a1 -; CHECK-NOV-NEXT: neg a3, a3 -; CHECK-NOV-NEXT: and a1, a3, a1 -; CHECK-NOV-NEXT: sgtz a3, s2 -; CHECK-NOV-NEXT: neg a3, a3 -; CHECK-NOV-NEXT: and a3, a3, s2 -; CHECK-NOV-NEXT: sgtz a4, a0 -; CHECK-NOV-NEXT: neg a4, a4 -; CHECK-NOV-NEXT: and a0, a4, a0 -; CHECK-NOV-NEXT: sw a0, 12(s0) -; CHECK-NOV-NEXT: sw a3, 8(s0) -; CHECK-NOV-NEXT: sw a1, 4(s0) -; CHECK-NOV-NEXT: sw a2, 0(s0) +; CHECK-NOV-NEXT: sw a2, 12(s0) +; CHECK-NOV-NEXT: sw a1, 8(s0) +; CHECK-NOV-NEXT: sw a5, 4(s0) +; CHECK-NOV-NEXT: sw a0, 0(s0) ; CHECK-NOV-NEXT: ld ra, 56(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s0, 48(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s1, 40(sp) # 8-byte Folded Reload @@ -920,22 +920,22 @@ entry: define <2 x i16> @ustest_f64i16(<2 x double> %x) { ; CHECK-NOV-LABEL: ustest_f64i16: ; CHECK-NOV: # %bb.0: # %entry -; CHECK-NOV-NEXT: fcvt.w.d a1, fa1, rtz -; CHECK-NOV-NEXT: lui a0, 16 -; CHECK-NOV-NEXT: addiw a2, a0, -1 ; CHECK-NOV-NEXT: fcvt.w.d a0, fa0, rtz -; CHECK-NOV-NEXT: blt a1, a2, .LBB11_2 +; CHECK-NOV-NEXT: lui a1, 16 +; CHECK-NOV-NEXT: addiw a2, a1, -1 +; CHECK-NOV-NEXT: fcvt.w.d a1, fa1, rtz +; CHECK-NOV-NEXT: blt a0, a2, .LBB11_2 ; CHECK-NOV-NEXT: # %bb.1: # %entry -; CHECK-NOV-NEXT: mv a1, a2 -; CHECK-NOV-NEXT: .LBB11_2: # %entry -; CHECK-NOV-NEXT: blt a0, a2, .LBB11_4 -; CHECK-NOV-NEXT: # %bb.3: # %entry ; CHECK-NOV-NEXT: mv a0, a2 +; CHECK-NOV-NEXT: .LBB11_2: # %entry +; CHECK-NOV-NEXT: blt a1, a2, .LBB11_4 +; CHECK-NOV-NEXT: # %bb.3: # %entry +; CHECK-NOV-NEXT: mv a1, a2 ; CHECK-NOV-NEXT: .LBB11_4: # %entry -; CHECK-NOV-NEXT: sgtz a2, a0 -; CHECK-NOV-NEXT: neg a2, a2 -; CHECK-NOV-NEXT: and a0, a2, a0 ; CHECK-NOV-NEXT: sgtz a2, a1 +; CHECK-NOV-NEXT: sgtz a3, a0 +; CHECK-NOV-NEXT: neg a3, a3 +; CHECK-NOV-NEXT: and a0, a3, a0 ; CHECK-NOV-NEXT: neg a2, a2 ; CHECK-NOV-NEXT: and a1, a2, a1 ; CHECK-NOV-NEXT: ret @@ -1100,16 +1100,16 @@ entry: define <4 x i16> @ustest_f32i16(<4 x float> %x) { ; CHECK-NOV-LABEL: ustest_f32i16: ; CHECK-NOV: # %bb.0: # %entry -; CHECK-NOV-NEXT: fcvt.w.s a1, fa3, rtz +; CHECK-NOV-NEXT: fcvt.w.s a1, fa0, rtz ; CHECK-NOV-NEXT: lui a2, 16 ; CHECK-NOV-NEXT: addiw a4, a2, -1 -; CHECK-NOV-NEXT: fcvt.w.s a2, fa2, rtz +; CHECK-NOV-NEXT: fcvt.w.s a2, fa1, rtz ; CHECK-NOV-NEXT: bge a1, a4, .LBB14_6 ; CHECK-NOV-NEXT: # %bb.1: # %entry -; CHECK-NOV-NEXT: fcvt.w.s a3, fa1, rtz +; CHECK-NOV-NEXT: fcvt.w.s a3, fa2, rtz ; CHECK-NOV-NEXT: bge a2, a4, .LBB14_7 ; CHECK-NOV-NEXT: .LBB14_2: # %entry -; CHECK-NOV-NEXT: fcvt.w.s a5, fa0, rtz +; CHECK-NOV-NEXT: fcvt.w.s a5, fa3, rtz ; CHECK-NOV-NEXT: bge a3, a4, .LBB14_8 ; CHECK-NOV-NEXT: .LBB14_3: # %entry ; CHECK-NOV-NEXT: blt a5, a4, .LBB14_5 @@ -1117,29 +1117,29 @@ define <4 x i16> @ustest_f32i16(<4 x float> %x) { ; CHECK-NOV-NEXT: mv a5, a4 ; CHECK-NOV-NEXT: .LBB14_5: # %entry ; CHECK-NOV-NEXT: sgtz a4, a5 +; CHECK-NOV-NEXT: sgtz a6, a3 +; CHECK-NOV-NEXT: sgtz a7, a2 +; CHECK-NOV-NEXT: sgtz t0, a1 +; CHECK-NOV-NEXT: neg t0, t0 +; CHECK-NOV-NEXT: and a1, t0, a1 +; CHECK-NOV-NEXT: neg a7, a7 +; CHECK-NOV-NEXT: and a2, a7, a2 +; CHECK-NOV-NEXT: neg a6, a6 +; CHECK-NOV-NEXT: and a3, a6, a3 ; CHECK-NOV-NEXT: neg a4, a4 ; CHECK-NOV-NEXT: and a4, a4, a5 -; CHECK-NOV-NEXT: sgtz a5, a3 -; CHECK-NOV-NEXT: neg a5, a5 -; CHECK-NOV-NEXT: and a3, a5, a3 -; CHECK-NOV-NEXT: sgtz a5, a2 -; CHECK-NOV-NEXT: neg a5, a5 -; CHECK-NOV-NEXT: and a2, a5, a2 -; CHECK-NOV-NEXT: sgtz a5, a1 -; CHECK-NOV-NEXT: neg a5, a5 -; CHECK-NOV-NEXT: and a1, a5, a1 -; CHECK-NOV-NEXT: sh a1, 6(a0) -; CHECK-NOV-NEXT: sh a2, 4(a0) -; CHECK-NOV-NEXT: sh a3, 2(a0) -; CHECK-NOV-NEXT: sh a4, 0(a0) +; CHECK-NOV-NEXT: sh a4, 6(a0) +; CHECK-NOV-NEXT: sh a3, 4(a0) +; CHECK-NOV-NEXT: sh a2, 2(a0) +; CHECK-NOV-NEXT: sh a1, 0(a0) ; CHECK-NOV-NEXT: ret ; CHECK-NOV-NEXT: .LBB14_6: # %entry ; CHECK-NOV-NEXT: mv a1, a4 -; CHECK-NOV-NEXT: fcvt.w.s a3, fa1, rtz +; CHECK-NOV-NEXT: fcvt.w.s a3, fa2, rtz ; CHECK-NOV-NEXT: blt a2, a4, .LBB14_2 ; CHECK-NOV-NEXT: .LBB14_7: # %entry ; CHECK-NOV-NEXT: mv a2, a4 -; CHECK-NOV-NEXT: fcvt.w.s a5, fa0, rtz +; CHECK-NOV-NEXT: fcvt.w.s a5, fa3, rtz ; CHECK-NOV-NEXT: blt a3, a4, .LBB14_3 ; CHECK-NOV-NEXT: .LBB14_8: # %entry ; CHECK-NOV-NEXT: mv a3, a4 @@ -1794,14 +1794,14 @@ define <8 x i16> @ustest_f16i16(<8 x half> %x) { ; CHECK-NOV-NEXT: .cfi_offset fs4, -112 ; CHECK-NOV-NEXT: .cfi_offset fs5, -120 ; CHECK-NOV-NEXT: .cfi_offset fs6, -128 -; CHECK-NOV-NEXT: lhu s1, 56(a1) -; CHECK-NOV-NEXT: lhu s2, 0(a1) -; CHECK-NOV-NEXT: lhu s3, 8(a1) -; CHECK-NOV-NEXT: lhu s4, 16(a1) -; CHECK-NOV-NEXT: lhu s5, 24(a1) -; CHECK-NOV-NEXT: lhu s6, 32(a1) -; CHECK-NOV-NEXT: lhu s7, 40(a1) -; CHECK-NOV-NEXT: lhu a1, 48(a1) +; CHECK-NOV-NEXT: lhu s1, 0(a1) +; CHECK-NOV-NEXT: lhu s2, 56(a1) +; CHECK-NOV-NEXT: lhu s3, 48(a1) +; CHECK-NOV-NEXT: lhu s4, 40(a1) +; CHECK-NOV-NEXT: lhu s5, 32(a1) +; CHECK-NOV-NEXT: lhu s6, 24(a1) +; CHECK-NOV-NEXT: lhu s7, 16(a1) +; CHECK-NOV-NEXT: lhu a1, 8(a1) ; CHECK-NOV-NEXT: mv s0, a0 ; CHECK-NOV-NEXT: mv a0, a1 ; CHECK-NOV-NEXT: call __extendhfsf2@plt @@ -1855,37 +1855,37 @@ define <8 x i16> @ustest_f16i16(<8 x half> %x) { ; CHECK-NOV-NEXT: mv a7, a3 ; CHECK-NOV-NEXT: .LBB17_9: # %entry ; CHECK-NOV-NEXT: sgtz a3, a7 +; CHECK-NOV-NEXT: sgtz t0, a6 +; CHECK-NOV-NEXT: sgtz t1, a5 +; CHECK-NOV-NEXT: sgtz t2, a4 +; CHECK-NOV-NEXT: sgtz t3, a2 +; CHECK-NOV-NEXT: sgtz t4, a1 +; CHECK-NOV-NEXT: sgtz t5, s2 +; CHECK-NOV-NEXT: sgtz t6, a0 +; CHECK-NOV-NEXT: neg t6, t6 +; CHECK-NOV-NEXT: and a0, t6, a0 +; CHECK-NOV-NEXT: neg t5, t5 +; CHECK-NOV-NEXT: and t5, t5, s2 +; CHECK-NOV-NEXT: neg t4, t4 +; CHECK-NOV-NEXT: and a1, t4, a1 +; CHECK-NOV-NEXT: neg t3, t3 +; CHECK-NOV-NEXT: and a2, t3, a2 +; CHECK-NOV-NEXT: neg t2, t2 +; CHECK-NOV-NEXT: and a4, t2, a4 +; CHECK-NOV-NEXT: neg t1, t1 +; CHECK-NOV-NEXT: and a5, t1, a5 +; CHECK-NOV-NEXT: neg t0, t0 +; CHECK-NOV-NEXT: and a6, t0, a6 ; CHECK-NOV-NEXT: neg a3, a3 ; CHECK-NOV-NEXT: and a3, a3, a7 -; CHECK-NOV-NEXT: sgtz a7, a6 -; CHECK-NOV-NEXT: neg a7, a7 -; CHECK-NOV-NEXT: and a6, a7, a6 -; CHECK-NOV-NEXT: sgtz a7, a5 -; CHECK-NOV-NEXT: neg a7, a7 -; CHECK-NOV-NEXT: and a5, a7, a5 -; CHECK-NOV-NEXT: sgtz a7, a4 -; CHECK-NOV-NEXT: neg a7, a7 -; CHECK-NOV-NEXT: and a4, a7, a4 -; CHECK-NOV-NEXT: sgtz a7, a2 -; CHECK-NOV-NEXT: neg a7, a7 -; CHECK-NOV-NEXT: and a2, a7, a2 -; CHECK-NOV-NEXT: sgtz a7, a1 -; CHECK-NOV-NEXT: neg a7, a7 -; CHECK-NOV-NEXT: and a1, a7, a1 -; CHECK-NOV-NEXT: sgtz a7, s2 -; CHECK-NOV-NEXT: neg a7, a7 -; CHECK-NOV-NEXT: and a7, a7, s2 -; CHECK-NOV-NEXT: sgtz t0, a0 -; CHECK-NOV-NEXT: neg t0, t0 -; CHECK-NOV-NEXT: and a0, t0, a0 -; CHECK-NOV-NEXT: sh a0, 14(s0) -; CHECK-NOV-NEXT: sh a7, 12(s0) -; CHECK-NOV-NEXT: sh a1, 10(s0) -; CHECK-NOV-NEXT: sh a2, 8(s0) -; CHECK-NOV-NEXT: sh a4, 6(s0) -; CHECK-NOV-NEXT: sh a5, 4(s0) -; CHECK-NOV-NEXT: sh a6, 2(s0) -; CHECK-NOV-NEXT: sh a3, 0(s0) +; CHECK-NOV-NEXT: sh a3, 14(s0) +; CHECK-NOV-NEXT: sh a6, 12(s0) +; CHECK-NOV-NEXT: sh a5, 10(s0) +; CHECK-NOV-NEXT: sh a4, 8(s0) +; CHECK-NOV-NEXT: sh a2, 6(s0) +; CHECK-NOV-NEXT: sh a1, 4(s0) +; CHECK-NOV-NEXT: sh t5, 2(s0) +; CHECK-NOV-NEXT: sh a0, 0(s0) ; CHECK-NOV-NEXT: ld ra, 120(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s0, 112(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s1, 104(sp) # 8-byte Folded Reload @@ -2086,7 +2086,6 @@ define <2 x i64> @stest_f64i64(<2 x double> %x) { ; CHECK-NOV-NEXT: bnez s1, .LBB18_4 ; CHECK-NOV-NEXT: .LBB18_2: ; CHECK-NOV-NEXT: sltu a5, s0, a3 -; CHECK-NOV-NEXT: seqz a6, a5 ; CHECK-NOV-NEXT: beqz a5, .LBB18_5 ; CHECK-NOV-NEXT: j .LBB18_6 ; CHECK-NOV-NEXT: .LBB18_3: @@ -2094,14 +2093,12 @@ define <2 x i64> @stest_f64i64(<2 x double> %x) { ; CHECK-NOV-NEXT: beqz s1, .LBB18_2 ; CHECK-NOV-NEXT: .LBB18_4: # %entry ; CHECK-NOV-NEXT: slti a5, s1, 0 -; CHECK-NOV-NEXT: seqz a6, a5 ; CHECK-NOV-NEXT: bnez a5, .LBB18_6 ; CHECK-NOV-NEXT: .LBB18_5: # %entry ; CHECK-NOV-NEXT: mv s0, a3 ; CHECK-NOV-NEXT: .LBB18_6: # %entry -; CHECK-NOV-NEXT: addi a6, a6, -1 -; CHECK-NOV-NEXT: seqz a5, a4 -; CHECK-NOV-NEXT: addi a5, a5, -1 +; CHECK-NOV-NEXT: neg a6, a5 +; CHECK-NOV-NEXT: neg a5, a4 ; CHECK-NOV-NEXT: and a5, a5, a1 ; CHECK-NOV-NEXT: bnez a4, .LBB18_8 ; CHECK-NOV-NEXT: # %bb.7: # %entry @@ -2175,51 +2172,50 @@ define <2 x i64> @stest_f64i64(<2 x double> %x) { ; CHECK-V-NEXT: bnez a1, .LBB18_4 ; CHECK-V-NEXT: .LBB18_2: ; CHECK-V-NEXT: sltu a5, a0, a3 -; CHECK-V-NEXT: j .LBB18_5 +; CHECK-V-NEXT: neg a6, a4 +; CHECK-V-NEXT: beqz a4, .LBB18_5 +; CHECK-V-NEXT: j .LBB18_6 ; CHECK-V-NEXT: .LBB18_3: ; CHECK-V-NEXT: sltu a4, s0, a3 ; CHECK-V-NEXT: beqz a1, .LBB18_2 ; CHECK-V-NEXT: .LBB18_4: # %entry ; CHECK-V-NEXT: slti a5, a1, 0 +; CHECK-V-NEXT: neg a6, a4 +; CHECK-V-NEXT: bnez a4, .LBB18_6 ; CHECK-V-NEXT: .LBB18_5: # %entry -; CHECK-V-NEXT: seqz a6, a4 -; CHECK-V-NEXT: addi a6, a6, -1 -; CHECK-V-NEXT: bnez a4, .LBB18_7 -; CHECK-V-NEXT: # %bb.6: # %entry ; CHECK-V-NEXT: mv s0, a3 -; CHECK-V-NEXT: .LBB18_7: # %entry +; CHECK-V-NEXT: .LBB18_6: # %entry ; CHECK-V-NEXT: and a6, a6, s1 -; CHECK-V-NEXT: seqz a4, a5 -; CHECK-V-NEXT: addi a4, a4, -1 -; CHECK-V-NEXT: bnez a5, .LBB18_9 -; CHECK-V-NEXT: # %bb.8: # %entry +; CHECK-V-NEXT: neg a4, a5 +; CHECK-V-NEXT: bnez a5, .LBB18_8 +; CHECK-V-NEXT: # %bb.7: # %entry ; CHECK-V-NEXT: mv a0, a3 -; CHECK-V-NEXT: .LBB18_9: # %entry +; CHECK-V-NEXT: .LBB18_8: # %entry ; CHECK-V-NEXT: and a4, a4, a1 ; CHECK-V-NEXT: slli a1, a2, 63 -; CHECK-V-NEXT: beq a6, a2, .LBB18_12 -; CHECK-V-NEXT: # %bb.10: # %entry +; CHECK-V-NEXT: beq a6, a2, .LBB18_11 +; CHECK-V-NEXT: # %bb.9: # %entry ; CHECK-V-NEXT: slti a3, a6, 0 ; CHECK-V-NEXT: xori a3, a3, 1 -; CHECK-V-NEXT: bne a4, a2, .LBB18_13 -; CHECK-V-NEXT: .LBB18_11: +; CHECK-V-NEXT: bne a4, a2, .LBB18_12 +; CHECK-V-NEXT: .LBB18_10: ; CHECK-V-NEXT: sltu a2, a1, a0 -; CHECK-V-NEXT: beqz a3, .LBB18_14 -; CHECK-V-NEXT: j .LBB18_15 -; CHECK-V-NEXT: .LBB18_12: +; CHECK-V-NEXT: beqz a3, .LBB18_13 +; CHECK-V-NEXT: j .LBB18_14 +; CHECK-V-NEXT: .LBB18_11: ; CHECK-V-NEXT: sltu a3, a1, s0 -; CHECK-V-NEXT: beq a4, a2, .LBB18_11 -; CHECK-V-NEXT: .LBB18_13: # %entry +; CHECK-V-NEXT: beq a4, a2, .LBB18_10 +; CHECK-V-NEXT: .LBB18_12: # %entry ; CHECK-V-NEXT: slti a2, a4, 0 ; CHECK-V-NEXT: xori a2, a2, 1 -; CHECK-V-NEXT: bnez a3, .LBB18_15 -; CHECK-V-NEXT: .LBB18_14: # %entry +; CHECK-V-NEXT: bnez a3, .LBB18_14 +; CHECK-V-NEXT: .LBB18_13: # %entry ; CHECK-V-NEXT: mv s0, a1 -; CHECK-V-NEXT: .LBB18_15: # %entry -; CHECK-V-NEXT: bnez a2, .LBB18_17 -; CHECK-V-NEXT: # %bb.16: # %entry +; CHECK-V-NEXT: .LBB18_14: # %entry +; CHECK-V-NEXT: bnez a2, .LBB18_16 +; CHECK-V-NEXT: # %bb.15: # %entry ; CHECK-V-NEXT: mv a0, a1 -; CHECK-V-NEXT: .LBB18_17: # %entry +; CHECK-V-NEXT: .LBB18_16: # %entry ; CHECK-V-NEXT: sd a0, 24(sp) ; CHECK-V-NEXT: sd s0, 32(sp) ; CHECK-V-NEXT: addi a0, sp, 24 @@ -2260,19 +2256,19 @@ define <2 x i64> @utest_f64i64(<2 x double> %x) { ; CHECK-NOV-NEXT: .cfi_offset s0, -16 ; CHECK-NOV-NEXT: .cfi_offset s1, -24 ; CHECK-NOV-NEXT: .cfi_offset fs0, -32 -; CHECK-NOV-NEXT: fmv.d fs0, fa0 -; CHECK-NOV-NEXT: fmv.d fa0, fa1 +; CHECK-NOV-NEXT: fmv.d fs0, fa1 ; CHECK-NOV-NEXT: call __fixunsdfti@plt ; CHECK-NOV-NEXT: mv s0, a0 ; CHECK-NOV-NEXT: mv s1, a1 ; CHECK-NOV-NEXT: fmv.d fa0, fs0 ; CHECK-NOV-NEXT: call __fixunsdfti@plt ; CHECK-NOV-NEXT: snez a1, a1 +; CHECK-NOV-NEXT: snez a2, s1 +; CHECK-NOV-NEXT: addi a2, a2, -1 +; CHECK-NOV-NEXT: and a2, a2, s0 ; CHECK-NOV-NEXT: addi a1, a1, -1 -; CHECK-NOV-NEXT: and a0, a1, a0 -; CHECK-NOV-NEXT: snez a1, s1 -; CHECK-NOV-NEXT: addi a1, a1, -1 -; CHECK-NOV-NEXT: and a1, a1, s0 +; CHECK-NOV-NEXT: and a1, a1, a0 +; CHECK-NOV-NEXT: mv a0, a2 ; CHECK-NOV-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s1, 8(sp) # 8-byte Folded Reload @@ -2307,9 +2303,9 @@ define <2 x i64> @utest_f64i64(<2 x double> %x) { ; CHECK-V-NEXT: vfmv.f.s fa0, v8 ; CHECK-V-NEXT: call __fixunsdfti@plt ; CHECK-V-NEXT: snez a2, s1 +; CHECK-V-NEXT: snez a1, a1 ; CHECK-V-NEXT: addi a2, a2, -1 ; CHECK-V-NEXT: and a2, a2, s0 -; CHECK-V-NEXT: snez a1, a1 ; CHECK-V-NEXT: addi a1, a1, -1 ; CHECK-V-NEXT: and a0, a1, a0 ; CHECK-V-NEXT: sd a0, 24(sp) @@ -2350,49 +2346,45 @@ define <2 x i64> @ustest_f64i64(<2 x double> %x) { ; CHECK-NOV-NEXT: .cfi_offset s0, -16 ; CHECK-NOV-NEXT: .cfi_offset s1, -24 ; CHECK-NOV-NEXT: .cfi_offset fs0, -32 -; CHECK-NOV-NEXT: fmv.d fs0, fa0 -; CHECK-NOV-NEXT: fmv.d fa0, fa1 +; CHECK-NOV-NEXT: fmv.d fs0, fa1 ; CHECK-NOV-NEXT: call __fixdfti@plt ; CHECK-NOV-NEXT: mv s0, a0 ; CHECK-NOV-NEXT: mv s1, a1 ; CHECK-NOV-NEXT: fmv.d fa0, fs0 ; CHECK-NOV-NEXT: call __fixdfti@plt -; CHECK-NOV-NEXT: mv a2, a1 -; CHECK-NOV-NEXT: blez a1, .LBB20_2 +; CHECK-NOV-NEXT: mv a2, s1 +; CHECK-NOV-NEXT: blez s1, .LBB20_2 ; CHECK-NOV-NEXT: # %bb.1: # %entry ; CHECK-NOV-NEXT: li a2, 1 ; CHECK-NOV-NEXT: .LBB20_2: # %entry -; CHECK-NOV-NEXT: mv a3, s1 -; CHECK-NOV-NEXT: blez s1, .LBB20_4 +; CHECK-NOV-NEXT: slti a3, a1, 1 +; CHECK-NOV-NEXT: slti a4, s1, 1 +; CHECK-NOV-NEXT: blez a1, .LBB20_4 ; CHECK-NOV-NEXT: # %bb.3: # %entry -; CHECK-NOV-NEXT: li a3, 1 +; CHECK-NOV-NEXT: li a1, 1 ; CHECK-NOV-NEXT: .LBB20_4: # %entry -; CHECK-NOV-NEXT: sgtz a1, a1 -; CHECK-NOV-NEXT: addi a4, a1, -1 -; CHECK-NOV-NEXT: sgtz a1, s1 -; CHECK-NOV-NEXT: addi a1, a1, -1 -; CHECK-NOV-NEXT: and a1, a1, s0 -; CHECK-NOV-NEXT: beqz a3, .LBB20_7 +; CHECK-NOV-NEXT: neg a4, a4 +; CHECK-NOV-NEXT: neg a3, a3 +; CHECK-NOV-NEXT: and a3, a3, a0 +; CHECK-NOV-NEXT: beqz a1, .LBB20_7 ; CHECK-NOV-NEXT: # %bb.5: # %entry -; CHECK-NOV-NEXT: sgtz a3, a3 -; CHECK-NOV-NEXT: and a0, a4, a0 +; CHECK-NOV-NEXT: sgtz a1, a1 +; CHECK-NOV-NEXT: and a0, a4, s0 ; CHECK-NOV-NEXT: bnez a2, .LBB20_8 ; CHECK-NOV-NEXT: .LBB20_6: ; CHECK-NOV-NEXT: snez a2, a0 ; CHECK-NOV-NEXT: j .LBB20_9 ; CHECK-NOV-NEXT: .LBB20_7: -; CHECK-NOV-NEXT: snez a3, a1 -; CHECK-NOV-NEXT: and a0, a4, a0 +; CHECK-NOV-NEXT: snez a1, a3 +; CHECK-NOV-NEXT: and a0, a4, s0 ; CHECK-NOV-NEXT: beqz a2, .LBB20_6 ; CHECK-NOV-NEXT: .LBB20_8: # %entry ; CHECK-NOV-NEXT: sgtz a2, a2 ; CHECK-NOV-NEXT: .LBB20_9: # %entry -; CHECK-NOV-NEXT: seqz a2, a2 -; CHECK-NOV-NEXT: addi a2, a2, -1 +; CHECK-NOV-NEXT: neg a2, a2 ; CHECK-NOV-NEXT: and a0, a2, a0 -; CHECK-NOV-NEXT: seqz a2, a3 -; CHECK-NOV-NEXT: addi a2, a2, -1 -; CHECK-NOV-NEXT: and a1, a2, a1 +; CHECK-NOV-NEXT: neg a1, a1 +; CHECK-NOV-NEXT: and a1, a1, a3 ; CHECK-NOV-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s1, 8(sp) # 8-byte Folded Reload @@ -2431,14 +2423,14 @@ define <2 x i64> @ustest_f64i64(<2 x double> %x) { ; CHECK-V-NEXT: # %bb.1: # %entry ; CHECK-V-NEXT: li a2, 1 ; CHECK-V-NEXT: .LBB20_2: # %entry -; CHECK-V-NEXT: sgtz a3, s0 +; CHECK-V-NEXT: slti a3, s0, 1 +; CHECK-V-NEXT: slti a1, a1, 1 ; CHECK-V-NEXT: blez s0, .LBB20_4 ; CHECK-V-NEXT: # %bb.3: # %entry ; CHECK-V-NEXT: li s0, 1 ; CHECK-V-NEXT: .LBB20_4: # %entry -; CHECK-V-NEXT: addi a3, a3, -1 -; CHECK-V-NEXT: sgtz a1, a1 -; CHECK-V-NEXT: addi a1, a1, -1 +; CHECK-V-NEXT: neg a3, a3 +; CHECK-V-NEXT: neg a1, a1 ; CHECK-V-NEXT: and a0, a1, a0 ; CHECK-V-NEXT: beqz a2, .LBB20_7 ; CHECK-V-NEXT: # %bb.5: # %entry @@ -2455,11 +2447,9 @@ define <2 x i64> @ustest_f64i64(<2 x double> %x) { ; CHECK-V-NEXT: .LBB20_8: # %entry ; CHECK-V-NEXT: sgtz a3, s0 ; CHECK-V-NEXT: .LBB20_9: # %entry -; CHECK-V-NEXT: seqz a3, a3 -; CHECK-V-NEXT: addi a3, a3, -1 +; CHECK-V-NEXT: neg a3, a3 ; CHECK-V-NEXT: and a2, a3, a2 -; CHECK-V-NEXT: seqz a1, a1 -; CHECK-V-NEXT: addi a1, a1, -1 +; CHECK-V-NEXT: neg a1, a1 ; CHECK-V-NEXT: and a0, a1, a0 ; CHECK-V-NEXT: sd a0, 24(sp) ; CHECK-V-NEXT: sd a2, 32(sp) @@ -2516,7 +2506,6 @@ define <2 x i64> @stest_f32i64(<2 x float> %x) { ; CHECK-NOV-NEXT: bnez s1, .LBB21_4 ; CHECK-NOV-NEXT: .LBB21_2: ; CHECK-NOV-NEXT: sltu a5, s0, a3 -; CHECK-NOV-NEXT: seqz a6, a5 ; CHECK-NOV-NEXT: beqz a5, .LBB21_5 ; CHECK-NOV-NEXT: j .LBB21_6 ; CHECK-NOV-NEXT: .LBB21_3: @@ -2524,14 +2513,12 @@ define <2 x i64> @stest_f32i64(<2 x float> %x) { ; CHECK-NOV-NEXT: beqz s1, .LBB21_2 ; CHECK-NOV-NEXT: .LBB21_4: # %entry ; CHECK-NOV-NEXT: slti a5, s1, 0 -; CHECK-NOV-NEXT: seqz a6, a5 ; CHECK-NOV-NEXT: bnez a5, .LBB21_6 ; CHECK-NOV-NEXT: .LBB21_5: # %entry ; CHECK-NOV-NEXT: mv s0, a3 ; CHECK-NOV-NEXT: .LBB21_6: # %entry -; CHECK-NOV-NEXT: addi a6, a6, -1 -; CHECK-NOV-NEXT: seqz a5, a4 -; CHECK-NOV-NEXT: addi a5, a5, -1 +; CHECK-NOV-NEXT: neg a6, a5 +; CHECK-NOV-NEXT: neg a5, a4 ; CHECK-NOV-NEXT: and a5, a5, a1 ; CHECK-NOV-NEXT: bnez a4, .LBB21_8 ; CHECK-NOV-NEXT: # %bb.7: # %entry @@ -2605,51 +2592,50 @@ define <2 x i64> @stest_f32i64(<2 x float> %x) { ; CHECK-V-NEXT: bnez a1, .LBB21_4 ; CHECK-V-NEXT: .LBB21_2: ; CHECK-V-NEXT: sltu a5, a0, a3 -; CHECK-V-NEXT: j .LBB21_5 +; CHECK-V-NEXT: neg a6, a4 +; CHECK-V-NEXT: beqz a4, .LBB21_5 +; CHECK-V-NEXT: j .LBB21_6 ; CHECK-V-NEXT: .LBB21_3: ; CHECK-V-NEXT: sltu a4, s0, a3 ; CHECK-V-NEXT: beqz a1, .LBB21_2 ; CHECK-V-NEXT: .LBB21_4: # %entry ; CHECK-V-NEXT: slti a5, a1, 0 +; CHECK-V-NEXT: neg a6, a4 +; CHECK-V-NEXT: bnez a4, .LBB21_6 ; CHECK-V-NEXT: .LBB21_5: # %entry -; CHECK-V-NEXT: seqz a6, a4 -; CHECK-V-NEXT: addi a6, a6, -1 -; CHECK-V-NEXT: bnez a4, .LBB21_7 -; CHECK-V-NEXT: # %bb.6: # %entry ; CHECK-V-NEXT: mv s0, a3 -; CHECK-V-NEXT: .LBB21_7: # %entry +; CHECK-V-NEXT: .LBB21_6: # %entry ; CHECK-V-NEXT: and a6, a6, s1 -; CHECK-V-NEXT: seqz a4, a5 -; CHECK-V-NEXT: addi a4, a4, -1 -; CHECK-V-NEXT: bnez a5, .LBB21_9 -; CHECK-V-NEXT: # %bb.8: # %entry +; CHECK-V-NEXT: neg a4, a5 +; CHECK-V-NEXT: bnez a5, .LBB21_8 +; CHECK-V-NEXT: # %bb.7: # %entry ; CHECK-V-NEXT: mv a0, a3 -; CHECK-V-NEXT: .LBB21_9: # %entry +; CHECK-V-NEXT: .LBB21_8: # %entry ; CHECK-V-NEXT: and a4, a4, a1 ; CHECK-V-NEXT: slli a1, a2, 63 -; CHECK-V-NEXT: beq a6, a2, .LBB21_12 -; CHECK-V-NEXT: # %bb.10: # %entry +; CHECK-V-NEXT: beq a6, a2, .LBB21_11 +; CHECK-V-NEXT: # %bb.9: # %entry ; CHECK-V-NEXT: slti a3, a6, 0 ; CHECK-V-NEXT: xori a3, a3, 1 -; CHECK-V-NEXT: bne a4, a2, .LBB21_13 -; CHECK-V-NEXT: .LBB21_11: +; CHECK-V-NEXT: bne a4, a2, .LBB21_12 +; CHECK-V-NEXT: .LBB21_10: ; CHECK-V-NEXT: sltu a2, a1, a0 -; CHECK-V-NEXT: beqz a3, .LBB21_14 -; CHECK-V-NEXT: j .LBB21_15 -; CHECK-V-NEXT: .LBB21_12: +; CHECK-V-NEXT: beqz a3, .LBB21_13 +; CHECK-V-NEXT: j .LBB21_14 +; CHECK-V-NEXT: .LBB21_11: ; CHECK-V-NEXT: sltu a3, a1, s0 -; CHECK-V-NEXT: beq a4, a2, .LBB21_11 -; CHECK-V-NEXT: .LBB21_13: # %entry +; CHECK-V-NEXT: beq a4, a2, .LBB21_10 +; CHECK-V-NEXT: .LBB21_12: # %entry ; CHECK-V-NEXT: slti a2, a4, 0 ; CHECK-V-NEXT: xori a2, a2, 1 -; CHECK-V-NEXT: bnez a3, .LBB21_15 -; CHECK-V-NEXT: .LBB21_14: # %entry +; CHECK-V-NEXT: bnez a3, .LBB21_14 +; CHECK-V-NEXT: .LBB21_13: # %entry ; CHECK-V-NEXT: mv s0, a1 -; CHECK-V-NEXT: .LBB21_15: # %entry -; CHECK-V-NEXT: bnez a2, .LBB21_17 -; CHECK-V-NEXT: # %bb.16: # %entry +; CHECK-V-NEXT: .LBB21_14: # %entry +; CHECK-V-NEXT: bnez a2, .LBB21_16 +; CHECK-V-NEXT: # %bb.15: # %entry ; CHECK-V-NEXT: mv a0, a1 -; CHECK-V-NEXT: .LBB21_17: # %entry +; CHECK-V-NEXT: .LBB21_16: # %entry ; CHECK-V-NEXT: sd a0, 24(sp) ; CHECK-V-NEXT: sd s0, 32(sp) ; CHECK-V-NEXT: addi a0, sp, 24 @@ -2690,19 +2676,19 @@ define <2 x i64> @utest_f32i64(<2 x float> %x) { ; CHECK-NOV-NEXT: .cfi_offset s0, -16 ; CHECK-NOV-NEXT: .cfi_offset s1, -24 ; CHECK-NOV-NEXT: .cfi_offset fs0, -32 -; CHECK-NOV-NEXT: fmv.s fs0, fa0 -; CHECK-NOV-NEXT: fmv.s fa0, fa1 +; CHECK-NOV-NEXT: fmv.s fs0, fa1 ; CHECK-NOV-NEXT: call __fixunssfti@plt ; CHECK-NOV-NEXT: mv s0, a0 ; CHECK-NOV-NEXT: mv s1, a1 ; CHECK-NOV-NEXT: fmv.s fa0, fs0 ; CHECK-NOV-NEXT: call __fixunssfti@plt ; CHECK-NOV-NEXT: snez a1, a1 +; CHECK-NOV-NEXT: snez a2, s1 +; CHECK-NOV-NEXT: addi a2, a2, -1 +; CHECK-NOV-NEXT: and a2, a2, s0 ; CHECK-NOV-NEXT: addi a1, a1, -1 -; CHECK-NOV-NEXT: and a0, a1, a0 -; CHECK-NOV-NEXT: snez a1, s1 -; CHECK-NOV-NEXT: addi a1, a1, -1 -; CHECK-NOV-NEXT: and a1, a1, s0 +; CHECK-NOV-NEXT: and a1, a1, a0 +; CHECK-NOV-NEXT: mv a0, a2 ; CHECK-NOV-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s1, 8(sp) # 8-byte Folded Reload @@ -2737,9 +2723,9 @@ define <2 x i64> @utest_f32i64(<2 x float> %x) { ; CHECK-V-NEXT: vfmv.f.s fa0, v8 ; CHECK-V-NEXT: call __fixunssfti@plt ; CHECK-V-NEXT: snez a2, s1 +; CHECK-V-NEXT: snez a1, a1 ; CHECK-V-NEXT: addi a2, a2, -1 ; CHECK-V-NEXT: and a2, a2, s0 -; CHECK-V-NEXT: snez a1, a1 ; CHECK-V-NEXT: addi a1, a1, -1 ; CHECK-V-NEXT: and a0, a1, a0 ; CHECK-V-NEXT: sd a0, 24(sp) @@ -2780,49 +2766,45 @@ define <2 x i64> @ustest_f32i64(<2 x float> %x) { ; CHECK-NOV-NEXT: .cfi_offset s0, -16 ; CHECK-NOV-NEXT: .cfi_offset s1, -24 ; CHECK-NOV-NEXT: .cfi_offset fs0, -32 -; CHECK-NOV-NEXT: fmv.s fs0, fa0 -; CHECK-NOV-NEXT: fmv.s fa0, fa1 +; CHECK-NOV-NEXT: fmv.s fs0, fa1 ; CHECK-NOV-NEXT: call __fixsfti@plt ; CHECK-NOV-NEXT: mv s0, a0 ; CHECK-NOV-NEXT: mv s1, a1 ; CHECK-NOV-NEXT: fmv.s fa0, fs0 ; CHECK-NOV-NEXT: call __fixsfti@plt -; CHECK-NOV-NEXT: mv a2, a1 -; CHECK-NOV-NEXT: blez a1, .LBB23_2 +; CHECK-NOV-NEXT: mv a2, s1 +; CHECK-NOV-NEXT: blez s1, .LBB23_2 ; CHECK-NOV-NEXT: # %bb.1: # %entry ; CHECK-NOV-NEXT: li a2, 1 ; CHECK-NOV-NEXT: .LBB23_2: # %entry -; CHECK-NOV-NEXT: mv a3, s1 -; CHECK-NOV-NEXT: blez s1, .LBB23_4 +; CHECK-NOV-NEXT: slti a3, a1, 1 +; CHECK-NOV-NEXT: slti a4, s1, 1 +; CHECK-NOV-NEXT: blez a1, .LBB23_4 ; CHECK-NOV-NEXT: # %bb.3: # %entry -; CHECK-NOV-NEXT: li a3, 1 +; CHECK-NOV-NEXT: li a1, 1 ; CHECK-NOV-NEXT: .LBB23_4: # %entry -; CHECK-NOV-NEXT: sgtz a1, a1 -; CHECK-NOV-NEXT: addi a4, a1, -1 -; CHECK-NOV-NEXT: sgtz a1, s1 -; CHECK-NOV-NEXT: addi a1, a1, -1 -; CHECK-NOV-NEXT: and a1, a1, s0 -; CHECK-NOV-NEXT: beqz a3, .LBB23_7 +; CHECK-NOV-NEXT: neg a4, a4 +; CHECK-NOV-NEXT: neg a3, a3 +; CHECK-NOV-NEXT: and a3, a3, a0 +; CHECK-NOV-NEXT: beqz a1, .LBB23_7 ; CHECK-NOV-NEXT: # %bb.5: # %entry -; CHECK-NOV-NEXT: sgtz a3, a3 -; CHECK-NOV-NEXT: and a0, a4, a0 +; CHECK-NOV-NEXT: sgtz a1, a1 +; CHECK-NOV-NEXT: and a0, a4, s0 ; CHECK-NOV-NEXT: bnez a2, .LBB23_8 ; CHECK-NOV-NEXT: .LBB23_6: ; CHECK-NOV-NEXT: snez a2, a0 ; CHECK-NOV-NEXT: j .LBB23_9 ; CHECK-NOV-NEXT: .LBB23_7: -; CHECK-NOV-NEXT: snez a3, a1 -; CHECK-NOV-NEXT: and a0, a4, a0 +; CHECK-NOV-NEXT: snez a1, a3 +; CHECK-NOV-NEXT: and a0, a4, s0 ; CHECK-NOV-NEXT: beqz a2, .LBB23_6 ; CHECK-NOV-NEXT: .LBB23_8: # %entry ; CHECK-NOV-NEXT: sgtz a2, a2 ; CHECK-NOV-NEXT: .LBB23_9: # %entry -; CHECK-NOV-NEXT: seqz a2, a2 -; CHECK-NOV-NEXT: addi a2, a2, -1 +; CHECK-NOV-NEXT: neg a2, a2 ; CHECK-NOV-NEXT: and a0, a2, a0 -; CHECK-NOV-NEXT: seqz a2, a3 -; CHECK-NOV-NEXT: addi a2, a2, -1 -; CHECK-NOV-NEXT: and a1, a2, a1 +; CHECK-NOV-NEXT: neg a1, a1 +; CHECK-NOV-NEXT: and a1, a1, a3 ; CHECK-NOV-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s1, 8(sp) # 8-byte Folded Reload @@ -2861,14 +2843,14 @@ define <2 x i64> @ustest_f32i64(<2 x float> %x) { ; CHECK-V-NEXT: # %bb.1: # %entry ; CHECK-V-NEXT: li a2, 1 ; CHECK-V-NEXT: .LBB23_2: # %entry -; CHECK-V-NEXT: sgtz a3, s0 +; CHECK-V-NEXT: slti a3, s0, 1 +; CHECK-V-NEXT: slti a1, a1, 1 ; CHECK-V-NEXT: blez s0, .LBB23_4 ; CHECK-V-NEXT: # %bb.3: # %entry ; CHECK-V-NEXT: li s0, 1 ; CHECK-V-NEXT: .LBB23_4: # %entry -; CHECK-V-NEXT: addi a3, a3, -1 -; CHECK-V-NEXT: sgtz a1, a1 -; CHECK-V-NEXT: addi a1, a1, -1 +; CHECK-V-NEXT: neg a3, a3 +; CHECK-V-NEXT: neg a1, a1 ; CHECK-V-NEXT: and a0, a1, a0 ; CHECK-V-NEXT: beqz a2, .LBB23_7 ; CHECK-V-NEXT: # %bb.5: # %entry @@ -2885,11 +2867,9 @@ define <2 x i64> @ustest_f32i64(<2 x float> %x) { ; CHECK-V-NEXT: .LBB23_8: # %entry ; CHECK-V-NEXT: sgtz a3, s0 ; CHECK-V-NEXT: .LBB23_9: # %entry -; CHECK-V-NEXT: seqz a3, a3 -; CHECK-V-NEXT: addi a3, a3, -1 +; CHECK-V-NEXT: neg a3, a3 ; CHECK-V-NEXT: and a2, a3, a2 -; CHECK-V-NEXT: seqz a1, a1 -; CHECK-V-NEXT: addi a1, a1, -1 +; CHECK-V-NEXT: neg a1, a1 ; CHECK-V-NEXT: and a0, a1, a0 ; CHECK-V-NEXT: sd a0, 24(sp) ; CHECK-V-NEXT: sd a2, 32(sp) @@ -2948,7 +2928,6 @@ define <2 x i64> @stest_f16i64(<2 x half> %x) { ; CHECK-NOV-NEXT: bnez s1, .LBB24_4 ; CHECK-NOV-NEXT: .LBB24_2: ; CHECK-NOV-NEXT: sltu a5, s0, a3 -; CHECK-NOV-NEXT: seqz a6, a5 ; CHECK-NOV-NEXT: beqz a5, .LBB24_5 ; CHECK-NOV-NEXT: j .LBB24_6 ; CHECK-NOV-NEXT: .LBB24_3: @@ -2956,14 +2935,12 @@ define <2 x i64> @stest_f16i64(<2 x half> %x) { ; CHECK-NOV-NEXT: beqz s1, .LBB24_2 ; CHECK-NOV-NEXT: .LBB24_4: # %entry ; CHECK-NOV-NEXT: slti a5, s1, 0 -; CHECK-NOV-NEXT: seqz a6, a5 ; CHECK-NOV-NEXT: bnez a5, .LBB24_6 ; CHECK-NOV-NEXT: .LBB24_5: # %entry ; CHECK-NOV-NEXT: mv s0, a3 ; CHECK-NOV-NEXT: .LBB24_6: # %entry -; CHECK-NOV-NEXT: addi a6, a6, -1 -; CHECK-NOV-NEXT: seqz a5, a4 -; CHECK-NOV-NEXT: addi a5, a5, -1 +; CHECK-NOV-NEXT: neg a6, a5 +; CHECK-NOV-NEXT: neg a5, a4 ; CHECK-NOV-NEXT: and a5, a5, a1 ; CHECK-NOV-NEXT: bnez a4, .LBB24_8 ; CHECK-NOV-NEXT: # %bb.7: # %entry @@ -3031,7 +3008,6 @@ define <2 x i64> @stest_f16i64(<2 x half> %x) { ; CHECK-V-NEXT: bnez s1, .LBB24_4 ; CHECK-V-NEXT: .LBB24_2: ; CHECK-V-NEXT: sltu a5, s0, a3 -; CHECK-V-NEXT: seqz a6, a5 ; CHECK-V-NEXT: beqz a5, .LBB24_5 ; CHECK-V-NEXT: j .LBB24_6 ; CHECK-V-NEXT: .LBB24_3: @@ -3039,14 +3015,12 @@ define <2 x i64> @stest_f16i64(<2 x half> %x) { ; CHECK-V-NEXT: beqz s1, .LBB24_2 ; CHECK-V-NEXT: .LBB24_4: # %entry ; CHECK-V-NEXT: slti a5, s1, 0 -; CHECK-V-NEXT: seqz a6, a5 ; CHECK-V-NEXT: bnez a5, .LBB24_6 ; CHECK-V-NEXT: .LBB24_5: # %entry ; CHECK-V-NEXT: mv s0, a3 ; CHECK-V-NEXT: .LBB24_6: # %entry -; CHECK-V-NEXT: addi a6, a6, -1 -; CHECK-V-NEXT: seqz a5, a4 -; CHECK-V-NEXT: addi a5, a5, -1 +; CHECK-V-NEXT: neg a6, a5 +; CHECK-V-NEXT: neg a5, a4 ; CHECK-V-NEXT: and a5, a5, a1 ; CHECK-V-NEXT: bnez a4, .LBB24_8 ; CHECK-V-NEXT: # %bb.7: # %entry @@ -3115,8 +3089,7 @@ define <2 x i64> @utesth_f16i64(<2 x half> %x) { ; CHECK-NOV-NEXT: .cfi_offset s0, -16 ; CHECK-NOV-NEXT: .cfi_offset s1, -24 ; CHECK-NOV-NEXT: .cfi_offset s2, -32 -; CHECK-NOV-NEXT: mv s0, a0 -; CHECK-NOV-NEXT: mv a0, a1 +; CHECK-NOV-NEXT: mv s0, a1 ; CHECK-NOV-NEXT: call __extendhfsf2@plt ; CHECK-NOV-NEXT: call __fixunssfti@plt ; CHECK-NOV-NEXT: mv s1, a0 @@ -3125,11 +3098,12 @@ define <2 x i64> @utesth_f16i64(<2 x half> %x) { ; CHECK-NOV-NEXT: call __extendhfsf2@plt ; CHECK-NOV-NEXT: call __fixunssfti@plt ; CHECK-NOV-NEXT: snez a1, a1 +; CHECK-NOV-NEXT: snez a2, s2 +; CHECK-NOV-NEXT: addi a2, a2, -1 +; CHECK-NOV-NEXT: and a2, a2, s1 ; CHECK-NOV-NEXT: addi a1, a1, -1 -; CHECK-NOV-NEXT: and a0, a1, a0 -; CHECK-NOV-NEXT: snez a1, s2 -; CHECK-NOV-NEXT: addi a1, a1, -1 -; CHECK-NOV-NEXT: and a1, a1, s1 +; CHECK-NOV-NEXT: and a1, a1, a0 +; CHECK-NOV-NEXT: mv a0, a2 ; CHECK-NOV-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s1, 8(sp) # 8-byte Folded Reload @@ -3149,8 +3123,7 @@ define <2 x i64> @utesth_f16i64(<2 x half> %x) { ; CHECK-V-NEXT: .cfi_offset s0, -16 ; CHECK-V-NEXT: .cfi_offset s1, -24 ; CHECK-V-NEXT: .cfi_offset s2, -32 -; CHECK-V-NEXT: mv s0, a0 -; CHECK-V-NEXT: mv a0, a1 +; CHECK-V-NEXT: mv s0, a1 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: call __fixunssfti@plt ; CHECK-V-NEXT: mv s1, a0 @@ -3159,13 +3132,13 @@ define <2 x i64> @utesth_f16i64(<2 x half> %x) { ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: call __fixunssfti@plt ; CHECK-V-NEXT: snez a1, a1 +; CHECK-V-NEXT: snez a2, s2 +; CHECK-V-NEXT: addi a2, a2, -1 +; CHECK-V-NEXT: and a2, a2, s1 ; CHECK-V-NEXT: addi a1, a1, -1 ; CHECK-V-NEXT: and a0, a1, a0 -; CHECK-V-NEXT: snez a1, s2 -; CHECK-V-NEXT: addi a1, a1, -1 -; CHECK-V-NEXT: and a1, a1, s1 -; CHECK-V-NEXT: sd a1, 8(sp) -; CHECK-V-NEXT: sd a0, 0(sp) +; CHECK-V-NEXT: sd a0, 8(sp) +; CHECK-V-NEXT: sd a2, 0(sp) ; CHECK-V-NEXT: addi a0, sp, 8 ; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-V-NEXT: vle64.v v9, (a0) @@ -3200,8 +3173,7 @@ define <2 x i64> @ustest_f16i64(<2 x half> %x) { ; CHECK-NOV-NEXT: .cfi_offset s0, -16 ; CHECK-NOV-NEXT: .cfi_offset s1, -24 ; CHECK-NOV-NEXT: .cfi_offset s2, -32 -; CHECK-NOV-NEXT: mv s2, a0 -; CHECK-NOV-NEXT: mv a0, a1 +; CHECK-NOV-NEXT: mv s2, a1 ; CHECK-NOV-NEXT: call __extendhfsf2@plt ; CHECK-NOV-NEXT: call __fixsfti@plt ; CHECK-NOV-NEXT: mv s0, a0 @@ -3209,42 +3181,39 @@ define <2 x i64> @ustest_f16i64(<2 x half> %x) { ; CHECK-NOV-NEXT: mv a0, s2 ; CHECK-NOV-NEXT: call __extendhfsf2@plt ; CHECK-NOV-NEXT: call __fixsfti@plt -; CHECK-NOV-NEXT: mv a2, a1 -; CHECK-NOV-NEXT: blez a1, .LBB26_2 +; CHECK-NOV-NEXT: mv a2, s1 +; CHECK-NOV-NEXT: blez s1, .LBB26_2 ; CHECK-NOV-NEXT: # %bb.1: # %entry ; CHECK-NOV-NEXT: li a2, 1 ; CHECK-NOV-NEXT: .LBB26_2: # %entry -; CHECK-NOV-NEXT: mv a3, s1 -; CHECK-NOV-NEXT: blez s1, .LBB26_4 +; CHECK-NOV-NEXT: slti a3, a1, 1 +; CHECK-NOV-NEXT: slti a4, s1, 1 +; CHECK-NOV-NEXT: blez a1, .LBB26_4 ; CHECK-NOV-NEXT: # %bb.3: # %entry -; CHECK-NOV-NEXT: li a3, 1 +; CHECK-NOV-NEXT: li a1, 1 ; CHECK-NOV-NEXT: .LBB26_4: # %entry -; CHECK-NOV-NEXT: sgtz a1, a1 -; CHECK-NOV-NEXT: addi a4, a1, -1 -; CHECK-NOV-NEXT: sgtz a1, s1 -; CHECK-NOV-NEXT: addi a1, a1, -1 -; CHECK-NOV-NEXT: and a1, a1, s0 -; CHECK-NOV-NEXT: beqz a3, .LBB26_7 +; CHECK-NOV-NEXT: neg a4, a4 +; CHECK-NOV-NEXT: neg a3, a3 +; CHECK-NOV-NEXT: and a3, a3, a0 +; CHECK-NOV-NEXT: beqz a1, .LBB26_7 ; CHECK-NOV-NEXT: # %bb.5: # %entry -; CHECK-NOV-NEXT: sgtz a3, a3 -; CHECK-NOV-NEXT: and a0, a4, a0 +; CHECK-NOV-NEXT: sgtz a1, a1 +; CHECK-NOV-NEXT: and a0, a4, s0 ; CHECK-NOV-NEXT: bnez a2, .LBB26_8 ; CHECK-NOV-NEXT: .LBB26_6: ; CHECK-NOV-NEXT: snez a2, a0 ; CHECK-NOV-NEXT: j .LBB26_9 ; CHECK-NOV-NEXT: .LBB26_7: -; CHECK-NOV-NEXT: snez a3, a1 -; CHECK-NOV-NEXT: and a0, a4, a0 +; CHECK-NOV-NEXT: snez a1, a3 +; CHECK-NOV-NEXT: and a0, a4, s0 ; CHECK-NOV-NEXT: beqz a2, .LBB26_6 ; CHECK-NOV-NEXT: .LBB26_8: # %entry ; CHECK-NOV-NEXT: sgtz a2, a2 ; CHECK-NOV-NEXT: .LBB26_9: # %entry -; CHECK-NOV-NEXT: seqz a2, a2 -; CHECK-NOV-NEXT: addi a2, a2, -1 +; CHECK-NOV-NEXT: neg a2, a2 ; CHECK-NOV-NEXT: and a0, a2, a0 -; CHECK-NOV-NEXT: seqz a2, a3 -; CHECK-NOV-NEXT: addi a2, a2, -1 -; CHECK-NOV-NEXT: and a1, a2, a1 +; CHECK-NOV-NEXT: neg a1, a1 +; CHECK-NOV-NEXT: and a1, a1, a3 ; CHECK-NOV-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s1, 8(sp) # 8-byte Folded Reload @@ -3264,8 +3233,7 @@ define <2 x i64> @ustest_f16i64(<2 x half> %x) { ; CHECK-V-NEXT: .cfi_offset s0, -16 ; CHECK-V-NEXT: .cfi_offset s1, -24 ; CHECK-V-NEXT: .cfi_offset s2, -32 -; CHECK-V-NEXT: mv s2, a0 -; CHECK-V-NEXT: mv a0, a1 +; CHECK-V-NEXT: mv s2, a1 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: call __fixsfti@plt ; CHECK-V-NEXT: mv s0, a0 @@ -3273,44 +3241,41 @@ define <2 x i64> @ustest_f16i64(<2 x half> %x) { ; CHECK-V-NEXT: mv a0, s2 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: call __fixsfti@plt -; CHECK-V-NEXT: mv a2, a1 -; CHECK-V-NEXT: blez a1, .LBB26_2 +; CHECK-V-NEXT: mv a2, s1 +; CHECK-V-NEXT: blez s1, .LBB26_2 ; CHECK-V-NEXT: # %bb.1: # %entry ; CHECK-V-NEXT: li a2, 1 ; CHECK-V-NEXT: .LBB26_2: # %entry -; CHECK-V-NEXT: mv a3, s1 -; CHECK-V-NEXT: blez s1, .LBB26_4 +; CHECK-V-NEXT: slti a4, a1, 1 +; CHECK-V-NEXT: slti a3, s1, 1 +; CHECK-V-NEXT: blez a1, .LBB26_4 ; CHECK-V-NEXT: # %bb.3: # %entry -; CHECK-V-NEXT: li a3, 1 +; CHECK-V-NEXT: li a1, 1 ; CHECK-V-NEXT: .LBB26_4: # %entry -; CHECK-V-NEXT: sgtz a1, a1 -; CHECK-V-NEXT: addi a4, a1, -1 -; CHECK-V-NEXT: sgtz a1, s1 -; CHECK-V-NEXT: addi a1, a1, -1 -; CHECK-V-NEXT: and a1, a1, s0 -; CHECK-V-NEXT: beqz a3, .LBB26_7 -; CHECK-V-NEXT: # %bb.5: # %entry -; CHECK-V-NEXT: sgtz a3, a3 +; CHECK-V-NEXT: neg a3, a3 +; CHECK-V-NEXT: neg a4, a4 ; CHECK-V-NEXT: and a0, a4, a0 +; CHECK-V-NEXT: beqz a1, .LBB26_7 +; CHECK-V-NEXT: # %bb.5: # %entry +; CHECK-V-NEXT: sgtz a1, a1 +; CHECK-V-NEXT: and a3, a3, s0 ; CHECK-V-NEXT: bnez a2, .LBB26_8 ; CHECK-V-NEXT: .LBB26_6: -; CHECK-V-NEXT: snez a2, a0 +; CHECK-V-NEXT: snez a2, a3 ; CHECK-V-NEXT: j .LBB26_9 ; CHECK-V-NEXT: .LBB26_7: -; CHECK-V-NEXT: snez a3, a1 -; CHECK-V-NEXT: and a0, a4, a0 +; CHECK-V-NEXT: snez a1, a0 +; CHECK-V-NEXT: and a3, a3, s0 ; CHECK-V-NEXT: beqz a2, .LBB26_6 ; CHECK-V-NEXT: .LBB26_8: # %entry ; CHECK-V-NEXT: sgtz a2, a2 ; CHECK-V-NEXT: .LBB26_9: # %entry -; CHECK-V-NEXT: seqz a2, a2 -; CHECK-V-NEXT: addi a2, a2, -1 -; CHECK-V-NEXT: and a0, a2, a0 -; CHECK-V-NEXT: seqz a2, a3 -; CHECK-V-NEXT: addi a2, a2, -1 -; CHECK-V-NEXT: and a1, a2, a1 -; CHECK-V-NEXT: sd a1, 8(sp) -; CHECK-V-NEXT: sd a0, 0(sp) +; CHECK-V-NEXT: neg a2, a2 +; CHECK-V-NEXT: and a2, a2, a3 +; CHECK-V-NEXT: neg a1, a1 +; CHECK-V-NEXT: and a0, a1, a0 +; CHECK-V-NEXT: sd a0, 8(sp) +; CHECK-V-NEXT: sd a2, 0(sp) ; CHECK-V-NEXT: addi a0, sp, 8 ; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-V-NEXT: vle64.v v9, (a0) @@ -5726,14 +5691,14 @@ define <2 x i64> @ustest_f64i64_mm(<2 x double> %x) { ; CHECK-NOV-NEXT: # %bb.3: # %entry ; CHECK-NOV-NEXT: li a4, 1 ; CHECK-NOV-NEXT: .LBB47_4: # %entry -; CHECK-NOV-NEXT: sgtz a3, a1 -; CHECK-NOV-NEXT: addi a3, a3, -1 +; CHECK-NOV-NEXT: slti a3, a1, 1 +; CHECK-NOV-NEXT: neg a3, a3 ; CHECK-NOV-NEXT: and a3, a3, a0 ; CHECK-NOV-NEXT: addi a0, a1, -1 ; CHECK-NOV-NEXT: seqz a0, a0 ; CHECK-NOV-NEXT: addi a1, a0, -1 -; CHECK-NOV-NEXT: sgtz a0, s1 -; CHECK-NOV-NEXT: addi a0, a0, -1 +; CHECK-NOV-NEXT: slti a0, s1, 1 +; CHECK-NOV-NEXT: neg a0, a0 ; CHECK-NOV-NEXT: and a0, a0, s0 ; CHECK-NOV-NEXT: addi a5, s1, -1 ; CHECK-NOV-NEXT: seqz a5, a5 @@ -5790,14 +5755,14 @@ define <2 x i64> @ustest_f64i64_mm(<2 x double> %x) { ; CHECK-V-NEXT: # %bb.1: # %entry ; CHECK-V-NEXT: li a2, 1 ; CHECK-V-NEXT: .LBB47_2: # %entry -; CHECK-V-NEXT: sgtz a3, s0 -; CHECK-V-NEXT: addi a3, a3, -1 +; CHECK-V-NEXT: slti a3, s0, 1 +; CHECK-V-NEXT: neg a3, a3 ; CHECK-V-NEXT: and a3, a3, s1 ; CHECK-V-NEXT: addi a4, s0, -1 ; CHECK-V-NEXT: seqz a4, a4 ; CHECK-V-NEXT: addi a4, a4, -1 -; CHECK-V-NEXT: sgtz a5, a1 -; CHECK-V-NEXT: addi a5, a5, -1 +; CHECK-V-NEXT: slti a5, a1, 1 +; CHECK-V-NEXT: neg a5, a5 ; CHECK-V-NEXT: addi a1, a1, -1 ; CHECK-V-NEXT: seqz a6, a1 ; CHECK-V-NEXT: blez s0, .LBB47_4 @@ -6213,14 +6178,14 @@ define <2 x i64> @ustest_f32i64_mm(<2 x float> %x) { ; CHECK-NOV-NEXT: # %bb.3: # %entry ; CHECK-NOV-NEXT: li a4, 1 ; CHECK-NOV-NEXT: .LBB50_4: # %entry -; CHECK-NOV-NEXT: sgtz a3, a1 -; CHECK-NOV-NEXT: addi a3, a3, -1 +; CHECK-NOV-NEXT: slti a3, a1, 1 +; CHECK-NOV-NEXT: neg a3, a3 ; CHECK-NOV-NEXT: and a3, a3, a0 ; CHECK-NOV-NEXT: addi a0, a1, -1 ; CHECK-NOV-NEXT: seqz a0, a0 ; CHECK-NOV-NEXT: addi a1, a0, -1 -; CHECK-NOV-NEXT: sgtz a0, s1 -; CHECK-NOV-NEXT: addi a0, a0, -1 +; CHECK-NOV-NEXT: slti a0, s1, 1 +; CHECK-NOV-NEXT: neg a0, a0 ; CHECK-NOV-NEXT: and a0, a0, s0 ; CHECK-NOV-NEXT: addi a5, s1, -1 ; CHECK-NOV-NEXT: seqz a5, a5 @@ -6277,14 +6242,14 @@ define <2 x i64> @ustest_f32i64_mm(<2 x float> %x) { ; CHECK-V-NEXT: # %bb.1: # %entry ; CHECK-V-NEXT: li a2, 1 ; CHECK-V-NEXT: .LBB50_2: # %entry -; CHECK-V-NEXT: sgtz a3, s0 -; CHECK-V-NEXT: addi a3, a3, -1 +; CHECK-V-NEXT: slti a3, s0, 1 +; CHECK-V-NEXT: neg a3, a3 ; CHECK-V-NEXT: and a3, a3, s1 ; CHECK-V-NEXT: addi a4, s0, -1 ; CHECK-V-NEXT: seqz a4, a4 ; CHECK-V-NEXT: addi a4, a4, -1 -; CHECK-V-NEXT: sgtz a5, a1 -; CHECK-V-NEXT: addi a5, a5, -1 +; CHECK-V-NEXT: slti a5, a1, 1 +; CHECK-V-NEXT: neg a5, a5 ; CHECK-V-NEXT: addi a1, a1, -1 ; CHECK-V-NEXT: seqz a6, a1 ; CHECK-V-NEXT: blez s0, .LBB50_4 @@ -6691,14 +6656,14 @@ define <2 x i64> @ustest_f16i64_mm(<2 x half> %x) { ; CHECK-NOV-NEXT: # %bb.3: # %entry ; CHECK-NOV-NEXT: li a4, 1 ; CHECK-NOV-NEXT: .LBB53_4: # %entry -; CHECK-NOV-NEXT: sgtz a3, a1 -; CHECK-NOV-NEXT: addi a3, a3, -1 +; CHECK-NOV-NEXT: slti a3, a1, 1 +; CHECK-NOV-NEXT: neg a3, a3 ; CHECK-NOV-NEXT: and a3, a3, a0 ; CHECK-NOV-NEXT: addi a0, a1, -1 ; CHECK-NOV-NEXT: seqz a0, a0 ; CHECK-NOV-NEXT: addi a1, a0, -1 -; CHECK-NOV-NEXT: sgtz a0, s1 -; CHECK-NOV-NEXT: addi a0, a0, -1 +; CHECK-NOV-NEXT: slti a0, s1, 1 +; CHECK-NOV-NEXT: neg a0, a0 ; CHECK-NOV-NEXT: and a0, a0, s0 ; CHECK-NOV-NEXT: addi a5, s1, -1 ; CHECK-NOV-NEXT: seqz a5, a5 @@ -6754,14 +6719,14 @@ define <2 x i64> @ustest_f16i64_mm(<2 x half> %x) { ; CHECK-V-NEXT: # %bb.3: # %entry ; CHECK-V-NEXT: li a4, 1 ; CHECK-V-NEXT: .LBB53_4: # %entry -; CHECK-V-NEXT: sgtz a3, a1 -; CHECK-V-NEXT: addi a3, a3, -1 +; CHECK-V-NEXT: slti a3, a1, 1 +; CHECK-V-NEXT: neg a3, a3 ; CHECK-V-NEXT: and a3, a3, a0 ; CHECK-V-NEXT: addi a0, a1, -1 ; CHECK-V-NEXT: seqz a0, a0 ; CHECK-V-NEXT: addi a1, a0, -1 -; CHECK-V-NEXT: sgtz a0, s1 -; CHECK-V-NEXT: addi a0, a0, -1 +; CHECK-V-NEXT: slti a0, s1, 1 +; CHECK-V-NEXT: neg a0, a0 ; CHECK-V-NEXT: and a0, a0, s0 ; CHECK-V-NEXT: addi a5, s1, -1 ; CHECK-V-NEXT: seqz a5, a5 diff --git a/llvm/test/CodeGen/RISCV/half-convert.ll b/llvm/test/CodeGen/RISCV/half-convert.ll index fab430f1d9cd..9a8eeea4bdbc 100644 --- a/llvm/test/CodeGen/RISCV/half-convert.ll +++ b/llvm/test/CodeGen/RISCV/half-convert.ll @@ -724,24 +724,22 @@ define i32 @fcvt_wu_h_sat(half %a) nounwind { ; RV32I-NEXT: slli a0, a0, 16 ; RV32I-NEXT: srli a0, a0, 16 ; RV32I-NEXT: call __extendhfsf2@plt -; RV32I-NEXT: mv s2, a0 +; RV32I-NEXT: mv s0, a0 ; RV32I-NEXT: lui a0, 325632 ; RV32I-NEXT: addi a1, a0, -1 -; RV32I-NEXT: mv a0, s2 +; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: call __gtsf2@plt ; RV32I-NEXT: sgtz a0, a0 -; RV32I-NEXT: neg s0, a0 -; RV32I-NEXT: mv a0, s2 -; RV32I-NEXT: call __fixunssfsi@plt -; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: mv a0, s2 +; RV32I-NEXT: neg s1, a0 +; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: li a1, 0 ; RV32I-NEXT: call __gesf2@plt -; RV32I-NEXT: bltz a0, .LBB8_2 -; RV32I-NEXT: # %bb.1: # %start -; RV32I-NEXT: or s0, s0, s1 -; RV32I-NEXT: .LBB8_2: # %start +; RV32I-NEXT: slti a0, a0, 0 +; RV32I-NEXT: addi s2, a0, -1 ; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: call __fixunssfsi@plt +; RV32I-NEXT: and a0, s2, a0 +; RV32I-NEXT: or a0, s1, a0 ; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload @@ -877,11 +875,9 @@ define i64 @fcvt_l_h_sat(half %a) nounwind { ; RV32IZFH-NEXT: seqz a3, a3 ; RV32IZFH-NEXT: addi a3, a3, -1 ; RV32IZFH-NEXT: and a1, a3, a1 -; RV32IZFH-NEXT: seqz a4, s0 -; RV32IZFH-NEXT: addi a4, a4, -1 +; RV32IZFH-NEXT: neg a2, a2 +; RV32IZFH-NEXT: neg a4, s0 ; RV32IZFH-NEXT: and a0, a4, a0 -; RV32IZFH-NEXT: seqz a2, a2 -; RV32IZFH-NEXT: addi a2, a2, -1 ; RV32IZFH-NEXT: or a0, a2, a0 ; RV32IZFH-NEXT: and a0, a3, a0 ; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload @@ -927,11 +923,9 @@ define i64 @fcvt_l_h_sat(half %a) nounwind { ; RV32IDZFH-NEXT: seqz a3, a3 ; RV32IDZFH-NEXT: addi a3, a3, -1 ; RV32IDZFH-NEXT: and a1, a3, a1 -; RV32IDZFH-NEXT: seqz a4, s0 -; RV32IDZFH-NEXT: addi a4, a4, -1 +; RV32IDZFH-NEXT: neg a2, a2 +; RV32IDZFH-NEXT: neg a4, s0 ; RV32IDZFH-NEXT: and a0, a4, a0 -; RV32IDZFH-NEXT: seqz a2, a2 -; RV32IDZFH-NEXT: addi a2, a2, -1 ; RV32IDZFH-NEXT: or a0, a2, a0 ; RV32IDZFH-NEXT: and a0, a3, a0 ; RV32IDZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload @@ -958,65 +952,62 @@ define i64 @fcvt_l_h_sat(half %a) nounwind { ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill ; RV32I-NEXT: slli a0, a0, 16 ; RV32I-NEXT: srli a0, a0, 16 ; RV32I-NEXT: call __extendhfsf2@plt ; RV32I-NEXT: mv s0, a0 ; RV32I-NEXT: lui a1, 913408 ; RV32I-NEXT: call __gesf2@plt -; RV32I-NEXT: mv s4, a0 +; RV32I-NEXT: mv s2, a0 ; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: call __fixsfdi@plt -; RV32I-NEXT: mv s5, a0 -; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: lui s4, 524288 +; RV32I-NEXT: lui s3, 524288 +; RV32I-NEXT: bltz s2, .LBB10_2 +; RV32I-NEXT: # %bb.1: # %start +; RV32I-NEXT: mv s3, a1 +; RV32I-NEXT: .LBB10_2: # %start +; RV32I-NEXT: lui a0, 389120 +; RV32I-NEXT: addi s2, a0, -1 +; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a1, s2 +; RV32I-NEXT: call __gtsf2@plt +; RV32I-NEXT: blez a0, .LBB10_4 +; RV32I-NEXT: # %bb.3: +; RV32I-NEXT: addi s3, s4, -1 +; RV32I-NEXT: .LBB10_4: # %start ; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: mv a1, s0 ; RV32I-NEXT: call __unordsf2@plt ; RV32I-NEXT: snez a0, a0 -; RV32I-NEXT: addi s1, a0, -1 -; RV32I-NEXT: lui a0, 389120 -; RV32I-NEXT: addi s3, a0, -1 -; RV32I-NEXT: mv a0, s0 -; RV32I-NEXT: mv a1, s3 -; RV32I-NEXT: call __gtsf2@plt -; RV32I-NEXT: bgtz a0, .LBB10_2 -; RV32I-NEXT: # %bb.1: # %start -; RV32I-NEXT: slti a0, s4, 0 ; RV32I-NEXT: addi a0, a0, -1 -; RV32I-NEXT: and a0, a0, s5 -; RV32I-NEXT: and s1, s1, a0 -; RV32I-NEXT: .LBB10_2: # %start +; RV32I-NEXT: and s3, a0, s3 +; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a1, s2 +; RV32I-NEXT: call __gtsf2@plt +; RV32I-NEXT: sgtz a0, a0 +; RV32I-NEXT: neg s2, a0 ; RV32I-NEXT: lui a1, 913408 ; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: call __gesf2@plt -; RV32I-NEXT: lui s5, 524288 -; RV32I-NEXT: lui s4, 524288 -; RV32I-NEXT: bltz a0, .LBB10_4 -; RV32I-NEXT: # %bb.3: # %start -; RV32I-NEXT: mv s4, s2 -; RV32I-NEXT: .LBB10_4: # %start -; RV32I-NEXT: mv a0, s0 -; RV32I-NEXT: mv a1, s3 -; RV32I-NEXT: call __gtsf2@plt -; RV32I-NEXT: blez a0, .LBB10_6 -; RV32I-NEXT: # %bb.5: -; RV32I-NEXT: addi s4, s5, -1 -; RV32I-NEXT: .LBB10_6: # %start +; RV32I-NEXT: slti a0, a0, 0 +; RV32I-NEXT: addi a0, a0, -1 +; RV32I-NEXT: and a0, a0, s1 +; RV32I-NEXT: or s1, s2, a0 ; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: mv a1, s0 ; RV32I-NEXT: call __unordsf2@plt ; RV32I-NEXT: snez a0, a0 ; RV32I-NEXT: addi a0, a0, -1 -; RV32I-NEXT: and a1, a0, s4 -; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: and a0, a0, s1 +; RV32I-NEXT: mv a1, s3 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s5, 4(sp) # 4-byte Folded Reload ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; @@ -1134,26 +1125,23 @@ define i64 @fcvt_lu_h_sat(half %a) nounwind { ; RV32IZFH-NEXT: addi sp, sp, -16 ; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IZFH-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZFH-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill -; RV32IZFH-NEXT: fcvt.s.h fs0, fa0 +; RV32IZFH-NEXT: sw s1, 4(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: lui a0, %hi(.LCPI12_0) +; RV32IZFH-NEXT: flw ft0, %lo(.LCPI12_0)(a0) +; RV32IZFH-NEXT: fcvt.s.h fa0, fa0 +; RV32IZFH-NEXT: flt.s a0, ft0, fa0 +; RV32IZFH-NEXT: neg s0, a0 ; RV32IZFH-NEXT: fmv.w.x ft0, zero -; RV32IZFH-NEXT: fle.s a0, ft0, fs0 -; RV32IZFH-NEXT: seqz a0, a0 -; RV32IZFH-NEXT: addi s0, a0, -1 -; RV32IZFH-NEXT: fmv.s fa0, fs0 +; RV32IZFH-NEXT: fle.s a0, ft0, fa0 +; RV32IZFH-NEXT: neg s1, a0 ; RV32IZFH-NEXT: call __fixunssfdi@plt -; RV32IZFH-NEXT: lui a2, %hi(.LCPI12_0) -; RV32IZFH-NEXT: flw ft0, %lo(.LCPI12_0)(a2) -; RV32IZFH-NEXT: and a0, s0, a0 -; RV32IZFH-NEXT: flt.s a2, ft0, fs0 -; RV32IZFH-NEXT: seqz a2, a2 -; RV32IZFH-NEXT: addi a2, a2, -1 -; RV32IZFH-NEXT: or a0, a2, a0 -; RV32IZFH-NEXT: and a1, s0, a1 -; RV32IZFH-NEXT: or a1, a2, a1 +; RV32IZFH-NEXT: and a0, s1, a0 +; RV32IZFH-NEXT: or a0, s0, a0 +; RV32IZFH-NEXT: and a1, s1, a1 +; RV32IZFH-NEXT: or a1, s0, a1 ; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFH-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IZFH-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload +; RV32IZFH-NEXT: lw s1, 4(sp) # 4-byte Folded Reload ; RV32IZFH-NEXT: addi sp, sp, 16 ; RV32IZFH-NEXT: ret ; @@ -1171,26 +1159,23 @@ define i64 @fcvt_lu_h_sat(half %a) nounwind { ; RV32IDZFH-NEXT: addi sp, sp, -16 ; RV32IDZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IDZFH-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IDZFH-NEXT: fsd fs0, 0(sp) # 8-byte Folded Spill -; RV32IDZFH-NEXT: fcvt.s.h fs0, fa0 +; RV32IDZFH-NEXT: sw s1, 4(sp) # 4-byte Folded Spill +; RV32IDZFH-NEXT: lui a0, %hi(.LCPI12_0) +; RV32IDZFH-NEXT: flw ft0, %lo(.LCPI12_0)(a0) +; RV32IDZFH-NEXT: fcvt.s.h fa0, fa0 +; RV32IDZFH-NEXT: flt.s a0, ft0, fa0 +; RV32IDZFH-NEXT: neg s0, a0 ; RV32IDZFH-NEXT: fmv.w.x ft0, zero -; RV32IDZFH-NEXT: fle.s a0, ft0, fs0 -; RV32IDZFH-NEXT: seqz a0, a0 -; RV32IDZFH-NEXT: addi s0, a0, -1 -; RV32IDZFH-NEXT: fmv.s fa0, fs0 +; RV32IDZFH-NEXT: fle.s a0, ft0, fa0 +; RV32IDZFH-NEXT: neg s1, a0 ; RV32IDZFH-NEXT: call __fixunssfdi@plt -; RV32IDZFH-NEXT: lui a2, %hi(.LCPI12_0) -; RV32IDZFH-NEXT: flw ft0, %lo(.LCPI12_0)(a2) -; RV32IDZFH-NEXT: and a0, s0, a0 -; RV32IDZFH-NEXT: flt.s a2, ft0, fs0 -; RV32IDZFH-NEXT: seqz a2, a2 -; RV32IDZFH-NEXT: addi a2, a2, -1 -; RV32IDZFH-NEXT: or a0, a2, a0 -; RV32IDZFH-NEXT: and a1, s0, a1 -; RV32IDZFH-NEXT: or a1, a2, a1 +; RV32IDZFH-NEXT: and a0, s1, a0 +; RV32IDZFH-NEXT: or a0, s0, a0 +; RV32IDZFH-NEXT: and a1, s1, a1 +; RV32IDZFH-NEXT: or a1, s0, a1 ; RV32IDZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IDZFH-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IDZFH-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload +; RV32IDZFH-NEXT: lw s1, 4(sp) # 4-byte Folded Reload ; RV32IDZFH-NEXT: addi sp, sp, 16 ; RV32IDZFH-NEXT: ret ; @@ -1215,39 +1200,37 @@ define i64 @fcvt_lu_h_sat(half %a) nounwind { ; RV32I-NEXT: slli a0, a0, 16 ; RV32I-NEXT: srli a0, a0, 16 ; RV32I-NEXT: call __extendhfsf2@plt -; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: mv s0, a0 ; RV32I-NEXT: lui a0, 391168 -; RV32I-NEXT: addi s2, a0, -1 -; RV32I-NEXT: mv a0, s1 -; RV32I-NEXT: mv a1, s2 -; RV32I-NEXT: call __gtsf2@plt -; RV32I-NEXT: sgtz a0, a0 -; RV32I-NEXT: neg s0, a0 -; RV32I-NEXT: mv a0, s1 -; RV32I-NEXT: call __fixunssfdi@plt -; RV32I-NEXT: mv s4, a0 -; RV32I-NEXT: mv s3, a1 -; RV32I-NEXT: mv a0, s1 -; RV32I-NEXT: li a1, 0 -; RV32I-NEXT: call __gesf2@plt -; RV32I-NEXT: bltz a0, .LBB12_2 -; RV32I-NEXT: # %bb.1: # %start -; RV32I-NEXT: or s0, s0, s4 -; RV32I-NEXT: .LBB12_2: # %start -; RV32I-NEXT: mv a0, s1 -; RV32I-NEXT: mv a1, s2 -; RV32I-NEXT: call __gtsf2@plt -; RV32I-NEXT: sgtz a0, a0 -; RV32I-NEXT: neg s2, a0 -; RV32I-NEXT: mv a0, s1 -; RV32I-NEXT: li a1, 0 -; RV32I-NEXT: call __gesf2@plt -; RV32I-NEXT: bltz a0, .LBB12_4 -; RV32I-NEXT: # %bb.3: # %start -; RV32I-NEXT: or s2, s2, s3 -; RV32I-NEXT: .LBB12_4: # %start +; RV32I-NEXT: addi s1, a0, -1 ; RV32I-NEXT: mv a0, s0 -; RV32I-NEXT: mv a1, s2 +; RV32I-NEXT: mv a1, s1 +; RV32I-NEXT: call __gtsf2@plt +; RV32I-NEXT: sgtz a0, a0 +; RV32I-NEXT: neg s3, a0 +; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: li a1, 0 +; RV32I-NEXT: call __gesf2@plt +; RV32I-NEXT: slti a0, a0, 0 +; RV32I-NEXT: addi s4, a0, -1 +; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: call __fixunssfdi@plt +; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: and a0, s4, a0 +; RV32I-NEXT: or s3, s3, a0 +; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a1, s1 +; RV32I-NEXT: call __gtsf2@plt +; RV32I-NEXT: sgtz a0, a0 +; RV32I-NEXT: neg s1, a0 +; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: li a1, 0 +; RV32I-NEXT: call __gesf2@plt +; RV32I-NEXT: slti a0, a0, 0 +; RV32I-NEXT: addi a0, a0, -1 +; RV32I-NEXT: and a0, a0, s2 +; RV32I-NEXT: or a1, s1, a0 +; RV32I-NEXT: mv a0, s3 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -1267,24 +1250,22 @@ define i64 @fcvt_lu_h_sat(half %a) nounwind { ; RV64I-NEXT: slli a0, a0, 48 ; RV64I-NEXT: srli a0, a0, 48 ; RV64I-NEXT: call __extendhfsf2@plt -; RV64I-NEXT: mv s2, a0 +; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: lui a0, 391168 ; RV64I-NEXT: addiw a1, a0, -1 -; RV64I-NEXT: mv a0, s2 +; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: call __gtsf2@plt ; RV64I-NEXT: sgtz a0, a0 -; RV64I-NEXT: neg s0, a0 -; RV64I-NEXT: mv a0, s2 -; RV64I-NEXT: call __fixunssfdi@plt -; RV64I-NEXT: mv s1, a0 -; RV64I-NEXT: mv a0, s2 +; RV64I-NEXT: neg s1, a0 +; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: li a1, 0 ; RV64I-NEXT: call __gesf2@plt -; RV64I-NEXT: bltz a0, .LBB12_2 -; RV64I-NEXT: # %bb.1: # %start -; RV64I-NEXT: or s0, s0, s1 -; RV64I-NEXT: .LBB12_2: # %start +; RV64I-NEXT: slti a0, a0, 0 +; RV64I-NEXT: addi s2, a0, -1 ; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: call __fixunssfdi@plt +; RV64I-NEXT: and a0, s2, a0 +; RV64I-NEXT: or a0, s1, a0 ; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload @@ -2962,24 +2943,22 @@ define zeroext i32 @fcvt_wu_h_sat_zext(half %a) nounwind { ; RV32I-NEXT: slli a0, a0, 16 ; RV32I-NEXT: srli a0, a0, 16 ; RV32I-NEXT: call __extendhfsf2@plt -; RV32I-NEXT: mv s2, a0 +; RV32I-NEXT: mv s0, a0 ; RV32I-NEXT: lui a0, 325632 ; RV32I-NEXT: addi a1, a0, -1 -; RV32I-NEXT: mv a0, s2 +; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: call __gtsf2@plt ; RV32I-NEXT: sgtz a0, a0 -; RV32I-NEXT: neg s0, a0 -; RV32I-NEXT: mv a0, s2 -; RV32I-NEXT: call __fixunssfsi@plt -; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: mv a0, s2 +; RV32I-NEXT: neg s1, a0 +; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: li a1, 0 ; RV32I-NEXT: call __gesf2@plt -; RV32I-NEXT: bltz a0, .LBB39_2 -; RV32I-NEXT: # %bb.1: # %start -; RV32I-NEXT: or s0, s0, s1 -; RV32I-NEXT: .LBB39_2: # %start +; RV32I-NEXT: slti a0, a0, 0 +; RV32I-NEXT: addi s2, a0, -1 ; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: call __fixunssfsi@plt +; RV32I-NEXT: and a0, s2, a0 +; RV32I-NEXT: or a0, s1, a0 ; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload diff --git a/llvm/test/CodeGen/RISCV/half-round-conv-sat.ll b/llvm/test/CodeGen/RISCV/half-round-conv-sat.ll index 7d3c4e051294..9f1dc2763ab5 100644 --- a/llvm/test/CodeGen/RISCV/half-round-conv-sat.ll +++ b/llvm/test/CodeGen/RISCV/half-round-conv-sat.ll @@ -50,11 +50,9 @@ define i64 @test_floor_si64(half %x) nounwind { ; RV32IZFH-NEXT: seqz a3, a3 ; RV32IZFH-NEXT: addi a3, a3, -1 ; RV32IZFH-NEXT: and a1, a3, a1 -; RV32IZFH-NEXT: seqz a4, s0 -; RV32IZFH-NEXT: addi a4, a4, -1 +; RV32IZFH-NEXT: neg a2, a2 +; RV32IZFH-NEXT: neg a4, s0 ; RV32IZFH-NEXT: and a0, a4, a0 -; RV32IZFH-NEXT: seqz a2, a2 -; RV32IZFH-NEXT: addi a2, a2, -1 ; RV32IZFH-NEXT: or a0, a2, a0 ; RV32IZFH-NEXT: and a0, a3, a0 ; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload @@ -96,29 +94,26 @@ define i64 @test_floor_ui64(half %x) nounwind { ; RV32IZFH-NEXT: addi sp, sp, -16 ; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IZFH-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZFH-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: sw s1, 4(sp) # 4-byte Folded Spill ; RV32IZFH-NEXT: fcvt.s.h fa0, fa0 ; RV32IZFH-NEXT: call floorf@plt -; RV32IZFH-NEXT: fcvt.h.s ft0, fa0 -; RV32IZFH-NEXT: fcvt.s.h fs0, ft0 +; RV32IZFH-NEXT: lui a0, %hi(.LCPI3_0) +; RV32IZFH-NEXT: flw ft0, %lo(.LCPI3_0)(a0) +; RV32IZFH-NEXT: fcvt.h.s ft1, fa0 +; RV32IZFH-NEXT: fcvt.s.h fa0, ft1 +; RV32IZFH-NEXT: flt.s a0, ft0, fa0 +; RV32IZFH-NEXT: neg s0, a0 ; RV32IZFH-NEXT: fmv.w.x ft0, zero -; RV32IZFH-NEXT: fle.s a0, ft0, fs0 -; RV32IZFH-NEXT: seqz a0, a0 -; RV32IZFH-NEXT: addi s0, a0, -1 -; RV32IZFH-NEXT: fmv.s fa0, fs0 +; RV32IZFH-NEXT: fle.s a0, ft0, fa0 +; RV32IZFH-NEXT: neg s1, a0 ; RV32IZFH-NEXT: call __fixunssfdi@plt -; RV32IZFH-NEXT: lui a2, %hi(.LCPI3_0) -; RV32IZFH-NEXT: flw ft0, %lo(.LCPI3_0)(a2) -; RV32IZFH-NEXT: and a0, s0, a0 -; RV32IZFH-NEXT: flt.s a2, ft0, fs0 -; RV32IZFH-NEXT: seqz a2, a2 -; RV32IZFH-NEXT: addi a2, a2, -1 -; RV32IZFH-NEXT: or a0, a2, a0 -; RV32IZFH-NEXT: and a1, s0, a1 -; RV32IZFH-NEXT: or a1, a2, a1 +; RV32IZFH-NEXT: and a0, s1, a0 +; RV32IZFH-NEXT: or a0, s0, a0 +; RV32IZFH-NEXT: and a1, s1, a1 +; RV32IZFH-NEXT: or a1, s0, a1 ; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFH-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IZFH-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload +; RV32IZFH-NEXT: lw s1, 4(sp) # 4-byte Folded Reload ; RV32IZFH-NEXT: addi sp, sp, 16 ; RV32IZFH-NEXT: ret ; @@ -181,11 +176,9 @@ define i64 @test_ceil_si64(half %x) nounwind { ; RV32IZFH-NEXT: seqz a3, a3 ; RV32IZFH-NEXT: addi a3, a3, -1 ; RV32IZFH-NEXT: and a1, a3, a1 -; RV32IZFH-NEXT: seqz a4, s0 -; RV32IZFH-NEXT: addi a4, a4, -1 +; RV32IZFH-NEXT: neg a2, a2 +; RV32IZFH-NEXT: neg a4, s0 ; RV32IZFH-NEXT: and a0, a4, a0 -; RV32IZFH-NEXT: seqz a2, a2 -; RV32IZFH-NEXT: addi a2, a2, -1 ; RV32IZFH-NEXT: or a0, a2, a0 ; RV32IZFH-NEXT: and a0, a3, a0 ; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload @@ -227,29 +220,26 @@ define i64 @test_ceil_ui64(half %x) nounwind { ; RV32IZFH-NEXT: addi sp, sp, -16 ; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IZFH-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZFH-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: sw s1, 4(sp) # 4-byte Folded Spill ; RV32IZFH-NEXT: fcvt.s.h fa0, fa0 ; RV32IZFH-NEXT: call ceilf@plt -; RV32IZFH-NEXT: fcvt.h.s ft0, fa0 -; RV32IZFH-NEXT: fcvt.s.h fs0, ft0 +; RV32IZFH-NEXT: lui a0, %hi(.LCPI7_0) +; RV32IZFH-NEXT: flw ft0, %lo(.LCPI7_0)(a0) +; RV32IZFH-NEXT: fcvt.h.s ft1, fa0 +; RV32IZFH-NEXT: fcvt.s.h fa0, ft1 +; RV32IZFH-NEXT: flt.s a0, ft0, fa0 +; RV32IZFH-NEXT: neg s0, a0 ; RV32IZFH-NEXT: fmv.w.x ft0, zero -; RV32IZFH-NEXT: fle.s a0, ft0, fs0 -; RV32IZFH-NEXT: seqz a0, a0 -; RV32IZFH-NEXT: addi s0, a0, -1 -; RV32IZFH-NEXT: fmv.s fa0, fs0 +; RV32IZFH-NEXT: fle.s a0, ft0, fa0 +; RV32IZFH-NEXT: neg s1, a0 ; RV32IZFH-NEXT: call __fixunssfdi@plt -; RV32IZFH-NEXT: lui a2, %hi(.LCPI7_0) -; RV32IZFH-NEXT: flw ft0, %lo(.LCPI7_0)(a2) -; RV32IZFH-NEXT: and a0, s0, a0 -; RV32IZFH-NEXT: flt.s a2, ft0, fs0 -; RV32IZFH-NEXT: seqz a2, a2 -; RV32IZFH-NEXT: addi a2, a2, -1 -; RV32IZFH-NEXT: or a0, a2, a0 -; RV32IZFH-NEXT: and a1, s0, a1 -; RV32IZFH-NEXT: or a1, a2, a1 +; RV32IZFH-NEXT: and a0, s1, a0 +; RV32IZFH-NEXT: or a0, s0, a0 +; RV32IZFH-NEXT: and a1, s1, a1 +; RV32IZFH-NEXT: or a1, s0, a1 ; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFH-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IZFH-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload +; RV32IZFH-NEXT: lw s1, 4(sp) # 4-byte Folded Reload ; RV32IZFH-NEXT: addi sp, sp, 16 ; RV32IZFH-NEXT: ret ; @@ -312,11 +302,9 @@ define i64 @test_trunc_si64(half %x) nounwind { ; RV32IZFH-NEXT: seqz a3, a3 ; RV32IZFH-NEXT: addi a3, a3, -1 ; RV32IZFH-NEXT: and a1, a3, a1 -; RV32IZFH-NEXT: seqz a4, s0 -; RV32IZFH-NEXT: addi a4, a4, -1 +; RV32IZFH-NEXT: neg a2, a2 +; RV32IZFH-NEXT: neg a4, s0 ; RV32IZFH-NEXT: and a0, a4, a0 -; RV32IZFH-NEXT: seqz a2, a2 -; RV32IZFH-NEXT: addi a2, a2, -1 ; RV32IZFH-NEXT: or a0, a2, a0 ; RV32IZFH-NEXT: and a0, a3, a0 ; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload @@ -358,29 +346,26 @@ define i64 @test_trunc_ui64(half %x) nounwind { ; RV32IZFH-NEXT: addi sp, sp, -16 ; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IZFH-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZFH-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: sw s1, 4(sp) # 4-byte Folded Spill ; RV32IZFH-NEXT: fcvt.s.h fa0, fa0 ; RV32IZFH-NEXT: call truncf@plt -; RV32IZFH-NEXT: fcvt.h.s ft0, fa0 -; RV32IZFH-NEXT: fcvt.s.h fs0, ft0 +; RV32IZFH-NEXT: lui a0, %hi(.LCPI11_0) +; RV32IZFH-NEXT: flw ft0, %lo(.LCPI11_0)(a0) +; RV32IZFH-NEXT: fcvt.h.s ft1, fa0 +; RV32IZFH-NEXT: fcvt.s.h fa0, ft1 +; RV32IZFH-NEXT: flt.s a0, ft0, fa0 +; RV32IZFH-NEXT: neg s0, a0 ; RV32IZFH-NEXT: fmv.w.x ft0, zero -; RV32IZFH-NEXT: fle.s a0, ft0, fs0 -; RV32IZFH-NEXT: seqz a0, a0 -; RV32IZFH-NEXT: addi s0, a0, -1 -; RV32IZFH-NEXT: fmv.s fa0, fs0 +; RV32IZFH-NEXT: fle.s a0, ft0, fa0 +; RV32IZFH-NEXT: neg s1, a0 ; RV32IZFH-NEXT: call __fixunssfdi@plt -; RV32IZFH-NEXT: lui a2, %hi(.LCPI11_0) -; RV32IZFH-NEXT: flw ft0, %lo(.LCPI11_0)(a2) -; RV32IZFH-NEXT: and a0, s0, a0 -; RV32IZFH-NEXT: flt.s a2, ft0, fs0 -; RV32IZFH-NEXT: seqz a2, a2 -; RV32IZFH-NEXT: addi a2, a2, -1 -; RV32IZFH-NEXT: or a0, a2, a0 -; RV32IZFH-NEXT: and a1, s0, a1 -; RV32IZFH-NEXT: or a1, a2, a1 +; RV32IZFH-NEXT: and a0, s1, a0 +; RV32IZFH-NEXT: or a0, s0, a0 +; RV32IZFH-NEXT: and a1, s1, a1 +; RV32IZFH-NEXT: or a1, s0, a1 ; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFH-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IZFH-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload +; RV32IZFH-NEXT: lw s1, 4(sp) # 4-byte Folded Reload ; RV32IZFH-NEXT: addi sp, sp, 16 ; RV32IZFH-NEXT: ret ; @@ -443,11 +428,9 @@ define i64 @test_round_si64(half %x) nounwind { ; RV32IZFH-NEXT: seqz a3, a3 ; RV32IZFH-NEXT: addi a3, a3, -1 ; RV32IZFH-NEXT: and a1, a3, a1 -; RV32IZFH-NEXT: seqz a4, s0 -; RV32IZFH-NEXT: addi a4, a4, -1 +; RV32IZFH-NEXT: neg a2, a2 +; RV32IZFH-NEXT: neg a4, s0 ; RV32IZFH-NEXT: and a0, a4, a0 -; RV32IZFH-NEXT: seqz a2, a2 -; RV32IZFH-NEXT: addi a2, a2, -1 ; RV32IZFH-NEXT: or a0, a2, a0 ; RV32IZFH-NEXT: and a0, a3, a0 ; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload @@ -489,29 +472,26 @@ define i64 @test_round_ui64(half %x) nounwind { ; RV32IZFH-NEXT: addi sp, sp, -16 ; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IZFH-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZFH-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: sw s1, 4(sp) # 4-byte Folded Spill ; RV32IZFH-NEXT: fcvt.s.h fa0, fa0 ; RV32IZFH-NEXT: call roundf@plt -; RV32IZFH-NEXT: fcvt.h.s ft0, fa0 -; RV32IZFH-NEXT: fcvt.s.h fs0, ft0 +; RV32IZFH-NEXT: lui a0, %hi(.LCPI15_0) +; RV32IZFH-NEXT: flw ft0, %lo(.LCPI15_0)(a0) +; RV32IZFH-NEXT: fcvt.h.s ft1, fa0 +; RV32IZFH-NEXT: fcvt.s.h fa0, ft1 +; RV32IZFH-NEXT: flt.s a0, ft0, fa0 +; RV32IZFH-NEXT: neg s0, a0 ; RV32IZFH-NEXT: fmv.w.x ft0, zero -; RV32IZFH-NEXT: fle.s a0, ft0, fs0 -; RV32IZFH-NEXT: seqz a0, a0 -; RV32IZFH-NEXT: addi s0, a0, -1 -; RV32IZFH-NEXT: fmv.s fa0, fs0 +; RV32IZFH-NEXT: fle.s a0, ft0, fa0 +; RV32IZFH-NEXT: neg s1, a0 ; RV32IZFH-NEXT: call __fixunssfdi@plt -; RV32IZFH-NEXT: lui a2, %hi(.LCPI15_0) -; RV32IZFH-NEXT: flw ft0, %lo(.LCPI15_0)(a2) -; RV32IZFH-NEXT: and a0, s0, a0 -; RV32IZFH-NEXT: flt.s a2, ft0, fs0 -; RV32IZFH-NEXT: seqz a2, a2 -; RV32IZFH-NEXT: addi a2, a2, -1 -; RV32IZFH-NEXT: or a0, a2, a0 -; RV32IZFH-NEXT: and a1, s0, a1 -; RV32IZFH-NEXT: or a1, a2, a1 +; RV32IZFH-NEXT: and a0, s1, a0 +; RV32IZFH-NEXT: or a0, s0, a0 +; RV32IZFH-NEXT: and a1, s1, a1 +; RV32IZFH-NEXT: or a1, s0, a1 ; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFH-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IZFH-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload +; RV32IZFH-NEXT: lw s1, 4(sp) # 4-byte Folded Reload ; RV32IZFH-NEXT: addi sp, sp, 16 ; RV32IZFH-NEXT: ret ; @@ -574,11 +554,9 @@ define i64 @test_roundeven_si64(half %x) nounwind { ; RV32IZFH-NEXT: seqz a3, a3 ; RV32IZFH-NEXT: addi a3, a3, -1 ; RV32IZFH-NEXT: and a1, a3, a1 -; RV32IZFH-NEXT: seqz a4, s0 -; RV32IZFH-NEXT: addi a4, a4, -1 +; RV32IZFH-NEXT: neg a2, a2 +; RV32IZFH-NEXT: neg a4, s0 ; RV32IZFH-NEXT: and a0, a4, a0 -; RV32IZFH-NEXT: seqz a2, a2 -; RV32IZFH-NEXT: addi a2, a2, -1 ; RV32IZFH-NEXT: or a0, a2, a0 ; RV32IZFH-NEXT: and a0, a3, a0 ; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload @@ -620,29 +598,26 @@ define i64 @test_roundeven_ui64(half %x) nounwind { ; RV32IZFH-NEXT: addi sp, sp, -16 ; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IZFH-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZFH-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: sw s1, 4(sp) # 4-byte Folded Spill ; RV32IZFH-NEXT: fcvt.s.h fa0, fa0 ; RV32IZFH-NEXT: call roundevenf@plt -; RV32IZFH-NEXT: fcvt.h.s ft0, fa0 -; RV32IZFH-NEXT: fcvt.s.h fs0, ft0 +; RV32IZFH-NEXT: lui a0, %hi(.LCPI19_0) +; RV32IZFH-NEXT: flw ft0, %lo(.LCPI19_0)(a0) +; RV32IZFH-NEXT: fcvt.h.s ft1, fa0 +; RV32IZFH-NEXT: fcvt.s.h fa0, ft1 +; RV32IZFH-NEXT: flt.s a0, ft0, fa0 +; RV32IZFH-NEXT: neg s0, a0 ; RV32IZFH-NEXT: fmv.w.x ft0, zero -; RV32IZFH-NEXT: fle.s a0, ft0, fs0 -; RV32IZFH-NEXT: seqz a0, a0 -; RV32IZFH-NEXT: addi s0, a0, -1 -; RV32IZFH-NEXT: fmv.s fa0, fs0 +; RV32IZFH-NEXT: fle.s a0, ft0, fa0 +; RV32IZFH-NEXT: neg s1, a0 ; RV32IZFH-NEXT: call __fixunssfdi@plt -; RV32IZFH-NEXT: lui a2, %hi(.LCPI19_0) -; RV32IZFH-NEXT: flw ft0, %lo(.LCPI19_0)(a2) -; RV32IZFH-NEXT: and a0, s0, a0 -; RV32IZFH-NEXT: flt.s a2, ft0, fs0 -; RV32IZFH-NEXT: seqz a2, a2 -; RV32IZFH-NEXT: addi a2, a2, -1 -; RV32IZFH-NEXT: or a0, a2, a0 -; RV32IZFH-NEXT: and a1, s0, a1 -; RV32IZFH-NEXT: or a1, a2, a1 +; RV32IZFH-NEXT: and a0, s1, a0 +; RV32IZFH-NEXT: or a0, s0, a0 +; RV32IZFH-NEXT: and a1, s1, a1 +; RV32IZFH-NEXT: or a1, s0, a1 ; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFH-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IZFH-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload +; RV32IZFH-NEXT: lw s1, 4(sp) # 4-byte Folded Reload ; RV32IZFH-NEXT: addi sp, sp, 16 ; RV32IZFH-NEXT: ret ; diff --git a/llvm/test/CodeGen/RISCV/min-max.ll b/llvm/test/CodeGen/RISCV/min-max.ll index bab4c7338be0..a7ac4a7e2a0c 100644 --- a/llvm/test/CodeGen/RISCV/min-max.ll +++ b/llvm/test/CodeGen/RISCV/min-max.ll @@ -665,7 +665,7 @@ define signext i32 @smax_i32_pos_constant_trailing_zeros(i32 signext %a) { define signext i32 @smin_i32_negone(i32 signext %a) { ; NOZBB-LABEL: smin_i32_negone: ; NOZBB: # %bb.0: -; NOZBB-NEXT: slti a1, a0, 0 +; NOZBB-NEXT: slti a1, a0, -1 ; NOZBB-NEXT: addi a1, a1, -1 ; NOZBB-NEXT: or a0, a1, a0 ; NOZBB-NEXT: ret @@ -682,21 +682,19 @@ define signext i32 @smin_i32_negone(i32 signext %a) { define i64 @smin_i64_negone(i64 %a) { ; RV32I-LABEL: smin_i64_negone: ; RV32I: # %bb.0: -; RV32I-NEXT: li a2, -1 -; RV32I-NEXT: beq a1, a2, .LBB27_2 -; RV32I-NEXT: # %bb.1: ; RV32I-NEXT: slti a2, a1, -1 +; RV32I-NEXT: li a3, -1 ; RV32I-NEXT: addi a2, a2, -1 +; RV32I-NEXT: beq a1, a3, .LBB27_2 +; RV32I-NEXT: # %bb.1: ; RV32I-NEXT: or a0, a2, a0 ; RV32I-NEXT: .LBB27_2: -; RV32I-NEXT: slti a2, a1, 0 -; RV32I-NEXT: addi a2, a2, -1 ; RV32I-NEXT: or a1, a2, a1 ; RV32I-NEXT: ret ; ; RV64I-LABEL: smin_i64_negone: ; RV64I: # %bb.0: -; RV64I-NEXT: slti a1, a0, 0 +; RV64I-NEXT: slti a1, a0, -1 ; RV64I-NEXT: addi a1, a1, -1 ; RV64I-NEXT: or a0, a1, a0 ; RV64I-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rv64zbb.ll b/llvm/test/CodeGen/RISCV/rv64zbb.ll index 44767c654161..936e4be02108 100644 --- a/llvm/test/CodeGen/RISCV/rv64zbb.ll +++ b/llvm/test/CodeGen/RISCV/rv64zbb.ll @@ -497,10 +497,10 @@ define signext i32 @ffs_i32(i32 signext %a) nounwind { ; RV64I-NEXT: add a0, a1, a0 ; RV64I-NEXT: lbu a1, 0(a0) ; RV64I-NEXT: .LBB9_2: -; RV64I-NEXT: addi a0, a1, 1 -; RV64I-NEXT: seqz a1, s0 -; RV64I-NEXT: addi a1, a1, -1 -; RV64I-NEXT: and a0, a1, a0 +; RV64I-NEXT: seqz a0, s0 +; RV64I-NEXT: addi a1, a1, 1 +; RV64I-NEXT: addi a0, a0, -1 +; RV64I-NEXT: and a0, a0, a1 ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 0(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 16 diff --git a/llvm/test/CodeGen/RISCV/selectcc-to-shiftand.ll b/llvm/test/CodeGen/RISCV/selectcc-to-shiftand.ll index 3104161937b1..f53afb3d8f00 100644 --- a/llvm/test/CodeGen/RISCV/selectcc-to-shiftand.ll +++ b/llvm/test/CodeGen/RISCV/selectcc-to-shiftand.ll @@ -219,8 +219,7 @@ define i8 @sel_shift_bool_i8(i1 %t) { ; CHECK-LABEL: sel_shift_bool_i8: ; CHECK: # %bb.0: ; CHECK-NEXT: andi a0, a0, 1 -; CHECK-NEXT: seqz a0, a0 -; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: neg a0, a0 ; CHECK-NEXT: andi a0, a0, -128 ; CHECK-NEXT: ret %shl = select i1 %t, i8 128, i8 0 diff --git a/llvm/test/CodeGen/RISCV/uadd_sat.ll b/llvm/test/CodeGen/RISCV/uadd_sat.ll index 82662321a51e..dbcb68eb3c9e 100644 --- a/llvm/test/CodeGen/RISCV/uadd_sat.ll +++ b/llvm/test/CodeGen/RISCV/uadd_sat.ll @@ -55,8 +55,7 @@ define i64 @func2(i64 %x, i64 %y) nounwind { ; RV32I-NEXT: # %bb.1: ; RV32I-NEXT: sltu a0, a3, a1 ; RV32I-NEXT: .LBB1_2: -; RV32I-NEXT: seqz a0, a0 -; RV32I-NEXT: addi a1, a0, -1 +; RV32I-NEXT: neg a1, a0 ; RV32I-NEXT: or a0, a1, a2 ; RV32I-NEXT: or a1, a1, a3 ; RV32I-NEXT: ret @@ -79,8 +78,7 @@ define i64 @func2(i64 %x, i64 %y) nounwind { ; RV32IZbb-NEXT: # %bb.1: ; RV32IZbb-NEXT: sltu a0, a3, a1 ; RV32IZbb-NEXT: .LBB1_2: -; RV32IZbb-NEXT: seqz a0, a0 -; RV32IZbb-NEXT: addi a1, a0, -1 +; RV32IZbb-NEXT: neg a1, a0 ; RV32IZbb-NEXT: or a0, a1, a2 ; RV32IZbb-NEXT: or a1, a1, a3 ; RV32IZbb-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/uadd_sat_plus.ll b/llvm/test/CodeGen/RISCV/uadd_sat_plus.ll index 3707f8286ae5..f42672efa9ba 100644 --- a/llvm/test/CodeGen/RISCV/uadd_sat_plus.ll +++ b/llvm/test/CodeGen/RISCV/uadd_sat_plus.ll @@ -62,8 +62,7 @@ define i64 @func64(i64 %x, i64 %y, i64 %z) nounwind { ; RV32I-NEXT: # %bb.1: ; RV32I-NEXT: sltu a0, a3, a1 ; RV32I-NEXT: .LBB1_2: -; RV32I-NEXT: seqz a0, a0 -; RV32I-NEXT: addi a1, a0, -1 +; RV32I-NEXT: neg a1, a0 ; RV32I-NEXT: or a0, a1, a2 ; RV32I-NEXT: or a1, a1, a3 ; RV32I-NEXT: ret @@ -86,8 +85,7 @@ define i64 @func64(i64 %x, i64 %y, i64 %z) nounwind { ; RV32IZbb-NEXT: # %bb.1: ; RV32IZbb-NEXT: sltu a0, a3, a1 ; RV32IZbb-NEXT: .LBB1_2: -; RV32IZbb-NEXT: seqz a0, a0 -; RV32IZbb-NEXT: addi a1, a0, -1 +; RV32IZbb-NEXT: neg a1, a0 ; RV32IZbb-NEXT: or a0, a1, a2 ; RV32IZbb-NEXT: or a1, a1, a3 ; RV32IZbb-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/usub_sat.ll b/llvm/test/CodeGen/RISCV/usub_sat.ll index 8cbcd6fcfd90..aab562657642 100644 --- a/llvm/test/CodeGen/RISCV/usub_sat.ll +++ b/llvm/test/CodeGen/RISCV/usub_sat.ll @@ -56,7 +56,6 @@ define i64 @func2(i64 %x, i64 %y) nounwind { ; RV32I-NEXT: .LBB1_2: ; RV32I-NEXT: sltu a0, a0, a2 ; RV32I-NEXT: .LBB1_3: -; RV32I-NEXT: snez a0, a0 ; RV32I-NEXT: addi a1, a0, -1 ; RV32I-NEXT: and a0, a1, a2 ; RV32I-NEXT: and a1, a1, a3 @@ -83,7 +82,6 @@ define i64 @func2(i64 %x, i64 %y) nounwind { ; RV32IZbb-NEXT: .LBB1_2: ; RV32IZbb-NEXT: sltu a0, a0, a2 ; RV32IZbb-NEXT: .LBB1_3: -; RV32IZbb-NEXT: snez a0, a0 ; RV32IZbb-NEXT: addi a1, a0, -1 ; RV32IZbb-NEXT: and a0, a1, a2 ; RV32IZbb-NEXT: and a1, a1, a3 diff --git a/llvm/test/CodeGen/RISCV/usub_sat_plus.ll b/llvm/test/CodeGen/RISCV/usub_sat_plus.ll index a35c04604e05..6f868b328b7c 100644 --- a/llvm/test/CodeGen/RISCV/usub_sat_plus.ll +++ b/llvm/test/CodeGen/RISCV/usub_sat_plus.ll @@ -63,7 +63,6 @@ define i64 @func64(i64 %x, i64 %y, i64 %z) nounwind { ; RV32I-NEXT: .LBB1_2: ; RV32I-NEXT: sltu a0, a0, a3 ; RV32I-NEXT: .LBB1_3: -; RV32I-NEXT: snez a0, a0 ; RV32I-NEXT: addi a1, a0, -1 ; RV32I-NEXT: and a0, a1, a3 ; RV32I-NEXT: and a1, a1, a2 @@ -90,7 +89,6 @@ define i64 @func64(i64 %x, i64 %y, i64 %z) nounwind { ; RV32IZbb-NEXT: .LBB1_2: ; RV32IZbb-NEXT: sltu a0, a0, a3 ; RV32IZbb-NEXT: .LBB1_3: -; RV32IZbb-NEXT: snez a0, a0 ; RV32IZbb-NEXT: addi a1, a0, -1 ; RV32IZbb-NEXT: and a0, a1, a3 ; RV32IZbb-NEXT: and a1, a1, a2 diff --git a/llvm/test/CodeGen/RISCV/vec3-setcc-crash.ll b/llvm/test/CodeGen/RISCV/vec3-setcc-crash.ll index dbd0b0359f76..a0c63ded7276 100644 --- a/llvm/test/CodeGen/RISCV/vec3-setcc-crash.ll +++ b/llvm/test/CodeGen/RISCV/vec3-setcc-crash.ll @@ -13,24 +13,24 @@ define void @vec3_setcc_crash(<3 x i8>* %in, <3 x i8>* %out) { ; RV32: # %bb.0: ; RV32-NEXT: lw a0, 0(a0) ; RV32-NEXT: srli a2, a0, 16 -; RV32-NEXT: slli a3, a0, 8 -; RV32-NEXT: srai a3, a3, 24 -; RV32-NEXT: slli a4, a0, 24 +; RV32-NEXT: srli a3, a0, 8 +; RV32-NEXT: slli a4, a0, 16 ; RV32-NEXT: srai a4, a4, 24 -; RV32-NEXT: srli a5, a0, 8 -; RV32-NEXT: slli a6, a0, 16 +; RV32-NEXT: slli a5, a0, 24 +; RV32-NEXT: srai a5, a5, 24 +; RV32-NEXT: slli a6, a0, 8 ; RV32-NEXT: srai a6, a6, 24 ; RV32-NEXT: sgtz a6, a6 -; RV32-NEXT: neg a6, a6 -; RV32-NEXT: and a5, a6, a5 -; RV32-NEXT: slli a5, a5, 8 +; RV32-NEXT: sgtz a5, a5 ; RV32-NEXT: sgtz a4, a4 ; RV32-NEXT: neg a4, a4 +; RV32-NEXT: and a3, a4, a3 +; RV32-NEXT: slli a3, a3, 8 +; RV32-NEXT: neg a4, a5 ; RV32-NEXT: and a0, a4, a0 ; RV32-NEXT: andi a0, a0, 255 -; RV32-NEXT: or a0, a0, a5 -; RV32-NEXT: sgtz a3, a3 -; RV32-NEXT: neg a3, a3 +; RV32-NEXT: or a0, a0, a3 +; RV32-NEXT: neg a3, a6 ; RV32-NEXT: and a2, a3, a2 ; RV32-NEXT: sb a2, 2(a1) ; RV32-NEXT: sh a0, 0(a1) @@ -40,24 +40,24 @@ define void @vec3_setcc_crash(<3 x i8>* %in, <3 x i8>* %out) { ; RV64: # %bb.0: ; RV64-NEXT: lw a0, 0(a0) ; RV64-NEXT: srli a2, a0, 16 -; RV64-NEXT: slli a3, a0, 40 -; RV64-NEXT: srai a3, a3, 56 -; RV64-NEXT: slli a4, a0, 56 +; RV64-NEXT: srli a3, a0, 8 +; RV64-NEXT: slli a4, a0, 48 ; RV64-NEXT: srai a4, a4, 56 -; RV64-NEXT: srli a5, a0, 8 -; RV64-NEXT: slli a6, a0, 48 +; RV64-NEXT: slli a5, a0, 56 +; RV64-NEXT: srai a5, a5, 56 +; RV64-NEXT: slli a6, a0, 40 ; RV64-NEXT: srai a6, a6, 56 ; RV64-NEXT: sgtz a6, a6 -; RV64-NEXT: neg a6, a6 -; RV64-NEXT: and a5, a6, a5 -; RV64-NEXT: slli a5, a5, 8 +; RV64-NEXT: sgtz a5, a5 ; RV64-NEXT: sgtz a4, a4 ; RV64-NEXT: neg a4, a4 +; RV64-NEXT: and a3, a4, a3 +; RV64-NEXT: slli a3, a3, 8 +; RV64-NEXT: neg a4, a5 ; RV64-NEXT: and a0, a4, a0 ; RV64-NEXT: andi a0, a0, 255 -; RV64-NEXT: or a0, a0, a5 -; RV64-NEXT: sgtz a3, a3 -; RV64-NEXT: neg a3, a3 +; RV64-NEXT: or a0, a0, a3 +; RV64-NEXT: neg a3, a6 ; RV64-NEXT: and a2, a3, a2 ; RV64-NEXT: sb a2, 2(a1) ; RV64-NEXT: sh a0, 0(a1)