diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp index 5afae3265f6c..a49f685f8fa4 100644 --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp @@ -542,35 +542,38 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) { return; } case ISD::SRL: { - // Optimize (srl (and X, 0xffff), C) -> - // (srli (slli X, (XLen-16), (XLen-16) + C) - // Taking into account that the 0xffff may have had lower bits unset by - // SimplifyDemandedBits. This avoids materializing the 0xffff immediate. - // This pattern occurs when type legalizing i16 right shifts. - // FIXME: This could be extended to other AND masks. + // Optimize (srl (and X, C2), C) -> + // (srli (slli X, (XLen-C3), (XLen-C3) + C) + // Where C2 is a mask with C3 trailing ones. + // Taking into account that the C2 may have had lower bits unset by + // SimplifyDemandedBits. This avoids materializing the C2 immediate. + // This pattern occurs when type legalizing right shifts for types with + // less than XLen bits. auto *N1C = dyn_cast(Node->getOperand(1)); - if (N1C) { - uint64_t ShAmt = N1C->getZExtValue(); - SDValue N0 = Node->getOperand(0); - if (ShAmt < 16 && N0.getOpcode() == ISD::AND && N0.hasOneUse() && - isa(N0.getOperand(1))) { - uint64_t Mask = N0.getConstantOperandVal(1); - Mask |= maskTrailingOnes(ShAmt); - if (Mask == 0xffff) { - unsigned LShAmt = Subtarget->getXLen() - 16; - SDNode *SLLI = - CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0->getOperand(0), - CurDAG->getTargetConstant(LShAmt, DL, VT)); - SDNode *SRLI = CurDAG->getMachineNode( - RISCV::SRLI, DL, VT, SDValue(SLLI, 0), - CurDAG->getTargetConstant(LShAmt + ShAmt, DL, VT)); - ReplaceNode(Node, SRLI); - return; - } - } - } - - break; + if (!N1C) + break; + SDValue N0 = Node->getOperand(0); + if (N0.getOpcode() != ISD::AND || !N0.hasOneUse() || + !isa(N0.getOperand(1))) + break; + unsigned ShAmt = N1C->getZExtValue(); + uint64_t Mask = N0.getConstantOperandVal(1); + Mask |= maskTrailingOnes(ShAmt); + if (!isMask_64(Mask)) + break; + unsigned TrailingOnes = countTrailingOnes(Mask); + // 32 trailing ones should use srliw via tablegen pattern. + if (TrailingOnes == 32 || ShAmt >= TrailingOnes) + break; + unsigned LShAmt = Subtarget->getXLen() - TrailingOnes; + SDNode *SLLI = + CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0->getOperand(0), + CurDAG->getTargetConstant(LShAmt, DL, VT)); + SDNode *SRLI = CurDAG->getMachineNode( + RISCV::SRLI, DL, VT, SDValue(SLLI, 0), + CurDAG->getTargetConstant(LShAmt + ShAmt, DL, VT)); + ReplaceNode(Node, SRLI); + return; } case ISD::SRA: { // Optimize (sra (sext_inreg X, i16), C) -> @@ -587,7 +590,7 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) { SDValue N0 = Node->getOperand(0); if (N0.getOpcode() != ISD::SIGN_EXTEND_INREG || !N0.hasOneUse()) break; - uint64_t ShAmt = N1C->getZExtValue(); + unsigned ShAmt = N1C->getZExtValue(); unsigned ExtSize = cast(N0.getOperand(1))->getVT().getSizeInBits(); // ExtSize of 32 should use sraiw via tablegen pattern. diff --git a/llvm/test/CodeGen/RISCV/alu8.ll b/llvm/test/CodeGen/RISCV/alu8.ll index 8611e752028d..dafa328450d9 100644 --- a/llvm/test/CodeGen/RISCV/alu8.ll +++ b/llvm/test/CodeGen/RISCV/alu8.ll @@ -135,14 +135,14 @@ define i8 @slli(i8 %a) nounwind { define i8 @srli(i8 %a) nounwind { ; RV32I-LABEL: srli: ; RV32I: # %bb.0: -; RV32I-NEXT: andi a0, a0, 192 -; RV32I-NEXT: srli a0, a0, 6 +; RV32I-NEXT: slli a0, a0, 24 +; RV32I-NEXT: srli a0, a0, 30 ; RV32I-NEXT: ret ; ; RV64I-LABEL: srli: ; RV64I: # %bb.0: -; RV64I-NEXT: andi a0, a0, 192 -; RV64I-NEXT: srli a0, a0, 6 +; RV64I-NEXT: slli a0, a0, 56 +; RV64I-NEXT: srli a0, a0, 62 ; RV64I-NEXT: ret %1 = lshr i8 %a, 6 ret i8 %1 diff --git a/llvm/test/CodeGen/RISCV/bswap-ctlz-cttz-ctpop.ll b/llvm/test/CodeGen/RISCV/bswap-ctlz-cttz-ctpop.ll index e7be4070fe02..74c2357fe700 100644 --- a/llvm/test/CodeGen/RISCV/bswap-ctlz-cttz-ctpop.ll +++ b/llvm/test/CodeGen/RISCV/bswap-ctlz-cttz-ctpop.ll @@ -212,10 +212,8 @@ define i16 @test_cttz_i16(i16 %a) nounwind { ; RV32I-NEXT: and a0, a0, a1 ; RV32I-NEXT: slli a1, a0, 8 ; RV32I-NEXT: add a0, a1, a0 -; RV32I-NEXT: lui a1, 2 -; RV32I-NEXT: addi a1, a1, -256 -; RV32I-NEXT: and a0, a0, a1 -; RV32I-NEXT: srli a0, a0, 8 +; RV32I-NEXT: slli a0, a0, 19 +; RV32I-NEXT: srli a0, a0, 27 ; RV32I-NEXT: ret ; RV32I-NEXT: .LBB4_2: ; RV32I-NEXT: li a0, 16 @@ -247,12 +245,10 @@ define i16 @test_cttz_i16(i16 %a) nounwind { ; RV64I-NEXT: lui a1, 1 ; RV64I-NEXT: addiw a1, a1, -241 ; RV64I-NEXT: and a0, a0, a1 -; RV64I-NEXT: slli a1, a0, 8 -; RV64I-NEXT: add a0, a1, a0 -; RV64I-NEXT: lui a1, 2 -; RV64I-NEXT: addiw a1, a1, -256 -; RV64I-NEXT: and a0, a0, a1 -; RV64I-NEXT: srli a0, a0, 8 +; RV64I-NEXT: slliw a1, a0, 8 +; RV64I-NEXT: addw a0, a1, a0 +; RV64I-NEXT: slli a0, a0, 51 +; RV64I-NEXT: srli a0, a0, 59 ; RV64I-NEXT: ret ; RV64I-NEXT: .LBB4_2: ; RV64I-NEXT: li a0, 16 @@ -605,10 +601,8 @@ define i16 @test_cttz_i16_zero_undef(i16 %a) nounwind { ; RV32I-NEXT: and a0, a0, a1 ; RV32I-NEXT: slli a1, a0, 8 ; RV32I-NEXT: add a0, a1, a0 -; RV32I-NEXT: lui a1, 2 -; RV32I-NEXT: addi a1, a1, -256 -; RV32I-NEXT: and a0, a0, a1 -; RV32I-NEXT: srli a0, a0, 8 +; RV32I-NEXT: slli a0, a0, 19 +; RV32I-NEXT: srli a0, a0, 27 ; RV32I-NEXT: ret ; ; RV64I-LABEL: test_cttz_i16_zero_undef: @@ -632,12 +626,10 @@ define i16 @test_cttz_i16_zero_undef(i16 %a) nounwind { ; RV64I-NEXT: lui a1, 1 ; RV64I-NEXT: addiw a1, a1, -241 ; RV64I-NEXT: and a0, a0, a1 -; RV64I-NEXT: slli a1, a0, 8 -; RV64I-NEXT: add a0, a1, a0 -; RV64I-NEXT: lui a1, 2 -; RV64I-NEXT: addiw a1, a1, -256 -; RV64I-NEXT: and a0, a0, a1 -; RV64I-NEXT: srli a0, a0, 8 +; RV64I-NEXT: slliw a1, a0, 8 +; RV64I-NEXT: addw a0, a1, a0 +; RV64I-NEXT: slli a0, a0, 51 +; RV64I-NEXT: srli a0, a0, 59 ; RV64I-NEXT: ret %tmp = call i16 @llvm.cttz.i16(i16 %a, i1 true) ret i16 %tmp diff --git a/llvm/test/CodeGen/RISCV/div-by-constant.ll b/llvm/test/CodeGen/RISCV/div-by-constant.ll index 2f13b18d0ac3..f20ff9b6d4f9 100644 --- a/llvm/test/CodeGen/RISCV/div-by-constant.ll +++ b/llvm/test/CodeGen/RISCV/div-by-constant.ll @@ -163,8 +163,8 @@ define i8 @udiv8_constant_add(i8 %a) nounwind { ; RV32IM-NEXT: mul a1, a1, a2 ; RV32IM-NEXT: srli a1, a1, 8 ; RV32IM-NEXT: sub a0, a0, a1 -; RV32IM-NEXT: andi a0, a0, 254 -; RV32IM-NEXT: srli a0, a0, 1 +; RV32IM-NEXT: slli a0, a0, 24 +; RV32IM-NEXT: srli a0, a0, 25 ; RV32IM-NEXT: add a0, a0, a1 ; RV32IM-NEXT: srli a0, a0, 2 ; RV32IM-NEXT: ret @@ -176,8 +176,8 @@ define i8 @udiv8_constant_add(i8 %a) nounwind { ; RV32IMZB-NEXT: sh2add a1, a2, a1 ; RV32IMZB-NEXT: srli a1, a1, 8 ; RV32IMZB-NEXT: sub a0, a0, a1 -; RV32IMZB-NEXT: andi a0, a0, 254 -; RV32IMZB-NEXT: srli a0, a0, 1 +; RV32IMZB-NEXT: slli a0, a0, 24 +; RV32IMZB-NEXT: srli a0, a0, 25 ; RV32IMZB-NEXT: add a0, a0, a1 ; RV32IMZB-NEXT: srli a0, a0, 2 ; RV32IMZB-NEXT: ret @@ -189,8 +189,8 @@ define i8 @udiv8_constant_add(i8 %a) nounwind { ; RV64IM-NEXT: mul a1, a1, a2 ; RV64IM-NEXT: srli a1, a1, 8 ; RV64IM-NEXT: subw a0, a0, a1 -; RV64IM-NEXT: andi a0, a0, 254 -; RV64IM-NEXT: srli a0, a0, 1 +; RV64IM-NEXT: slli a0, a0, 56 +; RV64IM-NEXT: srli a0, a0, 57 ; RV64IM-NEXT: add a0, a0, a1 ; RV64IM-NEXT: srli a0, a0, 2 ; RV64IM-NEXT: ret @@ -202,8 +202,8 @@ define i8 @udiv8_constant_add(i8 %a) nounwind { ; RV64IMZB-NEXT: sh2add a1, a2, a1 ; RV64IMZB-NEXT: srli a1, a1, 8 ; RV64IMZB-NEXT: subw a0, a0, a1 -; RV64IMZB-NEXT: andi a0, a0, 254 -; RV64IMZB-NEXT: srli a0, a0, 1 +; RV64IMZB-NEXT: slli a0, a0, 56 +; RV64IMZB-NEXT: srli a0, a0, 57 ; RV64IMZB-NEXT: add a0, a0, a1 ; RV64IMZB-NEXT: srli a0, a0, 2 ; RV64IMZB-NEXT: ret @@ -618,8 +618,6 @@ define i8 @sdiv8_constant_srai(i8 %a) nounwind { ret i8 %1 } -; FIXME: Can shorten the code after the mul by using slli+srai/srli like the -; i16 version without Zbb. define i8 @sdiv8_constant_add_srai(i8 %a) nounwind { ; RV32IM-LABEL: sdiv8_constant_add_srai: ; RV32IM: # %bb.0: @@ -629,9 +627,8 @@ define i8 @sdiv8_constant_add_srai(i8 %a) nounwind { ; RV32IM-NEXT: mul a1, a1, a2 ; RV32IM-NEXT: srli a1, a1, 8 ; RV32IM-NEXT: add a0, a1, a0 -; RV32IM-NEXT: andi a1, a0, 128 -; RV32IM-NEXT: srli a1, a1, 7 ; RV32IM-NEXT: slli a0, a0, 24 +; RV32IM-NEXT: srli a1, a0, 31 ; RV32IM-NEXT: srai a0, a0, 26 ; RV32IM-NEXT: add a0, a0, a1 ; RV32IM-NEXT: ret @@ -643,9 +640,8 @@ define i8 @sdiv8_constant_add_srai(i8 %a) nounwind { ; RV32IMZB-NEXT: mul a1, a1, a2 ; RV32IMZB-NEXT: srli a1, a1, 8 ; RV32IMZB-NEXT: add a0, a1, a0 -; RV32IMZB-NEXT: andi a1, a0, 128 -; RV32IMZB-NEXT: srli a1, a1, 7 ; RV32IMZB-NEXT: slli a0, a0, 24 +; RV32IMZB-NEXT: srli a1, a0, 31 ; RV32IMZB-NEXT: srai a0, a0, 26 ; RV32IMZB-NEXT: add a0, a0, a1 ; RV32IMZB-NEXT: ret @@ -658,9 +654,8 @@ define i8 @sdiv8_constant_add_srai(i8 %a) nounwind { ; RV64IM-NEXT: mul a1, a1, a2 ; RV64IM-NEXT: srli a1, a1, 8 ; RV64IM-NEXT: addw a0, a1, a0 -; RV64IM-NEXT: andi a1, a0, 128 -; RV64IM-NEXT: srli a1, a1, 7 ; RV64IM-NEXT: slli a0, a0, 56 +; RV64IM-NEXT: srli a1, a0, 63 ; RV64IM-NEXT: srai a0, a0, 58 ; RV64IM-NEXT: add a0, a0, a1 ; RV64IM-NEXT: ret @@ -672,9 +667,8 @@ define i8 @sdiv8_constant_add_srai(i8 %a) nounwind { ; RV64IMZB-NEXT: mul a1, a1, a2 ; RV64IMZB-NEXT: srli a1, a1, 8 ; RV64IMZB-NEXT: addw a0, a1, a0 -; RV64IMZB-NEXT: andi a1, a0, 128 -; RV64IMZB-NEXT: srli a1, a1, 7 ; RV64IMZB-NEXT: slli a0, a0, 56 +; RV64IMZB-NEXT: srli a1, a0, 63 ; RV64IMZB-NEXT: srai a0, a0, 58 ; RV64IMZB-NEXT: add a0, a0, a1 ; RV64IMZB-NEXT: ret @@ -682,8 +676,6 @@ define i8 @sdiv8_constant_add_srai(i8 %a) nounwind { ret i8 %1 } -; FIXME: Can shorten the code after the mul by using slli+srai/srli like the -; i16 version without Zbb. define i8 @sdiv8_constant_sub_srai(i8 %a) nounwind { ; RV32IM-LABEL: sdiv8_constant_sub_srai: ; RV32IM: # %bb.0: @@ -693,9 +685,8 @@ define i8 @sdiv8_constant_sub_srai(i8 %a) nounwind { ; RV32IM-NEXT: mul a1, a1, a2 ; RV32IM-NEXT: srli a1, a1, 8 ; RV32IM-NEXT: sub a0, a1, a0 -; RV32IM-NEXT: andi a1, a0, 128 -; RV32IM-NEXT: srli a1, a1, 7 ; RV32IM-NEXT: slli a0, a0, 24 +; RV32IM-NEXT: srli a1, a0, 31 ; RV32IM-NEXT: srai a0, a0, 26 ; RV32IM-NEXT: add a0, a0, a1 ; RV32IM-NEXT: ret @@ -707,9 +698,8 @@ define i8 @sdiv8_constant_sub_srai(i8 %a) nounwind { ; RV32IMZB-NEXT: mul a1, a1, a2 ; RV32IMZB-NEXT: srli a1, a1, 8 ; RV32IMZB-NEXT: sub a0, a1, a0 -; RV32IMZB-NEXT: andi a1, a0, 128 -; RV32IMZB-NEXT: srli a1, a1, 7 ; RV32IMZB-NEXT: slli a0, a0, 24 +; RV32IMZB-NEXT: srli a1, a0, 31 ; RV32IMZB-NEXT: srai a0, a0, 26 ; RV32IMZB-NEXT: add a0, a0, a1 ; RV32IMZB-NEXT: ret @@ -722,9 +712,8 @@ define i8 @sdiv8_constant_sub_srai(i8 %a) nounwind { ; RV64IM-NEXT: mul a1, a1, a2 ; RV64IM-NEXT: srli a1, a1, 8 ; RV64IM-NEXT: subw a0, a1, a0 -; RV64IM-NEXT: andi a1, a0, 128 -; RV64IM-NEXT: srli a1, a1, 7 ; RV64IM-NEXT: slli a0, a0, 56 +; RV64IM-NEXT: srli a1, a0, 63 ; RV64IM-NEXT: srai a0, a0, 58 ; RV64IM-NEXT: add a0, a0, a1 ; RV64IM-NEXT: ret @@ -736,9 +725,8 @@ define i8 @sdiv8_constant_sub_srai(i8 %a) nounwind { ; RV64IMZB-NEXT: mul a1, a1, a2 ; RV64IMZB-NEXT: srli a1, a1, 8 ; RV64IMZB-NEXT: subw a0, a1, a0 -; RV64IMZB-NEXT: andi a1, a0, 128 -; RV64IMZB-NEXT: srli a1, a1, 7 ; RV64IMZB-NEXT: slli a0, a0, 56 +; RV64IMZB-NEXT: srli a1, a0, 63 ; RV64IMZB-NEXT: srai a0, a0, 58 ; RV64IMZB-NEXT: add a0, a0, a1 ; RV64IMZB-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/div.ll b/llvm/test/CodeGen/RISCV/div.ll index af6855e94ff6..3d4db5bbeb69 100644 --- a/llvm/test/CodeGen/RISCV/div.ll +++ b/llvm/test/CodeGen/RISCV/div.ll @@ -363,26 +363,26 @@ define i8 @udiv8_constant(i8 %a) nounwind { define i8 @udiv8_pow2(i8 %a) nounwind { ; RV32I-LABEL: udiv8_pow2: ; RV32I: # %bb.0: -; RV32I-NEXT: andi a0, a0, 248 -; RV32I-NEXT: srli a0, a0, 3 +; RV32I-NEXT: slli a0, a0, 24 +; RV32I-NEXT: srli a0, a0, 27 ; RV32I-NEXT: ret ; ; RV32IM-LABEL: udiv8_pow2: ; RV32IM: # %bb.0: -; RV32IM-NEXT: andi a0, a0, 248 -; RV32IM-NEXT: srli a0, a0, 3 +; RV32IM-NEXT: slli a0, a0, 24 +; RV32IM-NEXT: srli a0, a0, 27 ; RV32IM-NEXT: ret ; ; RV64I-LABEL: udiv8_pow2: ; RV64I: # %bb.0: -; RV64I-NEXT: andi a0, a0, 248 -; RV64I-NEXT: srli a0, a0, 3 +; RV64I-NEXT: slli a0, a0, 56 +; RV64I-NEXT: srli a0, a0, 59 ; RV64I-NEXT: ret ; ; RV64IM-LABEL: udiv8_pow2: ; RV64IM: # %bb.0: -; RV64IM-NEXT: andi a0, a0, 248 -; RV64IM-NEXT: srli a0, a0, 3 +; RV64IM-NEXT: slli a0, a0, 56 +; RV64IM-NEXT: srli a0, a0, 59 ; RV64IM-NEXT: ret %1 = udiv i8 %a, 8 ret i8 %1 diff --git a/llvm/test/CodeGen/RISCV/rv32zbb-zbp.ll b/llvm/test/CodeGen/RISCV/rv32zbb-zbp.ll index a4fda68ba021..f9cd53bdf965 100644 --- a/llvm/test/CodeGen/RISCV/rv32zbb-zbp.ll +++ b/llvm/test/CodeGen/RISCV/rv32zbb-zbp.ll @@ -473,20 +473,20 @@ define i64 @rori_i64_fshr(i64 %a) nounwind { define i8 @srli_i8(i8 %a) nounwind { ; RV32I-LABEL: srli_i8: ; RV32I: # %bb.0: -; RV32I-NEXT: andi a0, a0, 192 -; RV32I-NEXT: srli a0, a0, 6 +; RV32I-NEXT: slli a0, a0, 24 +; RV32I-NEXT: srli a0, a0, 30 ; RV32I-NEXT: ret ; ; RV32ZBB-LABEL: srli_i8: ; RV32ZBB: # %bb.0: -; RV32ZBB-NEXT: andi a0, a0, 192 -; RV32ZBB-NEXT: srli a0, a0, 6 +; RV32ZBB-NEXT: slli a0, a0, 24 +; RV32ZBB-NEXT: srli a0, a0, 30 ; RV32ZBB-NEXT: ret ; ; RV32ZBP-LABEL: srli_i8: ; RV32ZBP: # %bb.0: -; RV32ZBP-NEXT: andi a0, a0, 192 -; RV32ZBP-NEXT: srli a0, a0, 6 +; RV32ZBP-NEXT: slli a0, a0, 24 +; RV32ZBP-NEXT: srli a0, a0, 30 ; RV32ZBP-NEXT: ret %1 = lshr i8 %a, 6 ret i8 %1 diff --git a/llvm/test/CodeGen/RISCV/rv64zbb-zbp.ll b/llvm/test/CodeGen/RISCV/rv64zbb-zbp.ll index 79a91979f2bd..97093ea0a052 100644 --- a/llvm/test/CodeGen/RISCV/rv64zbb-zbp.ll +++ b/llvm/test/CodeGen/RISCV/rv64zbb-zbp.ll @@ -544,20 +544,20 @@ define i64 @rori_i64_fshr(i64 %a) nounwind { define i8 @srli_i8(i8 %a) nounwind { ; RV64I-LABEL: srli_i8: ; RV64I: # %bb.0: -; RV64I-NEXT: andi a0, a0, 192 -; RV64I-NEXT: srli a0, a0, 6 +; RV64I-NEXT: slli a0, a0, 56 +; RV64I-NEXT: srli a0, a0, 62 ; RV64I-NEXT: ret ; ; RV64ZBB-LABEL: srli_i8: ; RV64ZBB: # %bb.0: -; RV64ZBB-NEXT: andi a0, a0, 192 -; RV64ZBB-NEXT: srli a0, a0, 6 +; RV64ZBB-NEXT: slli a0, a0, 56 +; RV64ZBB-NEXT: srli a0, a0, 62 ; RV64ZBB-NEXT: ret ; ; RV64ZBP-LABEL: srli_i8: ; RV64ZBP: # %bb.0: -; RV64ZBP-NEXT: andi a0, a0, 192 -; RV64ZBP-NEXT: srli a0, a0, 6 +; RV64ZBP-NEXT: slli a0, a0, 56 +; RV64ZBP-NEXT: srli a0, a0, 62 ; RV64ZBP-NEXT: ret %1 = lshr i8 %a, 6 ret i8 %1 diff --git a/llvm/test/CodeGen/RISCV/rv64zbb.ll b/llvm/test/CodeGen/RISCV/rv64zbb.ll index 0d892e650814..952d4c794275 100644 --- a/llvm/test/CodeGen/RISCV/rv64zbb.ll +++ b/llvm/test/CodeGen/RISCV/rv64zbb.ll @@ -264,9 +264,8 @@ define i32 @ctlz_lshr_i32(i32 signext %a) { ; RV64I-NEXT: or a0, a0, a1 ; RV64I-NEXT: srli a1, a0, 8 ; RV64I-NEXT: or a0, a0, a1 -; RV64I-NEXT: lui a1, 524272 -; RV64I-NEXT: and a1, a0, a1 -; RV64I-NEXT: srli a1, a1, 16 +; RV64I-NEXT: slli a1, a0, 33 +; RV64I-NEXT: srli a1, a1, 49 ; RV64I-NEXT: or a0, a0, a1 ; RV64I-NEXT: not a0, a0 ; RV64I-NEXT: srli a1, a0, 1 diff --git a/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll b/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll index 24a7c78d2666..810fee3464a4 100644 --- a/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll +++ b/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll @@ -380,14 +380,13 @@ define void @test_srem_vec(<3 x i33>* %X) nounwind { ; ; RV64-LABEL: test_srem_vec: ; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -64 -; RV64-NEXT: sd ra, 56(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s0, 48(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s1, 40(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s2, 32(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s3, 24(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s4, 16(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s5, 8(sp) # 8-byte Folded Spill +; RV64-NEXT: addi sp, sp, -48 +; RV64-NEXT: sd ra, 40(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s0, 32(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s1, 24(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s2, 16(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s3, 8(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s4, 0(sp) # 8-byte Folded Spill ; RV64-NEXT: mv s0, a0 ; RV64-NEXT: lb a0, 12(a0) ; RV64-NEXT: lwu a1, 8(s0) @@ -407,7 +406,6 @@ define void @test_srem_vec(<3 x i33>* %X) nounwind { ; RV64-NEXT: slli a1, a1, 31 ; RV64-NEXT: srai s2, a1, 31 ; RV64-NEXT: li a1, 7 -; RV64-NEXT: li s5, 7 ; RV64-NEXT: call __moddi3@plt ; RV64-NEXT: mv s3, a0 ; RV64-NEXT: li a1, -5 @@ -432,9 +430,8 @@ define void @test_srem_vec(<3 x i33>* %X) nounwind { ; RV64-NEXT: neg a0, a0 ; RV64-NEXT: neg a2, a2 ; RV64-NEXT: neg a3, a1 -; RV64-NEXT: slli a4, s5, 32 -; RV64-NEXT: and a3, a3, a4 -; RV64-NEXT: srli a3, a3, 32 +; RV64-NEXT: slli a3, a3, 29 +; RV64-NEXT: srli a3, a3, 61 ; RV64-NEXT: sb a3, 12(s0) ; RV64-NEXT: slliw a1, a1, 2 ; RV64-NEXT: srli a3, s4, 31 @@ -446,14 +443,13 @@ define void @test_srem_vec(<3 x i33>* %X) nounwind { ; RV64-NEXT: slli a1, a2, 33 ; RV64-NEXT: or a0, a0, a1 ; RV64-NEXT: sd a0, 0(s0) -; RV64-NEXT: ld ra, 56(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s0, 48(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s1, 40(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s2, 32(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s3, 24(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s4, 16(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s5, 8(sp) # 8-byte Folded Reload -; RV64-NEXT: addi sp, sp, 64 +; RV64-NEXT: ld ra, 40(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s0, 32(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s1, 24(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s2, 16(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s3, 8(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s4, 0(sp) # 8-byte Folded Reload +; RV64-NEXT: addi sp, sp, 48 ; RV64-NEXT: ret ; ; RV32M-LABEL: test_srem_vec: @@ -592,10 +588,8 @@ define void @test_srem_vec(<3 x i33>* %X) nounwind { ; RV64M-NEXT: neg a1, a1 ; RV64M-NEXT: neg a4, a2 ; RV64M-NEXT: neg a3, a3 -; RV64M-NEXT: li a5, 7 -; RV64M-NEXT: slli a5, a5, 32 -; RV64M-NEXT: and a4, a4, a5 -; RV64M-NEXT: srli a4, a4, 32 +; RV64M-NEXT: slli a4, a4, 29 +; RV64M-NEXT: srli a4, a4, 61 ; RV64M-NEXT: sb a4, 12(a0) ; RV64M-NEXT: slliw a2, a2, 2 ; RV64M-NEXT: srli a4, a6, 31 diff --git a/llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll b/llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll index 8c3870ee4070..a7c2cdf12248 100644 --- a/llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll +++ b/llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll @@ -94,12 +94,11 @@ define i1 @test_urem_even(i27 %X) nounwind { ; RV32-NEXT: addi a1, a1, -585 ; RV32-NEXT: call __mulsi3@plt ; RV32-NEXT: slli a1, a0, 26 -; RV32-NEXT: lui a2, 32768 -; RV32-NEXT: addi a3, a2, -2 -; RV32-NEXT: and a0, a0, a3 -; RV32-NEXT: srli a0, a0, 1 +; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: srli a0, a0, 6 ; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: addi a1, a2, -1 +; RV32-NEXT: lui a1, 32768 +; RV32-NEXT: addi a1, a1, -1 ; RV32-NEXT: and a0, a0, a1 ; RV32-NEXT: lui a1, 2341 ; RV32-NEXT: addi a1, a1, -1755 @@ -116,12 +115,11 @@ define i1 @test_urem_even(i27 %X) nounwind { ; RV64-NEXT: addiw a1, a1, -585 ; RV64-NEXT: call __muldi3@plt ; RV64-NEXT: slli a1, a0, 26 -; RV64-NEXT: lui a2, 32768 -; RV64-NEXT: addiw a3, a2, -2 -; RV64-NEXT: and a0, a0, a3 -; RV64-NEXT: srli a0, a0, 1 +; RV64-NEXT: slli a0, a0, 37 +; RV64-NEXT: srli a0, a0, 38 ; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: addiw a1, a2, -1 +; RV64-NEXT: lui a1, 32768 +; RV64-NEXT: addiw a1, a1, -1 ; RV64-NEXT: and a0, a0, a1 ; RV64-NEXT: lui a1, 2341 ; RV64-NEXT: addiw a1, a1, -1755 @@ -136,12 +134,11 @@ define i1 @test_urem_even(i27 %X) nounwind { ; RV32M-NEXT: addi a1, a1, -585 ; RV32M-NEXT: mul a0, a0, a1 ; RV32M-NEXT: slli a1, a0, 26 -; RV32M-NEXT: lui a2, 32768 -; RV32M-NEXT: addi a3, a2, -2 -; RV32M-NEXT: and a0, a0, a3 -; RV32M-NEXT: srli a0, a0, 1 +; RV32M-NEXT: slli a0, a0, 5 +; RV32M-NEXT: srli a0, a0, 6 ; RV32M-NEXT: or a0, a0, a1 -; RV32M-NEXT: addi a1, a2, -1 +; RV32M-NEXT: lui a1, 32768 +; RV32M-NEXT: addi a1, a1, -1 ; RV32M-NEXT: and a0, a0, a1 ; RV32M-NEXT: lui a1, 2341 ; RV32M-NEXT: addi a1, a1, -1755 @@ -154,12 +151,11 @@ define i1 @test_urem_even(i27 %X) nounwind { ; RV64M-NEXT: addiw a1, a1, -585 ; RV64M-NEXT: mul a0, a0, a1 ; RV64M-NEXT: slli a1, a0, 26 -; RV64M-NEXT: lui a2, 32768 -; RV64M-NEXT: addiw a3, a2, -2 -; RV64M-NEXT: and a0, a0, a3 -; RV64M-NEXT: srli a0, a0, 1 +; RV64M-NEXT: slli a0, a0, 37 +; RV64M-NEXT: srli a0, a0, 38 ; RV64M-NEXT: or a0, a0, a1 -; RV64M-NEXT: addiw a1, a2, -1 +; RV64M-NEXT: lui a1, 32768 +; RV64M-NEXT: addiw a1, a1, -1 ; RV64M-NEXT: and a0, a0, a1 ; RV64M-NEXT: lui a1, 2341 ; RV64M-NEXT: addiw a1, a1, -1755 @@ -172,12 +168,11 @@ define i1 @test_urem_even(i27 %X) nounwind { ; RV32MV-NEXT: addi a1, a1, -585 ; RV32MV-NEXT: mul a0, a0, a1 ; RV32MV-NEXT: slli a1, a0, 26 -; RV32MV-NEXT: lui a2, 32768 -; RV32MV-NEXT: addi a3, a2, -2 -; RV32MV-NEXT: and a0, a0, a3 -; RV32MV-NEXT: srli a0, a0, 1 +; RV32MV-NEXT: slli a0, a0, 5 +; RV32MV-NEXT: srli a0, a0, 6 ; RV32MV-NEXT: or a0, a0, a1 -; RV32MV-NEXT: addi a1, a2, -1 +; RV32MV-NEXT: lui a1, 32768 +; RV32MV-NEXT: addi a1, a1, -1 ; RV32MV-NEXT: and a0, a0, a1 ; RV32MV-NEXT: lui a1, 2341 ; RV32MV-NEXT: addi a1, a1, -1755 @@ -190,12 +185,11 @@ define i1 @test_urem_even(i27 %X) nounwind { ; RV64MV-NEXT: addiw a1, a1, -585 ; RV64MV-NEXT: mul a0, a0, a1 ; RV64MV-NEXT: slli a1, a0, 26 -; RV64MV-NEXT: lui a2, 32768 -; RV64MV-NEXT: addiw a3, a2, -2 -; RV64MV-NEXT: and a0, a0, a3 -; RV64MV-NEXT: srli a0, a0, 1 +; RV64MV-NEXT: slli a0, a0, 37 +; RV64MV-NEXT: srli a0, a0, 38 ; RV64MV-NEXT: or a0, a0, a1 -; RV64MV-NEXT: addiw a1, a2, -1 +; RV64MV-NEXT: lui a1, 32768 +; RV64MV-NEXT: addiw a1, a1, -1 ; RV64MV-NEXT: and a0, a0, a1 ; RV64MV-NEXT: lui a1, 2341 ; RV64MV-NEXT: addiw a1, a1, -1755 @@ -358,8 +352,8 @@ define void @test_urem_vec(<3 x i11>* %X) nounwind { ; RV32-NEXT: li a1, 683 ; RV32-NEXT: call __mulsi3@plt ; RV32-NEXT: slli a1, a0, 10 -; RV32-NEXT: andi a0, a0, 2046 -; RV32-NEXT: srli a0, a0, 1 +; RV32-NEXT: slli a0, a0, 21 +; RV32-NEXT: srli a0, a0, 22 ; RV32-NEXT: or a0, a0, a1 ; RV32-NEXT: andi a0, a0, 2047 ; RV32-NEXT: li a1, 341 @@ -418,8 +412,8 @@ define void @test_urem_vec(<3 x i11>* %X) nounwind { ; RV64-NEXT: li a1, 683 ; RV64-NEXT: call __muldi3@plt ; RV64-NEXT: slli a1, a0, 10 -; RV64-NEXT: andi a0, a0, 2046 -; RV64-NEXT: srli a0, a0, 1 +; RV64-NEXT: slli a0, a0, 53 +; RV64-NEXT: srli a0, a0, 54 ; RV64-NEXT: or a0, a0, a1 ; RV64-NEXT: andi a0, a0, 2047 ; RV64-NEXT: li a1, 341 @@ -447,10 +441,8 @@ define void @test_urem_vec(<3 x i11>* %X) nounwind { ; RV64-NEXT: slli a1, s1, 22 ; RV64-NEXT: sub a0, a0, a1 ; RV64-NEXT: sw a0, 0(s0) -; RV64-NEXT: li a1, -1 -; RV64-NEXT: srli a1, a1, 31 -; RV64-NEXT: and a0, a0, a1 -; RV64-NEXT: srli a0, a0, 32 +; RV64-NEXT: slli a0, a0, 31 +; RV64-NEXT: srli a0, a0, 63 ; RV64-NEXT: sb a0, 4(s0) ; RV64-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64-NEXT: ld s0, 32(sp) # 8-byte Folded Reload @@ -472,8 +464,8 @@ define void @test_urem_vec(<3 x i11>* %X) nounwind { ; RV32M-NEXT: li a4, 683 ; RV32M-NEXT: mul a2, a2, a4 ; RV32M-NEXT: slli a4, a2, 10 -; RV32M-NEXT: andi a2, a2, 2046 -; RV32M-NEXT: srli a2, a2, 1 +; RV32M-NEXT: slli a2, a2, 21 +; RV32M-NEXT: srli a2, a2, 22 ; RV32M-NEXT: or a2, a2, a4 ; RV32M-NEXT: andi a2, a2, 2047 ; RV32M-NEXT: li a4, 341 @@ -517,8 +509,8 @@ define void @test_urem_vec(<3 x i11>* %X) nounwind { ; RV64M-NEXT: li a4, 683 ; RV64M-NEXT: mul a1, a1, a4 ; RV64M-NEXT: slli a4, a1, 10 -; RV64M-NEXT: andi a1, a1, 2046 -; RV64M-NEXT: srli a1, a1, 1 +; RV64M-NEXT: slli a1, a1, 53 +; RV64M-NEXT: srli a1, a1, 54 ; RV64M-NEXT: or a1, a1, a4 ; RV64M-NEXT: andi a1, a1, 2047 ; RV64M-NEXT: li a4, 341 @@ -544,10 +536,8 @@ define void @test_urem_vec(<3 x i11>* %X) nounwind { ; RV64M-NEXT: slli a2, a3, 22 ; RV64M-NEXT: sub a1, a1, a2 ; RV64M-NEXT: sw a1, 0(a0) -; RV64M-NEXT: li a2, -1 -; RV64M-NEXT: srli a2, a2, 31 -; RV64M-NEXT: and a1, a1, a2 -; RV64M-NEXT: srli a1, a1, 32 +; RV64M-NEXT: slli a1, a1, 31 +; RV64M-NEXT: srli a1, a1, 63 ; RV64M-NEXT: sb a1, 4(a0) ; RV64M-NEXT: ret ; @@ -676,10 +666,8 @@ define void @test_urem_vec(<3 x i11>* %X) nounwind { ; RV64MV-NEXT: slli a2, a2, 22 ; RV64MV-NEXT: or a1, a1, a2 ; RV64MV-NEXT: sw a1, 0(a0) -; RV64MV-NEXT: li a2, -1 -; RV64MV-NEXT: srli a2, a2, 31 -; RV64MV-NEXT: and a1, a1, a2 -; RV64MV-NEXT: srli a1, a1, 32 +; RV64MV-NEXT: slli a1, a1, 31 +; RV64MV-NEXT: srli a1, a1, 63 ; RV64MV-NEXT: sb a1, 4(a0) ; RV64MV-NEXT: addi sp, sp, 16 ; RV64MV-NEXT: ret