forked from OSchip/llvm-project
[RISCV] Improve codegen for i32 udiv/urem by constant on RV64.
The division by constant optimization often produces constants that are uimm32, but not simm32. These constants require 3 or 4 instructions to materialize without Zba. Since these instructions are often used by a multiply with a LHS that needs to be zero extended with an AND, we can switch the MUL to a MULHU by shifting both inputs left by 32. Once we shift the constant left, the upper 32 bits no longer need to be 0 so constant materialization is free to use LUI+ADDIW. This reduces the constant materialization from 4 instructions to 3 in some cases while also reducing the zero extend of the LHS from 2 shifts to 1. Differential Revision: https://reviews.llvm.org/D113805
This commit is contained in:
parent
9a2b54af22
commit
02bed66cd5
|
@ -96,13 +96,24 @@ def : Pat<(srem (sexti32 (i64 GPR:$rs1)), (sexti32 (i64 GPR:$rs2))),
|
|||
(REMW GPR:$rs1, GPR:$rs2)>;
|
||||
} // Predicates = [HasStdExtM, IsRV64]
|
||||
|
||||
// Pattern to detect constants with no more than 32 active bits that can't
|
||||
// be materialized with lui+addiw.
|
||||
def uimm32_not_simm32 : PatLeaf<(XLenVT GPR:$a), [{
|
||||
auto *C = dyn_cast<ConstantSDNode>(N);
|
||||
return C && C->hasOneUse() && isUInt<32>(C->getZExtValue()) &&
|
||||
!isInt<32>(C->getSExtValue());
|
||||
}]>;
|
||||
|
||||
let Predicates = [HasStdExtM, IsRV64, NotHasStdExtZba] in {
|
||||
// Special case for calculating the full 64-bit product of a 32x32 unsigned
|
||||
// multiply where the inputs aren't known to be zero extended. We can shift the
|
||||
// inputs left by 32 and use a MULHU. This saves two SRLIs needed to finish
|
||||
// zeroing the upper 32 bits.
|
||||
// TODO: If one of the operands is zero extended and the other isn't, we might
|
||||
// still be better off shifting both left by 32.
|
||||
def : Pat<(i64 (mul (and GPR:$rs1, 0xffffffff), (and GPR:$rs2, 0xffffffff))),
|
||||
(MULHU (SLLI GPR:$rs1, 32), (SLLI GPR:$rs2, 32))>;
|
||||
// The RHS could also be a constant that is hard to materialize. By shifting
|
||||
// left we can allow constant materialization to use LUI+ADDIW via
|
||||
// hasAllWUsers.
|
||||
def : Pat<(i64 (mul (and GPR:$rs1, 0xffffffff), uimm32_not_simm32:$rs2)),
|
||||
(MULHU (SLLI GPR:$rs1, 32), (SLLI GPR:$rs2, 32))>;
|
||||
} // Predicates = [HasStdExtM, IsRV64, NotHasStdExtZba]
|
||||
|
|
|
@ -78,12 +78,10 @@ define i32 @udiv_constant(i32 %a) nounwind {
|
|||
; RV64IM-LABEL: udiv_constant:
|
||||
; RV64IM: # %bb.0:
|
||||
; RV64IM-NEXT: slli a0, a0, 32
|
||||
; RV64IM-NEXT: srli a0, a0, 32
|
||||
; RV64IM-NEXT: lui a1, 205
|
||||
; RV64IM-NEXT: lui a1, 838861
|
||||
; RV64IM-NEXT: addiw a1, a1, -819
|
||||
; RV64IM-NEXT: slli a1, a1, 12
|
||||
; RV64IM-NEXT: addi a1, a1, -819
|
||||
; RV64IM-NEXT: mul a0, a0, a1
|
||||
; RV64IM-NEXT: slli a1, a1, 32
|
||||
; RV64IM-NEXT: mulhu a0, a0, a1
|
||||
; RV64IM-NEXT: srli a0, a0, 34
|
||||
; RV64IM-NEXT: ret
|
||||
%1 = udiv i32 %a, 5
|
||||
|
|
|
@ -103,11 +103,10 @@ define i32 @fold_urem_positive_even(i32 %x) nounwind {
|
|||
; RV64IM-LABEL: fold_urem_positive_even:
|
||||
; RV64IM: # %bb.0:
|
||||
; RV64IM-NEXT: slli a1, a0, 32
|
||||
; RV64IM-NEXT: srli a1, a1, 32
|
||||
; RV64IM-NEXT: lui a2, 253241
|
||||
; RV64IM-NEXT: slli a2, a2, 2
|
||||
; RV64IM-NEXT: addi a2, a2, -61
|
||||
; RV64IM-NEXT: mul a1, a1, a2
|
||||
; RV64IM-NEXT: lui a2, 1012964
|
||||
; RV64IM-NEXT: addiw a2, a2, -61
|
||||
; RV64IM-NEXT: slli a2, a2, 32
|
||||
; RV64IM-NEXT: mulhu a1, a1, a2
|
||||
; RV64IM-NEXT: srli a1, a1, 42
|
||||
; RV64IM-NEXT: addi a2, zero, 1060
|
||||
; RV64IM-NEXT: mulw a1, a1, a2
|
||||
|
|
Loading…
Reference in New Issue