[RISCV] Add implementation of targetShrinkDemandedConstant to optimize AND immediates.

SimplifyDemandedBits can remove set bits from immediates from instructions
like AND/OR/XOR. This can prevent them from being efficiently
codegened on RISCV.

This adds an initial version that tries to keep or form 12 bit
sign extended immediates for AND operations to enable use of ANDI.
If that doesn't work we'll try to create a 32 bit sign extended immediate
to use LUI+ADDIW.

More optimizations are possible for different size immediates or
different operations. But this is a good starting point that already
has test coverage.

Reviewed By: frasercrmck

Differential Revision: https://reviews.llvm.org/D94628
This commit is contained in:
Craig Topper 2021-01-15 10:54:26 -08:00
parent d0cb0d30a4
commit 86e604c4d6
10 changed files with 198 additions and 233 deletions

View File

@ -1900,6 +1900,71 @@ bool RISCVTargetLowering::isDesirableToCommuteWithShift(
return true;
}
bool RISCVTargetLowering::targetShrinkDemandedConstant(
SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
TargetLoweringOpt &TLO) const {
// Delay this optimization as late as possible.
if (!TLO.LegalOps)
return false;
EVT VT = Op.getValueType();
if (VT.isVector())
return false;
// Only handle AND for now.
if (Op.getOpcode() != ISD::AND)
return false;
ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
if (!C)
return false;
const APInt &Mask = C->getAPIntValue();
// Clear all non-demanded bits initially.
APInt ShrunkMask = Mask & DemandedBits;
// If the shrunk mask fits in sign extended 12 bits, let the target
// independent code apply it.
if (ShrunkMask.isSignedIntN(12))
return false;
// Try to make a smaller immediate by setting undemanded bits.
// We need to be able to make a negative number through a combination of mask
// and undemanded bits.
APInt ExpandedMask = Mask | ~DemandedBits;
if (!ExpandedMask.isNegative())
return false;
// What is the fewest number of bits we need to represent the negative number.
unsigned MinSignedBits = ExpandedMask.getMinSignedBits();
// Try to make a 12 bit negative immediate. If that fails try to make a 32
// bit negative immediate unless the shrunk immediate already fits in 32 bits.
APInt NewMask = ShrunkMask;
if (MinSignedBits <= 12)
NewMask.setBitsFrom(11);
else if (MinSignedBits <= 32 && !ShrunkMask.isSignedIntN(32))
NewMask.setBitsFrom(31);
else
return false;
// Sanity check that our new mask is a subset of the demanded mask.
assert(NewMask.isSubsetOf(ExpandedMask));
// If we aren't changing the mask, just return true to keep it and prevent
// the caller from optimizing.
if (NewMask == Mask)
return true;
// Replace the constant with the new mask.
SDLoc DL(Op);
SDValue NewC = TLO.DAG.getConstant(NewMask, DL, VT);
SDValue NewOp = TLO.DAG.getNode(ISD::AND, DL, VT, Op.getOperand(0), NewC);
return TLO.CombineTo(Op, NewOp);
}
void RISCVTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
KnownBits &Known,
const APInt &DemandedElts,

View File

@ -131,6 +131,10 @@ public:
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits,
const APInt &DemandedElts,
TargetLoweringOpt &TLO) const override;
void computeKnownBitsForTargetNode(const SDValue Op,
KnownBits &Known,
const APInt &DemandedElts,

View File

@ -42,8 +42,7 @@ define double @fold_promote_d_s(double %a, float %b) nounwind {
; RV64I-NEXT: slli a2, a2, 63
; RV64I-NEXT: addi a2, a2, -1
; RV64I-NEXT: and a0, a0, a2
; RV64I-NEXT: addi a2, zero, 1
; RV64I-NEXT: slli a2, a2, 31
; RV64I-NEXT: lui a2, 524288
; RV64I-NEXT: and a1, a1, a2
; RV64I-NEXT: slli a1, a1, 32
; RV64I-NEXT: or a0, a0, a1
@ -188,10 +187,7 @@ define float @fold_promote_f_h(float %a, half %b) nounwind {
; RV64I-NEXT: lui a2, 524288
; RV64I-NEXT: addiw a2, a2, -1
; RV64I-NEXT: and a0, a0, a2
; RV64I-NEXT: addi a2, zero, 1
; RV64I-NEXT: slli a2, a2, 33
; RV64I-NEXT: addi a2, a2, -1
; RV64I-NEXT: slli a2, a2, 15
; RV64I-NEXT: lui a2, 1048568
; RV64I-NEXT: and a1, a1, a2
; RV64I-NEXT: slli a1, a1, 16
; RV64I-NEXT: or a0, a0, a1

View File

@ -82,10 +82,7 @@ define void @stack_alloc(i32 signext %size) {
; RV64-NEXT: slli a0, a0, 32
; RV64-NEXT: srli a0, a0, 32
; RV64-NEXT: addi a0, a0, 15
; RV64-NEXT: addi a1, zero, 1
; RV64-NEXT: slli a1, a1, 33
; RV64-NEXT: addi a1, a1, -16
; RV64-NEXT: and a0, a0, a1
; RV64-NEXT: andi a0, a0, -16
; RV64-NEXT: sub a0, sp, a0
; RV64-NEXT: mv sp, a0
; RV64-NEXT: call callee_with_args@plt
@ -129,10 +126,7 @@ define void @stack_alloc(i32 signext %size) {
; RV64-WITHFP-NEXT: slli a0, a0, 32
; RV64-WITHFP-NEXT: srli a0, a0, 32
; RV64-WITHFP-NEXT: addi a0, a0, 15
; RV64-WITHFP-NEXT: addi a1, zero, 1
; RV64-WITHFP-NEXT: slli a1, a1, 33
; RV64-WITHFP-NEXT: addi a1, a1, -16
; RV64-WITHFP-NEXT: and a0, a0, a1
; RV64-WITHFP-NEXT: andi a0, a0, -16
; RV64-WITHFP-NEXT: sub a0, sp, a0
; RV64-WITHFP-NEXT: mv sp, a0
; RV64-WITHFP-NEXT: call callee_with_args@plt

View File

@ -122,10 +122,7 @@ define half @fcopysign_fneg(half %a, half %b) nounwind {
; RV64I-NEXT: lui a2, 524288
; RV64I-NEXT: addiw a2, a2, -1
; RV64I-NEXT: and a0, a0, a2
; RV64I-NEXT: addi a2, zero, 1
; RV64I-NEXT: slli a2, a2, 33
; RV64I-NEXT: addi a2, a2, -1
; RV64I-NEXT: slli a2, a2, 15
; RV64I-NEXT: lui a2, 1048568
; RV64I-NEXT: and a1, a1, a2
; RV64I-NEXT: slli a1, a1, 16
; RV64I-NEXT: or a0, a0, a1

View File

@ -10,10 +10,8 @@ define signext i32 @gorc1_i32(i32 signext %a) nounwind {
; RV64I-LABEL: gorc1_i32:
; RV64I: # %bb.0:
; RV64I-NEXT: slli a1, a0, 1
; RV64I-NEXT: lui a2, 171
; RV64I-NEXT: addiw a2, a2, -1365
; RV64I-NEXT: slli a2, a2, 12
; RV64I-NEXT: addi a2, a2, -1366
; RV64I-NEXT: lui a2, 699051
; RV64I-NEXT: addiw a2, a2, -1366
; RV64I-NEXT: and a1, a1, a2
; RV64I-NEXT: srli a2, a0, 1
; RV64I-NEXT: lui a3, 349525
@ -91,10 +89,8 @@ define signext i32 @gorc2_i32(i32 signext %a) nounwind {
; RV64I-LABEL: gorc2_i32:
; RV64I: # %bb.0:
; RV64I-NEXT: slli a1, a0, 2
; RV64I-NEXT: lui a2, 205
; RV64I-NEXT: addiw a2, a2, -819
; RV64I-NEXT: slli a2, a2, 12
; RV64I-NEXT: addi a2, a2, -820
; RV64I-NEXT: lui a2, 838861
; RV64I-NEXT: addiw a2, a2, -820
; RV64I-NEXT: and a1, a1, a2
; RV64I-NEXT: srli a2, a0, 2
; RV64I-NEXT: lui a3, 209715
@ -182,10 +178,8 @@ define signext i32 @gorc3_i32(i32 signext %a) nounwind {
; RV64I-NEXT: or a0, a2, a0
; RV64I-NEXT: or a0, a0, a1
; RV64I-NEXT: slli a1, a0, 2
; RV64I-NEXT: lui a2, 205
; RV64I-NEXT: addiw a2, a2, -819
; RV64I-NEXT: slli a2, a2, 12
; RV64I-NEXT: addi a2, a2, -820
; RV64I-NEXT: lui a2, 838861
; RV64I-NEXT: addiw a2, a2, -820
; RV64I-NEXT: and a1, a1, a2
; RV64I-NEXT: srli a2, a0, 2
; RV64I-NEXT: lui a3, 209715
@ -297,10 +291,8 @@ define signext i32 @gorc4_i32(i32 signext %a) nounwind {
; RV64I-LABEL: gorc4_i32:
; RV64I: # %bb.0:
; RV64I-NEXT: slli a1, a0, 4
; RV64I-NEXT: lui a2, 241
; RV64I-NEXT: addiw a2, a2, -241
; RV64I-NEXT: slli a2, a2, 12
; RV64I-NEXT: addi a2, a2, 240
; RV64I-NEXT: lui a2, 986895
; RV64I-NEXT: addiw a2, a2, 240
; RV64I-NEXT: and a1, a1, a2
; RV64I-NEXT: srli a2, a0, 4
; RV64I-NEXT: lui a3, 61681
@ -388,10 +380,8 @@ define signext i32 @gorc5_i32(i32 signext %a) nounwind {
; RV64I-NEXT: or a0, a2, a0
; RV64I-NEXT: or a0, a0, a1
; RV64I-NEXT: slli a1, a0, 4
; RV64I-NEXT: lui a2, 241
; RV64I-NEXT: addiw a2, a2, -241
; RV64I-NEXT: slli a2, a2, 12
; RV64I-NEXT: addi a2, a2, 240
; RV64I-NEXT: lui a2, 986895
; RV64I-NEXT: addiw a2, a2, 240
; RV64I-NEXT: and a1, a1, a2
; RV64I-NEXT: srli a2, a0, 4
; RV64I-NEXT: lui a3, 61681
@ -513,10 +503,8 @@ define signext i32 @gorc6_i32(i32 signext %a) nounwind {
; RV64I-NEXT: or a0, a2, a0
; RV64I-NEXT: or a0, a0, a1
; RV64I-NEXT: slli a1, a0, 4
; RV64I-NEXT: lui a2, 241
; RV64I-NEXT: addiw a2, a2, -241
; RV64I-NEXT: slli a2, a2, 12
; RV64I-NEXT: addi a2, a2, 240
; RV64I-NEXT: lui a2, 986895
; RV64I-NEXT: addiw a2, a2, 240
; RV64I-NEXT: and a1, a1, a2
; RV64I-NEXT: srli a2, a0, 4
; RV64I-NEXT: lui a3, 61681
@ -648,10 +636,8 @@ define signext i32 @gorc7_i32(i32 signext %a) nounwind {
; RV64I-NEXT: or a0, a2, a0
; RV64I-NEXT: or a0, a0, a1
; RV64I-NEXT: slli a1, a0, 4
; RV64I-NEXT: lui a2, 241
; RV64I-NEXT: addiw a2, a2, -241
; RV64I-NEXT: slli a2, a2, 12
; RV64I-NEXT: addi a2, a2, 240
; RV64I-NEXT: lui a2, 986895
; RV64I-NEXT: addiw a2, a2, 240
; RV64I-NEXT: and a1, a1, a2
; RV64I-NEXT: srli a2, a0, 4
; RV64I-NEXT: lui a3, 61681
@ -797,10 +783,8 @@ define signext i32 @gorc8_i32(i32 signext %a) nounwind {
; RV64I-LABEL: gorc8_i32:
; RV64I: # %bb.0:
; RV64I-NEXT: slli a1, a0, 8
; RV64I-NEXT: lui a2, 16
; RV64I-NEXT: addiw a2, a2, -255
; RV64I-NEXT: slli a2, a2, 16
; RV64I-NEXT: addi a2, a2, -256
; RV64I-NEXT: lui a2, 1044496
; RV64I-NEXT: addiw a2, a2, -256
; RV64I-NEXT: and a1, a1, a2
; RV64I-NEXT: srli a2, a0, 8
; RV64I-NEXT: lui a3, 4080
@ -1015,20 +999,16 @@ define signext i32 @gorc2b_i32(i32 signext %a) nounwind {
; RV64I-NEXT: lui a2, 838861
; RV64I-NEXT: addiw a2, a2, -820
; RV64I-NEXT: and a1, a1, a2
; RV64I-NEXT: srli a2, a0, 2
; RV64I-NEXT: lui a3, 209715
; RV64I-NEXT: addiw a3, a3, 819
; RV64I-NEXT: and a2, a2, a3
; RV64I-NEXT: or a0, a2, a0
; RV64I-NEXT: srli a3, a0, 2
; RV64I-NEXT: lui a4, 209715
; RV64I-NEXT: addiw a4, a4, 819
; RV64I-NEXT: and a3, a3, a4
; RV64I-NEXT: or a0, a3, a0
; RV64I-NEXT: or a0, a0, a1
; RV64I-NEXT: slli a1, a0, 2
; RV64I-NEXT: lui a2, 205
; RV64I-NEXT: addiw a2, a2, -819
; RV64I-NEXT: slli a2, a2, 12
; RV64I-NEXT: addi a2, a2, -820
; RV64I-NEXT: and a1, a1, a2
; RV64I-NEXT: srli a2, a0, 2
; RV64I-NEXT: and a2, a2, a3
; RV64I-NEXT: and a2, a2, a4
; RV64I-NEXT: or a0, a2, a0
; RV64I-NEXT: or a0, a0, a1
; RV64I-NEXT: sext.w a0, a0
@ -1124,30 +1104,26 @@ define signext i32 @gorc3b_i32(i32 signext %a) nounwind {
; RV64I-NEXT: lui a2, 699051
; RV64I-NEXT: addiw a2, a2, -1366
; RV64I-NEXT: and a1, a1, a2
; RV64I-NEXT: srli a2, a0, 1
; RV64I-NEXT: lui a3, 349525
; RV64I-NEXT: addiw a3, a3, 1365
; RV64I-NEXT: and a2, a2, a3
; RV64I-NEXT: or a0, a2, a0
; RV64I-NEXT: srli a3, a0, 1
; RV64I-NEXT: lui a4, 349525
; RV64I-NEXT: addiw a4, a4, 1365
; RV64I-NEXT: and a3, a3, a4
; RV64I-NEXT: or a0, a3, a0
; RV64I-NEXT: or a0, a0, a1
; RV64I-NEXT: slli a1, a0, 2
; RV64I-NEXT: lui a2, 838861
; RV64I-NEXT: addiw a2, a2, -820
; RV64I-NEXT: and a1, a1, a2
; RV64I-NEXT: srli a2, a0, 2
; RV64I-NEXT: lui a4, 209715
; RV64I-NEXT: addiw a4, a4, 819
; RV64I-NEXT: and a2, a2, a4
; RV64I-NEXT: or a0, a2, a0
; RV64I-NEXT: lui a3, 838861
; RV64I-NEXT: addiw a3, a3, -820
; RV64I-NEXT: and a1, a1, a3
; RV64I-NEXT: srli a3, a0, 2
; RV64I-NEXT: lui a5, 209715
; RV64I-NEXT: addiw a5, a5, 819
; RV64I-NEXT: and a3, a3, a5
; RV64I-NEXT: or a0, a3, a0
; RV64I-NEXT: or a0, a0, a1
; RV64I-NEXT: slli a1, a0, 1
; RV64I-NEXT: lui a2, 171
; RV64I-NEXT: addiw a2, a2, -1365
; RV64I-NEXT: slli a2, a2, 12
; RV64I-NEXT: addi a2, a2, -1366
; RV64I-NEXT: and a1, a1, a2
; RV64I-NEXT: srli a2, a0, 1
; RV64I-NEXT: and a2, a2, a3
; RV64I-NEXT: and a2, a2, a4
; RV64I-NEXT: or a0, a2, a0
; RV64I-NEXT: or a0, a0, a1
; RV64I-NEXT: sext.w a0, a0
@ -1319,10 +1295,8 @@ define signext i32 @grev1_i32(i32 signext %a) nounwind {
; RV64I-LABEL: grev1_i32:
; RV64I: # %bb.0:
; RV64I-NEXT: slli a1, a0, 1
; RV64I-NEXT: lui a2, 171
; RV64I-NEXT: addiw a2, a2, -1365
; RV64I-NEXT: slli a2, a2, 12
; RV64I-NEXT: addi a2, a2, -1366
; RV64I-NEXT: lui a2, 699051
; RV64I-NEXT: addiw a2, a2, -1366
; RV64I-NEXT: and a1, a1, a2
; RV64I-NEXT: srli a0, a0, 1
; RV64I-NEXT: lui a2, 349525
@ -1396,10 +1370,8 @@ define signext i32 @grev2_i32(i32 signext %a) nounwind {
; RV64I-LABEL: grev2_i32:
; RV64I: # %bb.0:
; RV64I-NEXT: slli a1, a0, 2
; RV64I-NEXT: lui a2, 205
; RV64I-NEXT: addiw a2, a2, -819
; RV64I-NEXT: slli a2, a2, 12
; RV64I-NEXT: addi a2, a2, -820
; RV64I-NEXT: lui a2, 838861
; RV64I-NEXT: addiw a2, a2, -820
; RV64I-NEXT: and a1, a1, a2
; RV64I-NEXT: srli a0, a0, 2
; RV64I-NEXT: lui a2, 209715
@ -1482,10 +1454,8 @@ define signext i32 @grev3_i32(i32 signext %a) nounwind {
; RV64I-NEXT: and a0, a0, a2
; RV64I-NEXT: or a0, a1, a0
; RV64I-NEXT: slli a1, a0, 2
; RV64I-NEXT: lui a2, 205
; RV64I-NEXT: addiw a2, a2, -819
; RV64I-NEXT: slli a2, a2, 12
; RV64I-NEXT: addi a2, a2, -820
; RV64I-NEXT: lui a2, 838861
; RV64I-NEXT: addiw a2, a2, -820
; RV64I-NEXT: and a1, a1, a2
; RV64I-NEXT: srli a0, a0, 2
; RV64I-NEXT: lui a2, 209715
@ -1590,10 +1560,8 @@ define signext i32 @grev4_i32(i32 signext %a) nounwind {
; RV64I-LABEL: grev4_i32:
; RV64I: # %bb.0:
; RV64I-NEXT: slli a1, a0, 4
; RV64I-NEXT: lui a2, 241
; RV64I-NEXT: addiw a2, a2, -241
; RV64I-NEXT: slli a2, a2, 12
; RV64I-NEXT: addi a2, a2, 240
; RV64I-NEXT: lui a2, 986895
; RV64I-NEXT: addiw a2, a2, 240
; RV64I-NEXT: and a1, a1, a2
; RV64I-NEXT: srli a0, a0, 4
; RV64I-NEXT: lui a2, 61681
@ -1676,10 +1644,8 @@ define signext i32 @grev5_i32(i32 signext %a) nounwind {
; RV64I-NEXT: and a0, a0, a2
; RV64I-NEXT: or a0, a1, a0
; RV64I-NEXT: slli a1, a0, 4
; RV64I-NEXT: lui a2, 241
; RV64I-NEXT: addiw a2, a2, -241
; RV64I-NEXT: slli a2, a2, 12
; RV64I-NEXT: addi a2, a2, 240
; RV64I-NEXT: lui a2, 986895
; RV64I-NEXT: addiw a2, a2, 240
; RV64I-NEXT: and a1, a1, a2
; RV64I-NEXT: srli a0, a0, 4
; RV64I-NEXT: lui a2, 61681
@ -1794,10 +1760,8 @@ define signext i32 @grev6_i32(i32 signext %a) nounwind {
; RV64I-NEXT: and a0, a0, a2
; RV64I-NEXT: or a0, a1, a0
; RV64I-NEXT: slli a1, a0, 4
; RV64I-NEXT: lui a2, 241
; RV64I-NEXT: addiw a2, a2, -241
; RV64I-NEXT: slli a2, a2, 12
; RV64I-NEXT: addi a2, a2, 240
; RV64I-NEXT: lui a2, 986895
; RV64I-NEXT: addiw a2, a2, 240
; RV64I-NEXT: and a1, a1, a2
; RV64I-NEXT: srli a0, a0, 4
; RV64I-NEXT: lui a2, 61681
@ -1920,10 +1884,8 @@ define signext i32 @grev7_i32(i32 signext %a) nounwind {
; RV64I-NEXT: and a0, a0, a2
; RV64I-NEXT: or a0, a1, a0
; RV64I-NEXT: slli a1, a0, 4
; RV64I-NEXT: lui a2, 241
; RV64I-NEXT: addiw a2, a2, -241
; RV64I-NEXT: slli a2, a2, 12
; RV64I-NEXT: addi a2, a2, 240
; RV64I-NEXT: lui a2, 986895
; RV64I-NEXT: addiw a2, a2, 240
; RV64I-NEXT: and a1, a1, a2
; RV64I-NEXT: srli a0, a0, 4
; RV64I-NEXT: lui a2, 61681
@ -2059,10 +2021,8 @@ define signext i32 @grev8_i32(i32 signext %a) nounwind {
; RV64I-LABEL: grev8_i32:
; RV64I: # %bb.0:
; RV64I-NEXT: slli a1, a0, 8
; RV64I-NEXT: lui a2, 16
; RV64I-NEXT: addiw a2, a2, -255
; RV64I-NEXT: slli a2, a2, 16
; RV64I-NEXT: addi a2, a2, -256
; RV64I-NEXT: lui a2, 1044496
; RV64I-NEXT: addiw a2, a2, -256
; RV64I-NEXT: and a1, a1, a2
; RV64I-NEXT: srli a0, a0, 8
; RV64I-NEXT: lui a2, 4080
@ -2273,10 +2233,8 @@ define signext i32 @grev3b_i32(i32 signext %a) nounwind {
; RV64I-NEXT: and a0, a0, a2
; RV64I-NEXT: or a0, a1, a0
; RV64I-NEXT: slli a1, a0, 1
; RV64I-NEXT: lui a2, 171
; RV64I-NEXT: addiw a2, a2, -1365
; RV64I-NEXT: slli a2, a2, 12
; RV64I-NEXT: addi a2, a2, -1366
; RV64I-NEXT: lui a2, 699051
; RV64I-NEXT: addiw a2, a2, -1366
; RV64I-NEXT: and a1, a1, a2
; RV64I-NEXT: srli a0, a0, 1
; RV64I-NEXT: lui a2, 349525
@ -2386,27 +2344,23 @@ define signext i32 @grev2b_i32(i32 signext %a) nounwind {
; RV64I-NEXT: addiw a2, a2, -1366
; RV64I-NEXT: and a1, a1, a2
; RV64I-NEXT: srli a0, a0, 1
; RV64I-NEXT: lui a2, 349525
; RV64I-NEXT: addiw a2, a2, 1365
; RV64I-NEXT: and a0, a0, a2
; RV64I-NEXT: or a0, a1, a0
; RV64I-NEXT: slli a1, a0, 2
; RV64I-NEXT: lui a3, 838861
; RV64I-NEXT: addiw a3, a3, -820
; RV64I-NEXT: and a1, a1, a3
; RV64I-NEXT: srli a0, a0, 2
; RV64I-NEXT: lui a3, 209715
; RV64I-NEXT: addiw a3, a3, 819
; RV64I-NEXT: lui a3, 349525
; RV64I-NEXT: addiw a3, a3, 1365
; RV64I-NEXT: and a0, a0, a3
; RV64I-NEXT: or a0, a1, a0
; RV64I-NEXT: slli a1, a0, 2
; RV64I-NEXT: lui a4, 838861
; RV64I-NEXT: addiw a4, a4, -820
; RV64I-NEXT: and a1, a1, a4
; RV64I-NEXT: srli a0, a0, 2
; RV64I-NEXT: lui a4, 209715
; RV64I-NEXT: addiw a4, a4, 819
; RV64I-NEXT: and a0, a0, a4
; RV64I-NEXT: or a0, a1, a0
; RV64I-NEXT: slli a1, a0, 1
; RV64I-NEXT: lui a3, 171
; RV64I-NEXT: addiw a3, a3, -1365
; RV64I-NEXT: slli a3, a3, 12
; RV64I-NEXT: addi a3, a3, -1366
; RV64I-NEXT: and a1, a1, a3
; RV64I-NEXT: and a1, a1, a2
; RV64I-NEXT: srli a0, a0, 1
; RV64I-NEXT: and a0, a0, a2
; RV64I-NEXT: and a0, a0, a3
; RV64I-NEXT: or a0, a1, a0
; RV64I-NEXT: sext.w a0, a0
; RV64I-NEXT: ret
@ -2536,9 +2490,9 @@ define signext i32 @grev0_i32(i32 signext %a) nounwind {
; RV64I-NEXT: addiw a4, a4, -820
; RV64I-NEXT: and a1, a1, a4
; RV64I-NEXT: srli a0, a0, 2
; RV64I-NEXT: lui a4, 209715
; RV64I-NEXT: addiw a4, a4, 819
; RV64I-NEXT: and a0, a0, a4
; RV64I-NEXT: lui a5, 209715
; RV64I-NEXT: addiw a5, a5, 819
; RV64I-NEXT: and a0, a0, a5
; RV64I-NEXT: or a0, a1, a0
; RV64I-NEXT: slli a1, a0, 1
; RV64I-NEXT: and a1, a1, a2
@ -2546,13 +2500,9 @@ define signext i32 @grev0_i32(i32 signext %a) nounwind {
; RV64I-NEXT: and a0, a0, a3
; RV64I-NEXT: or a0, a1, a0
; RV64I-NEXT: slli a1, a0, 2
; RV64I-NEXT: lui a2, 205
; RV64I-NEXT: addiw a2, a2, -819
; RV64I-NEXT: slli a2, a2, 12
; RV64I-NEXT: addi a2, a2, -820
; RV64I-NEXT: and a1, a1, a2
; RV64I-NEXT: and a1, a1, a4
; RV64I-NEXT: srli a0, a0, 2
; RV64I-NEXT: and a0, a0, a4
; RV64I-NEXT: and a0, a0, a5
; RV64I-NEXT: or a0, a1, a0
; RV64I-NEXT: sext.w a0, a0
; RV64I-NEXT: ret
@ -3196,10 +3146,7 @@ define i32 @bswap_rotr_i32(i32 %a) {
; RV64I-NEXT: or a0, a0, a2
; RV64I-NEXT: or a0, a0, a1
; RV64I-NEXT: srli a1, a0, 16
; RV64I-NEXT: addi a2, zero, 1
; RV64I-NEXT: slli a2, a2, 32
; RV64I-NEXT: addi a2, a2, -1
; RV64I-NEXT: slli a2, a2, 16
; RV64I-NEXT: lui a2, 1048560
; RV64I-NEXT: and a1, a1, a2
; RV64I-NEXT: srli a0, a0, 48
; RV64I-NEXT: or a0, a0, a1
@ -3252,10 +3199,7 @@ define i32 @bswap_rotl_i32(i32 %a) {
; RV64I-NEXT: or a0, a0, a2
; RV64I-NEXT: or a0, a0, a1
; RV64I-NEXT: srli a1, a0, 16
; RV64I-NEXT: addi a2, zero, 1
; RV64I-NEXT: slli a2, a2, 32
; RV64I-NEXT: addi a2, a2, -1
; RV64I-NEXT: slli a2, a2, 16
; RV64I-NEXT: lui a2, 1048560
; RV64I-NEXT: and a1, a1, a2
; RV64I-NEXT: srli a0, a0, 48
; RV64I-NEXT: or a0, a1, a0

View File

@ -360,10 +360,7 @@ define i32 @dont_fold_srem_power_of_two(i32 %x) nounwind {
; RV64I-NEXT: srli a1, a1, 57
; RV64I-NEXT: andi a1, a1, 63
; RV64I-NEXT: add a1, a0, a1
; RV64I-NEXT: addi a2, zero, 1
; RV64I-NEXT: slli a2, a2, 32
; RV64I-NEXT: addi a2, a2, -64
; RV64I-NEXT: and a1, a1, a2
; RV64I-NEXT: andi a1, a1, -64
; RV64I-NEXT: subw a0, a0, a1
; RV64I-NEXT: ret
;
@ -373,10 +370,7 @@ define i32 @dont_fold_srem_power_of_two(i32 %x) nounwind {
; RV64IM-NEXT: srli a1, a1, 57
; RV64IM-NEXT: andi a1, a1, 63
; RV64IM-NEXT: add a1, a0, a1
; RV64IM-NEXT: addi a2, zero, 1
; RV64IM-NEXT: slli a2, a2, 32
; RV64IM-NEXT: addi a2, a2, -64
; RV64IM-NEXT: and a1, a1, a2
; RV64IM-NEXT: andi a1, a1, -64
; RV64IM-NEXT: subw a0, a0, a1
; RV64IM-NEXT: ret
%1 = srem i32 %x, 64
@ -420,11 +414,9 @@ define i32 @dont_fold_srem_i32_smax(i32 %x) nounwind {
; RV64I-NEXT: sext.w a1, a0
; RV64I-NEXT: srli a1, a1, 32
; RV64I-NEXT: lui a2, 524288
; RV64I-NEXT: addiw a2, a2, -1
; RV64I-NEXT: and a1, a1, a2
; RV64I-NEXT: addiw a3, a2, -1
; RV64I-NEXT: and a1, a1, a3
; RV64I-NEXT: add a1, a0, a1
; RV64I-NEXT: addi a2, zero, 1
; RV64I-NEXT: slli a2, a2, 31
; RV64I-NEXT: and a1, a1, a2
; RV64I-NEXT: addw a0, a0, a1
; RV64I-NEXT: ret
@ -434,11 +426,9 @@ define i32 @dont_fold_srem_i32_smax(i32 %x) nounwind {
; RV64IM-NEXT: sext.w a1, a0
; RV64IM-NEXT: srli a1, a1, 32
; RV64IM-NEXT: lui a2, 524288
; RV64IM-NEXT: addiw a2, a2, -1
; RV64IM-NEXT: and a1, a1, a2
; RV64IM-NEXT: addiw a3, a2, -1
; RV64IM-NEXT: and a1, a1, a3
; RV64IM-NEXT: add a1, a0, a1
; RV64IM-NEXT: addi a2, zero, 1
; RV64IM-NEXT: slli a2, a2, 31
; RV64IM-NEXT: and a1, a1, a2
; RV64IM-NEXT: addw a0, a0, a1
; RV64IM-NEXT: ret

View File

@ -677,19 +677,15 @@ define <4 x i16> @dont_fold_srem_power_of_two(<4 x i16> %x) nounwind {
; RV32I-NEXT: lh a1, 4(a1)
; RV32I-NEXT: srli a4, a2, 26
; RV32I-NEXT: add a4, a2, a4
; RV32I-NEXT: lui a6, 16
; RV32I-NEXT: addi a5, a6, -64
; RV32I-NEXT: and a4, a4, a5
; RV32I-NEXT: andi a4, a4, -64
; RV32I-NEXT: sub s2, a2, a4
; RV32I-NEXT: srli a2, a1, 27
; RV32I-NEXT: add a2, a1, a2
; RV32I-NEXT: addi a4, a6, -32
; RV32I-NEXT: and a2, a2, a4
; RV32I-NEXT: andi a2, a2, -32
; RV32I-NEXT: sub s3, a1, a2
; RV32I-NEXT: srli a1, a3, 29
; RV32I-NEXT: add a1, a3, a1
; RV32I-NEXT: addi a2, a6, -8
; RV32I-NEXT: and a1, a1, a2
; RV32I-NEXT: andi a1, a1, -8
; RV32I-NEXT: sub s1, a3, a1
; RV32I-NEXT: addi a1, zero, 95
; RV32I-NEXT: call __modsi3@plt
@ -707,7 +703,7 @@ define <4 x i16> @dont_fold_srem_power_of_two(<4 x i16> %x) nounwind {
;
; RV32IM-LABEL: dont_fold_srem_power_of_two:
; RV32IM: # %bb.0:
; RV32IM-NEXT: lh a6, 8(a1)
; RV32IM-NEXT: lh a2, 8(a1)
; RV32IM-NEXT: lh a3, 4(a1)
; RV32IM-NEXT: lh a4, 12(a1)
; RV32IM-NEXT: lh a1, 0(a1)
@ -715,32 +711,28 @@ define <4 x i16> @dont_fold_srem_power_of_two(<4 x i16> %x) nounwind {
; RV32IM-NEXT: addi a5, a5, 389
; RV32IM-NEXT: mulh a5, a4, a5
; RV32IM-NEXT: add a5, a5, a4
; RV32IM-NEXT: srli a2, a5, 31
; RV32IM-NEXT: srli a6, a5, 31
; RV32IM-NEXT: srli a5, a5, 6
; RV32IM-NEXT: add a2, a5, a2
; RV32IM-NEXT: add a6, a5, a6
; RV32IM-NEXT: addi a5, zero, 95
; RV32IM-NEXT: mul a2, a2, a5
; RV32IM-NEXT: sub a7, a4, a2
; RV32IM-NEXT: srli a4, a1, 26
; RV32IM-NEXT: add a4, a1, a4
; RV32IM-NEXT: lui a5, 16
; RV32IM-NEXT: addi a2, a5, -64
; RV32IM-NEXT: and a2, a4, a2
; RV32IM-NEXT: sub a1, a1, a2
; RV32IM-NEXT: srli a2, a3, 27
; RV32IM-NEXT: add a2, a3, a2
; RV32IM-NEXT: addi a4, a5, -32
; RV32IM-NEXT: and a2, a2, a4
; RV32IM-NEXT: sub a2, a3, a2
; RV32IM-NEXT: srli a3, a6, 29
; RV32IM-NEXT: add a3, a6, a3
; RV32IM-NEXT: addi a4, a5, -8
; RV32IM-NEXT: and a3, a3, a4
; RV32IM-NEXT: sub a3, a6, a3
; RV32IM-NEXT: sh a3, 4(a0)
; RV32IM-NEXT: sh a2, 2(a0)
; RV32IM-NEXT: mul a5, a6, a5
; RV32IM-NEXT: sub a4, a4, a5
; RV32IM-NEXT: srli a5, a1, 26
; RV32IM-NEXT: add a5, a1, a5
; RV32IM-NEXT: andi a5, a5, -64
; RV32IM-NEXT: sub a1, a1, a5
; RV32IM-NEXT: srli a5, a3, 27
; RV32IM-NEXT: add a5, a3, a5
; RV32IM-NEXT: andi a5, a5, -32
; RV32IM-NEXT: sub a3, a3, a5
; RV32IM-NEXT: srli a5, a2, 29
; RV32IM-NEXT: add a5, a2, a5
; RV32IM-NEXT: andi a5, a5, -8
; RV32IM-NEXT: sub a2, a2, a5
; RV32IM-NEXT: sh a2, 4(a0)
; RV32IM-NEXT: sh a3, 2(a0)
; RV32IM-NEXT: sh a1, 0(a0)
; RV32IM-NEXT: sh a7, 6(a0)
; RV32IM-NEXT: sh a4, 6(a0)
; RV32IM-NEXT: ret
;
; RV64I-LABEL: dont_fold_srem_power_of_two:
@ -758,19 +750,15 @@ define <4 x i16> @dont_fold_srem_power_of_two(<4 x i16> %x) nounwind {
; RV64I-NEXT: lh a1, 8(a1)
; RV64I-NEXT: srli a4, a2, 58
; RV64I-NEXT: add a4, a2, a4
; RV64I-NEXT: lui a6, 16
; RV64I-NEXT: addiw a5, a6, -64
; RV64I-NEXT: and a4, a4, a5
; RV64I-NEXT: andi a4, a4, -64
; RV64I-NEXT: sub s2, a2, a4
; RV64I-NEXT: srli a2, a1, 59
; RV64I-NEXT: add a2, a1, a2
; RV64I-NEXT: addiw a4, a6, -32
; RV64I-NEXT: and a2, a2, a4
; RV64I-NEXT: andi a2, a2, -32
; RV64I-NEXT: sub s3, a1, a2
; RV64I-NEXT: srli a1, a3, 61
; RV64I-NEXT: add a1, a3, a1
; RV64I-NEXT: addiw a2, a6, -8
; RV64I-NEXT: and a1, a1, a2
; RV64I-NEXT: andi a1, a1, -8
; RV64I-NEXT: sub s1, a3, a1
; RV64I-NEXT: addi a1, zero, 95
; RV64I-NEXT: call __moddi3@plt
@ -807,27 +795,23 @@ define <4 x i16> @dont_fold_srem_power_of_two(<4 x i16> %x) nounwind {
; RV64IM-NEXT: add a2, a5, a2
; RV64IM-NEXT: addi a5, zero, 95
; RV64IM-NEXT: mul a2, a2, a5
; RV64IM-NEXT: sub a7, a1, a2
; RV64IM-NEXT: sub a1, a1, a2
; RV64IM-NEXT: srli a2, a4, 58
; RV64IM-NEXT: add a2, a4, a2
; RV64IM-NEXT: lui a5, 16
; RV64IM-NEXT: addiw a1, a5, -64
; RV64IM-NEXT: and a1, a2, a1
; RV64IM-NEXT: sub a1, a4, a1
; RV64IM-NEXT: srli a2, a3, 59
; RV64IM-NEXT: add a2, a3, a2
; RV64IM-NEXT: addiw a4, a5, -32
; RV64IM-NEXT: and a2, a2, a4
; RV64IM-NEXT: sub a2, a3, a2
; RV64IM-NEXT: srli a3, a6, 61
; RV64IM-NEXT: add a3, a6, a3
; RV64IM-NEXT: addiw a4, a5, -8
; RV64IM-NEXT: and a3, a3, a4
; RV64IM-NEXT: sub a3, a6, a3
; RV64IM-NEXT: sh a3, 4(a0)
; RV64IM-NEXT: sh a2, 2(a0)
; RV64IM-NEXT: sh a1, 0(a0)
; RV64IM-NEXT: sh a7, 6(a0)
; RV64IM-NEXT: andi a2, a2, -64
; RV64IM-NEXT: sub a2, a4, a2
; RV64IM-NEXT: srli a4, a3, 59
; RV64IM-NEXT: add a4, a3, a4
; RV64IM-NEXT: andi a4, a4, -32
; RV64IM-NEXT: sub a3, a3, a4
; RV64IM-NEXT: srli a4, a6, 61
; RV64IM-NEXT: add a4, a6, a4
; RV64IM-NEXT: andi a4, a4, -8
; RV64IM-NEXT: sub a4, a6, a4
; RV64IM-NEXT: sh a4, 4(a0)
; RV64IM-NEXT: sh a3, 2(a0)
; RV64IM-NEXT: sh a2, 0(a0)
; RV64IM-NEXT: sh a1, 6(a0)
; RV64IM-NEXT: ret
%1 = srem <4 x i16> %x, <i16 64, i16 32, i16 8, i16 95>
ret <4 x i16> %1

View File

@ -51,10 +51,7 @@ define void @caller(i32 %n) {
; RV64I-NEXT: slli a0, a0, 32
; RV64I-NEXT: srli a0, a0, 32
; RV64I-NEXT: addi a0, a0, 15
; RV64I-NEXT: addi a1, zero, 1
; RV64I-NEXT: slli a1, a1, 33
; RV64I-NEXT: addi a1, a1, -16
; RV64I-NEXT: and a0, a0, a1
; RV64I-NEXT: andi a0, a0, -16
; RV64I-NEXT: sub a0, sp, a0
; RV64I-NEXT: mv sp, a0
; RV64I-NEXT: mv a1, s1

View File

@ -361,10 +361,7 @@ define i32 @va1_va_arg_alloca(i8* %fmt, ...) nounwind {
; LP64-LP64F-LP64D-FPELIM-NEXT: slli a0, a1, 32
; LP64-LP64F-LP64D-FPELIM-NEXT: srli a0, a0, 32
; LP64-LP64F-LP64D-FPELIM-NEXT: addi a0, a0, 15
; LP64-LP64F-LP64D-FPELIM-NEXT: addi a1, zero, 1
; LP64-LP64F-LP64D-FPELIM-NEXT: slli a1, a1, 33
; LP64-LP64F-LP64D-FPELIM-NEXT: addi a1, a1, -16
; LP64-LP64F-LP64D-FPELIM-NEXT: and a0, a0, a1
; LP64-LP64F-LP64D-FPELIM-NEXT: andi a0, a0, -16
; LP64-LP64F-LP64D-FPELIM-NEXT: sub a0, sp, a0
; LP64-LP64F-LP64D-FPELIM-NEXT: mv sp, a0
; LP64-LP64F-LP64D-FPELIM-NEXT: call notdead@plt
@ -396,10 +393,7 @@ define i32 @va1_va_arg_alloca(i8* %fmt, ...) nounwind {
; LP64-LP64F-LP64D-WITHFP-NEXT: slli a0, a1, 32
; LP64-LP64F-LP64D-WITHFP-NEXT: srli a0, a0, 32
; LP64-LP64F-LP64D-WITHFP-NEXT: addi a0, a0, 15
; LP64-LP64F-LP64D-WITHFP-NEXT: addi a1, zero, 1
; LP64-LP64F-LP64D-WITHFP-NEXT: slli a1, a1, 33
; LP64-LP64F-LP64D-WITHFP-NEXT: addi a1, a1, -16
; LP64-LP64F-LP64D-WITHFP-NEXT: and a0, a0, a1
; LP64-LP64F-LP64D-WITHFP-NEXT: andi a0, a0, -16
; LP64-LP64F-LP64D-WITHFP-NEXT: sub a0, sp, a0
; LP64-LP64F-LP64D-WITHFP-NEXT: mv sp, a0
; LP64-LP64F-LP64D-WITHFP-NEXT: call notdead@plt