[ARM] Implement target hook function to decide folding (mul (add x, c1), c2)

Prevent the folding in DAGCombine if it leads to worse code.

Reviewed By: dmgreen

Differential Revision: https://reviews.llvm.org/D109124
This commit is contained in:
Ben Shi 2021-09-07 10:21:38 +08:00
parent 20f890696f
commit 63ca9371c7
5 changed files with 158 additions and 213 deletions

View File

@ -18793,6 +18793,31 @@ bool ARMTargetLowering::isLegalAddImmediate(int64_t Imm) const {
return AbsImm >= 0 && AbsImm <= 255;
}
// Return false to prevent folding
// (mul (add r, c0), c1) -> (add (mul r, c1), c0*c1) in DAGCombine,
// if the folding leads to worse code.
bool ARMTargetLowering::isMulAddWithConstProfitable(
const SDValue &AddNode, const SDValue &ConstNode) const {
// Let the DAGCombiner decide for vector types and large types.
const EVT VT = AddNode.getValueType();
if (VT.isVector() || VT.getScalarSizeInBits() > 32)
return true;
// It is worse if c0 is legal add immediate, while c1*c0 is not
// and has to be composed by at least two instructions.
const ConstantSDNode *C0Node = cast<ConstantSDNode>(AddNode.getOperand(1));
const ConstantSDNode *C1Node = cast<ConstantSDNode>(ConstNode);
const int64_t C0 = C0Node->getSExtValue();
APInt CA = C0Node->getAPIntValue() * C1Node->getAPIntValue();
if (!isLegalAddImmediate(C0) || isLegalAddImmediate(CA.getSExtValue()))
return true;
if (ConstantMaterializationCost((unsigned)CA.getZExtValue(), Subtarget) > 1)
return false;
// Default to true and let the DAGCombiner decide.
return true;
}
static bool getARMIndexedAddressParts(SDNode *Ptr, EVT VT,
bool isSEXTLoad, SDValue &Base,
SDValue &Offset, bool &isInc,

View File

@ -712,6 +712,9 @@ class VectorType;
Align Alignment,
const DataLayout &DL) const;
bool isMulAddWithConstProfitable(const SDValue &AddNode,
const SDValue &ConstNode) const override;
bool alignLoopsWithOptSize() const override;
/// Returns the number of interleaved accesses that will be generated when

View File

@ -243,10 +243,9 @@ define i16 @fold_sub301_mul19_i16(i16 %a) {
define i32 @fold_add251_mul253_i32(i32 %a) {
; CHECK-ARMV6-LABEL: fold_add251_mul253_i32:
; CHECK-ARMV6: @ %bb.0:
; CHECK-ARMV6-NEXT: mov r1, #15
; CHECK-ARMV6-NEXT: mov r2, #253
; CHECK-ARMV6-NEXT: orr r1, r1, #63488
; CHECK-ARMV6-NEXT: mla r0, r0, r2, r1
; CHECK-ARMV6-NEXT: add r0, r0, #251
; CHECK-ARMV6-NEXT: mov r1, #253
; CHECK-ARMV6-NEXT: mul r0, r0, r1
; CHECK-ARMV6-NEXT: bx lr
;
; CHECK-ARMV7-LABEL: fold_add251_mul253_i32:
@ -258,15 +257,10 @@ define i32 @fold_add251_mul253_i32(i32 %a) {
;
; CHECK-THUMBV6M-LABEL: fold_add251_mul253_i32:
; CHECK-THUMBV6M: @ %bb.0:
; CHECK-THUMBV6M-NEXT: adds r0, #251
; CHECK-THUMBV6M-NEXT: movs r1, #253
; CHECK-THUMBV6M-NEXT: muls r1, r0, r1
; CHECK-THUMBV6M-NEXT: ldr r0, .LCPI8_0
; CHECK-THUMBV6M-NEXT: adds r0, r1, r0
; CHECK-THUMBV6M-NEXT: muls r0, r1, r0
; CHECK-THUMBV6M-NEXT: bx lr
; CHECK-THUMBV6M-NEXT: .p2align 2
; CHECK-THUMBV6M-NEXT: @ %bb.1:
; CHECK-THUMBV6M-NEXT: .LCPI8_0:
; CHECK-THUMBV6M-NEXT: .long 63503 @ 0xf80f
;
; CHECK-THUMBV7M-LABEL: fold_add251_mul253_i32:
; CHECK-THUMBV7M: @ %bb.0:
@ -282,9 +276,9 @@ define i32 @fold_add251_mul253_i32(i32 %a) {
define i16 @fold_add251_mul253_i16(i16 %a) {
; CHECK-ARMV6-LABEL: fold_add251_mul253_i16:
; CHECK-ARMV6: @ %bb.0:
; CHECK-ARMV6-NEXT: mvn r1, #2032
; CHECK-ARMV6-NEXT: mov r2, #253
; CHECK-ARMV6-NEXT: mla r0, r0, r2, r1
; CHECK-ARMV6-NEXT: add r0, r0, #251
; CHECK-ARMV6-NEXT: mov r1, #253
; CHECK-ARMV6-NEXT: mul r0, r0, r1
; CHECK-ARMV6-NEXT: bx lr
;
; CHECK-ARMV7-LABEL: fold_add251_mul253_i16:
@ -297,15 +291,10 @@ define i16 @fold_add251_mul253_i16(i16 %a) {
;
; CHECK-THUMBV6M-LABEL: fold_add251_mul253_i16:
; CHECK-THUMBV6M: @ %bb.0:
; CHECK-THUMBV6M-NEXT: adds r0, #251
; CHECK-THUMBV6M-NEXT: movs r1, #253
; CHECK-THUMBV6M-NEXT: muls r1, r0, r1
; CHECK-THUMBV6M-NEXT: ldr r0, .LCPI9_0
; CHECK-THUMBV6M-NEXT: adds r0, r1, r0
; CHECK-THUMBV6M-NEXT: muls r0, r1, r0
; CHECK-THUMBV6M-NEXT: bx lr
; CHECK-THUMBV6M-NEXT: .p2align 2
; CHECK-THUMBV6M-NEXT: @ %bb.1:
; CHECK-THUMBV6M-NEXT: .LCPI9_0:
; CHECK-THUMBV6M-NEXT: .long 4294965263 @ 0xfffff80f
;
; CHECK-THUMBV7M-LABEL: fold_add251_mul253_i16:
; CHECK-THUMBV7M: @ %bb.0:
@ -319,41 +308,19 @@ define i16 @fold_add251_mul253_i16(i16 %a) {
}
define i32 @fold_sub251_mul253_i32(i32 %a) {
; CHECK-ARMV6-LABEL: fold_sub251_mul253_i32:
; CHECK-ARMV6: @ %bb.0:
; CHECK-ARMV6-NEXT: mvn r1, #14
; CHECK-ARMV6-NEXT: mov r2, #253
; CHECK-ARMV6-NEXT: sub r1, r1, #63488
; CHECK-ARMV6-NEXT: mla r0, r0, r2, r1
; CHECK-ARMV6-NEXT: bx lr
; CHECK-ARM-LABEL: fold_sub251_mul253_i32:
; CHECK-ARM: @ %bb.0:
; CHECK-ARM-NEXT: sub r0, r0, #251
; CHECK-ARM-NEXT: mov r1, #253
; CHECK-ARM-NEXT: mul r0, r0, r1
; CHECK-ARM-NEXT: bx lr
;
; CHECK-ARMV7-LABEL: fold_sub251_mul253_i32:
; CHECK-ARMV7: @ %bb.0:
; CHECK-ARMV7-NEXT: mov r1, #253
; CHECK-ARMV7-NEXT: mul r0, r0, r1
; CHECK-ARMV7-NEXT: movw r1, #63503
; CHECK-ARMV7-NEXT: sub r0, r0, r1
; CHECK-ARMV7-NEXT: bx lr
;
; CHECK-THUMBV6M-LABEL: fold_sub251_mul253_i32:
; CHECK-THUMBV6M: @ %bb.0:
; CHECK-THUMBV6M-NEXT: movs r1, #253
; CHECK-THUMBV6M-NEXT: muls r1, r0, r1
; CHECK-THUMBV6M-NEXT: ldr r0, .LCPI10_0
; CHECK-THUMBV6M-NEXT: adds r0, r1, r0
; CHECK-THUMBV6M-NEXT: bx lr
; CHECK-THUMBV6M-NEXT: .p2align 2
; CHECK-THUMBV6M-NEXT: @ %bb.1:
; CHECK-THUMBV6M-NEXT: .LCPI10_0:
; CHECK-THUMBV6M-NEXT: .long 4294903793 @ 0xffff07f1
;
; CHECK-THUMBV7M-LABEL: fold_sub251_mul253_i32:
; CHECK-THUMBV7M: @ %bb.0:
; CHECK-THUMBV7M-NEXT: movs r1, #253
; CHECK-THUMBV7M-NEXT: muls r0, r1, r0
; CHECK-THUMBV7M-NEXT: movw r1, #63503
; CHECK-THUMBV7M-NEXT: subs r0, r0, r1
; CHECK-THUMBV7M-NEXT: bx lr
; CHECK-THUMB-LABEL: fold_sub251_mul253_i32:
; CHECK-THUMB: @ %bb.0:
; CHECK-THUMB-NEXT: subs r0, #251
; CHECK-THUMB-NEXT: movs r1, #253
; CHECK-THUMB-NEXT: muls r0, r1, r0
; CHECK-THUMB-NEXT: bx lr
%b = add i32 %a, -251
%c = mul i32 %b, 253
ret i32 %c
@ -362,10 +329,9 @@ define i32 @fold_sub251_mul253_i32(i32 %a) {
define i16 @fold_sub251_mul253_i16(i16 %a) {
; CHECK-ARMV6-LABEL: fold_sub251_mul253_i16:
; CHECK-ARMV6: @ %bb.0:
; CHECK-ARMV6-NEXT: mov r1, #241
; CHECK-ARMV6-NEXT: mov r2, #253
; CHECK-ARMV6-NEXT: orr r1, r1, #1792
; CHECK-ARMV6-NEXT: mla r0, r0, r2, r1
; CHECK-ARMV6-NEXT: sub r0, r0, #251
; CHECK-ARMV6-NEXT: mov r1, #253
; CHECK-ARMV6-NEXT: mul r0, r0, r1
; CHECK-ARMV6-NEXT: bx lr
;
; CHECK-ARMV7-LABEL: fold_sub251_mul253_i16:
@ -377,15 +343,10 @@ define i16 @fold_sub251_mul253_i16(i16 %a) {
;
; CHECK-THUMBV6M-LABEL: fold_sub251_mul253_i16:
; CHECK-THUMBV6M: @ %bb.0:
; CHECK-THUMBV6M-NEXT: subs r0, #251
; CHECK-THUMBV6M-NEXT: movs r1, #253
; CHECK-THUMBV6M-NEXT: muls r1, r0, r1
; CHECK-THUMBV6M-NEXT: ldr r0, .LCPI11_0
; CHECK-THUMBV6M-NEXT: adds r0, r1, r0
; CHECK-THUMBV6M-NEXT: muls r0, r1, r0
; CHECK-THUMBV6M-NEXT: bx lr
; CHECK-THUMBV6M-NEXT: .p2align 2
; CHECK-THUMBV6M-NEXT: @ %bb.1:
; CHECK-THUMBV6M-NEXT: .LCPI11_0:
; CHECK-THUMBV6M-NEXT: .long 2033 @ 0x7f1
;
; CHECK-THUMBV7M-LABEL: fold_sub251_mul253_i16:
; CHECK-THUMBV7M: @ %bb.0:
@ -401,43 +362,32 @@ define i16 @fold_sub251_mul253_i16(i16 %a) {
define i32 @fold_add251_mul353_i32(i32 %a) {
; CHECK-ARMV6-LABEL: fold_add251_mul353_i32:
; CHECK-ARMV6: @ %bb.0:
; CHECK-ARMV6-NEXT: mov r2, #97
; CHECK-ARMV6-NEXT: ldr r1, .LCPI12_0
; CHECK-ARMV6-NEXT: orr r2, r2, #256
; CHECK-ARMV6-NEXT: mla r0, r0, r2, r1
; CHECK-ARMV6-NEXT: mov r1, #97
; CHECK-ARMV6-NEXT: add r0, r0, #251
; CHECK-ARMV6-NEXT: orr r1, r1, #256
; CHECK-ARMV6-NEXT: mul r0, r0, r1
; CHECK-ARMV6-NEXT: bx lr
; CHECK-ARMV6-NEXT: .p2align 2
; CHECK-ARMV6-NEXT: @ %bb.1:
; CHECK-ARMV6-NEXT: .LCPI12_0:
; CHECK-ARMV6-NEXT: .long 88603 @ 0x15a1b
;
; CHECK-ARMV7-LABEL: fold_add251_mul353_i32:
; CHECK-ARMV7: @ %bb.0:
; CHECK-ARMV7-NEXT: movw r1, #23067
; CHECK-ARMV7-NEXT: movw r2, #353
; CHECK-ARMV7-NEXT: movt r1, #1
; CHECK-ARMV7-NEXT: mla r0, r0, r2, r1
; CHECK-ARMV7-NEXT: add r0, r0, #251
; CHECK-ARMV7-NEXT: movw r1, #353
; CHECK-ARMV7-NEXT: mul r0, r0, r1
; CHECK-ARMV7-NEXT: bx lr
;
; CHECK-THUMBV6M-LABEL: fold_add251_mul353_i32:
; CHECK-THUMBV6M: @ %bb.0:
; CHECK-THUMBV6M-NEXT: movs r1, #255
; CHECK-THUMBV6M-NEXT: adds r1, #98
; CHECK-THUMBV6M-NEXT: muls r1, r0, r1
; CHECK-THUMBV6M-NEXT: ldr r0, .LCPI12_0
; CHECK-THUMBV6M-NEXT: adds r0, r1, r0
; CHECK-THUMBV6M-NEXT: adds r0, #251
; CHECK-THUMBV6M-NEXT: muls r0, r1, r0
; CHECK-THUMBV6M-NEXT: bx lr
; CHECK-THUMBV6M-NEXT: .p2align 2
; CHECK-THUMBV6M-NEXT: @ %bb.1:
; CHECK-THUMBV6M-NEXT: .LCPI12_0:
; CHECK-THUMBV6M-NEXT: .long 88603 @ 0x15a1b
;
; CHECK-THUMBV7M-LABEL: fold_add251_mul353_i32:
; CHECK-THUMBV7M: @ %bb.0:
; CHECK-THUMBV7M-NEXT: movw r1, #23067
; CHECK-THUMBV7M-NEXT: movw r2, #353
; CHECK-THUMBV7M-NEXT: movt r1, #1
; CHECK-THUMBV7M-NEXT: mla r0, r0, r2, r1
; CHECK-THUMBV7M-NEXT: adds r0, #251
; CHECK-THUMBV7M-NEXT: movw r1, #353
; CHECK-THUMBV7M-NEXT: muls r0, r1, r0
; CHECK-THUMBV7M-NEXT: bx lr
%b = add i32 %a, 251
%c = mul i32 %b, 353
@ -447,11 +397,10 @@ define i32 @fold_add251_mul353_i32(i32 %a) {
define i16 @fold_add251_mul353_i16(i16 %a) {
; CHECK-ARMV6-LABEL: fold_add251_mul353_i16:
; CHECK-ARMV6: @ %bb.0:
; CHECK-ARMV6-NEXT: mov r2, #97
; CHECK-ARMV6-NEXT: mov r1, #27
; CHECK-ARMV6-NEXT: orr r2, r2, #256
; CHECK-ARMV6-NEXT: orr r1, r1, #23040
; CHECK-ARMV6-NEXT: mla r0, r0, r2, r1
; CHECK-ARMV6-NEXT: mov r1, #97
; CHECK-ARMV6-NEXT: add r0, r0, #251
; CHECK-ARMV6-NEXT: orr r1, r1, #256
; CHECK-ARMV6-NEXT: mul r0, r0, r1
; CHECK-ARMV6-NEXT: bx lr
;
; CHECK-ARMV7-LABEL: fold_add251_mul353_i16:
@ -465,14 +414,9 @@ define i16 @fold_add251_mul353_i16(i16 %a) {
; CHECK-THUMBV6M: @ %bb.0:
; CHECK-THUMBV6M-NEXT: movs r1, #255
; CHECK-THUMBV6M-NEXT: adds r1, #98
; CHECK-THUMBV6M-NEXT: muls r1, r0, r1
; CHECK-THUMBV6M-NEXT: ldr r0, .LCPI13_0
; CHECK-THUMBV6M-NEXT: adds r0, r1, r0
; CHECK-THUMBV6M-NEXT: adds r0, #251
; CHECK-THUMBV6M-NEXT: muls r0, r1, r0
; CHECK-THUMBV6M-NEXT: bx lr
; CHECK-THUMBV6M-NEXT: .p2align 2
; CHECK-THUMBV6M-NEXT: @ %bb.1:
; CHECK-THUMBV6M-NEXT: .LCPI13_0:
; CHECK-THUMBV6M-NEXT: .long 23067 @ 0x5a1b
;
; CHECK-THUMBV7M-LABEL: fold_add251_mul353_i16:
; CHECK-THUMBV7M: @ %bb.0:
@ -488,43 +432,32 @@ define i16 @fold_add251_mul353_i16(i16 %a) {
define i32 @fold_sub251_mul353_i32(i32 %a) {
; CHECK-ARMV6-LABEL: fold_sub251_mul353_i32:
; CHECK-ARMV6: @ %bb.0:
; CHECK-ARMV6-NEXT: mov r2, #97
; CHECK-ARMV6-NEXT: ldr r1, .LCPI14_0
; CHECK-ARMV6-NEXT: orr r2, r2, #256
; CHECK-ARMV6-NEXT: mla r0, r0, r2, r1
; CHECK-ARMV6-NEXT: mov r1, #97
; CHECK-ARMV6-NEXT: sub r0, r0, #251
; CHECK-ARMV6-NEXT: orr r1, r1, #256
; CHECK-ARMV6-NEXT: mul r0, r0, r1
; CHECK-ARMV6-NEXT: bx lr
; CHECK-ARMV6-NEXT: .p2align 2
; CHECK-ARMV6-NEXT: @ %bb.1:
; CHECK-ARMV6-NEXT: .LCPI14_0:
; CHECK-ARMV6-NEXT: .long 4294878693 @ 0xfffea5e5
;
; CHECK-ARMV7-LABEL: fold_sub251_mul353_i32:
; CHECK-ARMV7: @ %bb.0:
; CHECK-ARMV7-NEXT: movw r1, #42469
; CHECK-ARMV7-NEXT: movw r2, #353
; CHECK-ARMV7-NEXT: movt r1, #65534
; CHECK-ARMV7-NEXT: mla r0, r0, r2, r1
; CHECK-ARMV7-NEXT: sub r0, r0, #251
; CHECK-ARMV7-NEXT: movw r1, #353
; CHECK-ARMV7-NEXT: mul r0, r0, r1
; CHECK-ARMV7-NEXT: bx lr
;
; CHECK-THUMBV6M-LABEL: fold_sub251_mul353_i32:
; CHECK-THUMBV6M: @ %bb.0:
; CHECK-THUMBV6M-NEXT: movs r1, #255
; CHECK-THUMBV6M-NEXT: adds r1, #98
; CHECK-THUMBV6M-NEXT: muls r1, r0, r1
; CHECK-THUMBV6M-NEXT: ldr r0, .LCPI14_0
; CHECK-THUMBV6M-NEXT: adds r0, r1, r0
; CHECK-THUMBV6M-NEXT: subs r0, #251
; CHECK-THUMBV6M-NEXT: muls r0, r1, r0
; CHECK-THUMBV6M-NEXT: bx lr
; CHECK-THUMBV6M-NEXT: .p2align 2
; CHECK-THUMBV6M-NEXT: @ %bb.1:
; CHECK-THUMBV6M-NEXT: .LCPI14_0:
; CHECK-THUMBV6M-NEXT: .long 4294878693 @ 0xfffea5e5
;
; CHECK-THUMBV7M-LABEL: fold_sub251_mul353_i32:
; CHECK-THUMBV7M: @ %bb.0:
; CHECK-THUMBV7M-NEXT: movw r1, #42469
; CHECK-THUMBV7M-NEXT: movw r2, #353
; CHECK-THUMBV7M-NEXT: movt r1, #65534
; CHECK-THUMBV7M-NEXT: mla r0, r0, r2, r1
; CHECK-THUMBV7M-NEXT: subs r0, #251
; CHECK-THUMBV7M-NEXT: movw r1, #353
; CHECK-THUMBV7M-NEXT: muls r0, r1, r0
; CHECK-THUMBV7M-NEXT: bx lr
%b = add i32 %a, -251
%c = mul i32 %b, 353
@ -534,11 +467,10 @@ define i32 @fold_sub251_mul353_i32(i32 %a) {
define i16 @fold_sub251_mul353_i16(i16 %a) {
; CHECK-ARMV6-LABEL: fold_sub251_mul353_i16:
; CHECK-ARMV6: @ %bb.0:
; CHECK-ARMV6-NEXT: mov r2, #97
; CHECK-ARMV6-NEXT: mvn r1, #26
; CHECK-ARMV6-NEXT: orr r2, r2, #256
; CHECK-ARMV6-NEXT: sub r1, r1, #23040
; CHECK-ARMV6-NEXT: mla r0, r0, r2, r1
; CHECK-ARMV6-NEXT: mov r1, #97
; CHECK-ARMV6-NEXT: sub r0, r0, #251
; CHECK-ARMV6-NEXT: orr r1, r1, #256
; CHECK-ARMV6-NEXT: mul r0, r0, r1
; CHECK-ARMV6-NEXT: bx lr
;
; CHECK-ARMV7-LABEL: fold_sub251_mul353_i16:
@ -553,14 +485,9 @@ define i16 @fold_sub251_mul353_i16(i16 %a) {
; CHECK-THUMBV6M: @ %bb.0:
; CHECK-THUMBV6M-NEXT: movs r1, #255
; CHECK-THUMBV6M-NEXT: adds r1, #98
; CHECK-THUMBV6M-NEXT: muls r1, r0, r1
; CHECK-THUMBV6M-NEXT: ldr r0, .LCPI15_0
; CHECK-THUMBV6M-NEXT: adds r0, r1, r0
; CHECK-THUMBV6M-NEXT: subs r0, #251
; CHECK-THUMBV6M-NEXT: muls r0, r1, r0
; CHECK-THUMBV6M-NEXT: bx lr
; CHECK-THUMBV6M-NEXT: .p2align 2
; CHECK-THUMBV6M-NEXT: @ %bb.1:
; CHECK-THUMBV6M-NEXT: .LCPI15_0:
; CHECK-THUMBV6M-NEXT: .long 4294944229 @ 0xffffa5e5
;
; CHECK-THUMBV7M-LABEL: fold_sub251_mul353_i16:
; CHECK-THUMBV7M: @ %bb.0:

View File

@ -329,85 +329,81 @@ define i1 @test_urem_negative_odd(i9 %X) nounwind {
define <3 x i1> @test_urem_vec(<3 x i11> %X) nounwind {
; ARM5-LABEL: test_urem_vec:
; ARM5: @ %bb.0:
; ARM5-NEXT: push {r4, r5, r11, lr}
; ARM5-NEXT: mov r3, #183
; ARM5-NEXT: mvn r12, #182
; ARM5-NEXT: orr r3, r3, #1280
; ARM5-NEXT: sub r12, r12, #1280
; ARM5-NEXT: mov r4, #51
; ARM5-NEXT: mla lr, r1, r3, r12
; ARM5-NEXT: mov r12, #255
; ARM5-NEXT: orr r12, r12, #1792
; ARM5-NEXT: orr r4, r4, #768
; ARM5-NEXT: mov r3, #0
; ARM5-NEXT: and r1, lr, r12
; ARM5-NEXT: mvn lr, #101
; ARM5-NEXT: sub lr, lr, #1536
; ARM5-NEXT: cmp r1, #292
; ARM5-NEXT: mla r5, r2, r4, lr
; ARM5-NEXT: mov r1, #0
; ARM5-NEXT: movhi r1, #1
; ARM5-NEXT: and r2, r5, r12
; ARM5-NEXT: mov r5, #171
; ARM5-NEXT: orr r5, r5, #512
; ARM5-NEXT: cmp r2, #1
; ARM5-NEXT: mov r2, #0
; ARM5-NEXT: mul r4, r0, r5
; ARM5-NEXT: push {r4, lr}
; ARM5-NEXT: mov r3, #171
; ARM5-NEXT: orr r3, r3, #512
; ARM5-NEXT: mul r12, r0, r3
; ARM5-NEXT: mov r0, #1020
; ARM5-NEXT: orr r0, r0, #1024
; ARM5-NEXT: mov r5, #254
; ARM5-NEXT: movhi r2, #1
; ARM5-NEXT: orr r5, r5, #1792
; ARM5-NEXT: and r0, r4, r0
; ARM5-NEXT: mov r3, #254
; ARM5-NEXT: orr r3, r3, #1792
; ARM5-NEXT: and r0, r12, r0
; ARM5-NEXT: lsr r0, r0, #1
; ARM5-NEXT: orr r0, r0, r4, lsl #10
; ARM5-NEXT: and r0, r0, r5
; ARM5-NEXT: orr r0, r0, r12, lsl #10
; ARM5-NEXT: sub r12, r1, #1
; ARM5-NEXT: mov r1, #183
; ARM5-NEXT: and r0, r0, r3
; ARM5-NEXT: orr r1, r1, #1280
; ARM5-NEXT: mov r3, #0
; ARM5-NEXT: lsr r0, r0, #1
; ARM5-NEXT: cmp r0, #170
; ARM5-NEXT: mul lr, r12, r1
; ARM5-NEXT: mov r12, #255
; ARM5-NEXT: orr r12, r12, #1792
; ARM5-NEXT: mov r0, #0
; ARM5-NEXT: movhi r0, #1
; ARM5-NEXT: and r1, lr, r12
; ARM5-NEXT: sub lr, r2, #2
; ARM5-NEXT: mov r2, #51
; ARM5-NEXT: cmp r1, #292
; ARM5-NEXT: orr r2, r2, #768
; ARM5-NEXT: mov r1, #0
; ARM5-NEXT: movhi r1, #1
; ARM5-NEXT: mul r4, lr, r2
; ARM5-NEXT: and r2, r4, r12
; ARM5-NEXT: cmp r2, #1
; ARM5-NEXT: movhi r3, #1
; ARM5-NEXT: mov r0, r3
; ARM5-NEXT: pop {r4, r5, r11, pc}
; ARM5-NEXT: mov r2, r3
; ARM5-NEXT: pop {r4, pc}
;
; ARM6-LABEL: test_urem_vec:
; ARM6: @ %bb.0:
; ARM6-NEXT: push {r4, lr}
; ARM6-NEXT: mov r4, #51
; ARM6-NEXT: mvn lr, #101
; ARM6-NEXT: orr r4, r4, #768
; ARM6-NEXT: sub lr, lr, #1536
; ARM6-NEXT: mov r3, #183
; ARM6-NEXT: mvn r12, #182
; ARM6-NEXT: mla r2, r2, r4, lr
; ARM6-NEXT: mov r4, #171
; ARM6-NEXT: orr r4, r4, #512
; ARM6-NEXT: orr r3, r3, #1280
; ARM6-NEXT: sub r12, r12, #1280
; ARM6-NEXT: mul r0, r0, r4
; ARM6-NEXT: mov r4, #1020
; ARM6-NEXT: orr r4, r4, #1024
; ARM6-NEXT: mla r1, r1, r3, r12
; ARM6-NEXT: push {r11, lr}
; ARM6-NEXT: mov r3, #171
; ARM6-NEXT: sub r12, r1, #1
; ARM6-NEXT: orr r3, r3, #512
; ARM6-NEXT: mov r1, #183
; ARM6-NEXT: orr r1, r1, #1280
; ARM6-NEXT: sub lr, r2, #2
; ARM6-NEXT: mul r0, r0, r3
; ARM6-NEXT: mov r3, #1020
; ARM6-NEXT: orr r3, r3, #1024
; ARM6-NEXT: mov r2, #51
; ARM6-NEXT: mul r1, r12, r1
; ARM6-NEXT: orr r2, r2, #768
; ARM6-NEXT: mov r12, #255
; ARM6-NEXT: and r3, r0, r3
; ARM6-NEXT: mul r2, lr, r2
; ARM6-NEXT: orr r12, r12, #1792
; ARM6-NEXT: lsr r3, r3, #1
; ARM6-NEXT: orr r0, r3, r0, lsl #10
; ARM6-NEXT: mov r3, #254
; ARM6-NEXT: and r1, r1, r12
; ARM6-NEXT: orr r3, r3, #1792
; ARM6-NEXT: and r0, r0, r3
; ARM6-NEXT: and r2, r2, r12
; ARM6-NEXT: mov r3, #0
; ARM6-NEXT: and r4, r0, r4
; ARM6-NEXT: lsr r4, r4, #1
; ARM6-NEXT: orr r0, r4, r0, lsl #10
; ARM6-NEXT: mov r4, #254
; ARM6-NEXT: and r1, r1, r12
; ARM6-NEXT: orr r4, r4, #1792
; ARM6-NEXT: lsr r0, r0, #1
; ARM6-NEXT: cmp r0, #170
; ARM6-NEXT: mov r0, #0
; ARM6-NEXT: movhi r0, #1
; ARM6-NEXT: cmp r1, #292
; ARM6-NEXT: mov r1, #0
; ARM6-NEXT: and r0, r0, r4
; ARM6-NEXT: movhi r1, #1
; ARM6-NEXT: cmp r2, #1
; ARM6-NEXT: mov r2, #0
; ARM6-NEXT: lsr r0, r0, #1
; ARM6-NEXT: movhi r2, #1
; ARM6-NEXT: cmp r0, #170
; ARM6-NEXT: movhi r3, #1
; ARM6-NEXT: mov r0, r3
; ARM6-NEXT: pop {r4, pc}
; ARM6-NEXT: mov r2, r3
; ARM6-NEXT: pop {r11, pc}
;
; ARM7-LABEL: test_urem_vec:
; ARM7: @ %bb.0:

View File

@ -128,26 +128,24 @@ define <3 x i1> @test_urem_vec(<3 x i11> %X) nounwind {
; CHECK-NEXT: @ %bb.1:
; CHECK-NEXT: movs r0, r4
; CHECK-NEXT: .LBB4_2:
; CHECK-NEXT: subs r1, r1, #1
; CHECK-NEXT: ldr r5, .LCPI4_3
; CHECK-NEXT: muls r5, r1, r5
; CHECK-NEXT: ldr r1, .LCPI4_4
; CHECK-NEXT: adds r1, r5, r1
; CHECK-NEXT: movs r5, #73
; CHECK-NEXT: lsls r5, r5, #23
; CHECK-NEXT: cmp r1, r5
; CHECK-NEXT: movs r1, #73
; CHECK-NEXT: lsls r1, r1, #23
; CHECK-NEXT: cmp r5, r1
; CHECK-NEXT: push {r3}
; CHECK-NEXT: pop {r1}
; CHECK-NEXT: bhi .LBB4_4
; CHECK-NEXT: @ %bb.3:
; CHECK-NEXT: movs r1, r4
; CHECK-NEXT: .LBB4_4:
; CHECK-NEXT: ldr r5, .LCPI4_5
; CHECK-NEXT: subs r2, r2, #2
; CHECK-NEXT: ldr r5, .LCPI4_4
; CHECK-NEXT: muls r5, r2, r5
; CHECK-NEXT: ldr r2, .LCPI4_6
; CHECK-NEXT: adds r2, r5, r2
; CHECK-NEXT: ldr r5, .LCPI4_7
; CHECK-NEXT: ands r5, r2
; CHECK-NEXT: cmp r5, #1
; CHECK-NEXT: ldr r2, .LCPI4_5
; CHECK-NEXT: ands r2, r5
; CHECK-NEXT: cmp r2, #1
; CHECK-NEXT: bhi .LBB4_6
; CHECK-NEXT: @ %bb.5:
; CHECK-NEXT: movs r3, r4
@ -167,12 +165,8 @@ define <3 x i1> @test_urem_vec(<3 x i11> %X) nounwind {
; CHECK-NEXT: .LCPI4_3:
; CHECK-NEXT: .long 3068133376 @ 0xb6e00000
; CHECK-NEXT: .LCPI4_4:
; CHECK-NEXT: .long 1226833920 @ 0x49200000
; CHECK-NEXT: .LCPI4_5:
; CHECK-NEXT: .long 819 @ 0x333
; CHECK-NEXT: .LCPI4_6:
; CHECK-NEXT: .long 4294965658 @ 0xfffff99a
; CHECK-NEXT: .LCPI4_7:
; CHECK-NEXT: .LCPI4_5:
; CHECK-NEXT: .long 2047 @ 0x7ff
%urem = urem <3 x i11> %X, <i11 6, i11 7, i11 -5>
%cmp = icmp ne <3 x i11> %urem, <i11 0, i11 1, i11 2>