forked from OSchip/llvm-project
[DAGCombiner] Allow divide by constant optimization on opaque constants.
Summary: I believe this restores the behavior we had before r339147. Fixes PR38622. Reviewers: RKSimon, chandlerc, spatel Reviewed By: chandlerc Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D50936 llvm-svn: 340120
This commit is contained in:
parent
526a31a5a4
commit
cc5dbbf759
|
@ -3224,7 +3224,7 @@ SDValue DAGCombiner::visitSDIVLike(SDValue N0, SDValue N1, SDNode *N) {
|
|||
// alternate sequence. Targets may check function attributes for size/speed
|
||||
// trade-offs.
|
||||
AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
|
||||
if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
|
||||
if (isConstantOrConstantVector(N1) &&
|
||||
!TLI.isIntDivCheap(N->getValueType(0), Attr))
|
||||
if (SDValue Op = BuildSDIV(N))
|
||||
return Op;
|
||||
|
@ -3316,7 +3316,7 @@ SDValue DAGCombiner::visitUDIVLike(SDValue N0, SDValue N1, SDNode *N) {
|
|||
|
||||
// fold (udiv x, c) -> alternate
|
||||
AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
|
||||
if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
|
||||
if (isConstantOrConstantVector(N1) &&
|
||||
!TLI.isIntDivCheap(N->getValueType(0), Attr))
|
||||
if (SDValue Op = BuildUDIV(N))
|
||||
return Op;
|
||||
|
|
|
@ -330,3 +330,81 @@ entry:
|
|||
%div = udiv i64 %rem, 7
|
||||
ret i64 %div
|
||||
}
|
||||
|
||||
define { i64, i32 } @PR38622(i64) {
|
||||
; X32-LABEL: PR38622:
|
||||
; X32: # %bb.0:
|
||||
; X32-NEXT: pushl %ebp
|
||||
; X32-NEXT: .cfi_def_cfa_offset 8
|
||||
; X32-NEXT: pushl %ebx
|
||||
; X32-NEXT: .cfi_def_cfa_offset 12
|
||||
; X32-NEXT: pushl %edi
|
||||
; X32-NEXT: .cfi_def_cfa_offset 16
|
||||
; X32-NEXT: pushl %esi
|
||||
; X32-NEXT: .cfi_def_cfa_offset 20
|
||||
; X32-NEXT: subl $12, %esp
|
||||
; X32-NEXT: .cfi_def_cfa_offset 32
|
||||
; X32-NEXT: .cfi_offset %esi, -20
|
||||
; X32-NEXT: .cfi_offset %edi, -16
|
||||
; X32-NEXT: .cfi_offset %ebx, -12
|
||||
; X32-NEXT: .cfi_offset %ebp, -8
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %ebx
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %ebp
|
||||
; X32-NEXT: pushl $0
|
||||
; X32-NEXT: .cfi_adjust_cfa_offset 4
|
||||
; X32-NEXT: pushl $-294967296 # imm = 0xEE6B2800
|
||||
; X32-NEXT: .cfi_adjust_cfa_offset 4
|
||||
; X32-NEXT: pushl %ebp
|
||||
; X32-NEXT: .cfi_adjust_cfa_offset 4
|
||||
; X32-NEXT: pushl %ebx
|
||||
; X32-NEXT: .cfi_adjust_cfa_offset 4
|
||||
; X32-NEXT: calll __udivdi3
|
||||
; X32-NEXT: addl $16, %esp
|
||||
; X32-NEXT: .cfi_adjust_cfa_offset -16
|
||||
; X32-NEXT: movl %eax, %esi
|
||||
; X32-NEXT: movl %edx, %edi
|
||||
; X32-NEXT: pushl $0
|
||||
; X32-NEXT: .cfi_adjust_cfa_offset 4
|
||||
; X32-NEXT: pushl $-294967296 # imm = 0xEE6B2800
|
||||
; X32-NEXT: .cfi_adjust_cfa_offset 4
|
||||
; X32-NEXT: pushl %ebp
|
||||
; X32-NEXT: .cfi_adjust_cfa_offset 4
|
||||
; X32-NEXT: pushl %ebx
|
||||
; X32-NEXT: .cfi_adjust_cfa_offset 4
|
||||
; X32-NEXT: calll __umoddi3
|
||||
; X32-NEXT: addl $16, %esp
|
||||
; X32-NEXT: .cfi_adjust_cfa_offset -16
|
||||
; X32-NEXT: movl %eax, %ecx
|
||||
; X32-NEXT: movl %esi, %eax
|
||||
; X32-NEXT: movl %edi, %edx
|
||||
; X32-NEXT: addl $12, %esp
|
||||
; X32-NEXT: .cfi_def_cfa_offset 20
|
||||
; X32-NEXT: popl %esi
|
||||
; X32-NEXT: .cfi_def_cfa_offset 16
|
||||
; X32-NEXT: popl %edi
|
||||
; X32-NEXT: .cfi_def_cfa_offset 12
|
||||
; X32-NEXT: popl %ebx
|
||||
; X32-NEXT: .cfi_def_cfa_offset 8
|
||||
; X32-NEXT: popl %ebp
|
||||
; X32-NEXT: .cfi_def_cfa_offset 4
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: PR38622:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: movq %rdi, %rax
|
||||
; X64-NEXT: shrq $11, %rax
|
||||
; X64-NEXT: movabsq $4835703278458517, %rcx # imm = 0x112E0BE826D695
|
||||
; X64-NEXT: mulq %rcx
|
||||
; X64-NEXT: shrq $9, %rdx
|
||||
; X64-NEXT: imull $-294967296, %edx, %eax # imm = 0xEE6B2800
|
||||
; X64-NEXT: subl %eax, %edi
|
||||
; X64-NEXT: movq %rdx, %rax
|
||||
; X64-NEXT: movl %edi, %edx
|
||||
; X64-NEXT: retq
|
||||
%2 = udiv i64 %0, 4000000000
|
||||
%3 = urem i64 %0, 4000000000
|
||||
%4 = trunc i64 %3 to i32
|
||||
%5 = insertvalue { i64, i32 } undef, i64 %2, 0
|
||||
%6 = insertvalue { i64, i32 } %5, i32 %4, 1
|
||||
ret { i64, i32 } %6
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue