From cc5dbbf759ae10da7378fc831d5d0cdeb11e814d Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sat, 18 Aug 2018 05:52:42 +0000 Subject: [PATCH] [DAGCombiner] Allow divide by constant optimization on opaque constants. Summary: I believe this restores the behavior we had before r339147. Fixes PR38622. Reviewers: RKSimon, chandlerc, spatel Reviewed By: chandlerc Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D50936 llvm-svn: 340120 --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 4 +- llvm/test/CodeGen/X86/divide-by-constant.ll | 78 +++++++++++++++++++ 2 files changed, 80 insertions(+), 2 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 453f506d7980..62ee5c30086d 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -3224,7 +3224,7 @@ SDValue DAGCombiner::visitSDIVLike(SDValue N0, SDValue N1, SDNode *N) { // alternate sequence. Targets may check function attributes for size/speed // trade-offs. AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes(); - if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) && + if (isConstantOrConstantVector(N1) && !TLI.isIntDivCheap(N->getValueType(0), Attr)) if (SDValue Op = BuildSDIV(N)) return Op; @@ -3316,7 +3316,7 @@ SDValue DAGCombiner::visitUDIVLike(SDValue N0, SDValue N1, SDNode *N) { // fold (udiv x, c) -> alternate AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes(); - if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) && + if (isConstantOrConstantVector(N1) && !TLI.isIntDivCheap(N->getValueType(0), Attr)) if (SDValue Op = BuildUDIV(N)) return Op; diff --git a/llvm/test/CodeGen/X86/divide-by-constant.ll b/llvm/test/CodeGen/X86/divide-by-constant.ll index cc2dc1b1d094..8f0bb1075bd1 100644 --- a/llvm/test/CodeGen/X86/divide-by-constant.ll +++ b/llvm/test/CodeGen/X86/divide-by-constant.ll @@ -330,3 +330,81 @@ entry: %div = udiv i64 %rem, 7 ret i64 %div } + +define { i64, i32 } @PR38622(i64) { +; X32-LABEL: PR38622: +; X32: # %bb.0: +; X32-NEXT: pushl %ebp +; X32-NEXT: .cfi_def_cfa_offset 8 +; X32-NEXT: pushl %ebx +; X32-NEXT: .cfi_def_cfa_offset 12 +; X32-NEXT: pushl %edi +; X32-NEXT: .cfi_def_cfa_offset 16 +; X32-NEXT: pushl %esi +; X32-NEXT: .cfi_def_cfa_offset 20 +; X32-NEXT: subl $12, %esp +; X32-NEXT: .cfi_def_cfa_offset 32 +; X32-NEXT: .cfi_offset %esi, -20 +; X32-NEXT: .cfi_offset %edi, -16 +; X32-NEXT: .cfi_offset %ebx, -12 +; X32-NEXT: .cfi_offset %ebp, -8 +; X32-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X32-NEXT: movl {{[0-9]+}}(%esp), %ebp +; X32-NEXT: pushl $0 +; X32-NEXT: .cfi_adjust_cfa_offset 4 +; X32-NEXT: pushl $-294967296 # imm = 0xEE6B2800 +; X32-NEXT: .cfi_adjust_cfa_offset 4 +; X32-NEXT: pushl %ebp +; X32-NEXT: .cfi_adjust_cfa_offset 4 +; X32-NEXT: pushl %ebx +; X32-NEXT: .cfi_adjust_cfa_offset 4 +; X32-NEXT: calll __udivdi3 +; X32-NEXT: addl $16, %esp +; X32-NEXT: .cfi_adjust_cfa_offset -16 +; X32-NEXT: movl %eax, %esi +; X32-NEXT: movl %edx, %edi +; X32-NEXT: pushl $0 +; X32-NEXT: .cfi_adjust_cfa_offset 4 +; X32-NEXT: pushl $-294967296 # imm = 0xEE6B2800 +; X32-NEXT: .cfi_adjust_cfa_offset 4 +; X32-NEXT: pushl %ebp +; X32-NEXT: .cfi_adjust_cfa_offset 4 +; X32-NEXT: pushl %ebx +; X32-NEXT: .cfi_adjust_cfa_offset 4 +; X32-NEXT: calll __umoddi3 +; X32-NEXT: addl $16, %esp +; X32-NEXT: .cfi_adjust_cfa_offset -16 +; X32-NEXT: movl %eax, %ecx +; X32-NEXT: movl %esi, %eax +; X32-NEXT: movl %edi, %edx +; X32-NEXT: addl $12, %esp +; X32-NEXT: .cfi_def_cfa_offset 20 +; X32-NEXT: popl %esi +; X32-NEXT: .cfi_def_cfa_offset 16 +; X32-NEXT: popl %edi +; X32-NEXT: .cfi_def_cfa_offset 12 +; X32-NEXT: popl %ebx +; X32-NEXT: .cfi_def_cfa_offset 8 +; X32-NEXT: popl %ebp +; X32-NEXT: .cfi_def_cfa_offset 4 +; X32-NEXT: retl +; +; X64-LABEL: PR38622: +; X64: # %bb.0: +; X64-NEXT: movq %rdi, %rax +; X64-NEXT: shrq $11, %rax +; X64-NEXT: movabsq $4835703278458517, %rcx # imm = 0x112E0BE826D695 +; X64-NEXT: mulq %rcx +; X64-NEXT: shrq $9, %rdx +; X64-NEXT: imull $-294967296, %edx, %eax # imm = 0xEE6B2800 +; X64-NEXT: subl %eax, %edi +; X64-NEXT: movq %rdx, %rax +; X64-NEXT: movl %edi, %edx +; X64-NEXT: retq + %2 = udiv i64 %0, 4000000000 + %3 = urem i64 %0, 4000000000 + %4 = trunc i64 %3 to i32 + %5 = insertvalue { i64, i32 } undef, i64 %2, 0 + %6 = insertvalue { i64, i32 } %5, i32 %4, 1 + ret { i64, i32 } %6 +}