[X86ISelLowering] avoid emitting libcalls to __mulodi4()

Similar to D108842, D108844, and D108926.

__has_builtin(builtin_mul_overflow) returns true for 32b x86 targets,
but Clang is deferring to compiler RT when encountering long long types.
This breaks ARCH=i386 + CONFIG_BLK_DEV_NBD=y builds of the Linux kernel
that are using builtin_mul_overflow with these types for these targets.

If the semantics of __has_builtin mean "the compiler resolves these,
always" then we shouldn't conditionally emit a libcall.

This will still need to be worked around in the Linux kernel in order to
continue to support these builds of the Linux kernel for this
target with older releases of clang.

Link: https://bugs.llvm.org/show_bug.cgi?id=28629
Link: https://bugs.llvm.org/show_bug.cgi?id=35922
Link: https://github.com/ClangBuiltLinux/linux/issues/1438

Reviewed By: lebedev.ri, RKSimon

Differential Revision: https://reviews.llvm.org/D108928
This commit is contained in:
Nick Desaulniers 2021-09-07 10:26:22 -07:00
parent c9e9635ffe
commit d0eeb64be5
4 changed files with 396 additions and 116 deletions

View File

@ -2148,6 +2148,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setLibcallName(RTLIB::SRL_I128, nullptr);
setLibcallName(RTLIB::SRA_I128, nullptr);
setLibcallName(RTLIB::MUL_I128, nullptr);
setLibcallName(RTLIB::MULO_I64, nullptr);
setLibcallName(RTLIB::MULO_I128, nullptr);
}

View File

@ -0,0 +1,19 @@
; RUN: llc %s -mtriple=i386 -o - | FileCheck %s
define i1 @no__mulodi4(i32 %a, i64 %b, i32* %c) {
; CHECK-LABEL: no__mulodi4:
; CHECK-NOT: calll __mulodi4
entry:
%0 = sext i32 %a to i64
%1 = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 %0, i64 %b)
%2 = extractvalue { i64, i1 } %1, 1
%3 = extractvalue { i64, i1 } %1, 0
%4 = trunc i64 %3 to i32
%5 = sext i32 %4 to i64
%6 = icmp ne i64 %3, %5
%7 = or i1 %2, %6
store i32 %4, i32* %c, align 4
ret i1 %7
}
declare { i64, i1 } @llvm.smul.with.overflow.i64(i64, i64)

View File

@ -365,41 +365,79 @@ define i64 @func5(i64 %x, i64 %y) {
; X86-NEXT: .cfi_def_cfa_offset 16
; X86-NEXT: pushl %esi
; X86-NEXT: .cfi_def_cfa_offset 20
; X86-NEXT: pushl %eax
; X86-NEXT: .cfi_def_cfa_offset 24
; X86-NEXT: subl $12, %esp
; X86-NEXT: .cfi_def_cfa_offset 32
; X86-NEXT: .cfi_offset %esi, -20
; X86-NEXT: .cfi_offset %edi, -16
; X86-NEXT: .cfi_offset %ebx, -12
; X86-NEXT: .cfi_offset %ebp, -8
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: movl $0, (%esp)
; X86-NEXT: movl %esp, %edi
; X86-NEXT: movl %ecx, %ebx
; X86-NEXT: xorl %esi, %ebx
; X86-NEXT: sarl $31, %ebx
; X86-NEXT: movl %ebx, %ebp
; X86-NEXT: xorl $2147483647, %ebp # imm = 0x7FFFFFFF
; X86-NEXT: notl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: .cfi_adjust_cfa_offset 4
; X86-NEXT: pushl %esi
; X86-NEXT: .cfi_adjust_cfa_offset 4
; X86-NEXT: pushl %edx
; X86-NEXT: .cfi_adjust_cfa_offset 4
; X86-NEXT: pushl %ecx
; X86-NEXT: .cfi_adjust_cfa_offset 4
; X86-NEXT: pushl %eax
; X86-NEXT: .cfi_adjust_cfa_offset 4
; X86-NEXT: calll __mulodi4
; X86-NEXT: addl $20, %esp
; X86-NEXT: .cfi_adjust_cfa_offset -20
; X86-NEXT: cmpl $0, (%esp)
; X86-NEXT: cmovnel %ebx, %eax
; X86-NEXT: cmovnel %ebp, %edx
; X86-NEXT: addl $4, %esp
; X86-NEXT: movl %eax, %edi
; X86-NEXT: imull %ebx, %edi
; X86-NEXT: mull %ebx
; X86-NEXT: movl %eax, (%esp) # 4-byte Spill
; X86-NEXT: addl %edi, %edx
; X86-NEXT: movl %ebp, %edi
; X86-NEXT: imull %ebp, %ebx
; X86-NEXT: addl %edx, %ebx
; X86-NEXT: sarl $31, %edi
; X86-NEXT: movl %edi, %ebp
; X86-NEXT: imull %ecx, %ebp
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: movl %edi, %eax
; X86-NEXT: mull %esi
; X86-NEXT: addl %ebp, %edx
; X86-NEXT: imull %esi, %edi
; X86-NEXT: addl %edx, %edi
; X86-NEXT: addl (%esp), %eax # 4-byte Folded Reload
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: adcl %ebx, %edi
; X86-NEXT: movl %esi, %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: mull %esi
; X86-NEXT: movl %edx, %ebp
; X86-NEXT: movl %eax, (%esp) # 4-byte Spill
; X86-NEXT: movl %ecx, %eax
; X86-NEXT: mull %esi
; X86-NEXT: movl %edx, %esi
; X86-NEXT: movl %eax, %ebx
; X86-NEXT: addl %ebp, %ebx
; X86-NEXT: adcl $0, %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: mull {{[0-9]+}}(%esp)
; X86-NEXT: movl %edx, %ebp
; X86-NEXT: addl %ebx, %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: adcl %esi, %ebp
; X86-NEXT: setb %bl
; X86-NEXT: movl %ecx, %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: mull %edx
; X86-NEXT: addl %ebp, %eax
; X86-NEXT: movzbl %bl, %esi
; X86-NEXT: adcl %esi, %edx
; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X86-NEXT: adcl %edi, %edx
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
; X86-NEXT: movl %ebx, %edi
; X86-NEXT: sarl $31, %edi
; X86-NEXT: xorl %edi, %edx
; X86-NEXT: xorl %eax, %edi
; X86-NEXT: xorl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: sarl $31, %ecx
; X86-NEXT: movl %ecx, %esi
; X86-NEXT: xorl $2147483647, %esi # imm = 0x7FFFFFFF
; X86-NEXT: orl %edx, %edi
; X86-NEXT: notl %ecx
; X86-NEXT: movl (%esp), %eax # 4-byte Reload
; X86-NEXT: cmovnel %ecx, %eax
; X86-NEXT: cmovel %ebx, %esi
; X86-NEXT: movl %esi, %edx
; X86-NEXT: addl $12, %esp
; X86-NEXT: .cfi_def_cfa_offset 20
; X86-NEXT: popl %esi
; X86-NEXT: .cfi_def_cfa_offset 16

View File

@ -208,33 +208,78 @@ define zeroext i1 @smuloi64(i64 %v1, i64 %v2, i64* %res) {
;
; WIN32-LABEL: smuloi64:
; WIN32: # %bb.0:
; WIN32-NEXT: pushl %ebp
; WIN32-NEXT: pushl %ebx
; WIN32-NEXT: pushl %edi
; WIN32-NEXT: pushl %esi
; WIN32-NEXT: pushl %eax
; WIN32-NEXT: movl {{[0-9]+}}(%esp), %esi
; WIN32-NEXT: subl $8, %esp
; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax
; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edx
; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edi
; WIN32-NEXT: movl $0, (%esp)
; WIN32-NEXT: movl %esp, %ebx
; WIN32-NEXT: pushl %ebx
; WIN32-NEXT: pushl %edi
; WIN32-NEXT: pushl %edx
; WIN32-NEXT: pushl %ecx
; WIN32-NEXT: pushl %eax
; WIN32-NEXT: calll ___mulodi4
; WIN32-NEXT: addl $20, %esp
; WIN32-NEXT: cmpl $0, (%esp)
; WIN32-NEXT: setne %cl
; WIN32-NEXT: movl %edx, 4(%esi)
; WIN32-NEXT: movl %eax, (%esi)
; WIN32-NEXT: movl %ecx, %eax
; WIN32-NEXT: addl $4, %esp
; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edx
; WIN32-NEXT: movl %edx, %ecx
; WIN32-NEXT: movl %edx, %ebx
; WIN32-NEXT: sarl $31, %ecx
; WIN32-NEXT: movl %eax, %esi
; WIN32-NEXT: imull %ecx, %esi
; WIN32-NEXT: mull %ecx
; WIN32-NEXT: movl %eax, %ebp
; WIN32-NEXT: addl %esi, %edx
; WIN32-NEXT: movl %edi, %esi
; WIN32-NEXT: imull %edi, %ecx
; WIN32-NEXT: addl %edx, %ecx
; WIN32-NEXT: sarl $31, %esi
; WIN32-NEXT: movl %esi, %edi
; WIN32-NEXT: imull %ebx, %edi
; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ebx
; WIN32-NEXT: movl %esi, %eax
; WIN32-NEXT: mull %ebx
; WIN32-NEXT: addl %edi, %edx
; WIN32-NEXT: imull %ebx, %esi
; WIN32-NEXT: addl %edx, %esi
; WIN32-NEXT: addl %ebp, %eax
; WIN32-NEXT: movl %eax, (%esp) # 4-byte Spill
; WIN32-NEXT: adcl %ecx, %esi
; WIN32-NEXT: movl %ebx, %eax
; WIN32-NEXT: movl %ebx, %edi
; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; WIN32-NEXT: mull %ecx
; WIN32-NEXT: movl %edx, %ebx
; WIN32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax
; WIN32-NEXT: mull %ecx
; WIN32-NEXT: movl %edx, %ebp
; WIN32-NEXT: movl %eax, %ecx
; WIN32-NEXT: addl %ebx, %ecx
; WIN32-NEXT: adcl $0, %ebp
; WIN32-NEXT: movl %edi, %eax
; WIN32-NEXT: mull {{[0-9]+}}(%esp)
; WIN32-NEXT: movl %edx, %ebx
; WIN32-NEXT: movl %eax, %edi
; WIN32-NEXT: addl %ecx, %edi
; WIN32-NEXT: adcl %ebp, %ebx
; WIN32-NEXT: setb %cl
; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax
; WIN32-NEXT: mull {{[0-9]+}}(%esp)
; WIN32-NEXT: addl %ebx, %eax
; WIN32-NEXT: movzbl %cl, %ecx
; WIN32-NEXT: adcl %ecx, %edx
; WIN32-NEXT: addl (%esp), %eax # 4-byte Folded Reload
; WIN32-NEXT: adcl %esi, %edx
; WIN32-NEXT: movl %edi, %ecx
; WIN32-NEXT: sarl $31, %ecx
; WIN32-NEXT: xorl %ecx, %edx
; WIN32-NEXT: xorl %eax, %ecx
; WIN32-NEXT: orl %edx, %ecx
; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax
; WIN32-NEXT: movl %edi, 4(%eax)
; WIN32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; WIN32-NEXT: movl %ecx, (%eax)
; WIN32-NEXT: setne %al
; WIN32-NEXT: addl $8, %esp
; WIN32-NEXT: popl %esi
; WIN32-NEXT: popl %edi
; WIN32-NEXT: popl %ebx
; WIN32-NEXT: popl %ebp
; WIN32-NEXT: retl
%t = call {i64, i1} @llvm.smul.with.overflow.i64(i64 %v1, i64 %v2)
%val = extractvalue {i64, i1} %t, 0
@ -528,27 +573,68 @@ define i64 @smuloselecti64(i64 %v1, i64 %v2) {
; WIN32-NEXT: pushl %edi
; WIN32-NEXT: pushl %esi
; WIN32-NEXT: pushl %eax
; WIN32-NEXT: movl {{[0-9]+}}(%esp), %esi
; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edi
; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ebx
; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ebp
; WIN32-NEXT: movl $0, (%esp)
; WIN32-NEXT: movl %esp, %eax
; WIN32-NEXT: pushl %eax
; WIN32-NEXT: pushl %ebp
; WIN32-NEXT: pushl %ebx
; WIN32-NEXT: pushl %edi
; WIN32-NEXT: pushl %esi
; WIN32-NEXT: calll ___mulodi4
; WIN32-NEXT: addl $20, %esp
; WIN32-NEXT: cmpl $0, (%esp)
; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ebx
; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax
; WIN32-NEXT: movl %eax, %ecx
; WIN32-NEXT: movl %eax, %esi
; WIN32-NEXT: sarl $31, %ecx
; WIN32-NEXT: movl %ebp, %edi
; WIN32-NEXT: imull %ecx, %edi
; WIN32-NEXT: movl %ebp, %eax
; WIN32-NEXT: mull %ecx
; WIN32-NEXT: movl %eax, (%esp) # 4-byte Spill
; WIN32-NEXT: addl %edi, %edx
; WIN32-NEXT: imull %ebx, %ecx
; WIN32-NEXT: addl %edx, %ecx
; WIN32-NEXT: sarl $31, %ebx
; WIN32-NEXT: movl %ebx, %edi
; WIN32-NEXT: imull %esi, %edi
; WIN32-NEXT: movl {{[0-9]+}}(%esp), %esi
; WIN32-NEXT: movl %ebx, %eax
; WIN32-NEXT: mull %esi
; WIN32-NEXT: addl %edi, %edx
; WIN32-NEXT: movl %esi, %edi
; WIN32-NEXT: imull %esi, %ebx
; WIN32-NEXT: addl %edx, %ebx
; WIN32-NEXT: addl (%esp), %eax # 4-byte Folded Reload
; WIN32-NEXT: movl %eax, (%esp) # 4-byte Spill
; WIN32-NEXT: adcl %ecx, %ebx
; WIN32-NEXT: movl %edi, %eax
; WIN32-NEXT: mull %ebp
; WIN32-NEXT: movl %edx, %esi
; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax
; WIN32-NEXT: mull %ebp
; WIN32-NEXT: movl %edx, %ebp
; WIN32-NEXT: movl %eax, %ecx
; WIN32-NEXT: addl %esi, %ecx
; WIN32-NEXT: adcl $0, %ebp
; WIN32-NEXT: movl %edi, %eax
; WIN32-NEXT: mull {{[0-9]+}}(%esp)
; WIN32-NEXT: movl %edx, %esi
; WIN32-NEXT: movl %eax, %edi
; WIN32-NEXT: addl %ecx, %edi
; WIN32-NEXT: adcl %ebp, %esi
; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ebp
; WIN32-NEXT: setb %cl
; WIN32-NEXT: movl %ebp, %eax
; WIN32-NEXT: mull {{[0-9]+}}(%esp)
; WIN32-NEXT: addl %esi, %eax
; WIN32-NEXT: movzbl %cl, %ecx
; WIN32-NEXT: adcl %ecx, %edx
; WIN32-NEXT: addl (%esp), %eax # 4-byte Folded Reload
; WIN32-NEXT: adcl %ebx, %edx
; WIN32-NEXT: sarl $31, %edi
; WIN32-NEXT: xorl %edi, %edx
; WIN32-NEXT: xorl %eax, %edi
; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax
; WIN32-NEXT: orl %edx, %edi
; WIN32-NEXT: jne LBB12_2
; WIN32-NEXT: # %bb.1:
; WIN32-NEXT: movl %ebx, %esi
; WIN32-NEXT: movl %ebp, %edi
; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax
; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ebp
; WIN32-NEXT: LBB12_2:
; WIN32-NEXT: movl %esi, %eax
; WIN32-NEXT: movl %edi, %edx
; WIN32-NEXT: movl %ebp, %edx
; WIN32-NEXT: addl $4, %esp
; WIN32-NEXT: popl %esi
; WIN32-NEXT: popl %edi
@ -904,23 +990,66 @@ define zeroext i1 @smulobri64(i64 %v1, i64 %v2) {
;
; WIN32-LABEL: smulobri64:
; WIN32: # %bb.0:
; WIN32-NEXT: pushl %ebp
; WIN32-NEXT: pushl %ebx
; WIN32-NEXT: pushl %edi
; WIN32-NEXT: pushl %esi
; WIN32-NEXT: pushl %eax
; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax
; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edi
; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edx
; WIN32-NEXT: movl {{[0-9]+}}(%esp), %esi
; WIN32-NEXT: movl $0, (%esp)
; WIN32-NEXT: movl %esp, %edi
; WIN32-NEXT: pushl %edi
; WIN32-NEXT: pushl %esi
; WIN32-NEXT: pushl %edx
; WIN32-NEXT: pushl %ecx
; WIN32-NEXT: pushl %eax
; WIN32-NEXT: calll ___mulodi4
; WIN32-NEXT: addl $20, %esp
; WIN32-NEXT: cmpl $0, (%esp)
; WIN32-NEXT: movl %edx, %ecx
; WIN32-NEXT: movl %edx, %ebx
; WIN32-NEXT: sarl $31, %ecx
; WIN32-NEXT: movl %eax, %esi
; WIN32-NEXT: imull %ecx, %esi
; WIN32-NEXT: mull %ecx
; WIN32-NEXT: movl %eax, %ebp
; WIN32-NEXT: addl %esi, %edx
; WIN32-NEXT: movl %edi, %esi
; WIN32-NEXT: imull %edi, %ecx
; WIN32-NEXT: addl %edx, %ecx
; WIN32-NEXT: sarl $31, %esi
; WIN32-NEXT: movl %esi, %edi
; WIN32-NEXT: imull %ebx, %edi
; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ebx
; WIN32-NEXT: movl %esi, %eax
; WIN32-NEXT: mull %ebx
; WIN32-NEXT: addl %edi, %edx
; WIN32-NEXT: imull %ebx, %esi
; WIN32-NEXT: addl %edx, %esi
; WIN32-NEXT: addl %ebp, %eax
; WIN32-NEXT: movl %eax, (%esp) # 4-byte Spill
; WIN32-NEXT: adcl %ecx, %esi
; WIN32-NEXT: movl %ebx, %eax
; WIN32-NEXT: movl %ebx, %edi
; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; WIN32-NEXT: mull %ecx
; WIN32-NEXT: movl %edx, %ebp
; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax
; WIN32-NEXT: mull %ecx
; WIN32-NEXT: movl %edx, %ebx
; WIN32-NEXT: movl %eax, %ecx
; WIN32-NEXT: addl %ebp, %ecx
; WIN32-NEXT: adcl $0, %ebx
; WIN32-NEXT: movl %edi, %eax
; WIN32-NEXT: mull {{[0-9]+}}(%esp)
; WIN32-NEXT: movl %edx, %ebp
; WIN32-NEXT: movl %eax, %edi
; WIN32-NEXT: addl %ecx, %edi
; WIN32-NEXT: adcl %ebx, %ebp
; WIN32-NEXT: setb %cl
; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax
; WIN32-NEXT: mull {{[0-9]+}}(%esp)
; WIN32-NEXT: addl %ebp, %eax
; WIN32-NEXT: movzbl %cl, %ecx
; WIN32-NEXT: adcl %ecx, %edx
; WIN32-NEXT: addl (%esp), %eax # 4-byte Folded Reload
; WIN32-NEXT: adcl %esi, %edx
; WIN32-NEXT: sarl $31, %edi
; WIN32-NEXT: xorl %edi, %edx
; WIN32-NEXT: xorl %eax, %edi
; WIN32-NEXT: orl %edx, %edi
; WIN32-NEXT: jne LBB18_1
; WIN32-NEXT: # %bb.3: # %continue
; WIN32-NEXT: movb $1, %al
@ -928,6 +1057,8 @@ define zeroext i1 @smulobri64(i64 %v1, i64 %v2) {
; WIN32-NEXT: addl $4, %esp
; WIN32-NEXT: popl %esi
; WIN32-NEXT: popl %edi
; WIN32-NEXT: popl %ebx
; WIN32-NEXT: popl %ebp
; WIN32-NEXT: retl
; WIN32-NEXT: LBB18_1: # %overflow
; WIN32-NEXT: xorl %eax, %eax
@ -1567,34 +1698,79 @@ define zeroext i1 @smuloi64_load(i64* %ptr1, i64 %v2, i64* %res) {
;
; WIN32-LABEL: smuloi64_load:
; WIN32: # %bb.0:
; WIN32-NEXT: pushl %ebp
; WIN32-NEXT: pushl %ebx
; WIN32-NEXT: pushl %edi
; WIN32-NEXT: pushl %esi
; WIN32-NEXT: pushl %eax
; WIN32-NEXT: movl {{[0-9]+}}(%esp), %esi
; WIN32-NEXT: subl $16, %esp
; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ebx
; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax
; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; WIN32-NEXT: movl (%eax), %esi
; WIN32-NEXT: movl 4(%eax), %ebp
; WIN32-NEXT: sarl $31, %ebx
; WIN32-NEXT: movl %ebx, %ecx
; WIN32-NEXT: imull %ebp, %ecx
; WIN32-NEXT: movl %ebx, %eax
; WIN32-NEXT: mull %esi
; WIN32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; WIN32-NEXT: addl %ecx, %edx
; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax
; WIN32-NEXT: imull %esi, %ebx
; WIN32-NEXT: addl %edx, %ebx
; WIN32-NEXT: movl %ebp, %ecx
; WIN32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; WIN32-NEXT: sarl $31, %ecx
; WIN32-NEXT: movl %eax, %edi
; WIN32-NEXT: imull %ecx, %edi
; WIN32-NEXT: mull %ecx
; WIN32-NEXT: addl %edi, %edx
; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edi
; WIN32-NEXT: imull %edi, %ecx
; WIN32-NEXT: addl %edx, %ecx
; WIN32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; WIN32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; WIN32-NEXT: adcl %ebx, %ecx
; WIN32-NEXT: movl %esi, %eax
; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edi
; WIN32-NEXT: mull %edi
; WIN32-NEXT: movl %edx, %ebx
; WIN32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; WIN32-NEXT: movl %ebp, %eax
; WIN32-NEXT: mull %edi
; WIN32-NEXT: movl %edx, %ebp
; WIN32-NEXT: movl %eax, %edi
; WIN32-NEXT: addl %ebx, %edi
; WIN32-NEXT: adcl $0, %ebp
; WIN32-NEXT: movl %esi, %eax
; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edx
; WIN32-NEXT: movl (%edx), %edi
; WIN32-NEXT: movl 4(%edx), %edx
; WIN32-NEXT: movl $0, (%esp)
; WIN32-NEXT: movl %esp, %ebx
; WIN32-NEXT: pushl %ebx
; WIN32-NEXT: pushl %ecx
; WIN32-NEXT: pushl %eax
; WIN32-NEXT: pushl %edx
; WIN32-NEXT: pushl %edi
; WIN32-NEXT: calll ___mulodi4
; WIN32-NEXT: addl $20, %esp
; WIN32-NEXT: cmpl $0, (%esp)
; WIN32-NEXT: setne %cl
; WIN32-NEXT: movl %eax, (%esi)
; WIN32-NEXT: movl %edx, 4(%esi)
; WIN32-NEXT: movl %ecx, %eax
; WIN32-NEXT: addl $4, %esp
; WIN32-NEXT: mull %edx
; WIN32-NEXT: movl %edx, %esi
; WIN32-NEXT: movl %eax, %ebx
; WIN32-NEXT: addl %edi, %ebx
; WIN32-NEXT: adcl %ebp, %esi
; WIN32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
; WIN32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; WIN32-NEXT: mull {{[0-9]+}}(%esp)
; WIN32-NEXT: addl %esi, %eax
; WIN32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 1-byte Folded Reload
; WIN32-NEXT: adcl %esi, %edx
; WIN32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; WIN32-NEXT: adcl %ecx, %edx
; WIN32-NEXT: movl %ebx, %ecx
; WIN32-NEXT: sarl $31, %ecx
; WIN32-NEXT: xorl %ecx, %edx
; WIN32-NEXT: xorl %eax, %ecx
; WIN32-NEXT: orl %edx, %ecx
; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax
; WIN32-NEXT: movl %ebx, 4(%eax)
; WIN32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; WIN32-NEXT: movl %ecx, (%eax)
; WIN32-NEXT: setne %al
; WIN32-NEXT: addl $16, %esp
; WIN32-NEXT: popl %esi
; WIN32-NEXT: popl %edi
; WIN32-NEXT: popl %ebx
; WIN32-NEXT: popl %ebp
; WIN32-NEXT: retl
%v1 = load i64, i64* %ptr1
%t = call {i64, i1} @llvm.smul.with.overflow.i64(i64 %v1, i64 %v2)
@ -1630,34 +1806,80 @@ define zeroext i1 @smuloi64_load2(i64 %v1, i64* %ptr2, i64* %res) {
;
; WIN32-LABEL: smuloi64_load2:
; WIN32: # %bb.0:
; WIN32-NEXT: pushl %ebp
; WIN32-NEXT: pushl %ebx
; WIN32-NEXT: pushl %edi
; WIN32-NEXT: pushl %esi
; WIN32-NEXT: pushl %eax
; WIN32-NEXT: movl {{[0-9]+}}(%esp), %esi
; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax
; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; WIN32-NEXT: subl $12, %esp
; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edx
; WIN32-NEXT: movl (%edx), %edi
; WIN32-NEXT: movl 4(%edx), %edx
; WIN32-NEXT: movl $0, (%esp)
; WIN32-NEXT: movl %esp, %ebx
; WIN32-NEXT: pushl %ebx
; WIN32-NEXT: pushl %edx
; WIN32-NEXT: pushl %edi
; WIN32-NEXT: pushl %ecx
; WIN32-NEXT: pushl %eax
; WIN32-NEXT: calll ___mulodi4
; WIN32-NEXT: addl $20, %esp
; WIN32-NEXT: cmpl $0, (%esp)
; WIN32-NEXT: setne %cl
; WIN32-NEXT: movl %eax, (%esi)
; WIN32-NEXT: movl %edx, 4(%esi)
; WIN32-NEXT: movl %ecx, %eax
; WIN32-NEXT: addl $4, %esp
; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax
; WIN32-NEXT: movl (%eax), %ebp
; WIN32-NEXT: movl 4(%eax), %edi
; WIN32-NEXT: movl %edx, %ecx
; WIN32-NEXT: movl %edx, %ebx
; WIN32-NEXT: sarl $31, %ecx
; WIN32-NEXT: movl %ebp, %esi
; WIN32-NEXT: imull %ecx, %esi
; WIN32-NEXT: movl %ebp, %eax
; WIN32-NEXT: mull %ecx
; WIN32-NEXT: movl %eax, (%esp) # 4-byte Spill
; WIN32-NEXT: addl %esi, %edx
; WIN32-NEXT: movl %edi, %esi
; WIN32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; WIN32-NEXT: imull %edi, %ecx
; WIN32-NEXT: addl %edx, %ecx
; WIN32-NEXT: sarl $31, %esi
; WIN32-NEXT: movl %esi, %edi
; WIN32-NEXT: imull %ebx, %edi
; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ebx
; WIN32-NEXT: movl %esi, %eax
; WIN32-NEXT: mull %ebx
; WIN32-NEXT: addl %edi, %edx
; WIN32-NEXT: imull %ebx, %esi
; WIN32-NEXT: addl %edx, %esi
; WIN32-NEXT: addl (%esp), %eax # 4-byte Folded Reload
; WIN32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; WIN32-NEXT: adcl %ecx, %esi
; WIN32-NEXT: movl %ebx, %eax
; WIN32-NEXT: movl %ebx, %edi
; WIN32-NEXT: mull %ebp
; WIN32-NEXT: movl %edx, %ebx
; WIN32-NEXT: movl %eax, (%esp) # 4-byte Spill
; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax
; WIN32-NEXT: mull %ebp
; WIN32-NEXT: movl %edx, %ebp
; WIN32-NEXT: movl %eax, %ecx
; WIN32-NEXT: addl %ebx, %ecx
; WIN32-NEXT: adcl $0, %ebp
; WIN32-NEXT: movl %edi, %eax
; WIN32-NEXT: mull {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
; WIN32-NEXT: movl %edx, %ebx
; WIN32-NEXT: movl %eax, %edi
; WIN32-NEXT: addl %ecx, %edi
; WIN32-NEXT: adcl %ebp, %ebx
; WIN32-NEXT: setb %cl
; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax
; WIN32-NEXT: mull {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
; WIN32-NEXT: addl %ebx, %eax
; WIN32-NEXT: movzbl %cl, %ecx
; WIN32-NEXT: adcl %ecx, %edx
; WIN32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; WIN32-NEXT: adcl %esi, %edx
; WIN32-NEXT: movl %edi, %ecx
; WIN32-NEXT: sarl $31, %ecx
; WIN32-NEXT: xorl %ecx, %edx
; WIN32-NEXT: xorl %eax, %ecx
; WIN32-NEXT: orl %edx, %ecx
; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax
; WIN32-NEXT: movl %edi, 4(%eax)
; WIN32-NEXT: movl (%esp), %ecx # 4-byte Reload
; WIN32-NEXT: movl %ecx, (%eax)
; WIN32-NEXT: setne %al
; WIN32-NEXT: addl $12, %esp
; WIN32-NEXT: popl %esi
; WIN32-NEXT: popl %edi
; WIN32-NEXT: popl %ebx
; WIN32-NEXT: popl %ebp
; WIN32-NEXT: retl
%v2 = load i64, i64* %ptr2
%t = call {i64, i1} @llvm.smul.with.overflow.i64(i64 %v1, i64 %v2)