2017-05-22 10:31:42 +08:00
|
|
|
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
|
|
; RUN: llc < %s -mtriple=x86_64-unknown | FileCheck %s
|
|
|
|
|
2019-11-12 19:11:40 +08:00
|
|
|
declare { i8, i64 } @llvm.x86.subborrow.64(i8, i64, i64)
|
|
|
|
declare { i64, i1 } @llvm.usub.with.overflow.i64(i64, i64)
|
2019-11-11 21:07:51 +08:00
|
|
|
|
2018-05-07 00:00:23 +08:00
|
|
|
define i128 @sub128(i128 %a, i128 %b) nounwind {
|
|
|
|
; CHECK-LABEL: sub128:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
|
|
|
; CHECK-NEXT: movq %rdi, %rax
|
2018-09-20 02:59:08 +08:00
|
|
|
; CHECK-NEXT: subq %rdx, %rax
|
|
|
|
; CHECK-NEXT: sbbq %rcx, %rsi
|
2018-05-07 00:00:23 +08:00
|
|
|
; CHECK-NEXT: movq %rsi, %rdx
|
|
|
|
; CHECK-NEXT: retq
|
|
|
|
entry:
|
|
|
|
%0 = sub i128 %a, %b
|
|
|
|
ret i128 %0
|
|
|
|
}
|
|
|
|
|
|
|
|
define i256 @sub256(i256 %a, i256 %b) nounwind {
|
|
|
|
; CHECK-LABEL: sub256:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
2018-09-20 02:59:08 +08:00
|
|
|
; CHECK-NEXT: movq %rdi, %rax
|
2018-05-07 00:00:23 +08:00
|
|
|
; CHECK-NEXT: subq %r9, %rsi
|
|
|
|
; CHECK-NEXT: sbbq {{[0-9]+}}(%rsp), %rdx
|
|
|
|
; CHECK-NEXT: sbbq {{[0-9]+}}(%rsp), %rcx
|
|
|
|
; CHECK-NEXT: sbbq {{[0-9]+}}(%rsp), %r8
|
|
|
|
; CHECK-NEXT: movq %rdx, 8(%rdi)
|
|
|
|
; CHECK-NEXT: movq %rsi, (%rdi)
|
|
|
|
; CHECK-NEXT: movq %rcx, 16(%rdi)
|
|
|
|
; CHECK-NEXT: movq %r8, 24(%rdi)
|
|
|
|
; CHECK-NEXT: retq
|
|
|
|
entry:
|
|
|
|
%0 = sub i256 %a, %b
|
|
|
|
ret i256 %0
|
|
|
|
}
|
|
|
|
|
2017-05-22 10:31:42 +08:00
|
|
|
%S = type { [4 x i64] }
|
|
|
|
|
|
|
|
define %S @negate(%S* nocapture readonly %this) {
|
|
|
|
; CHECK-LABEL: negate:
|
2017-12-05 01:18:51 +08:00
|
|
|
; CHECK: # %bb.0: # %entry
|
2018-09-20 02:59:08 +08:00
|
|
|
; CHECK-NEXT: movq %rdi, %rax
|
2018-06-05 03:23:22 +08:00
|
|
|
; CHECK-NEXT: xorl %r8d, %r8d
|
2018-09-20 02:59:08 +08:00
|
|
|
; CHECK-NEXT: xorl %edx, %edx
|
|
|
|
; CHECK-NEXT: subq (%rsi), %rdx
|
|
|
|
; CHECK-NEXT: movl $0, %edi
|
|
|
|
; CHECK-NEXT: sbbq 8(%rsi), %rdi
|
|
|
|
; CHECK-NEXT: movl $0, %ecx
|
|
|
|
; CHECK-NEXT: sbbq 16(%rsi), %rcx
|
2018-06-05 03:23:22 +08:00
|
|
|
; CHECK-NEXT: sbbq 24(%rsi), %r8
|
2018-09-20 02:59:08 +08:00
|
|
|
; CHECK-NEXT: movq %rdx, (%rax)
|
|
|
|
; CHECK-NEXT: movq %rdi, 8(%rax)
|
|
|
|
; CHECK-NEXT: movq %rcx, 16(%rax)
|
|
|
|
; CHECK-NEXT: movq %r8, 24(%rax)
|
2017-05-22 10:31:42 +08:00
|
|
|
; CHECK-NEXT: retq
|
|
|
|
entry:
|
|
|
|
%0 = getelementptr inbounds %S, %S* %this, i64 0, i32 0, i64 0
|
|
|
|
%1 = load i64, i64* %0, align 8
|
|
|
|
%2 = xor i64 %1, -1
|
|
|
|
%3 = zext i64 %2 to i128
|
|
|
|
%4 = add nuw nsw i128 %3, 1
|
|
|
|
%5 = trunc i128 %4 to i64
|
|
|
|
%6 = lshr i128 %4, 64
|
|
|
|
%7 = getelementptr inbounds %S, %S* %this, i64 0, i32 0, i64 1
|
|
|
|
%8 = load i64, i64* %7, align 8
|
|
|
|
%9 = xor i64 %8, -1
|
|
|
|
%10 = zext i64 %9 to i128
|
|
|
|
%11 = add nuw nsw i128 %6, %10
|
|
|
|
%12 = trunc i128 %11 to i64
|
|
|
|
%13 = lshr i128 %11, 64
|
|
|
|
%14 = getelementptr inbounds %S, %S* %this, i64 0, i32 0, i64 2
|
|
|
|
%15 = load i64, i64* %14, align 8
|
|
|
|
%16 = xor i64 %15, -1
|
|
|
|
%17 = zext i64 %16 to i128
|
|
|
|
%18 = add nuw nsw i128 %13, %17
|
|
|
|
%19 = lshr i128 %18, 64
|
|
|
|
%20 = trunc i128 %18 to i64
|
|
|
|
%21 = getelementptr inbounds %S, %S* %this, i64 0, i32 0, i64 3
|
|
|
|
%22 = load i64, i64* %21, align 8
|
|
|
|
%23 = xor i64 %22, -1
|
|
|
|
%24 = zext i64 %23 to i128
|
|
|
|
%25 = add nuw nsw i128 %19, %24
|
|
|
|
%26 = trunc i128 %25 to i64
|
|
|
|
%27 = insertvalue [4 x i64] undef, i64 %5, 0
|
|
|
|
%28 = insertvalue [4 x i64] %27, i64 %12, 1
|
|
|
|
%29 = insertvalue [4 x i64] %28, i64 %20, 2
|
|
|
|
%30 = insertvalue [4 x i64] %29, i64 %26, 3
|
|
|
|
%31 = insertvalue %S undef, [4 x i64] %30, 0
|
|
|
|
ret %S %31
|
|
|
|
}
|
2017-05-22 14:06:45 +08:00
|
|
|
|
2019-11-11 21:07:51 +08:00
|
|
|
define %S @sub(%S* nocapture readonly %this, %S %arg.b) {
|
2017-05-22 14:06:45 +08:00
|
|
|
; CHECK-LABEL: sub:
|
2017-12-05 01:18:51 +08:00
|
|
|
; CHECK: # %bb.0: # %entry
|
2018-09-20 02:59:08 +08:00
|
|
|
; CHECK-NEXT: movq %rdi, %rax
|
2019-07-04 00:15:59 +08:00
|
|
|
; CHECK-NEXT: movq (%rsi), %r10
|
|
|
|
; CHECK-NEXT: movq 8(%rsi), %rdi
|
2019-07-16 23:17:00 +08:00
|
|
|
; CHECK-NEXT: subq %rdx, %r10
|
|
|
|
; CHECK-NEXT: setae %dl
|
|
|
|
; CHECK-NEXT: addb $-1, %dl
|
2019-07-04 00:15:59 +08:00
|
|
|
; CHECK-NEXT: adcq $0, %rdi
|
|
|
|
; CHECK-NEXT: setb %dl
|
2019-07-16 23:17:00 +08:00
|
|
|
; CHECK-NEXT: movzbl %dl, %r11d
|
2017-05-22 14:06:45 +08:00
|
|
|
; CHECK-NEXT: notq %rcx
|
2018-09-20 02:59:08 +08:00
|
|
|
; CHECK-NEXT: addq %rdi, %rcx
|
2019-07-16 23:17:00 +08:00
|
|
|
; CHECK-NEXT: adcq 16(%rsi), %r11
|
|
|
|
; CHECK-NEXT: setb %dl
|
|
|
|
; CHECK-NEXT: movzbl %dl, %edx
|
2017-05-22 14:06:45 +08:00
|
|
|
; CHECK-NEXT: notq %r8
|
2019-07-16 23:17:00 +08:00
|
|
|
; CHECK-NEXT: addq %r11, %r8
|
|
|
|
; CHECK-NEXT: adcq 24(%rsi), %rdx
|
2017-05-22 14:06:45 +08:00
|
|
|
; CHECK-NEXT: notq %r9
|
2019-07-16 23:17:00 +08:00
|
|
|
; CHECK-NEXT: addq %rdx, %r9
|
|
|
|
; CHECK-NEXT: movq %r10, (%rax)
|
2018-09-20 02:59:08 +08:00
|
|
|
; CHECK-NEXT: movq %rcx, 8(%rax)
|
|
|
|
; CHECK-NEXT: movq %r8, 16(%rax)
|
|
|
|
; CHECK-NEXT: movq %r9, 24(%rax)
|
2017-05-22 14:06:45 +08:00
|
|
|
; CHECK-NEXT: retq
|
|
|
|
entry:
|
|
|
|
%0 = extractvalue %S %arg.b, 0
|
|
|
|
%.elt6 = extractvalue [4 x i64] %0, 1
|
|
|
|
%.elt8 = extractvalue [4 x i64] %0, 2
|
|
|
|
%.elt10 = extractvalue [4 x i64] %0, 3
|
|
|
|
%.elt = extractvalue [4 x i64] %0, 0
|
|
|
|
%1 = getelementptr inbounds %S, %S* %this, i64 0, i32 0, i64 0
|
|
|
|
%2 = load i64, i64* %1, align 8
|
|
|
|
%3 = zext i64 %2 to i128
|
|
|
|
%4 = add nuw nsw i128 %3, 1
|
|
|
|
%5 = xor i64 %.elt, -1
|
|
|
|
%6 = zext i64 %5 to i128
|
|
|
|
%7 = add nuw nsw i128 %4, %6
|
|
|
|
%8 = trunc i128 %7 to i64
|
|
|
|
%9 = lshr i128 %7, 64
|
|
|
|
%10 = getelementptr inbounds %S, %S* %this, i64 0, i32 0, i64 1
|
|
|
|
%11 = load i64, i64* %10, align 8
|
|
|
|
%12 = zext i64 %11 to i128
|
|
|
|
%13 = add nuw nsw i128 %9, %12
|
|
|
|
%14 = xor i64 %.elt6, -1
|
|
|
|
%15 = zext i64 %14 to i128
|
|
|
|
%16 = add nuw nsw i128 %13, %15
|
|
|
|
%17 = trunc i128 %16 to i64
|
|
|
|
%18 = lshr i128 %16, 64
|
|
|
|
%19 = getelementptr inbounds %S, %S* %this, i64 0, i32 0, i64 2
|
|
|
|
%20 = load i64, i64* %19, align 8
|
|
|
|
%21 = zext i64 %20 to i128
|
|
|
|
%22 = add nuw nsw i128 %18, %21
|
|
|
|
%23 = xor i64 %.elt8, -1
|
|
|
|
%24 = zext i64 %23 to i128
|
|
|
|
%25 = add nuw nsw i128 %22, %24
|
|
|
|
%26 = lshr i128 %25, 64
|
|
|
|
%27 = trunc i128 %25 to i64
|
|
|
|
%28 = getelementptr inbounds %S, %S* %this, i64 0, i32 0, i64 3
|
|
|
|
%29 = load i64, i64* %28, align 8
|
|
|
|
%30 = zext i64 %29 to i128
|
|
|
|
%31 = add nuw nsw i128 %26, %30
|
|
|
|
%32 = xor i64 %.elt10, -1
|
|
|
|
%33 = zext i64 %32 to i128
|
|
|
|
%34 = add nuw nsw i128 %31, %33
|
|
|
|
%35 = trunc i128 %34 to i64
|
|
|
|
%36 = insertvalue [4 x i64] undef, i64 %8, 0
|
|
|
|
%37 = insertvalue [4 x i64] %36, i64 %17, 1
|
|
|
|
%38 = insertvalue [4 x i64] %37, i64 %27, 2
|
|
|
|
%39 = insertvalue [4 x i64] %38, i64 %35, 3
|
|
|
|
%40 = insertvalue %S undef, [4 x i64] %39, 0
|
|
|
|
ret %S %40
|
|
|
|
}
|
2019-09-19 04:48:05 +08:00
|
|
|
|
|
|
|
declare {i64, i1} @llvm.uadd.with.overflow(i64, i64)
|
|
|
|
declare {i64, i1} @llvm.usub.with.overflow(i64, i64)
|
|
|
|
|
|
|
|
define i64 @sub_from_carry(i64 %x, i64 %y, i64* %valout, i64 %z) {
|
|
|
|
; CHECK-LABEL: sub_from_carry:
|
|
|
|
; CHECK: # %bb.0:
|
[DAGCombine][ARM][X86] (sub Carry, X) -> (addcarry (sub 0, X), 0, Carry) fold
Summary:
`DAGCombiner::visitADDLikeCommutative()` already has a sibling fold:
`(add X, Carry) -> (addcarry X, 0, Carry)`
This fold, as suggested by @efriedma, helps recover from //some//
of the regressions of D62266
Reviewers: efriedma, deadalnix
Subscribers: javed.absar, kristof.beyls, llvm-commits, efriedma
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D62392
llvm-svn: 372259
2019-09-19 04:48:27 +08:00
|
|
|
; CHECK-NEXT: movq %rcx, %rax
|
|
|
|
; CHECK-NEXT: negq %rax
|
2019-09-19 04:48:05 +08:00
|
|
|
; CHECK-NEXT: addq %rsi, %rdi
|
|
|
|
; CHECK-NEXT: movq %rdi, (%rdx)
|
[DAGCombine][ARM][X86] (sub Carry, X) -> (addcarry (sub 0, X), 0, Carry) fold
Summary:
`DAGCombiner::visitADDLikeCommutative()` already has a sibling fold:
`(add X, Carry) -> (addcarry X, 0, Carry)`
This fold, as suggested by @efriedma, helps recover from //some//
of the regressions of D62266
Reviewers: efriedma, deadalnix
Subscribers: javed.absar, kristof.beyls, llvm-commits, efriedma
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D62392
llvm-svn: 372259
2019-09-19 04:48:27 +08:00
|
|
|
; CHECK-NEXT: adcq $0, %rax
|
2019-09-19 04:48:05 +08:00
|
|
|
; CHECK-NEXT: retq
|
|
|
|
%agg = call {i64, i1} @llvm.uadd.with.overflow(i64 %x, i64 %y)
|
|
|
|
%val = extractvalue {i64, i1} %agg, 0
|
|
|
|
%ov = extractvalue {i64, i1} %agg, 1
|
|
|
|
store i64 %val, i64* %valout, align 4
|
|
|
|
%carry = zext i1 %ov to i64
|
|
|
|
%res = sub i64 %carry, %z
|
|
|
|
ret i64 %res
|
|
|
|
}
|
2019-11-11 21:07:51 +08:00
|
|
|
|
|
|
|
%struct.U320 = type { [5 x i64] }
|
|
|
|
|
|
|
|
define i32 @sub_U320_without_i128_or(%struct.U320* nocapture dereferenceable(40) %0, i64 %1, i64 %2, i64 %3, i64 %4, i64 %5) {
|
|
|
|
; CHECK-LABEL: sub_U320_without_i128_or:
|
|
|
|
; CHECK: # %bb.0:
|
|
|
|
; CHECK-NEXT: subq %rsi, (%rdi)
|
[SelectionDAG] Combine U{ADD,SUB}O diamonds into {ADD,SUB}CARRY
Summary:
Convert (uaddo (uaddo x, y), carryIn) into addcarry x, y, carryIn if-and-only-if the carry flags of the first two uaddo are merged via OR or XOR.
Work remaining: match ADD, etc.
Reviewers: craig.topper, RKSimon, spatel, niravd, jonpa, uweigand, deadalnix, nikic, lebedev.ri, dmgreen, chfast
Reviewed By: lebedev.ri
Subscribers: chfast, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D70079
2019-11-20 21:52:24 +08:00
|
|
|
; CHECK-NEXT: sbbq %rdx, 8(%rdi)
|
|
|
|
; CHECK-NEXT: sbbq %rcx, 16(%rdi)
|
|
|
|
; CHECK-NEXT: sbbq %r8, 24(%rdi)
|
|
|
|
; CHECK-NEXT: sbbq %r9, 32(%rdi)
|
2019-11-11 21:07:51 +08:00
|
|
|
; CHECK-NEXT: setb %al
|
|
|
|
; CHECK-NEXT: movzbl %al, %eax
|
|
|
|
; CHECK-NEXT: retq
|
|
|
|
%7 = getelementptr inbounds %struct.U320, %struct.U320* %0, i64 0, i32 0, i64 0
|
|
|
|
%8 = load i64, i64* %7, align 8
|
|
|
|
%9 = getelementptr inbounds %struct.U320, %struct.U320* %0, i64 0, i32 0, i64 1
|
|
|
|
%10 = load i64, i64* %9, align 8
|
|
|
|
%11 = getelementptr inbounds %struct.U320, %struct.U320* %0, i64 0, i32 0, i64 2
|
|
|
|
%12 = load i64, i64* %11, align 8
|
|
|
|
%13 = getelementptr inbounds %struct.U320, %struct.U320* %0, i64 0, i32 0, i64 3
|
|
|
|
%14 = load i64, i64* %13, align 8
|
|
|
|
%15 = getelementptr inbounds %struct.U320, %struct.U320* %0, i64 0, i32 0, i64 4
|
|
|
|
%16 = load i64, i64* %15, align 8
|
|
|
|
%17 = sub i64 %8, %1
|
|
|
|
%18 = sub i64 %10, %2
|
|
|
|
%19 = icmp ult i64 %8, %1
|
|
|
|
%20 = zext i1 %19 to i64
|
|
|
|
%21 = sub i64 %18, %20
|
|
|
|
%22 = sub i64 %12, %3
|
|
|
|
%23 = icmp ult i64 %10, %2
|
|
|
|
%24 = icmp ult i64 %18, %20
|
|
|
|
%25 = or i1 %23, %24
|
|
|
|
%26 = zext i1 %25 to i64
|
|
|
|
%27 = sub i64 %22, %26
|
|
|
|
%28 = sub i64 %14, %4
|
|
|
|
%29 = icmp ult i64 %12, %3
|
|
|
|
%30 = icmp ult i64 %22, %26
|
|
|
|
%31 = or i1 %29, %30
|
|
|
|
%32 = zext i1 %31 to i64
|
|
|
|
%33 = sub i64 %28, %32
|
|
|
|
%34 = sub i64 %16, %5
|
|
|
|
%35 = icmp ult i64 %14, %4
|
|
|
|
%36 = icmp ult i64 %28, %32
|
|
|
|
%37 = or i1 %35, %36
|
|
|
|
%38 = zext i1 %37 to i64
|
|
|
|
%39 = sub i64 %34, %38
|
|
|
|
store i64 %17, i64* %7, align 8
|
|
|
|
store i64 %21, i64* %9, align 8
|
|
|
|
store i64 %27, i64* %11, align 8
|
|
|
|
store i64 %33, i64* %13, align 8
|
|
|
|
store i64 %39, i64* %15, align 8
|
|
|
|
%40 = icmp ult i64 %16, %5
|
|
|
|
%41 = icmp ult i64 %34, %38
|
|
|
|
%42 = or i1 %40, %41
|
|
|
|
%43 = zext i1 %42 to i32
|
|
|
|
ret i32 %43
|
|
|
|
}
|
|
|
|
|
|
|
|
define i32 @sub_U320_usubo(%struct.U320* nocapture dereferenceable(40) %0, i64 %1, i64 %2, i64 %3, i64 %4, i64 %5) {
|
|
|
|
; CHECK-LABEL: sub_U320_usubo:
|
|
|
|
; CHECK: # %bb.0:
|
|
|
|
; CHECK-NEXT: subq %rsi, (%rdi)
|
[SelectionDAG] Combine U{ADD,SUB}O diamonds into {ADD,SUB}CARRY
Summary:
Convert (uaddo (uaddo x, y), carryIn) into addcarry x, y, carryIn if-and-only-if the carry flags of the first two uaddo are merged via OR or XOR.
Work remaining: match ADD, etc.
Reviewers: craig.topper, RKSimon, spatel, niravd, jonpa, uweigand, deadalnix, nikic, lebedev.ri, dmgreen, chfast
Reviewed By: lebedev.ri
Subscribers: chfast, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D70079
2019-11-20 21:52:24 +08:00
|
|
|
; CHECK-NEXT: sbbq %rdx, 8(%rdi)
|
|
|
|
; CHECK-NEXT: sbbq %rcx, 16(%rdi)
|
|
|
|
; CHECK-NEXT: sbbq %r8, 24(%rdi)
|
|
|
|
; CHECK-NEXT: sbbq %r9, 32(%rdi)
|
2019-11-11 21:07:51 +08:00
|
|
|
; CHECK-NEXT: setb %al
|
|
|
|
; CHECK-NEXT: movzbl %al, %eax
|
|
|
|
; CHECK-NEXT: retq
|
|
|
|
%7 = getelementptr inbounds %struct.U320, %struct.U320* %0, i64 0, i32 0, i64 0
|
|
|
|
%8 = load i64, i64* %7, align 8
|
|
|
|
%9 = getelementptr inbounds %struct.U320, %struct.U320* %0, i64 0, i32 0, i64 1
|
|
|
|
%10 = load i64, i64* %9, align 8
|
|
|
|
%11 = getelementptr inbounds %struct.U320, %struct.U320* %0, i64 0, i32 0, i64 2
|
|
|
|
%12 = load i64, i64* %11, align 8
|
|
|
|
%13 = getelementptr inbounds %struct.U320, %struct.U320* %0, i64 0, i32 0, i64 3
|
|
|
|
%14 = load i64, i64* %13, align 8
|
|
|
|
%15 = getelementptr inbounds %struct.U320, %struct.U320* %0, i64 0, i32 0, i64 4
|
|
|
|
%16 = load i64, i64* %15, align 8
|
|
|
|
%17 = tail call { i64, i1 } @llvm.usub.with.overflow.i64(i64 %8, i64 %1)
|
|
|
|
%18 = extractvalue { i64, i1 } %17, 1
|
|
|
|
%19 = extractvalue { i64, i1 } %17, 0
|
|
|
|
%20 = zext i1 %18 to i64
|
|
|
|
%21 = tail call { i64, i1 } @llvm.usub.with.overflow.i64(i64 %10, i64 %2)
|
|
|
|
%22 = extractvalue { i64, i1 } %21, 1
|
|
|
|
%23 = extractvalue { i64, i1 } %21, 0
|
|
|
|
%24 = tail call { i64, i1 } @llvm.usub.with.overflow.i64(i64 %23, i64 %20)
|
|
|
|
%25 = extractvalue { i64, i1 } %24, 1
|
|
|
|
%26 = extractvalue { i64, i1 } %24, 0
|
|
|
|
%27 = or i1 %22, %25
|
|
|
|
%28 = zext i1 %27 to i64
|
|
|
|
%29 = tail call { i64, i1 } @llvm.usub.with.overflow.i64(i64 %12, i64 %3)
|
|
|
|
%30 = extractvalue { i64, i1 } %29, 1
|
|
|
|
%31 = extractvalue { i64, i1 } %29, 0
|
|
|
|
%32 = tail call { i64, i1 } @llvm.usub.with.overflow.i64(i64 %31, i64 %28)
|
|
|
|
%33 = extractvalue { i64, i1 } %32, 1
|
|
|
|
%34 = extractvalue { i64, i1 } %32, 0
|
|
|
|
%35 = or i1 %30, %33
|
|
|
|
%36 = zext i1 %35 to i64
|
|
|
|
%37 = tail call { i64, i1 } @llvm.usub.with.overflow.i64(i64 %14, i64 %4)
|
|
|
|
%38 = extractvalue { i64, i1 } %37, 1
|
|
|
|
%39 = extractvalue { i64, i1 } %37, 0
|
|
|
|
%40 = tail call { i64, i1 } @llvm.usub.with.overflow.i64(i64 %39, i64 %36)
|
|
|
|
%41 = extractvalue { i64, i1 } %40, 1
|
|
|
|
%42 = extractvalue { i64, i1 } %40, 0
|
|
|
|
%43 = or i1 %38, %41
|
|
|
|
%44 = zext i1 %43 to i64
|
|
|
|
%45 = tail call { i64, i1 } @llvm.usub.with.overflow.i64(i64 %16, i64 %5)
|
|
|
|
%46 = extractvalue { i64, i1 } %45, 1
|
|
|
|
%47 = extractvalue { i64, i1 } %45, 0
|
|
|
|
%48 = tail call { i64, i1 } @llvm.usub.with.overflow.i64(i64 %47, i64 %44)
|
|
|
|
%49 = extractvalue { i64, i1 } %48, 1
|
|
|
|
%50 = extractvalue { i64, i1 } %48, 0
|
|
|
|
%51 = or i1 %46, %49
|
|
|
|
store i64 %19, i64* %7, align 8
|
|
|
|
store i64 %26, i64* %9, align 8
|
|
|
|
store i64 %34, i64* %11, align 8
|
|
|
|
store i64 %42, i64* %13, align 8
|
|
|
|
store i64 %50, i64* %15, align 8
|
|
|
|
%52 = zext i1 %51 to i32
|
|
|
|
ret i32 %52
|
|
|
|
}
|
2019-11-12 19:11:40 +08:00
|
|
|
|
|
|
|
%struct.U192 = type { [3 x i64] }
|
|
|
|
|
|
|
|
define void @PR39464(%struct.U192* noalias nocapture sret %0, %struct.U192* nocapture readonly dereferenceable(24) %1, %struct.U192* nocapture readonly dereferenceable(24) %2) {
|
|
|
|
; CHECK-LABEL: PR39464:
|
|
|
|
; CHECK: # %bb.0:
|
|
|
|
; CHECK-NEXT: movq %rdi, %rax
|
|
|
|
; CHECK-NEXT: movq (%rsi), %rcx
|
|
|
|
; CHECK-NEXT: subq (%rdx), %rcx
|
|
|
|
; CHECK-NEXT: movq %rcx, (%rdi)
|
[SelectionDAG] Combine U{ADD,SUB}O diamonds into {ADD,SUB}CARRY
Summary:
Convert (uaddo (uaddo x, y), carryIn) into addcarry x, y, carryIn if-and-only-if the carry flags of the first two uaddo are merged via OR or XOR.
Work remaining: match ADD, etc.
Reviewers: craig.topper, RKSimon, spatel, niravd, jonpa, uweigand, deadalnix, nikic, lebedev.ri, dmgreen, chfast
Reviewed By: lebedev.ri
Subscribers: chfast, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D70079
2019-11-20 21:52:24 +08:00
|
|
|
; CHECK-NEXT: movq 8(%rsi), %rcx
|
|
|
|
; CHECK-NEXT: sbbq 8(%rdx), %rcx
|
|
|
|
; CHECK-NEXT: movq %rcx, 8(%rdi)
|
|
|
|
; CHECK-NEXT: movq 16(%rsi), %rcx
|
|
|
|
; CHECK-NEXT: sbbq 16(%rdx), %rcx
|
|
|
|
; CHECK-NEXT: movq %rcx, 16(%rdi)
|
2019-11-12 19:11:40 +08:00
|
|
|
; CHECK-NEXT: retq
|
|
|
|
%4 = getelementptr inbounds %struct.U192, %struct.U192* %1, i64 0, i32 0, i64 0
|
|
|
|
%5 = load i64, i64* %4, align 8
|
|
|
|
%6 = getelementptr inbounds %struct.U192, %struct.U192* %2, i64 0, i32 0, i64 0
|
|
|
|
%7 = load i64, i64* %6, align 8
|
|
|
|
%8 = tail call { i64, i1 } @llvm.usub.with.overflow.i64(i64 %5, i64 %7)
|
|
|
|
%9 = extractvalue { i64, i1 } %8, 1
|
|
|
|
%10 = extractvalue { i64, i1 } %8, 0
|
|
|
|
%11 = zext i1 %9 to i64
|
|
|
|
%12 = getelementptr inbounds %struct.U192, %struct.U192* %0, i64 0, i32 0, i64 0
|
|
|
|
store i64 %10, i64* %12, align 8
|
|
|
|
%13 = getelementptr inbounds %struct.U192, %struct.U192* %1, i64 0, i32 0, i64 1
|
|
|
|
%14 = load i64, i64* %13, align 8
|
|
|
|
%15 = getelementptr inbounds %struct.U192, %struct.U192* %2, i64 0, i32 0, i64 1
|
|
|
|
%16 = load i64, i64* %15, align 8
|
|
|
|
%17 = tail call { i64, i1 } @llvm.usub.with.overflow.i64(i64 %14, i64 %16)
|
|
|
|
%18 = extractvalue { i64, i1 } %17, 1
|
|
|
|
%19 = extractvalue { i64, i1 } %17, 0
|
|
|
|
%20 = tail call { i64, i1 } @llvm.usub.with.overflow.i64(i64 %19, i64 %11)
|
|
|
|
%21 = extractvalue { i64, i1 } %20, 1
|
|
|
|
%22 = extractvalue { i64, i1 } %20, 0
|
|
|
|
%23 = or i1 %18, %21
|
|
|
|
%24 = zext i1 %23 to i64
|
|
|
|
%25 = getelementptr inbounds %struct.U192, %struct.U192* %0, i64 0, i32 0, i64 1
|
|
|
|
store i64 %22, i64* %25, align 8
|
|
|
|
%26 = getelementptr inbounds %struct.U192, %struct.U192* %1, i64 0, i32 0, i64 2
|
|
|
|
%27 = load i64, i64* %26, align 8
|
|
|
|
%28 = getelementptr inbounds %struct.U192, %struct.U192* %2, i64 0, i32 0, i64 2
|
|
|
|
%29 = load i64, i64* %28, align 8
|
|
|
|
%30 = sub i64 %27, %29
|
|
|
|
%31 = sub i64 %30, %24
|
|
|
|
%32 = getelementptr inbounds %struct.U192, %struct.U192* %0, i64 0, i32 0, i64 2
|
|
|
|
store i64 %31, i64* %32, align 8
|
|
|
|
ret void
|
|
|
|
}
|
2019-11-18 18:59:48 +08:00
|
|
|
|
|
|
|
%uint128 = type { i64, i64 }
|
|
|
|
%uint256 = type { %uint128, %uint128 }
|
|
|
|
|
|
|
|
; The 256-bit subtraction implementation using two inlined usubo procedures for U128 type { i64, i64 }.
|
|
|
|
; This is similar to how LLVM legalize types in CodeGen.
|
|
|
|
define void @sub_U256_without_i128_or_recursive(%uint256* sret %0, %uint256* %1, %uint256* %2) nounwind {
|
|
|
|
; CHECK-LABEL: sub_U256_without_i128_or_recursive:
|
|
|
|
; CHECK: # %bb.0:
|
|
|
|
; CHECK-NEXT: movq %rdi, %rax
|
[SelectionDAG] Combine U{ADD,SUB}O diamonds into {ADD,SUB}CARRY
Summary:
Convert (uaddo (uaddo x, y), carryIn) into addcarry x, y, carryIn if-and-only-if the carry flags of the first two uaddo are merged via OR or XOR.
Work remaining: match ADD, etc.
Reviewers: craig.topper, RKSimon, spatel, niravd, jonpa, uweigand, deadalnix, nikic, lebedev.ri, dmgreen, chfast
Reviewed By: lebedev.ri
Subscribers: chfast, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D70079
2019-11-20 21:52:24 +08:00
|
|
|
; CHECK-NEXT: movq (%rsi), %r9
|
2019-11-18 18:59:48 +08:00
|
|
|
; CHECK-NEXT: movq 8(%rsi), %r10
|
[SelectionDAG] Combine U{ADD,SUB}O diamonds into {ADD,SUB}CARRY
Summary:
Convert (uaddo (uaddo x, y), carryIn) into addcarry x, y, carryIn if-and-only-if the carry flags of the first two uaddo are merged via OR or XOR.
Work remaining: match ADD, etc.
Reviewers: craig.topper, RKSimon, spatel, niravd, jonpa, uweigand, deadalnix, nikic, lebedev.ri, dmgreen, chfast
Reviewed By: lebedev.ri
Subscribers: chfast, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D70079
2019-11-20 21:52:24 +08:00
|
|
|
; CHECK-NEXT: subq (%rdx), %r9
|
|
|
|
; CHECK-NEXT: sbbq 8(%rdx), %r10
|
|
|
|
; CHECK-NEXT: setb %r8b
|
|
|
|
; CHECK-NEXT: movq 16(%rsi), %rcx
|
2019-11-18 18:59:48 +08:00
|
|
|
; CHECK-NEXT: movq 24(%rsi), %rsi
|
[SelectionDAG] Combine U{ADD,SUB}O diamonds into {ADD,SUB}CARRY
Summary:
Convert (uaddo (uaddo x, y), carryIn) into addcarry x, y, carryIn if-and-only-if the carry flags of the first two uaddo are merged via OR or XOR.
Work remaining: match ADD, etc.
Reviewers: craig.topper, RKSimon, spatel, niravd, jonpa, uweigand, deadalnix, nikic, lebedev.ri, dmgreen, chfast
Reviewed By: lebedev.ri
Subscribers: chfast, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D70079
2019-11-20 21:52:24 +08:00
|
|
|
; CHECK-NEXT: xorl %edi, %edi
|
|
|
|
; CHECK-NEXT: subq 16(%rdx), %rcx
|
|
|
|
; CHECK-NEXT: setb %dil
|
2019-11-18 18:59:48 +08:00
|
|
|
; CHECK-NEXT: subq 24(%rdx), %rsi
|
[SelectionDAG] Combine U{ADD,SUB}O diamonds into {ADD,SUB}CARRY
Summary:
Convert (uaddo (uaddo x, y), carryIn) into addcarry x, y, carryIn if-and-only-if the carry flags of the first two uaddo are merged via OR or XOR.
Work remaining: match ADD, etc.
Reviewers: craig.topper, RKSimon, spatel, niravd, jonpa, uweigand, deadalnix, nikic, lebedev.ri, dmgreen, chfast
Reviewed By: lebedev.ri
Subscribers: chfast, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D70079
2019-11-20 21:52:24 +08:00
|
|
|
; CHECK-NEXT: movzbl %r8b, %edx
|
|
|
|
; CHECK-NEXT: subq %rdx, %rcx
|
|
|
|
; CHECK-NEXT: sbbq %rdi, %rsi
|
|
|
|
; CHECK-NEXT: movq %r9, (%rax)
|
2019-11-18 18:59:48 +08:00
|
|
|
; CHECK-NEXT: movq %r10, 8(%rax)
|
[SelectionDAG] Combine U{ADD,SUB}O diamonds into {ADD,SUB}CARRY
Summary:
Convert (uaddo (uaddo x, y), carryIn) into addcarry x, y, carryIn if-and-only-if the carry flags of the first two uaddo are merged via OR or XOR.
Work remaining: match ADD, etc.
Reviewers: craig.topper, RKSimon, spatel, niravd, jonpa, uweigand, deadalnix, nikic, lebedev.ri, dmgreen, chfast
Reviewed By: lebedev.ri
Subscribers: chfast, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D70079
2019-11-20 21:52:24 +08:00
|
|
|
; CHECK-NEXT: movq %rcx, 16(%rax)
|
2019-11-18 18:59:48 +08:00
|
|
|
; CHECK-NEXT: movq %rsi, 24(%rax)
|
|
|
|
; CHECK-NEXT: retq
|
|
|
|
%4 = getelementptr inbounds %uint256, %uint256* %1, i64 0, i32 0, i32 0
|
|
|
|
%5 = load i64, i64* %4, align 8
|
|
|
|
%6 = getelementptr inbounds %uint256, %uint256* %1, i64 0, i32 0, i32 1
|
|
|
|
%7 = load i64, i64* %6, align 8
|
|
|
|
%8 = getelementptr inbounds %uint256, %uint256* %2, i64 0, i32 0, i32 0
|
|
|
|
%9 = load i64, i64* %8, align 8
|
|
|
|
%10 = getelementptr inbounds %uint256, %uint256* %2, i64 0, i32 0, i32 1
|
|
|
|
%11 = load i64, i64* %10, align 8
|
|
|
|
%12 = sub i64 %5, %9
|
|
|
|
%13 = icmp ult i64 %5, %9
|
|
|
|
%14 = sub i64 %7, %11
|
|
|
|
%15 = icmp ult i64 %7, %11
|
|
|
|
%16 = zext i1 %13 to i64
|
|
|
|
%17 = sub i64 %14, %16
|
|
|
|
%18 = icmp ult i64 %14, %16
|
|
|
|
%19 = or i1 %15, %18
|
|
|
|
%20 = getelementptr inbounds %uint256, %uint256* %1, i64 0, i32 1, i32 0
|
|
|
|
%21 = load i64, i64* %20, align 8
|
|
|
|
%22 = getelementptr inbounds %uint256, %uint256* %1, i64 0, i32 1, i32 1
|
|
|
|
%23 = load i64, i64* %22, align 8
|
|
|
|
%24 = getelementptr inbounds %uint256, %uint256* %2, i64 0, i32 1, i32 0
|
|
|
|
%25 = load i64, i64* %24, align 8
|
|
|
|
%26 = getelementptr inbounds %uint256, %uint256* %2, i64 0, i32 1, i32 1
|
|
|
|
%27 = load i64, i64* %26, align 8
|
|
|
|
%28 = sub i64 %21, %25
|
|
|
|
%29 = icmp ult i64 %21, %25
|
|
|
|
%30 = sub i64 %23, %27
|
|
|
|
%31 = zext i1 %29 to i64
|
|
|
|
%32 = sub i64 %30, %31
|
|
|
|
%33 = zext i1 %19 to i64
|
|
|
|
%34 = sub i64 %28, %33
|
|
|
|
%35 = icmp ult i64 %28, %33
|
|
|
|
%36 = zext i1 %35 to i64
|
|
|
|
%37 = sub i64 %32, %36
|
|
|
|
%38 = getelementptr inbounds %uint256, %uint256* %0, i64 0, i32 0, i32 0
|
|
|
|
store i64 %12, i64* %38, align 8
|
|
|
|
%39 = getelementptr inbounds %uint256, %uint256* %0, i64 0, i32 0, i32 1
|
|
|
|
store i64 %17, i64* %39, align 8
|
|
|
|
%40 = getelementptr inbounds %uint256, %uint256* %0, i64 0, i32 1, i32 0
|
|
|
|
store i64 %34, i64* %40, align 8
|
|
|
|
%41 = getelementptr inbounds %uint256, %uint256* %0, i64 0, i32 1, i32 1
|
|
|
|
store i64 %37, i64* %41, align 8
|
|
|
|
ret void
|
|
|
|
}
|