llvm-project/llvm/test/CodeGen/X86/srem-seteq-optsize.ll

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=i686-unknown-linux-gnu < %s | FileCheck %s --check-prefixes=CHECK,X86
; RUN: llc -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s --check-prefixes=CHECK,X64

; On X86, division in expensive. BuildRemEqFold should therefore run even
; when optimizing for size. Only optimizing for minimum size retains a plain div.

define i32 @test_minsize(i32 %X) optsize minsize nounwind readnone {
; X86-LABEL: test_minsize:
; X86:       # %bb.0:
; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
; X86-NEXT:    pushl $5
; X86-NEXT:    popl %ecx
; X86-NEXT:    cltd
; X86-NEXT:    idivl %ecx
; X86-NEXT:    testl %edx, %edx
; X86-NEXT:    je .LBB0_1
; X86-NEXT:  # %bb.2:
; X86-NEXT:    pushl $-10
; X86-NEXT:    popl %eax
; X86-NEXT:    retl
; X86-NEXT:  .LBB0_1:
; X86-NEXT:    pushl $42
; X86-NEXT:    popl %eax
; X86-NEXT:    retl
;
; X64-LABEL: test_minsize:
; X64:       # %bb.0:
; X64-NEXT:    movl %edi, %eax
; X64-NEXT:    pushq $5
; X64-NEXT:    popq %rcx
; X64-NEXT:    cltd
; X64-NEXT:    idivl %ecx
; X64-NEXT:    testl %edx, %edx
; X64-NEXT:    pushq $42
; X64-NEXT:    popq %rcx
; X64-NEXT:    pushq $-10
; X64-NEXT:    popq %rax
; X64-NEXT:    cmovel %ecx, %eax
; X64-NEXT:    retq
  %rem = srem i32 %X, 5
  %cmp = icmp eq i32 %rem, 0
  %ret = select i1 %cmp, i32 42, i32 -10
  ret i32 %ret
}

define i32 @test_optsize(i32 %X) optsize nounwind readnone {
; X86-LABEL: test_optsize:
; X86:       # %bb.0:
; X86-NEXT:    imull $-858993459, {{[0-9]+}}(%esp), %eax # imm = 0xCCCCCCCD
; X86-NEXT:    addl $429496729, %eax # imm = 0x19999999
; X86-NEXT:    cmpl $858993459, %eax # imm = 0x33333333
; X86-NEXT:    movl $42, %eax
; X86-NEXT:    jb .LBB1_2
; X86-NEXT:  # %bb.1:
; X86-NEXT:    movl $-10, %eax
; X86-NEXT:  .LBB1_2:
; X86-NEXT:    retl
;
; X64-LABEL: test_optsize:
; X64:       # %bb.0:
; X64-NEXT:    imull $-858993459, %edi, %eax # imm = 0xCCCCCCCD
; X64-NEXT:    addl $429496729, %eax # imm = 0x19999999
; X64-NEXT:    cmpl $858993459, %eax # imm = 0x33333333
; X64-NEXT:    movl $42, %ecx
; X64-NEXT:    movl $-10, %eax
; X64-NEXT:    cmovbl %ecx, %eax
; X64-NEXT:    retq
  %rem = srem i32 %X, 5
  %cmp = icmp eq i32 %rem, 0
  %ret = select i1 %cmp, i32 42, i32 -10
  ret i32 %ret
}
[NFC][Codegen][X86][AArch64] Add "(x s% C) == 0" tests Much like with `urem`, the same optimization (albeit with slightly different algorithm) applies for the signed case, too. I'm simply copying the test coverage from `urem` case for now, i believe it should be (close to?) sufficient. llvm-svn: 366640 2019-07-21 03:25:44 +08:00			`; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py`
			`; RUN: llc -mtriple=i686-unknown-linux-gnu < %s \| FileCheck %s --check-prefixes=CHECK,X86`
			`; RUN: llc -mtriple=x86_64-unknown-linux-gnu < %s \| FileCheck %s --check-prefixes=CHECK,X64`

			`; On X86, division in expensive. BuildRemEqFold should therefore run even`
			`; when optimizing for size. Only optimizing for minimum size retains a plain div.`

			`define i32 @test_minsize(i32 %X) optsize minsize nounwind readnone {`
			`; X86-LABEL: test_minsize:`
			`; X86: # %bb.0:`
			`; X86-NEXT: movl {{[0-9]+}}(%esp), %eax`
			`; X86-NEXT: pushl $5`
			`; X86-NEXT: popl %ecx`
			`; X86-NEXT: cltd`
			`; X86-NEXT: idivl %ecx`
			`; X86-NEXT: testl %edx, %edx`
			`; X86-NEXT: je .LBB0_1`
			`; X86-NEXT: # %bb.2:`
			`; X86-NEXT: pushl $-10`
			`; X86-NEXT: popl %eax`
			`; X86-NEXT: retl`
			`; X86-NEXT: .LBB0_1:`
			`; X86-NEXT: pushl $42`
			`; X86-NEXT: popl %eax`
			`; X86-NEXT: retl`
			`;`
			`; X64-LABEL: test_minsize:`
			`; X64: # %bb.0:`
			`; X64-NEXT: movl %edi, %eax`
			`; X64-NEXT: pushq $5`
			`; X64-NEXT: popq %rcx`
			`; X64-NEXT: cltd`
			`; X64-NEXT: idivl %ecx`
			`; X64-NEXT: testl %edx, %edx`
			`; X64-NEXT: pushq $42`
			`; X64-NEXT: popq %rcx`
			`; X64-NEXT: pushq $-10`
			`; X64-NEXT: popq %rax`
			`; X64-NEXT: cmovel %ecx, %eax`
			`; X64-NEXT: retq`
			`%rem = srem i32 %X, 5`
			`%cmp = icmp eq i32 %rem, 0`
			`%ret = select i1 %cmp, i32 42, i32 -10`
			`ret i32 %ret`
			`}`

			`define i32 @test_optsize(i32 %X) optsize nounwind readnone {`
			`; X86-LABEL: test_optsize:`
			`; X86: # %bb.0:`
[CodeGen][SelectionDAG] More efficient code for X % C == 0 (SREM case) Summary: This implements an optimization described in Hacker's Delight 10-17: when `C` is constant, the result of `X % C == 0` can be computed more cheaply without actually calculating the remainder. The motivation is discussed here: https://bugs.llvm.org/show_bug.cgi?id=35479. One huge caveat: this signed case is only valid for positive divisors. While we can freely negate negative divisors, we can't negate `INT_MIN`, so for now if `INT_MIN` is encountered, we bailout. As a follow-up, it should be possible to handle that more gracefully via extra `and`+`setcc`+`select`. This passes llvm's test-suite, and from cursory(!) cross-examination the folds (the assembly) match those of GCC, and manual checking via alive did not reveal any issues (other than the `INT_MIN` case) Reviewers: RKSimon, spatel, hermord, craig.topper, xbolva00 Reviewed By: RKSimon, xbolva00 Subscribers: xbolva00, thakis, javed.absar, hiraditya, dexonsmith, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D65366 llvm-svn: 368702 2019-08-13 22:57:37 +08:00			`; X86-NEXT: imull $-858993459, {{[0-9]+}}(%esp), %eax # imm = 0xCCCCCCCD`
			`; X86-NEXT: addl $429496729, %eax # imm = 0x19999999`
			`; X86-NEXT: cmpl $858993459, %eax # imm = 0x33333333`
[NFC][Codegen][X86][AArch64] Add "(x s% C) == 0" tests Much like with `urem`, the same optimization (albeit with slightly different algorithm) applies for the signed case, too. I'm simply copying the test coverage from `urem` case for now, i believe it should be (close to?) sufficient. llvm-svn: 366640 2019-07-21 03:25:44 +08:00			`; X86-NEXT: movl $42, %eax`
[CodeGen][SelectionDAG] More efficient code for X % C == 0 (SREM case) Summary: This implements an optimization described in Hacker's Delight 10-17: when `C` is constant, the result of `X % C == 0` can be computed more cheaply without actually calculating the remainder. The motivation is discussed here: https://bugs.llvm.org/show_bug.cgi?id=35479. One huge caveat: this signed case is only valid for positive divisors. While we can freely negate negative divisors, we can't negate `INT_MIN`, so for now if `INT_MIN` is encountered, we bailout. As a follow-up, it should be possible to handle that more gracefully via extra `and`+`setcc`+`select`. This passes llvm's test-suite, and from cursory(!) cross-examination the folds (the assembly) match those of GCC, and manual checking via alive did not reveal any issues (other than the `INT_MIN` case) Reviewers: RKSimon, spatel, hermord, craig.topper, xbolva00 Reviewed By: RKSimon, xbolva00 Subscribers: xbolva00, thakis, javed.absar, hiraditya, dexonsmith, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D65366 llvm-svn: 368702 2019-08-13 22:57:37 +08:00			`; X86-NEXT: jb .LBB1_2`
[NFC][Codegen][X86][AArch64] Add "(x s% C) == 0" tests Much like with `urem`, the same optimization (albeit with slightly different algorithm) applies for the signed case, too. I'm simply copying the test coverage from `urem` case for now, i believe it should be (close to?) sufficient. llvm-svn: 366640 2019-07-21 03:25:44 +08:00			`; X86-NEXT: # %bb.1:`
			`; X86-NEXT: movl $-10, %eax`
			`; X86-NEXT: .LBB1_2:`
			`; X86-NEXT: retl`
			`;`
			`; X64-LABEL: test_optsize:`
			`; X64: # %bb.0:`
[CodeGen][SelectionDAG] More efficient code for X % C == 0 (SREM case) Summary: This implements an optimization described in Hacker's Delight 10-17: when `C` is constant, the result of `X % C == 0` can be computed more cheaply without actually calculating the remainder. The motivation is discussed here: https://bugs.llvm.org/show_bug.cgi?id=35479. One huge caveat: this signed case is only valid for positive divisors. While we can freely negate negative divisors, we can't negate `INT_MIN`, so for now if `INT_MIN` is encountered, we bailout. As a follow-up, it should be possible to handle that more gracefully via extra `and`+`setcc`+`select`. This passes llvm's test-suite, and from cursory(!) cross-examination the folds (the assembly) match those of GCC, and manual checking via alive did not reveal any issues (other than the `INT_MIN` case) Reviewers: RKSimon, spatel, hermord, craig.topper, xbolva00 Reviewed By: RKSimon, xbolva00 Subscribers: xbolva00, thakis, javed.absar, hiraditya, dexonsmith, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D65366 llvm-svn: 368702 2019-08-13 22:57:37 +08:00			`; X64-NEXT: imull $-858993459, %edi, %eax # imm = 0xCCCCCCCD`
			`; X64-NEXT: addl $429496729, %eax # imm = 0x19999999`
			`; X64-NEXT: cmpl $858993459, %eax # imm = 0x33333333`
[NFC][Codegen][X86][AArch64] Add "(x s% C) == 0" tests Much like with `urem`, the same optimization (albeit with slightly different algorithm) applies for the signed case, too. I'm simply copying the test coverage from `urem` case for now, i believe it should be (close to?) sufficient. llvm-svn: 366640 2019-07-21 03:25:44 +08:00			`; X64-NEXT: movl $42, %ecx`
			`; X64-NEXT: movl $-10, %eax`
[CodeGen][SelectionDAG] More efficient code for X % C == 0 (SREM case) Summary: This implements an optimization described in Hacker's Delight 10-17: when `C` is constant, the result of `X % C == 0` can be computed more cheaply without actually calculating the remainder. The motivation is discussed here: https://bugs.llvm.org/show_bug.cgi?id=35479. One huge caveat: this signed case is only valid for positive divisors. While we can freely negate negative divisors, we can't negate `INT_MIN`, so for now if `INT_MIN` is encountered, we bailout. As a follow-up, it should be possible to handle that more gracefully via extra `and`+`setcc`+`select`. This passes llvm's test-suite, and from cursory(!) cross-examination the folds (the assembly) match those of GCC, and manual checking via alive did not reveal any issues (other than the `INT_MIN` case) Reviewers: RKSimon, spatel, hermord, craig.topper, xbolva00 Reviewed By: RKSimon, xbolva00 Subscribers: xbolva00, thakis, javed.absar, hiraditya, dexonsmith, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D65366 llvm-svn: 368702 2019-08-13 22:57:37 +08:00			`; X64-NEXT: cmovbl %ecx, %eax`
[NFC][Codegen][X86][AArch64] Add "(x s% C) == 0" tests Much like with `urem`, the same optimization (albeit with slightly different algorithm) applies for the signed case, too. I'm simply copying the test coverage from `urem` case for now, i believe it should be (close to?) sufficient. llvm-svn: 366640 2019-07-21 03:25:44 +08:00			`; X64-NEXT: retq`
			`%rem = srem i32 %X, 5`
			`%cmp = icmp eq i32 %rem, 0`
			`%ret = select i1 %cmp, i32 42, i32 -10`
			`ret i32 %ret`
			`}`