llvm-project/llvm/test/CodeGen/ARM/urem-opt-size.ll

; When optimising for minimum size, we don't want to expand a div to a mul
; and a shift sequence. As a result, the urem instruction e.g. will not be
; expanded to a sequence of umull, lsrs, muls and sub instructions, but
; just a call to __aeabi_uidivmod.
;
; When the processor features hardware division, UDIV + UREM can be turned
; into UDIV + MLS. This prevents the library function __aeabi_uidivmod to be
; pulled into the binary. The test uses ARMv7-M.
;
; RUN: llc -mtriple=armv7a-eabi -mattr=-neon -verify-machineinstrs %s -o - | FileCheck %s
; RUN: llc -mtriple=thumbv7m-eabi -verify-machineinstrs %s -o - | FileCheck %s -check-prefix=V7M

target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
target triple = "thumbv7m-arm-none-eabi"

define i32 @foo1() local_unnamed_addr #0 {
entry:
; CHECK-LABEL: foo1:
; CHECK:__aeabi_idiv
; CHECK-NOT: smmul
  %call = tail call i32 bitcast (i32 (...)* @GetValue to i32 ()*)()
  %div = sdiv i32 %call, 1000000
  ret i32 %div
}

define i32 @foo2() local_unnamed_addr #0 {
entry:
; CHECK-LABEL: foo2:
; CHECK: __aeabi_uidiv
; CHECK-NOT: umull
  %call = tail call i32 bitcast (i32 (...)* @GetValue to i32 ()*)()
  %div = udiv i32 %call, 1000000
  ret i32 %div
}

; Test for unsigned remainder
define i32 @foo3() local_unnamed_addr #0 {
entry:
; CHECK-LABEL: foo3:
; CHECK: __aeabi_uidivmod
; CHECK-NOT: umull
; V7M-LABEL: foo3:
; V7M: udiv [[R2:r[0-9]+]], [[R0:r[0-9]+]], [[R1:r[0-9]+]]
; V7M: mls {{r[0-9]+}}, [[R2]], [[R1]], [[R0]]
; V7M-NOT: __aeabi_uidivmod
  %call = tail call i32 bitcast (i32 (...)* @GetValue to i32 ()*)()
  %rem = urem i32 %call, 1000000
  %cmp = icmp eq i32 %rem, 0
  %conv = zext i1 %cmp to i32
  ret i32 %conv
}

; Test for signed remainder
define i32 @foo4() local_unnamed_addr #0 {
entry:
; CHECK-LABEL: foo4:
; CHECK:__aeabi_idivmod
; V7M-LABEL: foo4:
; V7M: sdiv [[R2:r[0-9]+]], [[R0:r[0-9]+]], [[R1:r[0-9]+]]
; V7M: mls {{r[0-9]+}}, [[R2]], [[R1]], [[R0]]
; V7M-NOT: __aeabi_idivmod
  %call = tail call i32 bitcast (i32 (...)* @GetValue to i32 ()*)()
  %rem = srem i32 %call, 1000000
  ret i32 %rem
}

; Check that doing a sdiv+srem has the same effect as only the srem,
; as the division needs to be computed anyway in order to calculate
; the remainder (i.e. make sure we don't end up with two divisions).
define i32 @foo5() local_unnamed_addr #0 {
entry:
; CHECK-LABEL: foo5:
; CHECK:__aeabi_idivmod
; V7M-LABEL: foo5:
; V7M: sdiv [[R2:r[0-9]+]], [[R0:r[0-9]+]], [[R1:r[0-9]+]]
; V7M-NOT: sdiv
; V7M: mls {{r[0-9]+}}, [[R2]], [[R1]], [[R0]]
; V7M-NOT: __aeabi_idivmod
  %call = tail call i32 bitcast (i32 (...)* @GetValue to i32 ()*)()
  %div = sdiv i32 %call, 1000000
  %rem = srem i32 %call, 1000000
  %add = add i32 %div, %rem
  ret i32 %add
}

; An early version of this patch caused isel to hang. The reason
; was that it shouldn't do the rewrite for i64 because that's not
; supported by hardware. Isel was stuck in a loop with type
; legalization and this optimisation.
; Function Attrs: norecurse nounwind
define i64 @isel_dont_hang(i32 %bar) local_unnamed_addr #4 {
entry:
; CHECK-LABEL: isel_dont_hang:
; CHECK: __aeabi_uldivmod
  %temp.0 = sext i32 %bar to i64
  %mul83 = shl i64 %temp.0, 1
  %add84 = add i64 %temp.0, 2
  %div85 = udiv i64 %mul83, %add84
  ret i64 %div85
}

; i16 types are promoted to i32, and we expect a normal udiv here:
define i16 @isel_dont_hang_2(i16 %bar) local_unnamed_addr #4 {
entry:
; CHECK-LABEL: isel_dont_hang_2:
; CHECK: udiv
; CHECK-NOT: __aeabi_
  %mul83 = shl i16 %bar, 1
  %add84 = add i16 %bar, 2
  %div85 = udiv i16 %mul83, %add84
  ret i16 %div85
}
declare i32 @GetValue(...) local_unnamed_addr

attributes #0 = { minsize nounwind optsize }
attributes #4 = { norecurse nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-jump-tables"="false" "stack-protector-buffer-size"="8" "target-cpu"="cortex-a15" "target-features"="+dsp,+hwdiv,+hwdiv-arm,+neon,+vfp4" "use-soft-float"="false" }
Do not expand SDIV when compiling for minimum code size Differential Revision: http://reviews.llvm.org/D22139 llvm-svn: 274855 2016-07-08 23:32:01 +08:00			`; When optimising for minimum size, we don't want to expand a div to a mul`
			`; and a shift sequence. As a result, the urem instruction e.g. will not be`
Code size optimisation: don't expand a div to a mul and and a shift sequence. As a result, the urem instruction will not be expanded to a sequence of umull, lsrs, muls and sub instructions, but just a call to __aeabi_uidivmod. Differential Revision: http://reviews.llvm.org/D22131 llvm-svn: 274843 2016-07-08 20:54:43 +08:00			`; expanded to a sequence of umull, lsrs, muls and sub instructions, but`
			`; just a call to __aeabi_uidivmod.`
			`;`
[ARM] Code size optimisation to lower udiv+urem to udiv+mls instead of a library call to __aeabi_uidivmod. This is an improved implementation of r280808, see also D24133, that got reverted because isel was stuck in a loop. That was caused by the optimisation incorrectly triggering on i64 ints, which shouldn't happen because there is no 64bit hwdiv support; that put isel's type legalization and this optimisation in a loop. A native ARM compiler and testing now shows that this is fixed. Patch mostly by Pablo Barrio. Differential Revision: https://reviews.llvm.org/D25077 llvm-svn: 283098 2016-10-03 18:12:32 +08:00			`; When the processor features hardware division, UDIV + UREM can be turned`
			`; into UDIV + MLS. This prevents the library function __aeabi_uidivmod to be`
			`; pulled into the binary. The test uses ARMv7-M.`
			`;`
Code size optimisation: don't expand a div to a mul and and a shift sequence. As a result, the urem instruction will not be expanded to a sequence of umull, lsrs, muls and sub instructions, but just a call to __aeabi_uidivmod. Differential Revision: http://reviews.llvm.org/D22131 llvm-svn: 274843 2016-07-08 20:54:43 +08:00			`; RUN: llc -mtriple=armv7a-eabi -mattr=-neon -verify-machineinstrs %s -o - \| FileCheck %s`
[ARM] Code size optimisation to lower udiv+urem to udiv+mls instead of a library call to __aeabi_uidivmod. This is an improved implementation of r280808, see also D24133, that got reverted because isel was stuck in a loop. That was caused by the optimisation incorrectly triggering on i64 ints, which shouldn't happen because there is no 64bit hwdiv support; that put isel's type legalization and this optimisation in a loop. A native ARM compiler and testing now shows that this is fixed. Patch mostly by Pablo Barrio. Differential Revision: https://reviews.llvm.org/D25077 llvm-svn: 283098 2016-10-03 18:12:32 +08:00			`; RUN: llc -mtriple=thumbv7m-eabi -verify-machineinstrs %s -o - \| FileCheck %s -check-prefix=V7M`
Code size optimisation: don't expand a div to a mul and and a shift sequence. As a result, the urem instruction will not be expanded to a sequence of umull, lsrs, muls and sub instructions, but just a call to __aeabi_uidivmod. Differential Revision: http://reviews.llvm.org/D22131 llvm-svn: 274843 2016-07-08 20:54:43 +08:00
			`target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"`
			`target triple = "thumbv7m-arm-none-eabi"`

Do not expand SDIV when compiling for minimum code size Differential Revision: http://reviews.llvm.org/D22139 llvm-svn: 274855 2016-07-08 23:32:01 +08:00			`define i32 @foo1() local_unnamed_addr #0 {`
Code size optimisation: don't expand a div to a mul and and a shift sequence. As a result, the urem instruction will not be expanded to a sequence of umull, lsrs, muls and sub instructions, but just a call to __aeabi_uidivmod. Differential Revision: http://reviews.llvm.org/D22131 llvm-svn: 274843 2016-07-08 20:54:43 +08:00			`entry:`
Do not expand SDIV when compiling for minimum code size Differential Revision: http://reviews.llvm.org/D22139 llvm-svn: 274855 2016-07-08 23:32:01 +08:00			`; CHECK-LABEL: foo1:`
			`; CHECK:__aeabi_idiv`
			`; CHECK-NOT: smmul`
			`%call = tail call i32 bitcast (i32 (...)* @GetValue to i32 ()*)()`
			`%div = sdiv i32 %call, 1000000`
			`ret i32 %div`
			`}`

			`define i32 @foo2() local_unnamed_addr #0 {`
			`entry:`
			`; CHECK-LABEL: foo2:`
			`; CHECK: __aeabi_uidiv`
			`; CHECK-NOT: umull`
			`%call = tail call i32 bitcast (i32 (...)* @GetValue to i32 ()*)()`
			`%div = udiv i32 %call, 1000000`
			`ret i32 %div`
			`}`

[ARM] Code size optimisation to lower udiv+urem to udiv+mls instead of a library call to __aeabi_uidivmod. This is an improved implementation of r280808, see also D24133, that got reverted because isel was stuck in a loop. That was caused by the optimisation incorrectly triggering on i64 ints, which shouldn't happen because there is no 64bit hwdiv support; that put isel's type legalization and this optimisation in a loop. A native ARM compiler and testing now shows that this is fixed. Patch mostly by Pablo Barrio. Differential Revision: https://reviews.llvm.org/D25077 llvm-svn: 283098 2016-10-03 18:12:32 +08:00			`; Test for unsigned remainder`
Do not expand SDIV when compiling for minimum code size Differential Revision: http://reviews.llvm.org/D22139 llvm-svn: 274855 2016-07-08 23:32:01 +08:00			`define i32 @foo3() local_unnamed_addr #0 {`
			`entry:`
			`; CHECK-LABEL: foo3:`
Code size optimisation: don't expand a div to a mul and and a shift sequence. As a result, the urem instruction will not be expanded to a sequence of umull, lsrs, muls and sub instructions, but just a call to __aeabi_uidivmod. Differential Revision: http://reviews.llvm.org/D22131 llvm-svn: 274843 2016-07-08 20:54:43 +08:00			`; CHECK: __aeabi_uidivmod`
			`; CHECK-NOT: umull`
[ARM] Code size optimisation to lower udiv+urem to udiv+mls instead of a library call to __aeabi_uidivmod. This is an improved implementation of r280808, see also D24133, that got reverted because isel was stuck in a loop. That was caused by the optimisation incorrectly triggering on i64 ints, which shouldn't happen because there is no 64bit hwdiv support; that put isel's type legalization and this optimisation in a loop. A native ARM compiler and testing now shows that this is fixed. Patch mostly by Pablo Barrio. Differential Revision: https://reviews.llvm.org/D25077 llvm-svn: 283098 2016-10-03 18:12:32 +08:00			`; V7M-LABEL: foo3:`
			`; V7M: udiv [[R2:r[0-9]+]], [[R0:r[0-9]+]], [[R1:r[0-9]+]]`
			`; V7M: mls {{r[0-9]+}}, [[R2]], [[R1]], [[R0]]`
			`; V7M-NOT: __aeabi_uidivmod`
Code size optimisation: don't expand a div to a mul and and a shift sequence. As a result, the urem instruction will not be expanded to a sequence of umull, lsrs, muls and sub instructions, but just a call to __aeabi_uidivmod. Differential Revision: http://reviews.llvm.org/D22131 llvm-svn: 274843 2016-07-08 20:54:43 +08:00			`%call = tail call i32 bitcast (i32 (...)* @GetValue to i32 ()*)()`
			`%rem = urem i32 %call, 1000000`
			`%cmp = icmp eq i32 %rem, 0`
			`%conv = zext i1 %cmp to i32`
			`ret i32 %conv`
			`}`

[ARM] Code size optimisation to lower udiv+urem to udiv+mls instead of a library call to __aeabi_uidivmod. This is an improved implementation of r280808, see also D24133, that got reverted because isel was stuck in a loop. That was caused by the optimisation incorrectly triggering on i64 ints, which shouldn't happen because there is no 64bit hwdiv support; that put isel's type legalization and this optimisation in a loop. A native ARM compiler and testing now shows that this is fixed. Patch mostly by Pablo Barrio. Differential Revision: https://reviews.llvm.org/D25077 llvm-svn: 283098 2016-10-03 18:12:32 +08:00			`; Test for signed remainder`
			`define i32 @foo4() local_unnamed_addr #0 {`
			`entry:`
			`; CHECK-LABEL: foo4:`
			`; CHECK:__aeabi_idivmod`
			`; V7M-LABEL: foo4:`
			`; V7M: sdiv [[R2:r[0-9]+]], [[R0:r[0-9]+]], [[R1:r[0-9]+]]`
			`; V7M: mls {{r[0-9]+}}, [[R2]], [[R1]], [[R0]]`
			`; V7M-NOT: __aeabi_idivmod`
			`%call = tail call i32 bitcast (i32 (...)* @GetValue to i32 ()*)()`
			`%rem = srem i32 %call, 1000000`
			`ret i32 %rem`
			`}`

			`; Check that doing a sdiv+srem has the same effect as only the srem,`
			`; as the division needs to be computed anyway in order to calculate`
			`; the remainder (i.e. make sure we don't end up with two divisions).`
			`define i32 @foo5() local_unnamed_addr #0 {`
			`entry:`
			`; CHECK-LABEL: foo5:`
			`; CHECK:__aeabi_idivmod`
			`; V7M-LABEL: foo5:`
			`; V7M: sdiv [[R2:r[0-9]+]], [[R0:r[0-9]+]], [[R1:r[0-9]+]]`
			`; V7M-NOT: sdiv`
			`; V7M: mls {{r[0-9]+}}, [[R2]], [[R1]], [[R0]]`
			`; V7M-NOT: __aeabi_idivmod`
			`%call = tail call i32 bitcast (i32 (...)* @GetValue to i32 ()*)()`
			`%div = sdiv i32 %call, 1000000`
			`%rem = srem i32 %call, 1000000`
			`%add = add i32 %div, %rem`
			`ret i32 %add`
			`}`

			`; An early version of this patch caused isel to hang. The reason`
			`; was that it shouldn't do the rewrite for i64 because that's not`
			`; supported by hardware. Isel was stuck in a loop with type`
			`; legalization and this optimisation.`
			`; Function Attrs: norecurse nounwind`
			`define i64 @isel_dont_hang(i32 %bar) local_unnamed_addr #4 {`
			`entry:`
			`; CHECK-LABEL: isel_dont_hang:`
			`; CHECK: __aeabi_uldivmod`
			`%temp.0 = sext i32 %bar to i64`
			`%mul83 = shl i64 %temp.0, 1`
			`%add84 = add i64 %temp.0, 2`
			`%div85 = udiv i64 %mul83, %add84`
			`ret i64 %div85`
			`}`

			`; i16 types are promoted to i32, and we expect a normal udiv here:`
			`define i16 @isel_dont_hang_2(i16 %bar) local_unnamed_addr #4 {`
			`entry:`
			`; CHECK-LABEL: isel_dont_hang_2:`
			`; CHECK: udiv`
			`; CHECK-NOT: __aeabi_`
			`%mul83 = shl i16 %bar, 1`
			`%add84 = add i16 %bar, 2`
			`%div85 = udiv i16 %mul83, %add84`
			`ret i16 %div85`
			`}`
Code size optimisation: don't expand a div to a mul and and a shift sequence. As a result, the urem instruction will not be expanded to a sequence of umull, lsrs, muls and sub instructions, but just a call to __aeabi_uidivmod. Differential Revision: http://reviews.llvm.org/D22131 llvm-svn: 274843 2016-07-08 20:54:43 +08:00			`declare i32 @GetValue(...) local_unnamed_addr`

			`attributes #0 = { minsize nounwind optsize }`
[ARM] Code size optimisation to lower udiv+urem to udiv+mls instead of a library call to __aeabi_uidivmod. This is an improved implementation of r280808, see also D24133, that got reverted because isel was stuck in a loop. That was caused by the optimisation incorrectly triggering on i64 ints, which shouldn't happen because there is no 64bit hwdiv support; that put isel's type legalization and this optimisation in a loop. A native ARM compiler and testing now shows that this is fixed. Patch mostly by Pablo Barrio. Differential Revision: https://reviews.llvm.org/D25077 llvm-svn: 283098 2016-10-03 18:12:32 +08:00			`attributes #4 = { norecurse nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-jump-tables"="false" "stack-protector-buffer-size"="8" "target-cpu"="cortex-a15" "target-features"="+dsp,+hwdiv,+hwdiv-arm,+neon,+vfp4" "use-soft-float"="false" }`