llvm-project/llvm/test/CodeGen/AArch64/urem-seteq-vec-nonsplat.ll

380 lines
14 KiB
LLVM
Raw Normal View History

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s
; At the moment, BuildUREMEqFold does not handle nonsplat vectors.
; Odd+Even divisors
define <4 x i32> @test_urem_odd_even(<4 x i32> %X) nounwind {
; CHECK-LABEL: test_urem_odd_even:
; CHECK: // %bb.0:
; CHECK-NEXT: adrp x8, .LCPI0_0
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI0_0]
; CHECK-NEXT: adrp x8, .LCPI0_1
; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI0_1]
; CHECK-NEXT: adrp x8, .LCPI0_2
; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI0_2]
; CHECK-NEXT: neg v1.4s, v1.4s
; CHECK-NEXT: adrp x8, .LCPI0_3
; CHECK-NEXT: ushl v1.4s, v0.4s, v1.4s
; CHECK-NEXT: umull2 v4.2d, v1.4s, v2.4s
; CHECK-NEXT: umull v1.2d, v1.2s, v2.2s
; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI0_3]
; CHECK-NEXT: uzp2 v1.4s, v1.4s, v4.4s
; CHECK-NEXT: neg v3.4s, v3.4s
; CHECK-NEXT: ushl v1.4s, v1.4s, v3.4s
; CHECK-NEXT: mls v0.4s, v1.4s, v2.4s
; CHECK-NEXT: cmeq v0.4s, v0.4s, #0
; CHECK-NEXT: movi v1.4s, #1
; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ret
%urem = urem <4 x i32> %X, <i32 5, i32 14, i32 25, i32 100>
%cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
%ret = zext <4 x i1> %cmp to <4 x i32>
ret <4 x i32> %ret
}
; One all-ones divisor in odd divisor
define <4 x i32> @test_urem_odd_allones(<4 x i32> %X) nounwind {
; CHECK-LABEL: test_urem_odd_allones:
; CHECK: // %bb.0:
; CHECK-NEXT: adrp x8, .LCPI1_0
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI1_0]
; CHECK-NEXT: adrp x8, .LCPI1_1
; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI1_1]
; CHECK-NEXT: adrp x8, .LCPI1_2
; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI1_2]
; CHECK-NEXT: umull2 v4.2d, v0.4s, v1.4s
; CHECK-NEXT: umull v1.2d, v0.2s, v1.2s
; CHECK-NEXT: uzp2 v1.4s, v1.4s, v4.4s
; CHECK-NEXT: neg v2.4s, v2.4s
; CHECK-NEXT: ushl v1.4s, v1.4s, v2.4s
; CHECK-NEXT: mls v0.4s, v1.4s, v3.4s
; CHECK-NEXT: cmeq v0.4s, v0.4s, #0
; CHECK-NEXT: movi v1.4s, #1
; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ret
%urem = urem <4 x i32> %X, <i32 5, i32 5, i32 4294967295, i32 5>
%cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
%ret = zext <4 x i1> %cmp to <4 x i32>
ret <4 x i32> %ret
}
; One all-ones divisor in even divisor
define <4 x i32> @test_urem_even_allones(<4 x i32> %X) nounwind {
; CHECK-LABEL: test_urem_even_allones:
; CHECK: // %bb.0:
; CHECK-NEXT: adrp x8, .LCPI2_0
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI2_0]
; CHECK-NEXT: adrp x8, .LCPI2_1
; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI2_1]
; CHECK-NEXT: adrp x8, .LCPI2_2
; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI2_2]
; CHECK-NEXT: neg v1.4s, v1.4s
; CHECK-NEXT: adrp x8, .LCPI2_3
; CHECK-NEXT: ushl v1.4s, v0.4s, v1.4s
; CHECK-NEXT: umull2 v4.2d, v1.4s, v2.4s
; CHECK-NEXT: umull v1.2d, v1.2s, v2.2s
; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI2_3]
; CHECK-NEXT: uzp2 v1.4s, v1.4s, v4.4s
; CHECK-NEXT: neg v3.4s, v3.4s
; CHECK-NEXT: ushl v1.4s, v1.4s, v3.4s
; CHECK-NEXT: mls v0.4s, v1.4s, v2.4s
; CHECK-NEXT: cmeq v0.4s, v0.4s, #0
; CHECK-NEXT: movi v1.4s, #1
; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ret
%urem = urem <4 x i32> %X, <i32 14, i32 14, i32 4294967295, i32 14>
%cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
%ret = zext <4 x i1> %cmp to <4 x i32>
ret <4 x i32> %ret
}
; One all-ones divisor in odd+even divisor
define <4 x i32> @test_urem_odd_even_allones(<4 x i32> %X) nounwind {
; CHECK-LABEL: test_urem_odd_even_allones:
; CHECK: // %bb.0:
; CHECK-NEXT: adrp x8, .LCPI3_0
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI3_0]
; CHECK-NEXT: adrp x8, .LCPI3_1
; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI3_1]
; CHECK-NEXT: adrp x8, .LCPI3_2
; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI3_2]
; CHECK-NEXT: neg v1.4s, v1.4s
; CHECK-NEXT: adrp x8, .LCPI3_3
; CHECK-NEXT: ushl v1.4s, v0.4s, v1.4s
; CHECK-NEXT: umull2 v4.2d, v1.4s, v2.4s
; CHECK-NEXT: umull v1.2d, v1.2s, v2.2s
; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI3_3]
; CHECK-NEXT: uzp2 v1.4s, v1.4s, v4.4s
; CHECK-NEXT: neg v3.4s, v3.4s
; CHECK-NEXT: ushl v1.4s, v1.4s, v3.4s
; CHECK-NEXT: mls v0.4s, v1.4s, v2.4s
; CHECK-NEXT: cmeq v0.4s, v0.4s, #0
; CHECK-NEXT: movi v1.4s, #1
; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ret
%urem = urem <4 x i32> %X, <i32 5, i32 14, i32 4294967295, i32 100>
%cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
%ret = zext <4 x i1> %cmp to <4 x i32>
ret <4 x i32> %ret
}
; One power-of-two divisor in odd divisor
define <4 x i32> @test_urem_odd_poweroftwo(<4 x i32> %X) nounwind {
; CHECK-LABEL: test_urem_odd_poweroftwo:
; CHECK: // %bb.0:
; CHECK-NEXT: adrp x8, .LCPI4_0
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI4_0]
; CHECK-NEXT: adrp x8, .LCPI4_1
; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI4_1]
; CHECK-NEXT: adrp x8, .LCPI4_2
; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI4_2]
; CHECK-NEXT: umull2 v4.2d, v0.4s, v1.4s
; CHECK-NEXT: umull v1.2d, v0.2s, v1.2s
; CHECK-NEXT: uzp2 v1.4s, v1.4s, v4.4s
; CHECK-NEXT: neg v2.4s, v2.4s
; CHECK-NEXT: ushl v1.4s, v1.4s, v2.4s
; CHECK-NEXT: mls v0.4s, v1.4s, v3.4s
; CHECK-NEXT: cmeq v0.4s, v0.4s, #0
; CHECK-NEXT: movi v1.4s, #1
; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ret
%urem = urem <4 x i32> %X, <i32 5, i32 5, i32 16, i32 5>
%cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
%ret = zext <4 x i1> %cmp to <4 x i32>
ret <4 x i32> %ret
}
; One power-of-two divisor in even divisor
define <4 x i32> @test_urem_even_poweroftwo(<4 x i32> %X) nounwind {
; CHECK-LABEL: test_urem_even_poweroftwo:
; CHECK: // %bb.0:
; CHECK-NEXT: adrp x8, .LCPI5_0
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI5_0]
; CHECK-NEXT: adrp x8, .LCPI5_1
; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI5_1]
; CHECK-NEXT: adrp x8, .LCPI5_2
; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI5_2]
; CHECK-NEXT: neg v1.4s, v1.4s
; CHECK-NEXT: adrp x8, .LCPI5_3
; CHECK-NEXT: ushl v1.4s, v0.4s, v1.4s
; CHECK-NEXT: umull2 v4.2d, v1.4s, v2.4s
; CHECK-NEXT: umull v1.2d, v1.2s, v2.2s
; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI5_3]
; CHECK-NEXT: uzp2 v1.4s, v1.4s, v4.4s
; CHECK-NEXT: neg v3.4s, v3.4s
; CHECK-NEXT: ushl v1.4s, v1.4s, v3.4s
; CHECK-NEXT: mls v0.4s, v1.4s, v2.4s
; CHECK-NEXT: cmeq v0.4s, v0.4s, #0
; CHECK-NEXT: movi v1.4s, #1
; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ret
%urem = urem <4 x i32> %X, <i32 14, i32 14, i32 16, i32 14>
%cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
%ret = zext <4 x i1> %cmp to <4 x i32>
ret <4 x i32> %ret
}
; One power-of-two divisor in odd+even divisor
define <4 x i32> @test_urem_odd_even_poweroftwo(<4 x i32> %X) nounwind {
; CHECK-LABEL: test_urem_odd_even_poweroftwo:
; CHECK: // %bb.0:
; CHECK-NEXT: adrp x8, .LCPI6_0
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI6_0]
; CHECK-NEXT: adrp x8, .LCPI6_1
; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI6_1]
; CHECK-NEXT: adrp x8, .LCPI6_2
; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI6_2]
; CHECK-NEXT: neg v1.4s, v1.4s
; CHECK-NEXT: adrp x8, .LCPI6_3
; CHECK-NEXT: ushl v1.4s, v0.4s, v1.4s
; CHECK-NEXT: umull2 v4.2d, v1.4s, v2.4s
; CHECK-NEXT: umull v1.2d, v1.2s, v2.2s
; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI6_3]
; CHECK-NEXT: uzp2 v1.4s, v1.4s, v4.4s
; CHECK-NEXT: neg v3.4s, v3.4s
; CHECK-NEXT: ushl v1.4s, v1.4s, v3.4s
; CHECK-NEXT: mls v0.4s, v1.4s, v2.4s
; CHECK-NEXT: cmeq v0.4s, v0.4s, #0
; CHECK-NEXT: movi v1.4s, #1
; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ret
%urem = urem <4 x i32> %X, <i32 5, i32 14, i32 16, i32 100>
%cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
%ret = zext <4 x i1> %cmp to <4 x i32>
ret <4 x i32> %ret
}
; One all-ones divisor and one power-of-two divisor in odd divisor
define <4 x i32> @test_urem_odd_allones_and_poweroftwo(<4 x i32> %X) nounwind {
; CHECK-LABEL: test_urem_odd_allones_and_poweroftwo:
; CHECK: // %bb.0:
; CHECK-NEXT: adrp x8, .LCPI7_0
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI7_0]
; CHECK-NEXT: adrp x8, .LCPI7_1
; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI7_1]
; CHECK-NEXT: adrp x8, .LCPI7_2
; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI7_2]
; CHECK-NEXT: umull2 v4.2d, v0.4s, v1.4s
; CHECK-NEXT: umull v1.2d, v0.2s, v1.2s
; CHECK-NEXT: uzp2 v1.4s, v1.4s, v4.4s
; CHECK-NEXT: neg v2.4s, v2.4s
; CHECK-NEXT: ushl v1.4s, v1.4s, v2.4s
; CHECK-NEXT: mls v0.4s, v1.4s, v3.4s
; CHECK-NEXT: cmeq v0.4s, v0.4s, #0
; CHECK-NEXT: movi v1.4s, #1
; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ret
%urem = urem <4 x i32> %X, <i32 5, i32 16, i32 4294967295, i32 5>
%cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
%ret = zext <4 x i1> %cmp to <4 x i32>
ret <4 x i32> %ret
}
; One all-ones divisor and one power-of-two divisor in even divisor
define <4 x i32> @test_urem_even_allones_and_poweroftwo(<4 x i32> %X) nounwind {
; CHECK-LABEL: test_urem_even_allones_and_poweroftwo:
; CHECK: // %bb.0:
; CHECK-NEXT: adrp x8, .LCPI8_0
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI8_0]
; CHECK-NEXT: adrp x8, .LCPI8_1
; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI8_1]
; CHECK-NEXT: adrp x8, .LCPI8_2
; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI8_2]
; CHECK-NEXT: neg v1.4s, v1.4s
; CHECK-NEXT: adrp x8, .LCPI8_3
; CHECK-NEXT: ushl v1.4s, v0.4s, v1.4s
; CHECK-NEXT: umull2 v4.2d, v1.4s, v2.4s
; CHECK-NEXT: umull v1.2d, v1.2s, v2.2s
; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI8_3]
; CHECK-NEXT: uzp2 v1.4s, v1.4s, v4.4s
; CHECK-NEXT: neg v3.4s, v3.4s
; CHECK-NEXT: ushl v1.4s, v1.4s, v3.4s
; CHECK-NEXT: mls v0.4s, v1.4s, v2.4s
; CHECK-NEXT: cmeq v0.4s, v0.4s, #0
; CHECK-NEXT: movi v1.4s, #1
; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ret
%urem = urem <4 x i32> %X, <i32 14, i32 16, i32 4294967295, i32 14>
%cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
%ret = zext <4 x i1> %cmp to <4 x i32>
ret <4 x i32> %ret
}
; One all-ones divisor and one power-of-two divisor in odd+even divisor
define <4 x i32> @test_urem_odd_even_allones_and_poweroftwo(<4 x i32> %X) nounwind {
; CHECK-LABEL: test_urem_odd_even_allones_and_poweroftwo:
; CHECK: // %bb.0:
; CHECK-NEXT: adrp x8, .LCPI9_0
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI9_0]
; CHECK-NEXT: adrp x8, .LCPI9_1
; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI9_1]
; CHECK-NEXT: adrp x8, .LCPI9_2
; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI9_2]
; CHECK-NEXT: umull2 v4.2d, v0.4s, v1.4s
; CHECK-NEXT: umull v1.2d, v0.2s, v1.2s
; CHECK-NEXT: uzp2 v1.4s, v1.4s, v4.4s
; CHECK-NEXT: neg v2.4s, v2.4s
; CHECK-NEXT: ushl v1.4s, v1.4s, v2.4s
; CHECK-NEXT: mls v0.4s, v1.4s, v3.4s
; CHECK-NEXT: cmeq v0.4s, v0.4s, #0
; CHECK-NEXT: movi v1.4s, #1
; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ret
%urem = urem <4 x i32> %X, <i32 5, i32 16, i32 4294967295, i32 100>
%cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
%ret = zext <4 x i1> %cmp to <4 x i32>
ret <4 x i32> %ret
}
;------------------------------------------------------------------------------;
; Negative tests - the fold is invalid if any divisor is 1.
;------------------------------------------------------------------------------;
; One divisor in odd divisor
define <4 x i32> @test_urem_odd_one(<4 x i32> %X) nounwind {
; CHECK-LABEL: test_urem_odd_one:
; CHECK: // %bb.0:
; CHECK-NEXT: adrp x8, .LCPI10_0
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI10_0]
; CHECK-NEXT: adrp x8, .LCPI10_1
; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI10_1]
; CHECK-NEXT: adrp x8, .LCPI10_2
; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI10_2]
; CHECK-NEXT: adrp x8, .LCPI10_3
; CHECK-NEXT: umull2 v4.2d, v0.4s, v1.4s
; CHECK-NEXT: umull v1.2d, v0.2s, v1.2s
; CHECK-NEXT: uzp2 v1.4s, v1.4s, v4.4s
; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI10_3]
; CHECK-NEXT: neg v2.4s, v2.4s
; CHECK-NEXT: ushl v1.4s, v1.4s, v2.4s
; CHECK-NEXT: bsl v3.16b, v0.16b, v1.16b
; CHECK-NEXT: mls v0.4s, v3.4s, v4.4s
; CHECK-NEXT: cmeq v0.4s, v0.4s, #0
; CHECK-NEXT: movi v1.4s, #1
; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ret
%urem = urem <4 x i32> %X, <i32 25, i32 25, i32 1, i32 25>
%cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
%ret = zext <4 x i1> %cmp to <4 x i32>
ret <4 x i32> %ret
}
; One divisor in even divisors
define <4 x i32> @test_urem_even_one(<4 x i32> %X) nounwind {
; CHECK-LABEL: test_urem_even_one:
; CHECK: // %bb.0:
; CHECK-NEXT: adrp x8, .LCPI11_0
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI11_0]
; CHECK-NEXT: adrp x8, .LCPI11_1
; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI11_1]
; CHECK-NEXT: adrp x8, .LCPI11_2
; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI11_2]
; CHECK-NEXT: adrp x8, .LCPI11_3
; CHECK-NEXT: umull2 v4.2d, v0.4s, v1.4s
; CHECK-NEXT: umull v1.2d, v0.2s, v1.2s
; CHECK-NEXT: uzp2 v1.4s, v1.4s, v4.4s
; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI11_3]
; CHECK-NEXT: neg v2.4s, v2.4s
; CHECK-NEXT: ushl v1.4s, v1.4s, v2.4s
; CHECK-NEXT: bsl v3.16b, v0.16b, v1.16b
; CHECK-NEXT: mls v0.4s, v3.4s, v4.4s
; CHECK-NEXT: cmeq v0.4s, v0.4s, #0
; CHECK-NEXT: movi v1.4s, #1
; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ret
%urem = urem <4 x i32> %X, <i32 100, i32 100, i32 1, i32 100>
%cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
%ret = zext <4 x i1> %cmp to <4 x i32>
ret <4 x i32> %ret
}
; One divisor in odd-even divisors
define <4 x i32> @test_urem_odd_even_one(<4 x i32> %X) nounwind {
; CHECK-LABEL: test_urem_odd_even_one:
; CHECK: // %bb.0:
; CHECK-NEXT: adrp x8, .LCPI12_0
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI12_0]
; CHECK-NEXT: adrp x8, .LCPI12_1
; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI12_1]
; CHECK-NEXT: adrp x8, .LCPI12_2
; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI12_2]
; CHECK-NEXT: adrp x8, .LCPI12_3
; CHECK-NEXT: umull2 v4.2d, v0.4s, v1.4s
; CHECK-NEXT: umull v1.2d, v0.2s, v1.2s
; CHECK-NEXT: uzp2 v1.4s, v1.4s, v4.4s
; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI12_3]
; CHECK-NEXT: neg v2.4s, v2.4s
; CHECK-NEXT: ushl v1.4s, v1.4s, v2.4s
; CHECK-NEXT: bsl v3.16b, v0.16b, v1.16b
; CHECK-NEXT: mls v0.4s, v3.4s, v4.4s
; CHECK-NEXT: cmeq v0.4s, v0.4s, #0
; CHECK-NEXT: movi v1.4s, #1
; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ret
%urem = urem <4 x i32> %X, <i32 25, i32 100, i32 1, i32 25>
%cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
%ret = zext <4 x i1> %cmp to <4 x i32>
ret <4 x i32> %ret
}