llvm-project/llvm/test/CodeGen/AArch64/urem-seteq-vec-tautological.ll

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

98 lines
3.4 KiB
LLVM
Raw Normal View History

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s
define <4 x i1> @t0_all_tautological(<4 x i32> %X) nounwind {
; CHECK-LABEL: t0_all_tautological:
; CHECK: // %bb.0:
; CHECK-NEXT: adrp x8, .LCPI0_0
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI0_0]
; CHECK-NEXT: adrp x8, .LCPI0_1
; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI0_1]
; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
; CHECK-NEXT: cmeq v0.4s, v0.4s, v2.4s
; CHECK-NEXT: xtn v0.4h, v0.4s
; CHECK-NEXT: ret
%urem = urem <4 x i32> %X, <i32 1, i32 1, i32 2, i32 2>
%cmp = icmp eq <4 x i32> %urem, <i32 0, i32 1, i32 2, i32 3>
ret <4 x i1> %cmp
}
define <4 x i1> @t1_all_odd_eq(<4 x i32> %X) nounwind {
; CHECK-LABEL: t1_all_odd_eq:
; CHECK: // %bb.0:
; CHECK-NEXT: adrp x8, .LCPI1_0
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI1_0]
[Codegen] TargetLowering::prepareUREMEqFold(): `x u% C1 ==/!= C2` with tautological C1 u<= C2 (PR35479) Summary: This is a preparatory cleanup before i add more of this fold to deal with comparisons with non-zero. In essence, the current lowering is: ``` Name: (X % C1) == 0 -> X * C3 <= C4 Pre: (C1 u>> countTrailingZeros(C1)) * C3 == 1 %zz = and i8 C3, 0 ; trick alive into making C3 avaliable in precondition %o0 = urem i8 %x, C1 %r = icmp eq i8 %o0, 0 => %zz = and i8 C3, 0 ; and silence it from complaining about said reg %C4 = -1 /u C1 %n0 = mul i8 %x, C3 %n1 = lshr i8 %n0, countTrailingZeros(C1) ; rotate right %n2 = shl i8 %n0, ((8-countTrailingZeros(C1)) %u 8) ; rotate right %n3 = or i8 %n1, %n2 ; rotate right %r = icmp ule i8 %n3, %C4 ``` https://rise4fun.com/Alive/oqd It kinda just works, really no weird edge-cases. But it isn't all that great for when comparing with non-zero. In particular, given `(X % C1) == C2`, there will be problems in the always-false tautological case where `C2 u>= C1`: https://rise4fun.com/Alive/pH3 That case is tautological, always-false: ``` Name: (X % Y) u>= Y %o0 = urem i8 %x, %y %r = icmp uge i8 %o0, %y => %r = false ``` https://rise4fun.com/Alive/ofu While we can't/shouldn't get such tautological case normally, we do deal with non-splat vectors, so unless we want to give up in this case, we need to fixup/short-circuit such lanes. There are two lowering variants: 1. We can blend between whatever computed result and the correct tautological result ``` Name: (X % C1) == C2 -> X * C3 <= C4 || false Pre: (C2 == 0 || C1 u<= C2) && (C1 u>> countTrailingZeros(C1)) * C3 == 1 %zz = and i8 C3, 0 ; trick alive into making C3 avaliable in precondition %o0 = urem i8 %x, C1 %r = icmp eq i8 %o0, C2 => %zz = and i8 C3, 0 ; and silence it from complaining about said reg %C4 = -1 /u C1 %n0 = mul i8 %x, C3 %n1 = lshr i8 %n0, countTrailingZeros(C1) ; rotate right %n2 = shl i8 %n0, ((8-countTrailingZeros(C1)) %u 8) ; rotate right %n3 = or i8 %n1, %n2 ; rotate right %is_tautologically_false = icmp ule i8 C1, C2 %res = icmp ule i8 %n3, %C4 %r = select i1 %is_tautologically_false, i1 0, i1 %res ``` https://rise4fun.com/Alive/PjT5 https://rise4fun.com/Alive/1KV 2. We can invert the comparison result ``` Name: (X % C1) == C2 -> X * C3 <= C4 || false Pre: (C2 == 0 || C1 u<= C2) && (C1 u>> countTrailingZeros(C1)) * C3 == 1 %zz = and i8 C3, 0 ; trick alive into making C3 avaliable in precondition %o0 = urem i8 %x, C1 %r = icmp eq i8 %o0, C2 => %zz = and i8 C3, 0 ; and silence it from complaining about said reg %C4 = -1 /u C1 %n0 = mul i8 %x, C3 %n1 = lshr i8 %n0, countTrailingZeros(C1) ; rotate right %n2 = shl i8 %n0, ((8-countTrailingZeros(C1)) %u 8) ; rotate right %n3 = or i8 %n1, %n2 ; rotate right %is_tautologically_false = icmp ule i8 C1, C2 %C4_fixed = select i1 %is_tautologically_false, i8 -1, i8 %C4 %res = icmp ule i8 %n3, %C4_fixed %r = xor i1 %res, %is_tautologically_false ``` https://rise4fun.com/Alive/2xC https://rise4fun.com/Alive/jpb5 3. We can expand into `and`/`or`: https://rise4fun.com/Alive/WGn https://rise4fun.com/Alive/lcb5 Blend-one is likely better since we avoid having to load the replacement from constant pool. `xor` is second best since it's still pretty general. I'm not adding `and`/`or` variants. Reviewers: RKSimon, craig.topper, spatel Reviewed By: RKSimon Subscribers: nick, hiraditya, xbolva00, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D70051
2019-11-22 20:16:03 +08:00
; CHECK-NEXT: mov w8, #43691
; CHECK-NEXT: movk w8, #43690, lsl #16
; CHECK-NEXT: dup v2.4s, w8
; CHECK-NEXT: mul v0.4s, v0.4s, v2.4s
; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s
; CHECK-NEXT: xtn v0.4h, v0.4s
[Codegen] TargetLowering::prepareUREMEqFold(): `x u% C1 ==/!= C2` with tautological C1 u<= C2 (PR35479) Summary: This is a preparatory cleanup before i add more of this fold to deal with comparisons with non-zero. In essence, the current lowering is: ``` Name: (X % C1) == 0 -> X * C3 <= C4 Pre: (C1 u>> countTrailingZeros(C1)) * C3 == 1 %zz = and i8 C3, 0 ; trick alive into making C3 avaliable in precondition %o0 = urem i8 %x, C1 %r = icmp eq i8 %o0, 0 => %zz = and i8 C3, 0 ; and silence it from complaining about said reg %C4 = -1 /u C1 %n0 = mul i8 %x, C3 %n1 = lshr i8 %n0, countTrailingZeros(C1) ; rotate right %n2 = shl i8 %n0, ((8-countTrailingZeros(C1)) %u 8) ; rotate right %n3 = or i8 %n1, %n2 ; rotate right %r = icmp ule i8 %n3, %C4 ``` https://rise4fun.com/Alive/oqd It kinda just works, really no weird edge-cases. But it isn't all that great for when comparing with non-zero. In particular, given `(X % C1) == C2`, there will be problems in the always-false tautological case where `C2 u>= C1`: https://rise4fun.com/Alive/pH3 That case is tautological, always-false: ``` Name: (X % Y) u>= Y %o0 = urem i8 %x, %y %r = icmp uge i8 %o0, %y => %r = false ``` https://rise4fun.com/Alive/ofu While we can't/shouldn't get such tautological case normally, we do deal with non-splat vectors, so unless we want to give up in this case, we need to fixup/short-circuit such lanes. There are two lowering variants: 1. We can blend between whatever computed result and the correct tautological result ``` Name: (X % C1) == C2 -> X * C3 <= C4 || false Pre: (C2 == 0 || C1 u<= C2) && (C1 u>> countTrailingZeros(C1)) * C3 == 1 %zz = and i8 C3, 0 ; trick alive into making C3 avaliable in precondition %o0 = urem i8 %x, C1 %r = icmp eq i8 %o0, C2 => %zz = and i8 C3, 0 ; and silence it from complaining about said reg %C4 = -1 /u C1 %n0 = mul i8 %x, C3 %n1 = lshr i8 %n0, countTrailingZeros(C1) ; rotate right %n2 = shl i8 %n0, ((8-countTrailingZeros(C1)) %u 8) ; rotate right %n3 = or i8 %n1, %n2 ; rotate right %is_tautologically_false = icmp ule i8 C1, C2 %res = icmp ule i8 %n3, %C4 %r = select i1 %is_tautologically_false, i1 0, i1 %res ``` https://rise4fun.com/Alive/PjT5 https://rise4fun.com/Alive/1KV 2. We can invert the comparison result ``` Name: (X % C1) == C2 -> X * C3 <= C4 || false Pre: (C2 == 0 || C1 u<= C2) && (C1 u>> countTrailingZeros(C1)) * C3 == 1 %zz = and i8 C3, 0 ; trick alive into making C3 avaliable in precondition %o0 = urem i8 %x, C1 %r = icmp eq i8 %o0, C2 => %zz = and i8 C3, 0 ; and silence it from complaining about said reg %C4 = -1 /u C1 %n0 = mul i8 %x, C3 %n1 = lshr i8 %n0, countTrailingZeros(C1) ; rotate right %n2 = shl i8 %n0, ((8-countTrailingZeros(C1)) %u 8) ; rotate right %n3 = or i8 %n1, %n2 ; rotate right %is_tautologically_false = icmp ule i8 C1, C2 %C4_fixed = select i1 %is_tautologically_false, i8 -1, i8 %C4 %res = icmp ule i8 %n3, %C4_fixed %r = xor i1 %res, %is_tautologically_false ``` https://rise4fun.com/Alive/2xC https://rise4fun.com/Alive/jpb5 3. We can expand into `and`/`or`: https://rise4fun.com/Alive/WGn https://rise4fun.com/Alive/lcb5 Blend-one is likely better since we avoid having to load the replacement from constant pool. `xor` is second best since it's still pretty general. I'm not adding `and`/`or` variants. Reviewers: RKSimon, craig.topper, spatel Reviewed By: RKSimon Subscribers: nick, hiraditya, xbolva00, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D70051
2019-11-22 20:16:03 +08:00
; CHECK-NEXT: movi d1, #0xffff0000ffff0000
; CHECK-NEXT: eor v0.8b, v0.8b, v1.8b
; CHECK-NEXT: ret
%urem = urem <4 x i32> %X, <i32 3, i32 1, i32 1, i32 9>
%cmp = icmp eq <4 x i32> %urem, <i32 0, i32 42, i32 0, i32 42>
ret <4 x i1> %cmp
}
define <4 x i1> @t1_all_odd_ne(<4 x i32> %X) nounwind {
; CHECK-LABEL: t1_all_odd_ne:
; CHECK: // %bb.0:
; CHECK-NEXT: adrp x8, .LCPI2_0
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI2_0]
[Codegen] TargetLowering::prepareUREMEqFold(): `x u% C1 ==/!= C2` with tautological C1 u<= C2 (PR35479) Summary: This is a preparatory cleanup before i add more of this fold to deal with comparisons with non-zero. In essence, the current lowering is: ``` Name: (X % C1) == 0 -> X * C3 <= C4 Pre: (C1 u>> countTrailingZeros(C1)) * C3 == 1 %zz = and i8 C3, 0 ; trick alive into making C3 avaliable in precondition %o0 = urem i8 %x, C1 %r = icmp eq i8 %o0, 0 => %zz = and i8 C3, 0 ; and silence it from complaining about said reg %C4 = -1 /u C1 %n0 = mul i8 %x, C3 %n1 = lshr i8 %n0, countTrailingZeros(C1) ; rotate right %n2 = shl i8 %n0, ((8-countTrailingZeros(C1)) %u 8) ; rotate right %n3 = or i8 %n1, %n2 ; rotate right %r = icmp ule i8 %n3, %C4 ``` https://rise4fun.com/Alive/oqd It kinda just works, really no weird edge-cases. But it isn't all that great for when comparing with non-zero. In particular, given `(X % C1) == C2`, there will be problems in the always-false tautological case where `C2 u>= C1`: https://rise4fun.com/Alive/pH3 That case is tautological, always-false: ``` Name: (X % Y) u>= Y %o0 = urem i8 %x, %y %r = icmp uge i8 %o0, %y => %r = false ``` https://rise4fun.com/Alive/ofu While we can't/shouldn't get such tautological case normally, we do deal with non-splat vectors, so unless we want to give up in this case, we need to fixup/short-circuit such lanes. There are two lowering variants: 1. We can blend between whatever computed result and the correct tautological result ``` Name: (X % C1) == C2 -> X * C3 <= C4 || false Pre: (C2 == 0 || C1 u<= C2) && (C1 u>> countTrailingZeros(C1)) * C3 == 1 %zz = and i8 C3, 0 ; trick alive into making C3 avaliable in precondition %o0 = urem i8 %x, C1 %r = icmp eq i8 %o0, C2 => %zz = and i8 C3, 0 ; and silence it from complaining about said reg %C4 = -1 /u C1 %n0 = mul i8 %x, C3 %n1 = lshr i8 %n0, countTrailingZeros(C1) ; rotate right %n2 = shl i8 %n0, ((8-countTrailingZeros(C1)) %u 8) ; rotate right %n3 = or i8 %n1, %n2 ; rotate right %is_tautologically_false = icmp ule i8 C1, C2 %res = icmp ule i8 %n3, %C4 %r = select i1 %is_tautologically_false, i1 0, i1 %res ``` https://rise4fun.com/Alive/PjT5 https://rise4fun.com/Alive/1KV 2. We can invert the comparison result ``` Name: (X % C1) == C2 -> X * C3 <= C4 || false Pre: (C2 == 0 || C1 u<= C2) && (C1 u>> countTrailingZeros(C1)) * C3 == 1 %zz = and i8 C3, 0 ; trick alive into making C3 avaliable in precondition %o0 = urem i8 %x, C1 %r = icmp eq i8 %o0, C2 => %zz = and i8 C3, 0 ; and silence it from complaining about said reg %C4 = -1 /u C1 %n0 = mul i8 %x, C3 %n1 = lshr i8 %n0, countTrailingZeros(C1) ; rotate right %n2 = shl i8 %n0, ((8-countTrailingZeros(C1)) %u 8) ; rotate right %n3 = or i8 %n1, %n2 ; rotate right %is_tautologically_false = icmp ule i8 C1, C2 %C4_fixed = select i1 %is_tautologically_false, i8 -1, i8 %C4 %res = icmp ule i8 %n3, %C4_fixed %r = xor i1 %res, %is_tautologically_false ``` https://rise4fun.com/Alive/2xC https://rise4fun.com/Alive/jpb5 3. We can expand into `and`/`or`: https://rise4fun.com/Alive/WGn https://rise4fun.com/Alive/lcb5 Blend-one is likely better since we avoid having to load the replacement from constant pool. `xor` is second best since it's still pretty general. I'm not adding `and`/`or` variants. Reviewers: RKSimon, craig.topper, spatel Reviewed By: RKSimon Subscribers: nick, hiraditya, xbolva00, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D70051
2019-11-22 20:16:03 +08:00
; CHECK-NEXT: mov w8, #43691
; CHECK-NEXT: movk w8, #43690, lsl #16
; CHECK-NEXT: dup v2.4s, w8
; CHECK-NEXT: mul v0.4s, v0.4s, v2.4s
; CHECK-NEXT: cmhi v0.4s, v0.4s, v1.4s
; CHECK-NEXT: xtn v0.4h, v0.4s
[Codegen] TargetLowering::prepareUREMEqFold(): `x u% C1 ==/!= C2` with tautological C1 u<= C2 (PR35479) Summary: This is a preparatory cleanup before i add more of this fold to deal with comparisons with non-zero. In essence, the current lowering is: ``` Name: (X % C1) == 0 -> X * C3 <= C4 Pre: (C1 u>> countTrailingZeros(C1)) * C3 == 1 %zz = and i8 C3, 0 ; trick alive into making C3 avaliable in precondition %o0 = urem i8 %x, C1 %r = icmp eq i8 %o0, 0 => %zz = and i8 C3, 0 ; and silence it from complaining about said reg %C4 = -1 /u C1 %n0 = mul i8 %x, C3 %n1 = lshr i8 %n0, countTrailingZeros(C1) ; rotate right %n2 = shl i8 %n0, ((8-countTrailingZeros(C1)) %u 8) ; rotate right %n3 = or i8 %n1, %n2 ; rotate right %r = icmp ule i8 %n3, %C4 ``` https://rise4fun.com/Alive/oqd It kinda just works, really no weird edge-cases. But it isn't all that great for when comparing with non-zero. In particular, given `(X % C1) == C2`, there will be problems in the always-false tautological case where `C2 u>= C1`: https://rise4fun.com/Alive/pH3 That case is tautological, always-false: ``` Name: (X % Y) u>= Y %o0 = urem i8 %x, %y %r = icmp uge i8 %o0, %y => %r = false ``` https://rise4fun.com/Alive/ofu While we can't/shouldn't get such tautological case normally, we do deal with non-splat vectors, so unless we want to give up in this case, we need to fixup/short-circuit such lanes. There are two lowering variants: 1. We can blend between whatever computed result and the correct tautological result ``` Name: (X % C1) == C2 -> X * C3 <= C4 || false Pre: (C2 == 0 || C1 u<= C2) && (C1 u>> countTrailingZeros(C1)) * C3 == 1 %zz = and i8 C3, 0 ; trick alive into making C3 avaliable in precondition %o0 = urem i8 %x, C1 %r = icmp eq i8 %o0, C2 => %zz = and i8 C3, 0 ; and silence it from complaining about said reg %C4 = -1 /u C1 %n0 = mul i8 %x, C3 %n1 = lshr i8 %n0, countTrailingZeros(C1) ; rotate right %n2 = shl i8 %n0, ((8-countTrailingZeros(C1)) %u 8) ; rotate right %n3 = or i8 %n1, %n2 ; rotate right %is_tautologically_false = icmp ule i8 C1, C2 %res = icmp ule i8 %n3, %C4 %r = select i1 %is_tautologically_false, i1 0, i1 %res ``` https://rise4fun.com/Alive/PjT5 https://rise4fun.com/Alive/1KV 2. We can invert the comparison result ``` Name: (X % C1) == C2 -> X * C3 <= C4 || false Pre: (C2 == 0 || C1 u<= C2) && (C1 u>> countTrailingZeros(C1)) * C3 == 1 %zz = and i8 C3, 0 ; trick alive into making C3 avaliable in precondition %o0 = urem i8 %x, C1 %r = icmp eq i8 %o0, C2 => %zz = and i8 C3, 0 ; and silence it from complaining about said reg %C4 = -1 /u C1 %n0 = mul i8 %x, C3 %n1 = lshr i8 %n0, countTrailingZeros(C1) ; rotate right %n2 = shl i8 %n0, ((8-countTrailingZeros(C1)) %u 8) ; rotate right %n3 = or i8 %n1, %n2 ; rotate right %is_tautologically_false = icmp ule i8 C1, C2 %C4_fixed = select i1 %is_tautologically_false, i8 -1, i8 %C4 %res = icmp ule i8 %n3, %C4_fixed %r = xor i1 %res, %is_tautologically_false ``` https://rise4fun.com/Alive/2xC https://rise4fun.com/Alive/jpb5 3. We can expand into `and`/`or`: https://rise4fun.com/Alive/WGn https://rise4fun.com/Alive/lcb5 Blend-one is likely better since we avoid having to load the replacement from constant pool. `xor` is second best since it's still pretty general. I'm not adding `and`/`or` variants. Reviewers: RKSimon, craig.topper, spatel Reviewed By: RKSimon Subscribers: nick, hiraditya, xbolva00, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D70051
2019-11-22 20:16:03 +08:00
; CHECK-NEXT: movi d1, #0xffff0000ffff0000
; CHECK-NEXT: eor v0.8b, v0.8b, v1.8b
; CHECK-NEXT: ret
%urem = urem <4 x i32> %X, <i32 3, i32 1, i32 1, i32 9>
%cmp = icmp ne <4 x i32> %urem, <i32 0, i32 42, i32 0, i32 42>
ret <4 x i1> %cmp
}
define <8 x i1> @t2_narrow(<8 x i16> %X) nounwind {
; CHECK-LABEL: t2_narrow:
; CHECK: // %bb.0:
; CHECK-NEXT: adrp x8, .LCPI3_0
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI3_0]
[Codegen] TargetLowering::prepareUREMEqFold(): `x u% C1 ==/!= C2` with tautological C1 u<= C2 (PR35479) Summary: This is a preparatory cleanup before i add more of this fold to deal with comparisons with non-zero. In essence, the current lowering is: ``` Name: (X % C1) == 0 -> X * C3 <= C4 Pre: (C1 u>> countTrailingZeros(C1)) * C3 == 1 %zz = and i8 C3, 0 ; trick alive into making C3 avaliable in precondition %o0 = urem i8 %x, C1 %r = icmp eq i8 %o0, 0 => %zz = and i8 C3, 0 ; and silence it from complaining about said reg %C4 = -1 /u C1 %n0 = mul i8 %x, C3 %n1 = lshr i8 %n0, countTrailingZeros(C1) ; rotate right %n2 = shl i8 %n0, ((8-countTrailingZeros(C1)) %u 8) ; rotate right %n3 = or i8 %n1, %n2 ; rotate right %r = icmp ule i8 %n3, %C4 ``` https://rise4fun.com/Alive/oqd It kinda just works, really no weird edge-cases. But it isn't all that great for when comparing with non-zero. In particular, given `(X % C1) == C2`, there will be problems in the always-false tautological case where `C2 u>= C1`: https://rise4fun.com/Alive/pH3 That case is tautological, always-false: ``` Name: (X % Y) u>= Y %o0 = urem i8 %x, %y %r = icmp uge i8 %o0, %y => %r = false ``` https://rise4fun.com/Alive/ofu While we can't/shouldn't get such tautological case normally, we do deal with non-splat vectors, so unless we want to give up in this case, we need to fixup/short-circuit such lanes. There are two lowering variants: 1. We can blend between whatever computed result and the correct tautological result ``` Name: (X % C1) == C2 -> X * C3 <= C4 || false Pre: (C2 == 0 || C1 u<= C2) && (C1 u>> countTrailingZeros(C1)) * C3 == 1 %zz = and i8 C3, 0 ; trick alive into making C3 avaliable in precondition %o0 = urem i8 %x, C1 %r = icmp eq i8 %o0, C2 => %zz = and i8 C3, 0 ; and silence it from complaining about said reg %C4 = -1 /u C1 %n0 = mul i8 %x, C3 %n1 = lshr i8 %n0, countTrailingZeros(C1) ; rotate right %n2 = shl i8 %n0, ((8-countTrailingZeros(C1)) %u 8) ; rotate right %n3 = or i8 %n1, %n2 ; rotate right %is_tautologically_false = icmp ule i8 C1, C2 %res = icmp ule i8 %n3, %C4 %r = select i1 %is_tautologically_false, i1 0, i1 %res ``` https://rise4fun.com/Alive/PjT5 https://rise4fun.com/Alive/1KV 2. We can invert the comparison result ``` Name: (X % C1) == C2 -> X * C3 <= C4 || false Pre: (C2 == 0 || C1 u<= C2) && (C1 u>> countTrailingZeros(C1)) * C3 == 1 %zz = and i8 C3, 0 ; trick alive into making C3 avaliable in precondition %o0 = urem i8 %x, C1 %r = icmp eq i8 %o0, C2 => %zz = and i8 C3, 0 ; and silence it from complaining about said reg %C4 = -1 /u C1 %n0 = mul i8 %x, C3 %n1 = lshr i8 %n0, countTrailingZeros(C1) ; rotate right %n2 = shl i8 %n0, ((8-countTrailingZeros(C1)) %u 8) ; rotate right %n3 = or i8 %n1, %n2 ; rotate right %is_tautologically_false = icmp ule i8 C1, C2 %C4_fixed = select i1 %is_tautologically_false, i8 -1, i8 %C4 %res = icmp ule i8 %n3, %C4_fixed %r = xor i1 %res, %is_tautologically_false ``` https://rise4fun.com/Alive/2xC https://rise4fun.com/Alive/jpb5 3. We can expand into `and`/`or`: https://rise4fun.com/Alive/WGn https://rise4fun.com/Alive/lcb5 Blend-one is likely better since we avoid having to load the replacement from constant pool. `xor` is second best since it's still pretty general. I'm not adding `and`/`or` variants. Reviewers: RKSimon, craig.topper, spatel Reviewed By: RKSimon Subscribers: nick, hiraditya, xbolva00, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D70051
2019-11-22 20:16:03 +08:00
; CHECK-NEXT: mov w8, #43691
; CHECK-NEXT: dup v2.8h, w8
; CHECK-NEXT: mul v0.8h, v0.8h, v2.8h
; CHECK-NEXT: cmhs v0.8h, v1.8h, v0.8h
; CHECK-NEXT: xtn v0.8b, v0.8h
[Codegen] TargetLowering::prepareUREMEqFold(): `x u% C1 ==/!= C2` with tautological C1 u<= C2 (PR35479) Summary: This is a preparatory cleanup before i add more of this fold to deal with comparisons with non-zero. In essence, the current lowering is: ``` Name: (X % C1) == 0 -> X * C3 <= C4 Pre: (C1 u>> countTrailingZeros(C1)) * C3 == 1 %zz = and i8 C3, 0 ; trick alive into making C3 avaliable in precondition %o0 = urem i8 %x, C1 %r = icmp eq i8 %o0, 0 => %zz = and i8 C3, 0 ; and silence it from complaining about said reg %C4 = -1 /u C1 %n0 = mul i8 %x, C3 %n1 = lshr i8 %n0, countTrailingZeros(C1) ; rotate right %n2 = shl i8 %n0, ((8-countTrailingZeros(C1)) %u 8) ; rotate right %n3 = or i8 %n1, %n2 ; rotate right %r = icmp ule i8 %n3, %C4 ``` https://rise4fun.com/Alive/oqd It kinda just works, really no weird edge-cases. But it isn't all that great for when comparing with non-zero. In particular, given `(X % C1) == C2`, there will be problems in the always-false tautological case where `C2 u>= C1`: https://rise4fun.com/Alive/pH3 That case is tautological, always-false: ``` Name: (X % Y) u>= Y %o0 = urem i8 %x, %y %r = icmp uge i8 %o0, %y => %r = false ``` https://rise4fun.com/Alive/ofu While we can't/shouldn't get such tautological case normally, we do deal with non-splat vectors, so unless we want to give up in this case, we need to fixup/short-circuit such lanes. There are two lowering variants: 1. We can blend between whatever computed result and the correct tautological result ``` Name: (X % C1) == C2 -> X * C3 <= C4 || false Pre: (C2 == 0 || C1 u<= C2) && (C1 u>> countTrailingZeros(C1)) * C3 == 1 %zz = and i8 C3, 0 ; trick alive into making C3 avaliable in precondition %o0 = urem i8 %x, C1 %r = icmp eq i8 %o0, C2 => %zz = and i8 C3, 0 ; and silence it from complaining about said reg %C4 = -1 /u C1 %n0 = mul i8 %x, C3 %n1 = lshr i8 %n0, countTrailingZeros(C1) ; rotate right %n2 = shl i8 %n0, ((8-countTrailingZeros(C1)) %u 8) ; rotate right %n3 = or i8 %n1, %n2 ; rotate right %is_tautologically_false = icmp ule i8 C1, C2 %res = icmp ule i8 %n3, %C4 %r = select i1 %is_tautologically_false, i1 0, i1 %res ``` https://rise4fun.com/Alive/PjT5 https://rise4fun.com/Alive/1KV 2. We can invert the comparison result ``` Name: (X % C1) == C2 -> X * C3 <= C4 || false Pre: (C2 == 0 || C1 u<= C2) && (C1 u>> countTrailingZeros(C1)) * C3 == 1 %zz = and i8 C3, 0 ; trick alive into making C3 avaliable in precondition %o0 = urem i8 %x, C1 %r = icmp eq i8 %o0, C2 => %zz = and i8 C3, 0 ; and silence it from complaining about said reg %C4 = -1 /u C1 %n0 = mul i8 %x, C3 %n1 = lshr i8 %n0, countTrailingZeros(C1) ; rotate right %n2 = shl i8 %n0, ((8-countTrailingZeros(C1)) %u 8) ; rotate right %n3 = or i8 %n1, %n2 ; rotate right %is_tautologically_false = icmp ule i8 C1, C2 %C4_fixed = select i1 %is_tautologically_false, i8 -1, i8 %C4 %res = icmp ule i8 %n3, %C4_fixed %r = xor i1 %res, %is_tautologically_false ``` https://rise4fun.com/Alive/2xC https://rise4fun.com/Alive/jpb5 3. We can expand into `and`/`or`: https://rise4fun.com/Alive/WGn https://rise4fun.com/Alive/lcb5 Blend-one is likely better since we avoid having to load the replacement from constant pool. `xor` is second best since it's still pretty general. I'm not adding `and`/`or` variants. Reviewers: RKSimon, craig.topper, spatel Reviewed By: RKSimon Subscribers: nick, hiraditya, xbolva00, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D70051
2019-11-22 20:16:03 +08:00
; CHECK-NEXT: movi d1, #0xffff0000ffff0000
; CHECK-NEXT: eor v0.8b, v0.8b, v1.8b
; CHECK-NEXT: ret
%urem = urem <8 x i16> %X, <i16 3, i16 1, i16 1, i16 9, i16 3, i16 1, i16 1, i16 9>
%cmp = icmp eq <8 x i16> %urem, <i16 0, i16 0, i16 42, i16 42, i16 0, i16 0, i16 42, i16 42>
ret <8 x i1> %cmp
}
define <2 x i1> @t3_wide(<2 x i64> %X) nounwind {
; CHECK-LABEL: t3_wide:
; CHECK: // %bb.0:
; CHECK-NEXT: mov x9, #-6148914691236517206
[Codegen] TargetLowering::prepareUREMEqFold(): `x u% C1 ==/!= C2` with tautological C1 u<= C2 (PR35479) Summary: This is a preparatory cleanup before i add more of this fold to deal with comparisons with non-zero. In essence, the current lowering is: ``` Name: (X % C1) == 0 -> X * C3 <= C4 Pre: (C1 u>> countTrailingZeros(C1)) * C3 == 1 %zz = and i8 C3, 0 ; trick alive into making C3 avaliable in precondition %o0 = urem i8 %x, C1 %r = icmp eq i8 %o0, 0 => %zz = and i8 C3, 0 ; and silence it from complaining about said reg %C4 = -1 /u C1 %n0 = mul i8 %x, C3 %n1 = lshr i8 %n0, countTrailingZeros(C1) ; rotate right %n2 = shl i8 %n0, ((8-countTrailingZeros(C1)) %u 8) ; rotate right %n3 = or i8 %n1, %n2 ; rotate right %r = icmp ule i8 %n3, %C4 ``` https://rise4fun.com/Alive/oqd It kinda just works, really no weird edge-cases. But it isn't all that great for when comparing with non-zero. In particular, given `(X % C1) == C2`, there will be problems in the always-false tautological case where `C2 u>= C1`: https://rise4fun.com/Alive/pH3 That case is tautological, always-false: ``` Name: (X % Y) u>= Y %o0 = urem i8 %x, %y %r = icmp uge i8 %o0, %y => %r = false ``` https://rise4fun.com/Alive/ofu While we can't/shouldn't get such tautological case normally, we do deal with non-splat vectors, so unless we want to give up in this case, we need to fixup/short-circuit such lanes. There are two lowering variants: 1. We can blend between whatever computed result and the correct tautological result ``` Name: (X % C1) == C2 -> X * C3 <= C4 || false Pre: (C2 == 0 || C1 u<= C2) && (C1 u>> countTrailingZeros(C1)) * C3 == 1 %zz = and i8 C3, 0 ; trick alive into making C3 avaliable in precondition %o0 = urem i8 %x, C1 %r = icmp eq i8 %o0, C2 => %zz = and i8 C3, 0 ; and silence it from complaining about said reg %C4 = -1 /u C1 %n0 = mul i8 %x, C3 %n1 = lshr i8 %n0, countTrailingZeros(C1) ; rotate right %n2 = shl i8 %n0, ((8-countTrailingZeros(C1)) %u 8) ; rotate right %n3 = or i8 %n1, %n2 ; rotate right %is_tautologically_false = icmp ule i8 C1, C2 %res = icmp ule i8 %n3, %C4 %r = select i1 %is_tautologically_false, i1 0, i1 %res ``` https://rise4fun.com/Alive/PjT5 https://rise4fun.com/Alive/1KV 2. We can invert the comparison result ``` Name: (X % C1) == C2 -> X * C3 <= C4 || false Pre: (C2 == 0 || C1 u<= C2) && (C1 u>> countTrailingZeros(C1)) * C3 == 1 %zz = and i8 C3, 0 ; trick alive into making C3 avaliable in precondition %o0 = urem i8 %x, C1 %r = icmp eq i8 %o0, C2 => %zz = and i8 C3, 0 ; and silence it from complaining about said reg %C4 = -1 /u C1 %n0 = mul i8 %x, C3 %n1 = lshr i8 %n0, countTrailingZeros(C1) ; rotate right %n2 = shl i8 %n0, ((8-countTrailingZeros(C1)) %u 8) ; rotate right %n3 = or i8 %n1, %n2 ; rotate right %is_tautologically_false = icmp ule i8 C1, C2 %C4_fixed = select i1 %is_tautologically_false, i8 -1, i8 %C4 %res = icmp ule i8 %n3, %C4_fixed %r = xor i1 %res, %is_tautologically_false ``` https://rise4fun.com/Alive/2xC https://rise4fun.com/Alive/jpb5 3. We can expand into `and`/`or`: https://rise4fun.com/Alive/WGn https://rise4fun.com/Alive/lcb5 Blend-one is likely better since we avoid having to load the replacement from constant pool. `xor` is second best since it's still pretty general. I'm not adding `and`/`or` variants. Reviewers: RKSimon, craig.topper, spatel Reviewed By: RKSimon Subscribers: nick, hiraditya, xbolva00, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D70051
2019-11-22 20:16:03 +08:00
; CHECK-NEXT: adrp x11, .LCPI4_0
; CHECK-NEXT: mov x8, v0.d[1]
; CHECK-NEXT: movk x9, #43691
[Codegen] TargetLowering::prepareUREMEqFold(): `x u% C1 ==/!= C2` with tautological C1 u<= C2 (PR35479) Summary: This is a preparatory cleanup before i add more of this fold to deal with comparisons with non-zero. In essence, the current lowering is: ``` Name: (X % C1) == 0 -> X * C3 <= C4 Pre: (C1 u>> countTrailingZeros(C1)) * C3 == 1 %zz = and i8 C3, 0 ; trick alive into making C3 avaliable in precondition %o0 = urem i8 %x, C1 %r = icmp eq i8 %o0, 0 => %zz = and i8 C3, 0 ; and silence it from complaining about said reg %C4 = -1 /u C1 %n0 = mul i8 %x, C3 %n1 = lshr i8 %n0, countTrailingZeros(C1) ; rotate right %n2 = shl i8 %n0, ((8-countTrailingZeros(C1)) %u 8) ; rotate right %n3 = or i8 %n1, %n2 ; rotate right %r = icmp ule i8 %n3, %C4 ``` https://rise4fun.com/Alive/oqd It kinda just works, really no weird edge-cases. But it isn't all that great for when comparing with non-zero. In particular, given `(X % C1) == C2`, there will be problems in the always-false tautological case where `C2 u>= C1`: https://rise4fun.com/Alive/pH3 That case is tautological, always-false: ``` Name: (X % Y) u>= Y %o0 = urem i8 %x, %y %r = icmp uge i8 %o0, %y => %r = false ``` https://rise4fun.com/Alive/ofu While we can't/shouldn't get such tautological case normally, we do deal with non-splat vectors, so unless we want to give up in this case, we need to fixup/short-circuit such lanes. There are two lowering variants: 1. We can blend between whatever computed result and the correct tautological result ``` Name: (X % C1) == C2 -> X * C3 <= C4 || false Pre: (C2 == 0 || C1 u<= C2) && (C1 u>> countTrailingZeros(C1)) * C3 == 1 %zz = and i8 C3, 0 ; trick alive into making C3 avaliable in precondition %o0 = urem i8 %x, C1 %r = icmp eq i8 %o0, C2 => %zz = and i8 C3, 0 ; and silence it from complaining about said reg %C4 = -1 /u C1 %n0 = mul i8 %x, C3 %n1 = lshr i8 %n0, countTrailingZeros(C1) ; rotate right %n2 = shl i8 %n0, ((8-countTrailingZeros(C1)) %u 8) ; rotate right %n3 = or i8 %n1, %n2 ; rotate right %is_tautologically_false = icmp ule i8 C1, C2 %res = icmp ule i8 %n3, %C4 %r = select i1 %is_tautologically_false, i1 0, i1 %res ``` https://rise4fun.com/Alive/PjT5 https://rise4fun.com/Alive/1KV 2. We can invert the comparison result ``` Name: (X % C1) == C2 -> X * C3 <= C4 || false Pre: (C2 == 0 || C1 u<= C2) && (C1 u>> countTrailingZeros(C1)) * C3 == 1 %zz = and i8 C3, 0 ; trick alive into making C3 avaliable in precondition %o0 = urem i8 %x, C1 %r = icmp eq i8 %o0, C2 => %zz = and i8 C3, 0 ; and silence it from complaining about said reg %C4 = -1 /u C1 %n0 = mul i8 %x, C3 %n1 = lshr i8 %n0, countTrailingZeros(C1) ; rotate right %n2 = shl i8 %n0, ((8-countTrailingZeros(C1)) %u 8) ; rotate right %n3 = or i8 %n1, %n2 ; rotate right %is_tautologically_false = icmp ule i8 C1, C2 %C4_fixed = select i1 %is_tautologically_false, i8 -1, i8 %C4 %res = icmp ule i8 %n3, %C4_fixed %r = xor i1 %res, %is_tautologically_false ``` https://rise4fun.com/Alive/2xC https://rise4fun.com/Alive/jpb5 3. We can expand into `and`/`or`: https://rise4fun.com/Alive/WGn https://rise4fun.com/Alive/lcb5 Blend-one is likely better since we avoid having to load the replacement from constant pool. `xor` is second best since it's still pretty general. I'm not adding `and`/`or` variants. Reviewers: RKSimon, craig.topper, spatel Reviewed By: RKSimon Subscribers: nick, hiraditya, xbolva00, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D70051
2019-11-22 20:16:03 +08:00
; CHECK-NEXT: fmov x10, d0
; CHECK-NEXT: ldr q0, [x11, :lo12:.LCPI4_0]
; CHECK-NEXT: mul x10, x10, x9
; CHECK-NEXT: mul x8, x8, x9
; CHECK-NEXT: fmov d1, x10
; CHECK-NEXT: mov v1.d[1], x8
; CHECK-NEXT: cmhs v0.2d, v0.2d, v1.2d
; CHECK-NEXT: xtn v0.2s, v0.2d
[Codegen] TargetLowering::prepareUREMEqFold(): `x u% C1 ==/!= C2` with tautological C1 u<= C2 (PR35479) Summary: This is a preparatory cleanup before i add more of this fold to deal with comparisons with non-zero. In essence, the current lowering is: ``` Name: (X % C1) == 0 -> X * C3 <= C4 Pre: (C1 u>> countTrailingZeros(C1)) * C3 == 1 %zz = and i8 C3, 0 ; trick alive into making C3 avaliable in precondition %o0 = urem i8 %x, C1 %r = icmp eq i8 %o0, 0 => %zz = and i8 C3, 0 ; and silence it from complaining about said reg %C4 = -1 /u C1 %n0 = mul i8 %x, C3 %n1 = lshr i8 %n0, countTrailingZeros(C1) ; rotate right %n2 = shl i8 %n0, ((8-countTrailingZeros(C1)) %u 8) ; rotate right %n3 = or i8 %n1, %n2 ; rotate right %r = icmp ule i8 %n3, %C4 ``` https://rise4fun.com/Alive/oqd It kinda just works, really no weird edge-cases. But it isn't all that great for when comparing with non-zero. In particular, given `(X % C1) == C2`, there will be problems in the always-false tautological case where `C2 u>= C1`: https://rise4fun.com/Alive/pH3 That case is tautological, always-false: ``` Name: (X % Y) u>= Y %o0 = urem i8 %x, %y %r = icmp uge i8 %o0, %y => %r = false ``` https://rise4fun.com/Alive/ofu While we can't/shouldn't get such tautological case normally, we do deal with non-splat vectors, so unless we want to give up in this case, we need to fixup/short-circuit such lanes. There are two lowering variants: 1. We can blend between whatever computed result and the correct tautological result ``` Name: (X % C1) == C2 -> X * C3 <= C4 || false Pre: (C2 == 0 || C1 u<= C2) && (C1 u>> countTrailingZeros(C1)) * C3 == 1 %zz = and i8 C3, 0 ; trick alive into making C3 avaliable in precondition %o0 = urem i8 %x, C1 %r = icmp eq i8 %o0, C2 => %zz = and i8 C3, 0 ; and silence it from complaining about said reg %C4 = -1 /u C1 %n0 = mul i8 %x, C3 %n1 = lshr i8 %n0, countTrailingZeros(C1) ; rotate right %n2 = shl i8 %n0, ((8-countTrailingZeros(C1)) %u 8) ; rotate right %n3 = or i8 %n1, %n2 ; rotate right %is_tautologically_false = icmp ule i8 C1, C2 %res = icmp ule i8 %n3, %C4 %r = select i1 %is_tautologically_false, i1 0, i1 %res ``` https://rise4fun.com/Alive/PjT5 https://rise4fun.com/Alive/1KV 2. We can invert the comparison result ``` Name: (X % C1) == C2 -> X * C3 <= C4 || false Pre: (C2 == 0 || C1 u<= C2) && (C1 u>> countTrailingZeros(C1)) * C3 == 1 %zz = and i8 C3, 0 ; trick alive into making C3 avaliable in precondition %o0 = urem i8 %x, C1 %r = icmp eq i8 %o0, C2 => %zz = and i8 C3, 0 ; and silence it from complaining about said reg %C4 = -1 /u C1 %n0 = mul i8 %x, C3 %n1 = lshr i8 %n0, countTrailingZeros(C1) ; rotate right %n2 = shl i8 %n0, ((8-countTrailingZeros(C1)) %u 8) ; rotate right %n3 = or i8 %n1, %n2 ; rotate right %is_tautologically_false = icmp ule i8 C1, C2 %C4_fixed = select i1 %is_tautologically_false, i8 -1, i8 %C4 %res = icmp ule i8 %n3, %C4_fixed %r = xor i1 %res, %is_tautologically_false ``` https://rise4fun.com/Alive/2xC https://rise4fun.com/Alive/jpb5 3. We can expand into `and`/`or`: https://rise4fun.com/Alive/WGn https://rise4fun.com/Alive/lcb5 Blend-one is likely better since we avoid having to load the replacement from constant pool. `xor` is second best since it's still pretty general. I'm not adding `and`/`or` variants. Reviewers: RKSimon, craig.topper, spatel Reviewed By: RKSimon Subscribers: nick, hiraditya, xbolva00, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D70051
2019-11-22 20:16:03 +08:00
; CHECK-NEXT: movi d1, #0xffffffff00000000
; CHECK-NEXT: eor v0.8b, v0.8b, v1.8b
; CHECK-NEXT: ret
%urem = urem <2 x i64> %X, <i64 3, i64 1>
%cmp = icmp eq <2 x i64> %urem, <i64 0, i64 42>
ret <2 x i1> %cmp
}