forked from OSchip/llvm-project
[Codegen] TargetLowering::prepareUREMEqFold(): `x u% C1 ==/!= C2` (PR35479)
Summary: The current lowering is: ``` Name: (X % C1) == C2 -> X * C3 <= C4 || false Pre: (C2 == 0 || C1 u<= C2) && (C1 u>> countTrailingZeros(C1)) * C3 == 1 %zz = and i8 C3, 0 ; trick alive into making C3 avaliable in precondition %o0 = urem i8 %x, C1 %r = icmp eq i8 %o0, C2 => %zz = and i8 C3, 0 ; and silence it from complaining about said reg %C4 = -1 /u C1 %n0 = mul i8 %x, C3 %n1 = lshr i8 %n0, countTrailingZeros(C1) ; rotate right %n2 = shl i8 %n0, ((8-countTrailingZeros(C1)) %u 8) ; rotate right %n3 = or i8 %n1, %n2 ; rotate right %is_tautologically_false = icmp ule i8 C1, C2 %C4_fixed = select i1 %is_tautologically_false, i8 -1, i8 %C4 %res = icmp ule i8 %n3, %C4_fixed %r = xor i1 %res, %is_tautologically_false ``` https://rise4fun.com/Alive/2xC https://rise4fun.com/Alive/jpb5 However, we can support non-tautological cases `C1 u> C2` too. Said handling consists of two parts: * `C2 u<= (-1 %u C1)`. It just works. We only have to change `(X % C1) == C2` into `((X - C2) % C1) == 0` ``` Name: (X % C1) == C2 -> (X - C2) * C3 <= C4 iff C2 u<= (-1 %u C1) Pre: (C1 u>> countTrailingZeros(C1)) * C3 == 1 && C2 u<= (-1 %u C1) %zz = and i8 C3, 0 ; trick alive into making C3 avaliable in precondition %o0 = urem i8 %x, C1 %r = icmp eq i8 %o0, C2 => %zz = and i8 C3, 0 ; and silence it from complaining about said reg %C4 = (-1 /u C1) %n0 = sub i8 %x, C2 %n1 = mul i8 %n0, C3 %n2 = lshr i8 %n1, countTrailingZeros(C1) ; rotate right %n3 = shl i8 %n1, ((8-countTrailingZeros(C1)) %u 8) ; rotate right %n4 = or i8 %n2, %n3 ; rotate right %is_tautologically_false = icmp ule i8 C1, C2 %C4_fixed = select i1 %is_tautologically_false, i8 -1, i8 %C4 %res = icmp ule i8 %n4, %C4_fixed %r = xor i1 %res, %is_tautologically_false ``` https://rise4fun.com/Alive/m4P https://rise4fun.com/Alive/SKrx * `C2 u> (-1 %u C1)`. We also have to change `(X % C1) == C2` into `((X - C2) % C1) == 0`, and we have to decrement C4: ``` Name: (X % C1) == C2 -> (X - C2) * C3 <= C4 iff C2 u> (-1 %u C1) Pre: (C1 u>> countTrailingZeros(C1)) * C3 == 1 && C2 u> (-1 %u C1) %zz = and i8 C3, 0 ; trick alive into making C3 avaliable in precondition %o0 = urem i8 %x, C1 %r = icmp eq i8 %o0, C2 => %zz = and i8 C3, 0 ; and silence it from complaining about said reg %C4 = (-1 /u C1)-1 %n0 = sub i8 %x, C2 %n1 = mul i8 %n0, C3 %n2 = lshr i8 %n1, countTrailingZeros(C1) ; rotate right %n3 = shl i8 %n1, ((8-countTrailingZeros(C1)) %u 8) ; rotate right %n4 = or i8 %n2, %n3 ; rotate right %is_tautologically_false = icmp ule i8 C1, C2 %C4_fixed = select i1 %is_tautologically_false, i8 -1, i8 %C4 %res = icmp ule i8 %n4, %C4_fixed %r = xor i1 %res, %is_tautologically_false ``` https://rise4fun.com/Alive/d40 https://rise4fun.com/Alive/8cF I believe this concludes `x u% C1 ==/!= C2` lowering. In fact, clang is may now be better in this regard than gcc: as it can be seen from `@t32_6_4` test, we do lower `x % 6 == 4` via this pattern, while gcc does not: https://godbolt.org/z/XNU2z9 And all the general alive proofs say this is legal. And manual checking agrees: https://rise4fun.com/Alive/WA2 Fixes [[ https://bugs.llvm.org/show_bug.cgi?id=35479 | PR35479 ]]. Reviewers: RKSimon, craig.topper, spatel Reviewed By: RKSimon Subscribers: nick, hiraditya, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D70053
This commit is contained in:
parent
3f46022e33
commit
96cf5c8d47
|
@ -4943,7 +4943,7 @@ SDValue TargetLowering::buildUREMEqFold(EVT SETCCVT, SDValue REMNode,
|
|||
ISD::CondCode Cond,
|
||||
DAGCombinerInfo &DCI,
|
||||
const SDLoc &DL) const {
|
||||
SmallVector<SDNode *, 4> Built;
|
||||
SmallVector<SDNode *, 5> Built;
|
||||
if (SDValue Folded = prepareUREMEqFold(SETCCVT, REMNode, CompTargetNode, Cond,
|
||||
DCI, DL, Built)) {
|
||||
for (SDNode *N : Built)
|
||||
|
@ -4978,6 +4978,8 @@ TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode,
|
|||
if (!isOperationLegalOrCustom(ISD::MUL, VT))
|
||||
return SDValue();
|
||||
|
||||
bool ComparingWithAllZeros = true;
|
||||
bool AllComparisonsWithNonZerosAreTautological = true;
|
||||
bool HadTautologicalLanes = false;
|
||||
bool AllLanesAreTautological = true;
|
||||
bool HadEvenDivisor = false;
|
||||
|
@ -4993,6 +4995,8 @@ TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode,
|
|||
const APInt &D = CDiv->getAPIntValue();
|
||||
const APInt &Cmp = CCmp->getAPIntValue();
|
||||
|
||||
ComparingWithAllZeros &= Cmp.isNullValue();
|
||||
|
||||
// x u% C1` is *always* less than C1. So given `x u% C1 == C2`,
|
||||
// if C2 is not less than C1, the comparison is always false.
|
||||
// But we will only be able to produce the comparison that will give the
|
||||
|
@ -5000,12 +5004,6 @@ TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode,
|
|||
bool TautologicalInvertedLane = D.ule(Cmp);
|
||||
HadTautologicalInvertedLanes |= TautologicalInvertedLane;
|
||||
|
||||
// If we are checking that remainder is something smaller than the divisor,
|
||||
// then this comparison isn't tautological. For now this is not handled,
|
||||
// other than the comparison that remainder is zero.
|
||||
if (!Cmp.isNullValue() && !TautologicalInvertedLane)
|
||||
return false;
|
||||
|
||||
// If all lanes are tautological (either all divisors are ones, or divisor
|
||||
// is not greater than the constant we are comparing with),
|
||||
// we will prefer to avoid the fold.
|
||||
|
@ -5013,6 +5011,12 @@ TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode,
|
|||
HadTautologicalLanes |= TautologicalLane;
|
||||
AllLanesAreTautological &= TautologicalLane;
|
||||
|
||||
// If we are comparing with non-zero, we need'll need to subtract said
|
||||
// comparison value from the LHS. But there is no point in doing that if
|
||||
// every lane where we are comparing with non-zero is tautological..
|
||||
if (!Cmp.isNullValue())
|
||||
AllComparisonsWithNonZerosAreTautological &= TautologicalLane;
|
||||
|
||||
// Decompose D into D0 * 2^K
|
||||
unsigned K = D.countTrailingZeros();
|
||||
assert((!D.isOneValue() || (K == 0)) && "For divisor '1' we won't rotate.");
|
||||
|
@ -5033,8 +5037,15 @@ TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode,
|
|||
assert(!P.isNullValue() && "No multiplicative inverse!"); // unreachable
|
||||
assert((D0 * P).isOneValue() && "Multiplicative inverse sanity check.");
|
||||
|
||||
// Q = floor((2^W - 1) / D)
|
||||
APInt Q = APInt::getAllOnesValue(W).udiv(D);
|
||||
// Q = floor((2^W - 1) u/ D)
|
||||
// R = ((2^W - 1) u% D)
|
||||
APInt Q, R;
|
||||
APInt::udivrem(APInt::getAllOnesValue(W), D, Q, R);
|
||||
|
||||
// If we are comparing with zero, then that comparison constant is okay,
|
||||
// else it may need to be one less than that.
|
||||
if (Cmp.ugt(R))
|
||||
Q -= 1;
|
||||
|
||||
assert(APInt::getAllOnesValue(ShSVT.getSizeInBits()).ugt(K) &&
|
||||
"We are expecting that K is always less than all-ones for ShSVT");
|
||||
|
@ -5093,6 +5104,14 @@ TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode,
|
|||
QVal = QAmts[0];
|
||||
}
|
||||
|
||||
if (!ComparingWithAllZeros && !AllComparisonsWithNonZerosAreTautological) {
|
||||
if (!isOperationLegalOrCustom(ISD::SUB, VT))
|
||||
return SDValue(); // FIXME: Could/should use `ISD::ADD`?
|
||||
assert(CompTargetNode.getValueType() == N.getValueType() &&
|
||||
"Expecting that the types on LHS and RHS of comparisons match.");
|
||||
N = DAG.getNode(ISD::SUB, DL, VT, N, CompTargetNode);
|
||||
}
|
||||
|
||||
// (mul N, P)
|
||||
SDValue Op0 = DAG.getNode(ISD::MUL, DL, VT, N, PVal);
|
||||
Created.push_back(Op0.getNode());
|
||||
|
|
|
@ -6,12 +6,10 @@ define i1 @t32_3_1(i32 %X) nounwind {
|
|||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov w8, #43691
|
||||
; CHECK-NEXT: movk w8, #43690, lsl #16
|
||||
; CHECK-NEXT: umull x8, w0, w8
|
||||
; CHECK-NEXT: lsr x8, x8, #33
|
||||
; CHECK-NEXT: add w8, w8, w8, lsl #1
|
||||
; CHECK-NEXT: sub w8, w0, w8
|
||||
; CHECK-NEXT: cmp w8, #1 // =1
|
||||
; CHECK-NEXT: cset w0, eq
|
||||
; CHECK-NEXT: mov w9, #1431655765
|
||||
; CHECK-NEXT: madd w8, w0, w8, w9
|
||||
; CHECK-NEXT: cmp w8, w9
|
||||
; CHECK-NEXT: cset w0, lo
|
||||
; CHECK-NEXT: ret
|
||||
%urem = urem i32 %X, 3
|
||||
%cmp = icmp eq i32 %urem, 1
|
||||
|
@ -23,12 +21,11 @@ define i1 @t32_3_2(i32 %X) nounwind {
|
|||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov w8, #43691
|
||||
; CHECK-NEXT: movk w8, #43690, lsl #16
|
||||
; CHECK-NEXT: umull x8, w0, w8
|
||||
; CHECK-NEXT: lsr x8, x8, #33
|
||||
; CHECK-NEXT: add w8, w8, w8, lsl #1
|
||||
; CHECK-NEXT: sub w8, w0, w8
|
||||
; CHECK-NEXT: cmp w8, #2 // =2
|
||||
; CHECK-NEXT: cset w0, eq
|
||||
; CHECK-NEXT: mov w9, #-1431655766
|
||||
; CHECK-NEXT: madd w8, w0, w8, w9
|
||||
; CHECK-NEXT: mov w9, #1431655765
|
||||
; CHECK-NEXT: cmp w8, w9
|
||||
; CHECK-NEXT: cset w0, lo
|
||||
; CHECK-NEXT: ret
|
||||
%urem = urem i32 %X, 3
|
||||
%cmp = icmp eq i32 %urem, 2
|
||||
|
@ -41,12 +38,10 @@ define i1 @t32_5_1(i32 %X) nounwind {
|
|||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov w8, #52429
|
||||
; CHECK-NEXT: movk w8, #52428, lsl #16
|
||||
; CHECK-NEXT: umull x8, w0, w8
|
||||
; CHECK-NEXT: lsr x8, x8, #34
|
||||
; CHECK-NEXT: add w8, w8, w8, lsl #2
|
||||
; CHECK-NEXT: sub w8, w0, w8
|
||||
; CHECK-NEXT: cmp w8, #1 // =1
|
||||
; CHECK-NEXT: cset w0, eq
|
||||
; CHECK-NEXT: mov w9, #858993459
|
||||
; CHECK-NEXT: madd w8, w0, w8, w9
|
||||
; CHECK-NEXT: cmp w8, w9
|
||||
; CHECK-NEXT: cset w0, lo
|
||||
; CHECK-NEXT: ret
|
||||
%urem = urem i32 %X, 5
|
||||
%cmp = icmp eq i32 %urem, 1
|
||||
|
@ -58,12 +53,11 @@ define i1 @t32_5_2(i32 %X) nounwind {
|
|||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov w8, #52429
|
||||
; CHECK-NEXT: movk w8, #52428, lsl #16
|
||||
; CHECK-NEXT: umull x8, w0, w8
|
||||
; CHECK-NEXT: lsr x8, x8, #34
|
||||
; CHECK-NEXT: add w8, w8, w8, lsl #2
|
||||
; CHECK-NEXT: sub w8, w0, w8
|
||||
; CHECK-NEXT: cmp w8, #2 // =2
|
||||
; CHECK-NEXT: cset w0, eq
|
||||
; CHECK-NEXT: mov w9, #1717986918
|
||||
; CHECK-NEXT: madd w8, w0, w8, w9
|
||||
; CHECK-NEXT: mov w9, #858993459
|
||||
; CHECK-NEXT: cmp w8, w9
|
||||
; CHECK-NEXT: cset w0, lo
|
||||
; CHECK-NEXT: ret
|
||||
%urem = urem i32 %X, 5
|
||||
%cmp = icmp eq i32 %urem, 2
|
||||
|
@ -75,12 +69,11 @@ define i1 @t32_5_3(i32 %X) nounwind {
|
|||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov w8, #52429
|
||||
; CHECK-NEXT: movk w8, #52428, lsl #16
|
||||
; CHECK-NEXT: umull x8, w0, w8
|
||||
; CHECK-NEXT: lsr x8, x8, #34
|
||||
; CHECK-NEXT: add w8, w8, w8, lsl #2
|
||||
; CHECK-NEXT: sub w8, w0, w8
|
||||
; CHECK-NEXT: cmp w8, #3 // =3
|
||||
; CHECK-NEXT: cset w0, eq
|
||||
; CHECK-NEXT: mov w9, #-1717986919
|
||||
; CHECK-NEXT: madd w8, w0, w8, w9
|
||||
; CHECK-NEXT: mov w9, #858993459
|
||||
; CHECK-NEXT: cmp w8, w9
|
||||
; CHECK-NEXT: cset w0, lo
|
||||
; CHECK-NEXT: ret
|
||||
%urem = urem i32 %X, 5
|
||||
%cmp = icmp eq i32 %urem, 3
|
||||
|
@ -92,12 +85,11 @@ define i1 @t32_5_4(i32 %X) nounwind {
|
|||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov w8, #52429
|
||||
; CHECK-NEXT: movk w8, #52428, lsl #16
|
||||
; CHECK-NEXT: umull x8, w0, w8
|
||||
; CHECK-NEXT: lsr x8, x8, #34
|
||||
; CHECK-NEXT: add w8, w8, w8, lsl #2
|
||||
; CHECK-NEXT: sub w8, w0, w8
|
||||
; CHECK-NEXT: cmp w8, #4 // =4
|
||||
; CHECK-NEXT: cset w0, eq
|
||||
; CHECK-NEXT: mov w9, #-858993460
|
||||
; CHECK-NEXT: madd w8, w0, w8, w9
|
||||
; CHECK-NEXT: mov w9, #858993459
|
||||
; CHECK-NEXT: cmp w8, w9
|
||||
; CHECK-NEXT: cset w0, lo
|
||||
; CHECK-NEXT: ret
|
||||
%urem = urem i32 %X, 5
|
||||
%cmp = icmp eq i32 %urem, 4
|
||||
|
@ -110,12 +102,13 @@ define i1 @t32_6_1(i32 %X) nounwind {
|
|||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov w8, #43691
|
||||
; CHECK-NEXT: movk w8, #43690, lsl #16
|
||||
; CHECK-NEXT: umull x8, w0, w8
|
||||
; CHECK-NEXT: lsr x8, x8, #34
|
||||
; CHECK-NEXT: mov w9, #6
|
||||
; CHECK-NEXT: msub w8, w8, w9, w0
|
||||
; CHECK-NEXT: cmp w8, #1 // =1
|
||||
; CHECK-NEXT: cset w0, eq
|
||||
; CHECK-NEXT: mov w9, #1431655765
|
||||
; CHECK-NEXT: madd w8, w0, w8, w9
|
||||
; CHECK-NEXT: mov w9, #43691
|
||||
; CHECK-NEXT: ror w8, w8, #1
|
||||
; CHECK-NEXT: movk w9, #10922, lsl #16
|
||||
; CHECK-NEXT: cmp w8, w9
|
||||
; CHECK-NEXT: cset w0, lo
|
||||
; CHECK-NEXT: ret
|
||||
%urem = urem i32 %X, 6
|
||||
%cmp = icmp eq i32 %urem, 1
|
||||
|
@ -127,12 +120,13 @@ define i1 @t32_6_2(i32 %X) nounwind {
|
|||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov w8, #43691
|
||||
; CHECK-NEXT: movk w8, #43690, lsl #16
|
||||
; CHECK-NEXT: umull x8, w0, w8
|
||||
; CHECK-NEXT: lsr x8, x8, #34
|
||||
; CHECK-NEXT: mov w9, #6
|
||||
; CHECK-NEXT: msub w8, w8, w9, w0
|
||||
; CHECK-NEXT: cmp w8, #2 // =2
|
||||
; CHECK-NEXT: cset w0, eq
|
||||
; CHECK-NEXT: mov w9, #-1431655766
|
||||
; CHECK-NEXT: madd w8, w0, w8, w9
|
||||
; CHECK-NEXT: mov w9, #43691
|
||||
; CHECK-NEXT: ror w8, w8, #1
|
||||
; CHECK-NEXT: movk w9, #10922, lsl #16
|
||||
; CHECK-NEXT: cmp w8, w9
|
||||
; CHECK-NEXT: cset w0, lo
|
||||
; CHECK-NEXT: ret
|
||||
%urem = urem i32 %X, 6
|
||||
%cmp = icmp eq i32 %urem, 2
|
||||
|
@ -144,12 +138,13 @@ define i1 @t32_6_3(i32 %X) nounwind {
|
|||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov w8, #43691
|
||||
; CHECK-NEXT: movk w8, #43690, lsl #16
|
||||
; CHECK-NEXT: umull x8, w0, w8
|
||||
; CHECK-NEXT: lsr x8, x8, #34
|
||||
; CHECK-NEXT: mov w9, #6
|
||||
; CHECK-NEXT: msub w8, w8, w9, w0
|
||||
; CHECK-NEXT: cmp w8, #3 // =3
|
||||
; CHECK-NEXT: cset w0, eq
|
||||
; CHECK-NEXT: mul w8, w0, w8
|
||||
; CHECK-NEXT: sub w8, w8, #1 // =1
|
||||
; CHECK-NEXT: mov w9, #43691
|
||||
; CHECK-NEXT: ror w8, w8, #1
|
||||
; CHECK-NEXT: movk w9, #10922, lsl #16
|
||||
; CHECK-NEXT: cmp w8, w9
|
||||
; CHECK-NEXT: cset w0, lo
|
||||
; CHECK-NEXT: ret
|
||||
%urem = urem i32 %X, 6
|
||||
%cmp = icmp eq i32 %urem, 3
|
||||
|
@ -160,13 +155,15 @@ define i1 @t32_6_4(i32 %X) nounwind {
|
|||
; CHECK-LABEL: t32_6_4:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov w8, #43691
|
||||
; CHECK-NEXT: mov w9, #21844
|
||||
; CHECK-NEXT: movk w8, #43690, lsl #16
|
||||
; CHECK-NEXT: umull x8, w0, w8
|
||||
; CHECK-NEXT: lsr x8, x8, #34
|
||||
; CHECK-NEXT: mov w9, #6
|
||||
; CHECK-NEXT: msub w8, w8, w9, w0
|
||||
; CHECK-NEXT: cmp w8, #4 // =4
|
||||
; CHECK-NEXT: cset w0, eq
|
||||
; CHECK-NEXT: movk w9, #21845, lsl #16
|
||||
; CHECK-NEXT: madd w8, w0, w8, w9
|
||||
; CHECK-NEXT: mov w9, #43690
|
||||
; CHECK-NEXT: ror w8, w8, #1
|
||||
; CHECK-NEXT: movk w9, #10922, lsl #16
|
||||
; CHECK-NEXT: cmp w8, w9
|
||||
; CHECK-NEXT: cset w0, lo
|
||||
; CHECK-NEXT: ret
|
||||
%urem = urem i32 %X, 6
|
||||
%cmp = icmp eq i32 %urem, 4
|
||||
|
@ -177,13 +174,15 @@ define i1 @t32_6_5(i32 %X) nounwind {
|
|||
; CHECK-LABEL: t32_6_5:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov w8, #43691
|
||||
; CHECK-NEXT: mov w9, #43689
|
||||
; CHECK-NEXT: movk w8, #43690, lsl #16
|
||||
; CHECK-NEXT: umull x8, w0, w8
|
||||
; CHECK-NEXT: lsr x8, x8, #34
|
||||
; CHECK-NEXT: mov w9, #6
|
||||
; CHECK-NEXT: msub w8, w8, w9, w0
|
||||
; CHECK-NEXT: cmp w8, #5 // =5
|
||||
; CHECK-NEXT: cset w0, eq
|
||||
; CHECK-NEXT: movk w9, #43690, lsl #16
|
||||
; CHECK-NEXT: madd w8, w0, w8, w9
|
||||
; CHECK-NEXT: mov w9, #43690
|
||||
; CHECK-NEXT: ror w8, w8, #1
|
||||
; CHECK-NEXT: movk w9, #10922, lsl #16
|
||||
; CHECK-NEXT: cmp w8, w9
|
||||
; CHECK-NEXT: cset w0, lo
|
||||
; CHECK-NEXT: ret
|
||||
%urem = urem i32 %X, 6
|
||||
%cmp = icmp eq i32 %urem, 5
|
||||
|
@ -199,12 +198,11 @@ define i1 @t16_3_2(i16 %X) nounwind {
|
|||
; CHECK-NEXT: mov w9, #43691
|
||||
; CHECK-NEXT: and w8, w0, #0xffff
|
||||
; CHECK-NEXT: movk w9, #43690, lsl #16
|
||||
; CHECK-NEXT: umull x9, w8, w9
|
||||
; CHECK-NEXT: lsr x9, x9, #33
|
||||
; CHECK-NEXT: add w9, w9, w9, lsl #1
|
||||
; CHECK-NEXT: sub w8, w8, w9
|
||||
; CHECK-NEXT: cmp w8, #2 // =2
|
||||
; CHECK-NEXT: cset w0, eq
|
||||
; CHECK-NEXT: mov w10, #-1431655766
|
||||
; CHECK-NEXT: madd w8, w8, w9, w10
|
||||
; CHECK-NEXT: mov w9, #1431655765
|
||||
; CHECK-NEXT: cmp w8, w9
|
||||
; CHECK-NEXT: cset w0, lo
|
||||
; CHECK-NEXT: ret
|
||||
%urem = urem i16 %X, 3
|
||||
%cmp = icmp eq i16 %urem, 2
|
||||
|
@ -217,12 +215,11 @@ define i1 @t8_3_2(i8 %X) nounwind {
|
|||
; CHECK-NEXT: mov w9, #43691
|
||||
; CHECK-NEXT: and w8, w0, #0xff
|
||||
; CHECK-NEXT: movk w9, #43690, lsl #16
|
||||
; CHECK-NEXT: umull x9, w8, w9
|
||||
; CHECK-NEXT: lsr x9, x9, #33
|
||||
; CHECK-NEXT: add w9, w9, w9, lsl #1
|
||||
; CHECK-NEXT: sub w8, w8, w9
|
||||
; CHECK-NEXT: cmp w8, #2 // =2
|
||||
; CHECK-NEXT: cset w0, eq
|
||||
; CHECK-NEXT: mov w10, #-1431655766
|
||||
; CHECK-NEXT: madd w8, w8, w9, w10
|
||||
; CHECK-NEXT: mov w9, #1431655765
|
||||
; CHECK-NEXT: cmp w8, w9
|
||||
; CHECK-NEXT: cset w0, lo
|
||||
; CHECK-NEXT: ret
|
||||
%urem = urem i8 %X, 3
|
||||
%cmp = icmp eq i8 %urem, 2
|
||||
|
@ -234,12 +231,11 @@ define i1 @t64_3_2(i64 %X) nounwind {
|
|||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov x8, #-6148914691236517206
|
||||
; CHECK-NEXT: movk x8, #43691
|
||||
; CHECK-NEXT: umulh x8, x0, x8
|
||||
; CHECK-NEXT: lsr x8, x8, #1
|
||||
; CHECK-NEXT: add x8, x8, x8, lsl #1
|
||||
; CHECK-NEXT: sub x8, x0, x8
|
||||
; CHECK-NEXT: cmp x8, #2 // =2
|
||||
; CHECK-NEXT: cset w0, eq
|
||||
; CHECK-NEXT: mov x9, #-6148914691236517206
|
||||
; CHECK-NEXT: madd x8, x0, x8, x9
|
||||
; CHECK-NEXT: mov x9, #6148914691236517205
|
||||
; CHECK-NEXT: cmp x8, x9
|
||||
; CHECK-NEXT: cset w0, lo
|
||||
; CHECK-NEXT: ret
|
||||
%urem = urem i64 %X, 3
|
||||
%cmp = icmp eq i64 %urem, 2
|
||||
|
|
|
@ -4,18 +4,16 @@
|
|||
define <4 x i1> @t32_3(<4 x i32> %X) nounwind {
|
||||
; CHECK-LABEL: t32_3:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: adrp x8, .LCPI0_0
|
||||
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI0_0]
|
||||
; CHECK-NEXT: adrp x9, .LCPI0_1
|
||||
; CHECK-NEXT: mov w8, #43691
|
||||
; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI0_1]
|
||||
; CHECK-NEXT: movk w8, #43690, lsl #16
|
||||
; CHECK-NEXT: adrp x9, .LCPI0_0
|
||||
; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s
|
||||
; CHECK-NEXT: dup v1.4s, w8
|
||||
; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI0_0]
|
||||
; CHECK-NEXT: umull2 v3.2d, v0.4s, v1.4s
|
||||
; CHECK-NEXT: umull v1.2d, v0.2s, v1.2s
|
||||
; CHECK-NEXT: uzp2 v1.4s, v1.4s, v3.4s
|
||||
; CHECK-NEXT: ushr v1.4s, v1.4s, #1
|
||||
; CHECK-NEXT: movi v3.4s, #3
|
||||
; CHECK-NEXT: mls v0.4s, v1.4s, v3.4s
|
||||
; CHECK-NEXT: cmeq v0.4s, v0.4s, v2.4s
|
||||
; CHECK-NEXT: mul v0.4s, v0.4s, v1.4s
|
||||
; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s
|
||||
; CHECK-NEXT: xtn v0.4h, v0.4s
|
||||
; CHECK-NEXT: ret
|
||||
%urem = urem <4 x i32> %X, <i32 3, i32 3, i32 3, i32 3>
|
||||
|
@ -26,18 +24,17 @@ define <4 x i1> @t32_3(<4 x i32> %X) nounwind {
|
|||
define <4 x i1> @t32_5(<4 x i32> %X) nounwind {
|
||||
; CHECK-LABEL: t32_5:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: adrp x8, .LCPI1_0
|
||||
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI1_0]
|
||||
; CHECK-NEXT: mov w8, #52429
|
||||
; CHECK-NEXT: movk w8, #52428, lsl #16
|
||||
; CHECK-NEXT: adrp x9, .LCPI1_0
|
||||
; CHECK-NEXT: dup v1.4s, w8
|
||||
; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI1_0]
|
||||
; CHECK-NEXT: umull2 v3.2d, v0.4s, v1.4s
|
||||
; CHECK-NEXT: umull v1.2d, v0.2s, v1.2s
|
||||
; CHECK-NEXT: uzp2 v1.4s, v1.4s, v3.4s
|
||||
; CHECK-NEXT: ushr v1.4s, v1.4s, #2
|
||||
; CHECK-NEXT: movi v3.4s, #5
|
||||
; CHECK-NEXT: mls v0.4s, v1.4s, v3.4s
|
||||
; CHECK-NEXT: cmeq v0.4s, v0.4s, v2.4s
|
||||
; CHECK-NEXT: mov w9, #13106
|
||||
; CHECK-NEXT: movk w9, #13107, lsl #16
|
||||
; CHECK-NEXT: dup v2.4s, w8
|
||||
; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s
|
||||
; CHECK-NEXT: mul v0.4s, v0.4s, v2.4s
|
||||
; CHECK-NEXT: dup v1.4s, w9
|
||||
; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s
|
||||
; CHECK-NEXT: xtn v0.4h, v0.4s
|
||||
; CHECK-NEXT: ret
|
||||
%urem = urem <4 x i32> %X, <i32 5, i32 5, i32 5, i32 5>
|
||||
|
|
|
@ -5,27 +5,18 @@
|
|||
define i1 @t32_3_1(i32 %X) nounwind {
|
||||
; X86-LABEL: t32_3_1:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-NEXT: movl $-1431655765, %edx # imm = 0xAAAAAAAB
|
||||
; X86-NEXT: movl %ecx, %eax
|
||||
; X86-NEXT: mull %edx
|
||||
; X86-NEXT: shrl %edx
|
||||
; X86-NEXT: leal (%edx,%edx,2), %eax
|
||||
; X86-NEXT: subl %eax, %ecx
|
||||
; X86-NEXT: cmpl $1, %ecx
|
||||
; X86-NEXT: sete %al
|
||||
; X86-NEXT: imull $-1431655765, {{[0-9]+}}(%esp), %eax # imm = 0xAAAAAAAB
|
||||
; X86-NEXT: addl $1431655765, %eax # imm = 0x55555555
|
||||
; X86-NEXT: cmpl $1431655765, %eax # imm = 0x55555555
|
||||
; X86-NEXT: setb %al
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: t32_3_1:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: movl %edi, %eax
|
||||
; X64-NEXT: movl $2863311531, %ecx # imm = 0xAAAAAAAB
|
||||
; X64-NEXT: imulq %rax, %rcx
|
||||
; X64-NEXT: shrq $33, %rcx
|
||||
; X64-NEXT: leal (%rcx,%rcx,2), %eax
|
||||
; X64-NEXT: subl %eax, %edi
|
||||
; X64-NEXT: cmpl $1, %edi
|
||||
; X64-NEXT: sete %al
|
||||
; X64-NEXT: imull $-1431655765, %edi, %eax # imm = 0xAAAAAAAB
|
||||
; X64-NEXT: addl $1431655765, %eax # imm = 0x55555555
|
||||
; X64-NEXT: cmpl $1431655765, %eax # imm = 0x55555555
|
||||
; X64-NEXT: setb %al
|
||||
; X64-NEXT: retq
|
||||
%urem = urem i32 %X, 3
|
||||
%cmp = icmp eq i32 %urem, 1
|
||||
|
@ -35,27 +26,18 @@ define i1 @t32_3_1(i32 %X) nounwind {
|
|||
define i1 @t32_3_2(i32 %X) nounwind {
|
||||
; X86-LABEL: t32_3_2:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-NEXT: movl $-1431655765, %edx # imm = 0xAAAAAAAB
|
||||
; X86-NEXT: movl %ecx, %eax
|
||||
; X86-NEXT: mull %edx
|
||||
; X86-NEXT: shrl %edx
|
||||
; X86-NEXT: leal (%edx,%edx,2), %eax
|
||||
; X86-NEXT: subl %eax, %ecx
|
||||
; X86-NEXT: cmpl $2, %ecx
|
||||
; X86-NEXT: sete %al
|
||||
; X86-NEXT: imull $-1431655765, {{[0-9]+}}(%esp), %eax # imm = 0xAAAAAAAB
|
||||
; X86-NEXT: addl $-1431655766, %eax # imm = 0xAAAAAAAA
|
||||
; X86-NEXT: cmpl $1431655765, %eax # imm = 0x55555555
|
||||
; X86-NEXT: setb %al
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: t32_3_2:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: movl %edi, %eax
|
||||
; X64-NEXT: movl $2863311531, %ecx # imm = 0xAAAAAAAB
|
||||
; X64-NEXT: imulq %rax, %rcx
|
||||
; X64-NEXT: shrq $33, %rcx
|
||||
; X64-NEXT: leal (%rcx,%rcx,2), %eax
|
||||
; X64-NEXT: subl %eax, %edi
|
||||
; X64-NEXT: cmpl $2, %edi
|
||||
; X64-NEXT: sete %al
|
||||
; X64-NEXT: imull $-1431655765, %edi, %eax # imm = 0xAAAAAAAB
|
||||
; X64-NEXT: addl $-1431655766, %eax # imm = 0xAAAAAAAA
|
||||
; X64-NEXT: cmpl $1431655765, %eax # imm = 0x55555555
|
||||
; X64-NEXT: setb %al
|
||||
; X64-NEXT: retq
|
||||
%urem = urem i32 %X, 3
|
||||
%cmp = icmp eq i32 %urem, 2
|
||||
|
@ -66,27 +48,18 @@ define i1 @t32_3_2(i32 %X) nounwind {
|
|||
define i1 @t32_5_1(i32 %X) nounwind {
|
||||
; X86-LABEL: t32_5_1:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-NEXT: movl $-858993459, %edx # imm = 0xCCCCCCCD
|
||||
; X86-NEXT: movl %ecx, %eax
|
||||
; X86-NEXT: mull %edx
|
||||
; X86-NEXT: shrl $2, %edx
|
||||
; X86-NEXT: leal (%edx,%edx,4), %eax
|
||||
; X86-NEXT: subl %eax, %ecx
|
||||
; X86-NEXT: cmpl $1, %ecx
|
||||
; X86-NEXT: sete %al
|
||||
; X86-NEXT: imull $-858993459, {{[0-9]+}}(%esp), %eax # imm = 0xCCCCCCCD
|
||||
; X86-NEXT: addl $858993459, %eax # imm = 0x33333333
|
||||
; X86-NEXT: cmpl $858993459, %eax # imm = 0x33333333
|
||||
; X86-NEXT: setb %al
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: t32_5_1:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: movl %edi, %eax
|
||||
; X64-NEXT: movl $3435973837, %ecx # imm = 0xCCCCCCCD
|
||||
; X64-NEXT: imulq %rax, %rcx
|
||||
; X64-NEXT: shrq $34, %rcx
|
||||
; X64-NEXT: leal (%rcx,%rcx,4), %eax
|
||||
; X64-NEXT: subl %eax, %edi
|
||||
; X64-NEXT: cmpl $1, %edi
|
||||
; X64-NEXT: sete %al
|
||||
; X64-NEXT: imull $-858993459, %edi, %eax # imm = 0xCCCCCCCD
|
||||
; X64-NEXT: addl $858993459, %eax # imm = 0x33333333
|
||||
; X64-NEXT: cmpl $858993459, %eax # imm = 0x33333333
|
||||
; X64-NEXT: setb %al
|
||||
; X64-NEXT: retq
|
||||
%urem = urem i32 %X, 5
|
||||
%cmp = icmp eq i32 %urem, 1
|
||||
|
@ -96,27 +69,18 @@ define i1 @t32_5_1(i32 %X) nounwind {
|
|||
define i1 @t32_5_2(i32 %X) nounwind {
|
||||
; X86-LABEL: t32_5_2:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-NEXT: movl $-858993459, %edx # imm = 0xCCCCCCCD
|
||||
; X86-NEXT: movl %ecx, %eax
|
||||
; X86-NEXT: mull %edx
|
||||
; X86-NEXT: shrl $2, %edx
|
||||
; X86-NEXT: leal (%edx,%edx,4), %eax
|
||||
; X86-NEXT: subl %eax, %ecx
|
||||
; X86-NEXT: cmpl $2, %ecx
|
||||
; X86-NEXT: sete %al
|
||||
; X86-NEXT: imull $-858993459, {{[0-9]+}}(%esp), %eax # imm = 0xCCCCCCCD
|
||||
; X86-NEXT: addl $1717986918, %eax # imm = 0x66666666
|
||||
; X86-NEXT: cmpl $858993459, %eax # imm = 0x33333333
|
||||
; X86-NEXT: setb %al
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: t32_5_2:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: movl %edi, %eax
|
||||
; X64-NEXT: movl $3435973837, %ecx # imm = 0xCCCCCCCD
|
||||
; X64-NEXT: imulq %rax, %rcx
|
||||
; X64-NEXT: shrq $34, %rcx
|
||||
; X64-NEXT: leal (%rcx,%rcx,4), %eax
|
||||
; X64-NEXT: subl %eax, %edi
|
||||
; X64-NEXT: cmpl $2, %edi
|
||||
; X64-NEXT: sete %al
|
||||
; X64-NEXT: imull $-858993459, %edi, %eax # imm = 0xCCCCCCCD
|
||||
; X64-NEXT: addl $1717986918, %eax # imm = 0x66666666
|
||||
; X64-NEXT: cmpl $858993459, %eax # imm = 0x33333333
|
||||
; X64-NEXT: setb %al
|
||||
; X64-NEXT: retq
|
||||
%urem = urem i32 %X, 5
|
||||
%cmp = icmp eq i32 %urem, 2
|
||||
|
@ -126,27 +90,18 @@ define i1 @t32_5_2(i32 %X) nounwind {
|
|||
define i1 @t32_5_3(i32 %X) nounwind {
|
||||
; X86-LABEL: t32_5_3:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-NEXT: movl $-858993459, %edx # imm = 0xCCCCCCCD
|
||||
; X86-NEXT: movl %ecx, %eax
|
||||
; X86-NEXT: mull %edx
|
||||
; X86-NEXT: shrl $2, %edx
|
||||
; X86-NEXT: leal (%edx,%edx,4), %eax
|
||||
; X86-NEXT: subl %eax, %ecx
|
||||
; X86-NEXT: cmpl $3, %ecx
|
||||
; X86-NEXT: sete %al
|
||||
; X86-NEXT: imull $-858993459, {{[0-9]+}}(%esp), %eax # imm = 0xCCCCCCCD
|
||||
; X86-NEXT: addl $-1717986919, %eax # imm = 0x99999999
|
||||
; X86-NEXT: cmpl $858993459, %eax # imm = 0x33333333
|
||||
; X86-NEXT: setb %al
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: t32_5_3:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: movl %edi, %eax
|
||||
; X64-NEXT: movl $3435973837, %ecx # imm = 0xCCCCCCCD
|
||||
; X64-NEXT: imulq %rax, %rcx
|
||||
; X64-NEXT: shrq $34, %rcx
|
||||
; X64-NEXT: leal (%rcx,%rcx,4), %eax
|
||||
; X64-NEXT: subl %eax, %edi
|
||||
; X64-NEXT: cmpl $3, %edi
|
||||
; X64-NEXT: sete %al
|
||||
; X64-NEXT: imull $-858993459, %edi, %eax # imm = 0xCCCCCCCD
|
||||
; X64-NEXT: addl $-1717986919, %eax # imm = 0x99999999
|
||||
; X64-NEXT: cmpl $858993459, %eax # imm = 0x33333333
|
||||
; X64-NEXT: setb %al
|
||||
; X64-NEXT: retq
|
||||
%urem = urem i32 %X, 5
|
||||
%cmp = icmp eq i32 %urem, 3
|
||||
|
@ -156,27 +111,18 @@ define i1 @t32_5_3(i32 %X) nounwind {
|
|||
define i1 @t32_5_4(i32 %X) nounwind {
|
||||
; X86-LABEL: t32_5_4:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-NEXT: movl $-858993459, %edx # imm = 0xCCCCCCCD
|
||||
; X86-NEXT: movl %ecx, %eax
|
||||
; X86-NEXT: mull %edx
|
||||
; X86-NEXT: shrl $2, %edx
|
||||
; X86-NEXT: leal (%edx,%edx,4), %eax
|
||||
; X86-NEXT: subl %eax, %ecx
|
||||
; X86-NEXT: cmpl $4, %ecx
|
||||
; X86-NEXT: sete %al
|
||||
; X86-NEXT: imull $-858993459, {{[0-9]+}}(%esp), %eax # imm = 0xCCCCCCCD
|
||||
; X86-NEXT: addl $-858993460, %eax # imm = 0xCCCCCCCC
|
||||
; X86-NEXT: cmpl $858993459, %eax # imm = 0x33333333
|
||||
; X86-NEXT: setb %al
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: t32_5_4:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: movl %edi, %eax
|
||||
; X64-NEXT: movl $3435973837, %ecx # imm = 0xCCCCCCCD
|
||||
; X64-NEXT: imulq %rax, %rcx
|
||||
; X64-NEXT: shrq $34, %rcx
|
||||
; X64-NEXT: leal (%rcx,%rcx,4), %eax
|
||||
; X64-NEXT: subl %eax, %edi
|
||||
; X64-NEXT: cmpl $4, %edi
|
||||
; X64-NEXT: sete %al
|
||||
; X64-NEXT: imull $-858993459, %edi, %eax # imm = 0xCCCCCCCD
|
||||
; X64-NEXT: addl $-858993460, %eax # imm = 0xCCCCCCCC
|
||||
; X64-NEXT: cmpl $858993459, %eax # imm = 0x33333333
|
||||
; X64-NEXT: setb %al
|
||||
; X64-NEXT: retq
|
||||
%urem = urem i32 %X, 5
|
||||
%cmp = icmp eq i32 %urem, 4
|
||||
|
@ -187,29 +133,20 @@ define i1 @t32_5_4(i32 %X) nounwind {
|
|||
define i1 @t32_6_1(i32 %X) nounwind {
|
||||
; X86-LABEL: t32_6_1:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-NEXT: movl $-1431655765, %edx # imm = 0xAAAAAAAB
|
||||
; X86-NEXT: movl %ecx, %eax
|
||||
; X86-NEXT: mull %edx
|
||||
; X86-NEXT: shrl %edx
|
||||
; X86-NEXT: andl $-2, %edx
|
||||
; X86-NEXT: leal (%edx,%edx,2), %eax
|
||||
; X86-NEXT: subl %eax, %ecx
|
||||
; X86-NEXT: cmpl $1, %ecx
|
||||
; X86-NEXT: sete %al
|
||||
; X86-NEXT: imull $-1431655765, {{[0-9]+}}(%esp), %eax # imm = 0xAAAAAAAB
|
||||
; X86-NEXT: addl $1431655765, %eax # imm = 0x55555555
|
||||
; X86-NEXT: rorl %eax
|
||||
; X86-NEXT: cmpl $715827883, %eax # imm = 0x2AAAAAAB
|
||||
; X86-NEXT: setb %al
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: t32_6_1:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: movl %edi, %eax
|
||||
; X64-NEXT: movl $2863311531, %ecx # imm = 0xAAAAAAAB
|
||||
; X64-NEXT: imulq %rax, %rcx
|
||||
; X64-NEXT: shrq $34, %rcx
|
||||
; X64-NEXT: addl %ecx, %ecx
|
||||
; X64-NEXT: leal (%rcx,%rcx,2), %eax
|
||||
; X64-NEXT: subl %eax, %edi
|
||||
; X64-NEXT: cmpl $1, %edi
|
||||
; X64-NEXT: sete %al
|
||||
; X64-NEXT: imull $-1431655765, %edi, %eax # imm = 0xAAAAAAAB
|
||||
; X64-NEXT: addl $1431655765, %eax # imm = 0x55555555
|
||||
; X64-NEXT: rorl %eax
|
||||
; X64-NEXT: cmpl $715827883, %eax # imm = 0x2AAAAAAB
|
||||
; X64-NEXT: setb %al
|
||||
; X64-NEXT: retq
|
||||
%urem = urem i32 %X, 6
|
||||
%cmp = icmp eq i32 %urem, 1
|
||||
|
@ -219,29 +156,20 @@ define i1 @t32_6_1(i32 %X) nounwind {
|
|||
define i1 @t32_6_2(i32 %X) nounwind {
|
||||
; X86-LABEL: t32_6_2:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-NEXT: movl $-1431655765, %edx # imm = 0xAAAAAAAB
|
||||
; X86-NEXT: movl %ecx, %eax
|
||||
; X86-NEXT: mull %edx
|
||||
; X86-NEXT: shrl %edx
|
||||
; X86-NEXT: andl $-2, %edx
|
||||
; X86-NEXT: leal (%edx,%edx,2), %eax
|
||||
; X86-NEXT: subl %eax, %ecx
|
||||
; X86-NEXT: cmpl $2, %ecx
|
||||
; X86-NEXT: sete %al
|
||||
; X86-NEXT: imull $-1431655765, {{[0-9]+}}(%esp), %eax # imm = 0xAAAAAAAB
|
||||
; X86-NEXT: addl $-1431655766, %eax # imm = 0xAAAAAAAA
|
||||
; X86-NEXT: rorl %eax
|
||||
; X86-NEXT: cmpl $715827883, %eax # imm = 0x2AAAAAAB
|
||||
; X86-NEXT: setb %al
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: t32_6_2:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: movl %edi, %eax
|
||||
; X64-NEXT: movl $2863311531, %ecx # imm = 0xAAAAAAAB
|
||||
; X64-NEXT: imulq %rax, %rcx
|
||||
; X64-NEXT: shrq $34, %rcx
|
||||
; X64-NEXT: addl %ecx, %ecx
|
||||
; X64-NEXT: leal (%rcx,%rcx,2), %eax
|
||||
; X64-NEXT: subl %eax, %edi
|
||||
; X64-NEXT: cmpl $2, %edi
|
||||
; X64-NEXT: sete %al
|
||||
; X64-NEXT: imull $-1431655765, %edi, %eax # imm = 0xAAAAAAAB
|
||||
; X64-NEXT: addl $-1431655766, %eax # imm = 0xAAAAAAAA
|
||||
; X64-NEXT: rorl %eax
|
||||
; X64-NEXT: cmpl $715827883, %eax # imm = 0x2AAAAAAB
|
||||
; X64-NEXT: setb %al
|
||||
; X64-NEXT: retq
|
||||
%urem = urem i32 %X, 6
|
||||
%cmp = icmp eq i32 %urem, 2
|
||||
|
@ -251,29 +179,20 @@ define i1 @t32_6_2(i32 %X) nounwind {
|
|||
define i1 @t32_6_3(i32 %X) nounwind {
|
||||
; X86-LABEL: t32_6_3:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-NEXT: movl $-1431655765, %edx # imm = 0xAAAAAAAB
|
||||
; X86-NEXT: movl %ecx, %eax
|
||||
; X86-NEXT: mull %edx
|
||||
; X86-NEXT: shrl %edx
|
||||
; X86-NEXT: andl $-2, %edx
|
||||
; X86-NEXT: leal (%edx,%edx,2), %eax
|
||||
; X86-NEXT: subl %eax, %ecx
|
||||
; X86-NEXT: cmpl $3, %ecx
|
||||
; X86-NEXT: sete %al
|
||||
; X86-NEXT: imull $-1431655765, {{[0-9]+}}(%esp), %eax # imm = 0xAAAAAAAB
|
||||
; X86-NEXT: decl %eax
|
||||
; X86-NEXT: rorl %eax
|
||||
; X86-NEXT: cmpl $715827883, %eax # imm = 0x2AAAAAAB
|
||||
; X86-NEXT: setb %al
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: t32_6_3:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: movl %edi, %eax
|
||||
; X64-NEXT: movl $2863311531, %ecx # imm = 0xAAAAAAAB
|
||||
; X64-NEXT: imulq %rax, %rcx
|
||||
; X64-NEXT: shrq $34, %rcx
|
||||
; X64-NEXT: addl %ecx, %ecx
|
||||
; X64-NEXT: leal (%rcx,%rcx,2), %eax
|
||||
; X64-NEXT: subl %eax, %edi
|
||||
; X64-NEXT: cmpl $3, %edi
|
||||
; X64-NEXT: sete %al
|
||||
; X64-NEXT: imull $-1431655765, %edi, %eax # imm = 0xAAAAAAAB
|
||||
; X64-NEXT: decl %eax
|
||||
; X64-NEXT: rorl %eax
|
||||
; X64-NEXT: cmpl $715827883, %eax # imm = 0x2AAAAAAB
|
||||
; X64-NEXT: setb %al
|
||||
; X64-NEXT: retq
|
||||
%urem = urem i32 %X, 6
|
||||
%cmp = icmp eq i32 %urem, 3
|
||||
|
@ -283,29 +202,20 @@ define i1 @t32_6_3(i32 %X) nounwind {
|
|||
define i1 @t32_6_4(i32 %X) nounwind {
|
||||
; X86-LABEL: t32_6_4:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-NEXT: movl $-1431655765, %edx # imm = 0xAAAAAAAB
|
||||
; X86-NEXT: movl %ecx, %eax
|
||||
; X86-NEXT: mull %edx
|
||||
; X86-NEXT: shrl %edx
|
||||
; X86-NEXT: andl $-2, %edx
|
||||
; X86-NEXT: leal (%edx,%edx,2), %eax
|
||||
; X86-NEXT: subl %eax, %ecx
|
||||
; X86-NEXT: cmpl $4, %ecx
|
||||
; X86-NEXT: sete %al
|
||||
; X86-NEXT: imull $-1431655765, {{[0-9]+}}(%esp), %eax # imm = 0xAAAAAAAB
|
||||
; X86-NEXT: addl $1431655764, %eax # imm = 0x55555554
|
||||
; X86-NEXT: rorl %eax
|
||||
; X86-NEXT: cmpl $715827882, %eax # imm = 0x2AAAAAAA
|
||||
; X86-NEXT: setb %al
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: t32_6_4:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: movl %edi, %eax
|
||||
; X64-NEXT: movl $2863311531, %ecx # imm = 0xAAAAAAAB
|
||||
; X64-NEXT: imulq %rax, %rcx
|
||||
; X64-NEXT: shrq $34, %rcx
|
||||
; X64-NEXT: addl %ecx, %ecx
|
||||
; X64-NEXT: leal (%rcx,%rcx,2), %eax
|
||||
; X64-NEXT: subl %eax, %edi
|
||||
; X64-NEXT: cmpl $4, %edi
|
||||
; X64-NEXT: sete %al
|
||||
; X64-NEXT: imull $-1431655765, %edi, %eax # imm = 0xAAAAAAAB
|
||||
; X64-NEXT: addl $1431655764, %eax # imm = 0x55555554
|
||||
; X64-NEXT: rorl %eax
|
||||
; X64-NEXT: cmpl $715827882, %eax # imm = 0x2AAAAAAA
|
||||
; X64-NEXT: setb %al
|
||||
; X64-NEXT: retq
|
||||
%urem = urem i32 %X, 6
|
||||
%cmp = icmp eq i32 %urem, 4
|
||||
|
@ -315,29 +225,20 @@ define i1 @t32_6_4(i32 %X) nounwind {
|
|||
define i1 @t32_6_5(i32 %X) nounwind {
|
||||
; X86-LABEL: t32_6_5:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-NEXT: movl $-1431655765, %edx # imm = 0xAAAAAAAB
|
||||
; X86-NEXT: movl %ecx, %eax
|
||||
; X86-NEXT: mull %edx
|
||||
; X86-NEXT: shrl %edx
|
||||
; X86-NEXT: andl $-2, %edx
|
||||
; X86-NEXT: leal (%edx,%edx,2), %eax
|
||||
; X86-NEXT: subl %eax, %ecx
|
||||
; X86-NEXT: cmpl $5, %ecx
|
||||
; X86-NEXT: sete %al
|
||||
; X86-NEXT: imull $-1431655765, {{[0-9]+}}(%esp), %eax # imm = 0xAAAAAAAB
|
||||
; X86-NEXT: addl $-1431655767, %eax # imm = 0xAAAAAAA9
|
||||
; X86-NEXT: rorl %eax
|
||||
; X86-NEXT: cmpl $715827882, %eax # imm = 0x2AAAAAAA
|
||||
; X86-NEXT: setb %al
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: t32_6_5:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: movl %edi, %eax
|
||||
; X64-NEXT: movl $2863311531, %ecx # imm = 0xAAAAAAAB
|
||||
; X64-NEXT: imulq %rax, %rcx
|
||||
; X64-NEXT: shrq $34, %rcx
|
||||
; X64-NEXT: addl %ecx, %ecx
|
||||
; X64-NEXT: leal (%rcx,%rcx,2), %eax
|
||||
; X64-NEXT: subl %eax, %edi
|
||||
; X64-NEXT: cmpl $5, %edi
|
||||
; X64-NEXT: sete %al
|
||||
; X64-NEXT: imull $-1431655765, %edi, %eax # imm = 0xAAAAAAAB
|
||||
; X64-NEXT: addl $-1431655767, %eax # imm = 0xAAAAAAA9
|
||||
; X64-NEXT: rorl %eax
|
||||
; X64-NEXT: cmpl $715827882, %eax # imm = 0x2AAAAAAA
|
||||
; X64-NEXT: setb %al
|
||||
; X64-NEXT: retq
|
||||
%urem = urem i32 %X, 6
|
||||
%cmp = icmp eq i32 %urem, 5
|
||||
|
@ -350,24 +251,20 @@ define i1 @t32_6_5(i32 %X) nounwind {
|
|||
define i1 @t16_3_2(i16 %X) nounwind {
|
||||
; X86-LABEL: t16_3_2:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
|
||||
; X86-NEXT: imull $43691, %eax, %ecx # imm = 0xAAAB
|
||||
; X86-NEXT: shrl $17, %ecx
|
||||
; X86-NEXT: leal (%ecx,%ecx,2), %ecx
|
||||
; X86-NEXT: subl %ecx, %eax
|
||||
; X86-NEXT: cmpw $2, %ax
|
||||
; X86-NEXT: sete %al
|
||||
; X86-NEXT: imull $-21845, {{[0-9]+}}(%esp), %eax # imm = 0xAAAB
|
||||
; X86-NEXT: addl $-21846, %eax # imm = 0xAAAA
|
||||
; X86-NEXT: movzwl %ax, %eax
|
||||
; X86-NEXT: cmpl $21845, %eax # imm = 0x5555
|
||||
; X86-NEXT: setb %al
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: t16_3_2:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: movzwl %di, %eax
|
||||
; X64-NEXT: imull $43691, %eax, %eax # imm = 0xAAAB
|
||||
; X64-NEXT: shrl $17, %eax
|
||||
; X64-NEXT: leal (%rax,%rax,2), %eax
|
||||
; X64-NEXT: subl %eax, %edi
|
||||
; X64-NEXT: cmpw $2, %di
|
||||
; X64-NEXT: sete %al
|
||||
; X64-NEXT: imull $-21845, %edi, %eax # imm = 0xAAAB
|
||||
; X64-NEXT: addl $-21846, %eax # imm = 0xAAAA
|
||||
; X64-NEXT: movzwl %ax, %eax
|
||||
; X64-NEXT: cmpl $21845, %eax # imm = 0x5555
|
||||
; X64-NEXT: setb %al
|
||||
; X64-NEXT: retq
|
||||
%urem = urem i16 %X, 3
|
||||
%cmp = icmp eq i16 %urem, 2
|
||||
|
@ -377,24 +274,18 @@ define i1 @t16_3_2(i16 %X) nounwind {
|
|||
define i1 @t8_3_2(i8 %X) nounwind {
|
||||
; X86-LABEL: t8_3_2:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
|
||||
; X86-NEXT: imull $171, %eax, %ecx
|
||||
; X86-NEXT: shrl $9, %ecx
|
||||
; X86-NEXT: leal (%ecx,%ecx,2), %ecx
|
||||
; X86-NEXT: subb %cl, %al
|
||||
; X86-NEXT: cmpb $2, %al
|
||||
; X86-NEXT: sete %al
|
||||
; X86-NEXT: imull $-85, {{[0-9]+}}(%esp), %eax
|
||||
; X86-NEXT: addb $-86, %al
|
||||
; X86-NEXT: cmpb $85, %al
|
||||
; X86-NEXT: setb %al
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: t8_3_2:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: movzbl %dil, %eax
|
||||
; X64-NEXT: imull $171, %eax, %ecx
|
||||
; X64-NEXT: shrl $9, %ecx
|
||||
; X64-NEXT: leal (%rcx,%rcx,2), %ecx
|
||||
; X64-NEXT: subb %cl, %al
|
||||
; X64-NEXT: cmpb $2, %al
|
||||
; X64-NEXT: sete %al
|
||||
; X64-NEXT: imull $-85, %edi, %eax
|
||||
; X64-NEXT: addb $-86, %al
|
||||
; X64-NEXT: cmpb $85, %al
|
||||
; X64-NEXT: setb %al
|
||||
; X64-NEXT: retq
|
||||
%urem = urem i8 %X, 3
|
||||
%cmp = icmp eq i8 %urem, 2
|
||||
|
@ -419,14 +310,13 @@ define i1 @t64_3_2(i64 %X) nounwind {
|
|||
;
|
||||
; X64-LABEL: t64_3_2:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: movabsq $-6148914691236517205, %rcx # imm = 0xAAAAAAAAAAAAAAAB
|
||||
; X64-NEXT: movq %rdi, %rax
|
||||
; X64-NEXT: mulq %rcx
|
||||
; X64-NEXT: shrq %rdx
|
||||
; X64-NEXT: leaq (%rdx,%rdx,2), %rax
|
||||
; X64-NEXT: subq %rax, %rdi
|
||||
; X64-NEXT: cmpq $2, %rdi
|
||||
; X64-NEXT: sete %al
|
||||
; X64-NEXT: movabsq $-6148914691236517205, %rax # imm = 0xAAAAAAAAAAAAAAAB
|
||||
; X64-NEXT: imulq %rdi, %rax
|
||||
; X64-NEXT: movabsq $-6148914691236517206, %rcx # imm = 0xAAAAAAAAAAAAAAAA
|
||||
; X64-NEXT: addq %rax, %rcx
|
||||
; X64-NEXT: movabsq $6148914691236517205, %rax # imm = 0x5555555555555555
|
||||
; X64-NEXT: cmpq %rax, %rcx
|
||||
; X64-NEXT: setb %al
|
||||
; X64-NEXT: retq
|
||||
%urem = urem i64 %X, 3
|
||||
%cmp = icmp eq i64 %urem, 2
|
||||
|
|
|
@ -8,77 +8,52 @@
|
|||
define <4 x i1> @t32_3(<4 x i32> %X) nounwind {
|
||||
; CHECK-SSE2-LABEL: t32_3:
|
||||
; CHECK-SSE2: # %bb.0:
|
||||
; CHECK-SSE2-NEXT: psubd {{.*}}(%rip), %xmm0
|
||||
; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2863311531,2863311531,2863311531,2863311531]
|
||||
; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm2
|
||||
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
|
||||
; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm0
|
||||
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[0,2,2,3]
|
||||
; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm2
|
||||
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3]
|
||||
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
|
||||
; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm3
|
||||
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,3,2,3]
|
||||
; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
|
||||
; CHECK-SSE2-NEXT: psrld $1, %xmm2
|
||||
; CHECK-SSE2-NEXT: movdqa %xmm2, %xmm1
|
||||
; CHECK-SSE2-NEXT: paddd %xmm2, %xmm1
|
||||
; CHECK-SSE2-NEXT: paddd %xmm2, %xmm1
|
||||
; CHECK-SSE2-NEXT: psubd %xmm1, %xmm0
|
||||
; CHECK-SSE2-NEXT: pcmpeqd {{.*}}(%rip), %xmm0
|
||||
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3]
|
||||
; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1]
|
||||
; CHECK-SSE2-NEXT: pxor {{.*}}(%rip), %xmm3
|
||||
; CHECK-SSE2-NEXT: pcmpgtd {{.*}}(%rip), %xmm3
|
||||
; CHECK-SSE2-NEXT: pcmpeqd %xmm0, %xmm0
|
||||
; CHECK-SSE2-NEXT: pxor %xmm3, %xmm0
|
||||
; CHECK-SSE2-NEXT: retq
|
||||
;
|
||||
; CHECK-SSE41-LABEL: t32_3:
|
||||
; CHECK-SSE41: # %bb.0:
|
||||
; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
|
||||
; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm2 = [2863311531,2863311531,2863311531,2863311531]
|
||||
; CHECK-SSE41-NEXT: pmuludq %xmm2, %xmm1
|
||||
; CHECK-SSE41-NEXT: pmuludq %xmm0, %xmm2
|
||||
; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
|
||||
; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
|
||||
; CHECK-SSE41-NEXT: psrld $1, %xmm2
|
||||
; CHECK-SSE41-NEXT: pmulld {{.*}}(%rip), %xmm2
|
||||
; CHECK-SSE41-NEXT: psubd %xmm2, %xmm0
|
||||
; CHECK-SSE41-NEXT: pcmpeqd {{.*}}(%rip), %xmm0
|
||||
; CHECK-SSE41-NEXT: psubd {{.*}}(%rip), %xmm0
|
||||
; CHECK-SSE41-NEXT: pmulld {{.*}}(%rip), %xmm0
|
||||
; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [1431655765,1431655764,1431655764,1431655764]
|
||||
; CHECK-SSE41-NEXT: pminud %xmm0, %xmm1
|
||||
; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0
|
||||
; CHECK-SSE41-NEXT: retq
|
||||
;
|
||||
; CHECK-AVX1-LABEL: t32_3:
|
||||
; CHECK-AVX1: # %bb.0:
|
||||
; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
|
||||
; CHECK-AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [2863311531,2863311531,2863311531,2863311531]
|
||||
; CHECK-AVX1-NEXT: vpmuludq %xmm2, %xmm1, %xmm1
|
||||
; CHECK-AVX1-NEXT: vpmuludq %xmm2, %xmm0, %xmm2
|
||||
; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
|
||||
; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
|
||||
; CHECK-AVX1-NEXT: vpsrld $1, %xmm1, %xmm1
|
||||
; CHECK-AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1
|
||||
; CHECK-AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0
|
||||
; CHECK-AVX1-NEXT: vpcmpeqd {{.*}}(%rip), %xmm0, %xmm0
|
||||
; CHECK-AVX1-NEXT: vpsubd {{.*}}(%rip), %xmm0, %xmm0
|
||||
; CHECK-AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0
|
||||
; CHECK-AVX1-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm1
|
||||
; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
|
||||
; CHECK-AVX1-NEXT: retq
|
||||
;
|
||||
; CHECK-AVX2-LABEL: t32_3:
|
||||
; CHECK-AVX2: # %bb.0:
|
||||
; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
|
||||
; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2863311531,2863311531,2863311531,2863311531]
|
||||
; CHECK-AVX2-NEXT: vpmuludq %xmm2, %xmm1, %xmm1
|
||||
; CHECK-AVX2-NEXT: vpmuludq %xmm2, %xmm0, %xmm2
|
||||
; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
|
||||
; CHECK-AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm2[0],xmm1[1],xmm2[2],xmm1[3]
|
||||
; CHECK-AVX2-NEXT: vpsrld $1, %xmm1, %xmm1
|
||||
; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [3,3,3,3]
|
||||
; CHECK-AVX2-NEXT: vpmulld %xmm2, %xmm1, %xmm1
|
||||
; CHECK-AVX2-NEXT: vpsubd %xmm1, %xmm0, %xmm0
|
||||
; CHECK-AVX2-NEXT: vpcmpeqd {{.*}}(%rip), %xmm0, %xmm0
|
||||
; CHECK-AVX2-NEXT: vpsubd {{.*}}(%rip), %xmm0, %xmm0
|
||||
; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [2863311531,2863311531,2863311531,2863311531]
|
||||
; CHECK-AVX2-NEXT: vpmulld %xmm1, %xmm0, %xmm0
|
||||
; CHECK-AVX2-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm1
|
||||
; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
|
||||
; CHECK-AVX2-NEXT: retq
|
||||
;
|
||||
; CHECK-AVX512VL-LABEL: t32_3:
|
||||
; CHECK-AVX512VL: # %bb.0:
|
||||
; CHECK-AVX512VL-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
|
||||
; CHECK-AVX512VL-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2863311531,2863311531,2863311531,2863311531]
|
||||
; CHECK-AVX512VL-NEXT: vpmuludq %xmm2, %xmm1, %xmm1
|
||||
; CHECK-AVX512VL-NEXT: vpmuludq %xmm2, %xmm0, %xmm2
|
||||
; CHECK-AVX512VL-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
|
||||
; CHECK-AVX512VL-NEXT: vpblendd {{.*#+}} xmm1 = xmm2[0],xmm1[1],xmm2[2],xmm1[3]
|
||||
; CHECK-AVX512VL-NEXT: vpsrld $1, %xmm1, %xmm1
|
||||
; CHECK-AVX512VL-NEXT: vpmulld {{.*}}(%rip){1to4}, %xmm1, %xmm1
|
||||
; CHECK-AVX512VL-NEXT: vpsubd %xmm1, %xmm0, %xmm0
|
||||
; CHECK-AVX512VL-NEXT: vpcmpeqd {{.*}}(%rip), %xmm0, %xmm0
|
||||
; CHECK-AVX512VL-NEXT: vpsubd {{.*}}(%rip), %xmm0, %xmm0
|
||||
; CHECK-AVX512VL-NEXT: vpmulld {{.*}}(%rip){1to4}, %xmm0, %xmm0
|
||||
; CHECK-AVX512VL-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm1
|
||||
; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
|
||||
; CHECK-AVX512VL-NEXT: retq
|
||||
%urem = urem <4 x i32> %X, <i32 3, i32 3, i32 3, i32 3>
|
||||
%cmp = icmp eq <4 x i32> %urem, <i32 0, i32 1, i32 2, i32 2>
|
||||
|
@ -88,77 +63,53 @@ define <4 x i1> @t32_3(<4 x i32> %X) nounwind {
|
|||
define <4 x i1> @t32_5(<4 x i32> %X) nounwind {
|
||||
; CHECK-SSE2-LABEL: t32_5:
|
||||
; CHECK-SSE2: # %bb.0:
|
||||
; CHECK-SSE2-NEXT: psubd {{.*}}(%rip), %xmm0
|
||||
; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [3435973837,3435973837,3435973837,3435973837]
|
||||
; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm2
|
||||
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
|
||||
; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm0
|
||||
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[0,2,2,3]
|
||||
; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm2
|
||||
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3]
|
||||
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
|
||||
; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm3
|
||||
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,3,2,3]
|
||||
; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
|
||||
; CHECK-SSE2-NEXT: psrld $2, %xmm2
|
||||
; CHECK-SSE2-NEXT: movdqa %xmm2, %xmm1
|
||||
; CHECK-SSE2-NEXT: pslld $2, %xmm1
|
||||
; CHECK-SSE2-NEXT: paddd %xmm2, %xmm1
|
||||
; CHECK-SSE2-NEXT: psubd %xmm1, %xmm0
|
||||
; CHECK-SSE2-NEXT: pcmpeqd {{.*}}(%rip), %xmm0
|
||||
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3]
|
||||
; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1]
|
||||
; CHECK-SSE2-NEXT: pxor {{.*}}(%rip), %xmm3
|
||||
; CHECK-SSE2-NEXT: pcmpgtd {{.*}}(%rip), %xmm3
|
||||
; CHECK-SSE2-NEXT: pcmpeqd %xmm0, %xmm0
|
||||
; CHECK-SSE2-NEXT: pxor %xmm3, %xmm0
|
||||
; CHECK-SSE2-NEXT: retq
|
||||
;
|
||||
; CHECK-SSE41-LABEL: t32_5:
|
||||
; CHECK-SSE41: # %bb.0:
|
||||
; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
|
||||
; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm2 = [3435973837,3435973837,3435973837,3435973837]
|
||||
; CHECK-SSE41-NEXT: pmuludq %xmm2, %xmm1
|
||||
; CHECK-SSE41-NEXT: pmuludq %xmm0, %xmm2
|
||||
; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
|
||||
; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
|
||||
; CHECK-SSE41-NEXT: psrld $2, %xmm2
|
||||
; CHECK-SSE41-NEXT: pmulld {{.*}}(%rip), %xmm2
|
||||
; CHECK-SSE41-NEXT: psubd %xmm2, %xmm0
|
||||
; CHECK-SSE41-NEXT: pcmpeqd {{.*}}(%rip), %xmm0
|
||||
; CHECK-SSE41-NEXT: psubd {{.*}}(%rip), %xmm0
|
||||
; CHECK-SSE41-NEXT: pmulld {{.*}}(%rip), %xmm0
|
||||
; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [858993458,858993458,858993458,858993458]
|
||||
; CHECK-SSE41-NEXT: pminud %xmm0, %xmm1
|
||||
; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0
|
||||
; CHECK-SSE41-NEXT: retq
|
||||
;
|
||||
; CHECK-AVX1-LABEL: t32_5:
|
||||
; CHECK-AVX1: # %bb.0:
|
||||
; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
|
||||
; CHECK-AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [3435973837,3435973837,3435973837,3435973837]
|
||||
; CHECK-AVX1-NEXT: vpmuludq %xmm2, %xmm1, %xmm1
|
||||
; CHECK-AVX1-NEXT: vpmuludq %xmm2, %xmm0, %xmm2
|
||||
; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
|
||||
; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
|
||||
; CHECK-AVX1-NEXT: vpsrld $2, %xmm1, %xmm1
|
||||
; CHECK-AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1
|
||||
; CHECK-AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0
|
||||
; CHECK-AVX1-NEXT: vpcmpeqd {{.*}}(%rip), %xmm0, %xmm0
|
||||
; CHECK-AVX1-NEXT: vpsubd {{.*}}(%rip), %xmm0, %xmm0
|
||||
; CHECK-AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0
|
||||
; CHECK-AVX1-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm1
|
||||
; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
|
||||
; CHECK-AVX1-NEXT: retq
|
||||
;
|
||||
; CHECK-AVX2-LABEL: t32_5:
|
||||
; CHECK-AVX2: # %bb.0:
|
||||
; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
|
||||
; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [3435973837,3435973837,3435973837,3435973837]
|
||||
; CHECK-AVX2-NEXT: vpmuludq %xmm2, %xmm1, %xmm1
|
||||
; CHECK-AVX2-NEXT: vpmuludq %xmm2, %xmm0, %xmm2
|
||||
; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
|
||||
; CHECK-AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm2[0],xmm1[1],xmm2[2],xmm1[3]
|
||||
; CHECK-AVX2-NEXT: vpsrld $2, %xmm1, %xmm1
|
||||
; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [5,5,5,5]
|
||||
; CHECK-AVX2-NEXT: vpmulld %xmm2, %xmm1, %xmm1
|
||||
; CHECK-AVX2-NEXT: vpsubd %xmm1, %xmm0, %xmm0
|
||||
; CHECK-AVX2-NEXT: vpcmpeqd {{.*}}(%rip), %xmm0, %xmm0
|
||||
; CHECK-AVX2-NEXT: vpsubd {{.*}}(%rip), %xmm0, %xmm0
|
||||
; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [3435973837,3435973837,3435973837,3435973837]
|
||||
; CHECK-AVX2-NEXT: vpmulld %xmm1, %xmm0, %xmm0
|
||||
; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [858993458,858993458,858993458,858993458]
|
||||
; CHECK-AVX2-NEXT: vpminud %xmm1, %xmm0, %xmm1
|
||||
; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
|
||||
; CHECK-AVX2-NEXT: retq
|
||||
;
|
||||
; CHECK-AVX512VL-LABEL: t32_5:
|
||||
; CHECK-AVX512VL: # %bb.0:
|
||||
; CHECK-AVX512VL-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
|
||||
; CHECK-AVX512VL-NEXT: vpbroadcastd {{.*#+}} xmm2 = [3435973837,3435973837,3435973837,3435973837]
|
||||
; CHECK-AVX512VL-NEXT: vpmuludq %xmm2, %xmm1, %xmm1
|
||||
; CHECK-AVX512VL-NEXT: vpmuludq %xmm2, %xmm0, %xmm2
|
||||
; CHECK-AVX512VL-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
|
||||
; CHECK-AVX512VL-NEXT: vpblendd {{.*#+}} xmm1 = xmm2[0],xmm1[1],xmm2[2],xmm1[3]
|
||||
; CHECK-AVX512VL-NEXT: vpsrld $2, %xmm1, %xmm1
|
||||
; CHECK-AVX512VL-NEXT: vpmulld {{.*}}(%rip){1to4}, %xmm1, %xmm1
|
||||
; CHECK-AVX512VL-NEXT: vpsubd %xmm1, %xmm0, %xmm0
|
||||
; CHECK-AVX512VL-NEXT: vpcmpeqd {{.*}}(%rip), %xmm0, %xmm0
|
||||
; CHECK-AVX512VL-NEXT: vpsubd {{.*}}(%rip), %xmm0, %xmm0
|
||||
; CHECK-AVX512VL-NEXT: vpmulld {{.*}}(%rip){1to4}, %xmm0, %xmm0
|
||||
; CHECK-AVX512VL-NEXT: vpminud {{.*}}(%rip){1to4}, %xmm0, %xmm1
|
||||
; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
|
||||
; CHECK-AVX512VL-NEXT: retq
|
||||
%urem = urem <4 x i32> %X, <i32 5, i32 5, i32 5, i32 5>
|
||||
%cmp = icmp eq <4 x i32> %urem, <i32 1, i32 2, i32 3, i32 4>
|
||||
|
@ -233,16 +184,11 @@ define <4 x i1> @t32_6_part0(<4 x i32> %X) nounwind {
|
|||
;
|
||||
; CHECK-AVX512VL-LABEL: t32_6_part0:
|
||||
; CHECK-AVX512VL: # %bb.0:
|
||||
; CHECK-AVX512VL-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
|
||||
; CHECK-AVX512VL-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2863311531,2863311531,2863311531,2863311531]
|
||||
; CHECK-AVX512VL-NEXT: vpmuludq %xmm2, %xmm1, %xmm1
|
||||
; CHECK-AVX512VL-NEXT: vpmuludq %xmm2, %xmm0, %xmm2
|
||||
; CHECK-AVX512VL-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
|
||||
; CHECK-AVX512VL-NEXT: vpblendd {{.*#+}} xmm1 = xmm2[0],xmm1[1],xmm2[2],xmm1[3]
|
||||
; CHECK-AVX512VL-NEXT: vpsrld $2, %xmm1, %xmm1
|
||||
; CHECK-AVX512VL-NEXT: vpmulld {{.*}}(%rip){1to4}, %xmm1, %xmm1
|
||||
; CHECK-AVX512VL-NEXT: vpsubd %xmm1, %xmm0, %xmm0
|
||||
; CHECK-AVX512VL-NEXT: vpcmpeqd {{.*}}(%rip), %xmm0, %xmm0
|
||||
; CHECK-AVX512VL-NEXT: vpsubd {{.*}}(%rip), %xmm0, %xmm0
|
||||
; CHECK-AVX512VL-NEXT: vpmulld {{.*}}(%rip){1to4}, %xmm0, %xmm0
|
||||
; CHECK-AVX512VL-NEXT: vprord $1, %xmm0, %xmm0
|
||||
; CHECK-AVX512VL-NEXT: vpminud {{.*}}(%rip){1to4}, %xmm0, %xmm1
|
||||
; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
|
||||
; CHECK-AVX512VL-NEXT: retq
|
||||
%urem = urem <4 x i32> %X, <i32 6, i32 6, i32 6, i32 6>
|
||||
%cmp = icmp eq <4 x i32> %urem, <i32 0, i32 1, i32 2, i32 3>
|
||||
|
@ -317,16 +263,11 @@ define <4 x i1> @t32_6_part1(<4 x i32> %X) nounwind {
|
|||
;
|
||||
; CHECK-AVX512VL-LABEL: t32_6_part1:
|
||||
; CHECK-AVX512VL: # %bb.0:
|
||||
; CHECK-AVX512VL-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
|
||||
; CHECK-AVX512VL-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2863311531,2863311531,2863311531,2863311531]
|
||||
; CHECK-AVX512VL-NEXT: vpmuludq %xmm2, %xmm1, %xmm1
|
||||
; CHECK-AVX512VL-NEXT: vpmuludq %xmm2, %xmm0, %xmm2
|
||||
; CHECK-AVX512VL-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
|
||||
; CHECK-AVX512VL-NEXT: vpblendd {{.*#+}} xmm1 = xmm2[0],xmm1[1],xmm2[2],xmm1[3]
|
||||
; CHECK-AVX512VL-NEXT: vpsrld $2, %xmm1, %xmm1
|
||||
; CHECK-AVX512VL-NEXT: vpmulld {{.*}}(%rip){1to4}, %xmm1, %xmm1
|
||||
; CHECK-AVX512VL-NEXT: vpsubd %xmm1, %xmm0, %xmm0
|
||||
; CHECK-AVX512VL-NEXT: vpcmpeqd {{.*}}(%rip), %xmm0, %xmm0
|
||||
; CHECK-AVX512VL-NEXT: vpsubd {{.*}}(%rip), %xmm0, %xmm0
|
||||
; CHECK-AVX512VL-NEXT: vpmulld {{.*}}(%rip){1to4}, %xmm0, %xmm0
|
||||
; CHECK-AVX512VL-NEXT: vprord $1, %xmm0, %xmm0
|
||||
; CHECK-AVX512VL-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm1
|
||||
; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
|
||||
; CHECK-AVX512VL-NEXT: retq
|
||||
%urem = urem <4 x i32> %X, <i32 6, i32 6, i32 6, i32 6>
|
||||
%cmp = icmp eq <4 x i32> %urem, <i32 4, i32 5, i32 0, i32 0>
|
||||
|
@ -415,18 +356,12 @@ define <4 x i1> @t32_tautological(<4 x i32> %X) nounwind {
|
|||
;
|
||||
; CHECK-AVX512VL-LABEL: t32_tautological:
|
||||
; CHECK-AVX512VL: # %bb.0:
|
||||
; CHECK-AVX512VL-NEXT: vmovdqa {{.*#+}} xmm1 = [0,0,2147483648,2863311531]
|
||||
; CHECK-AVX512VL-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
|
||||
; CHECK-AVX512VL-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
|
||||
; CHECK-AVX512VL-NEXT: vpmuludq %xmm2, %xmm3, %xmm2
|
||||
; CHECK-AVX512VL-NEXT: vpmuludq %xmm1, %xmm0, %xmm1
|
||||
; CHECK-AVX512VL-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
|
||||
; CHECK-AVX512VL-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3]
|
||||
; CHECK-AVX512VL-NEXT: vpsrlvd {{.*}}(%rip), %xmm1, %xmm1
|
||||
; CHECK-AVX512VL-NEXT: vpblendd {{.*#+}} xmm1 = xmm0[0,1],xmm1[2,3]
|
||||
; CHECK-AVX512VL-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1
|
||||
; CHECK-AVX512VL-NEXT: vpsubd %xmm1, %xmm0, %xmm0
|
||||
; CHECK-AVX512VL-NEXT: vpcmpeqd {{.*}}(%rip), %xmm0, %xmm0
|
||||
; CHECK-AVX512VL-NEXT: vpsubd {{.*}}(%rip), %xmm0, %xmm0
|
||||
; CHECK-AVX512VL-NEXT: vpmulld {{.*}}(%rip){1to4}, %xmm0, %xmm0
|
||||
; CHECK-AVX512VL-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm1
|
||||
; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
|
||||
; CHECK-AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; CHECK-AVX512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3]
|
||||
; CHECK-AVX512VL-NEXT: retq
|
||||
%urem = urem <4 x i32> %X, <i32 1, i32 1, i32 2, i32 3>
|
||||
%cmp = icmp eq <4 x i32> %urem, <i32 0, i32 1, i32 2, i32 2>
|
||||
|
|
Loading…
Reference in New Issue