forked from OSchip/llvm-project
[Codegen] TargetLowering::prepareUREMEqFold(): `x u% C1 ==/!= C2` with tautological C1 u<= C2 (PR35479)
Summary: This is a preparatory cleanup before i add more of this fold to deal with comparisons with non-zero. In essence, the current lowering is: ``` Name: (X % C1) == 0 -> X * C3 <= C4 Pre: (C1 u>> countTrailingZeros(C1)) * C3 == 1 %zz = and i8 C3, 0 ; trick alive into making C3 avaliable in precondition %o0 = urem i8 %x, C1 %r = icmp eq i8 %o0, 0 => %zz = and i8 C3, 0 ; and silence it from complaining about said reg %C4 = -1 /u C1 %n0 = mul i8 %x, C3 %n1 = lshr i8 %n0, countTrailingZeros(C1) ; rotate right %n2 = shl i8 %n0, ((8-countTrailingZeros(C1)) %u 8) ; rotate right %n3 = or i8 %n1, %n2 ; rotate right %r = icmp ule i8 %n3, %C4 ``` https://rise4fun.com/Alive/oqd It kinda just works, really no weird edge-cases. But it isn't all that great for when comparing with non-zero. In particular, given `(X % C1) == C2`, there will be problems in the always-false tautological case where `C2 u>= C1`: https://rise4fun.com/Alive/pH3 That case is tautological, always-false: ``` Name: (X % Y) u>= Y %o0 = urem i8 %x, %y %r = icmp uge i8 %o0, %y => %r = false ``` https://rise4fun.com/Alive/ofu While we can't/shouldn't get such tautological case normally, we do deal with non-splat vectors, so unless we want to give up in this case, we need to fixup/short-circuit such lanes. There are two lowering variants: 1. We can blend between whatever computed result and the correct tautological result ``` Name: (X % C1) == C2 -> X * C3 <= C4 || false Pre: (C2 == 0 || C1 u<= C2) && (C1 u>> countTrailingZeros(C1)) * C3 == 1 %zz = and i8 C3, 0 ; trick alive into making C3 avaliable in precondition %o0 = urem i8 %x, C1 %r = icmp eq i8 %o0, C2 => %zz = and i8 C3, 0 ; and silence it from complaining about said reg %C4 = -1 /u C1 %n0 = mul i8 %x, C3 %n1 = lshr i8 %n0, countTrailingZeros(C1) ; rotate right %n2 = shl i8 %n0, ((8-countTrailingZeros(C1)) %u 8) ; rotate right %n3 = or i8 %n1, %n2 ; rotate right %is_tautologically_false = icmp ule i8 C1, C2 %res = icmp ule i8 %n3, %C4 %r = select i1 %is_tautologically_false, i1 0, i1 %res ``` https://rise4fun.com/Alive/PjT5 https://rise4fun.com/Alive/1KV 2. We can invert the comparison result ``` Name: (X % C1) == C2 -> X * C3 <= C4 || false Pre: (C2 == 0 || C1 u<= C2) && (C1 u>> countTrailingZeros(C1)) * C3 == 1 %zz = and i8 C3, 0 ; trick alive into making C3 avaliable in precondition %o0 = urem i8 %x, C1 %r = icmp eq i8 %o0, C2 => %zz = and i8 C3, 0 ; and silence it from complaining about said reg %C4 = -1 /u C1 %n0 = mul i8 %x, C3 %n1 = lshr i8 %n0, countTrailingZeros(C1) ; rotate right %n2 = shl i8 %n0, ((8-countTrailingZeros(C1)) %u 8) ; rotate right %n3 = or i8 %n1, %n2 ; rotate right %is_tautologically_false = icmp ule i8 C1, C2 %C4_fixed = select i1 %is_tautologically_false, i8 -1, i8 %C4 %res = icmp ule i8 %n3, %C4_fixed %r = xor i1 %res, %is_tautologically_false ``` https://rise4fun.com/Alive/2xC https://rise4fun.com/Alive/jpb5 3. We can expand into `and`/`or`: https://rise4fun.com/Alive/WGn https://rise4fun.com/Alive/lcb5 Blend-one is likely better since we avoid having to load the replacement from constant pool. `xor` is second best since it's still pretty general. I'm not adding `and`/`or` variants. Reviewers: RKSimon, craig.topper, spatel Reviewed By: RKSimon Subscribers: nick, hiraditya, xbolva00, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D70051
This commit is contained in:
parent
06e03bce80
commit
3f46022e33
|
@ -4943,7 +4943,7 @@ SDValue TargetLowering::buildUREMEqFold(EVT SETCCVT, SDValue REMNode,
|
|||
ISD::CondCode Cond,
|
||||
DAGCombinerInfo &DCI,
|
||||
const SDLoc &DL) const {
|
||||
SmallVector<SDNode *, 2> Built;
|
||||
SmallVector<SDNode *, 4> Built;
|
||||
if (SDValue Folded = prepareUREMEqFold(SETCCVT, REMNode, CompTargetNode, Cond,
|
||||
DCI, DL, Built)) {
|
||||
for (SDNode *N : Built)
|
||||
|
@ -4978,26 +4978,40 @@ TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode,
|
|||
if (!isOperationLegalOrCustom(ISD::MUL, VT))
|
||||
return SDValue();
|
||||
|
||||
// TODO: Could support comparing with non-zero too.
|
||||
ConstantSDNode *CompTarget = isConstOrConstSplat(CompTargetNode);
|
||||
if (!CompTarget || !CompTarget->isNullValue())
|
||||
return SDValue();
|
||||
|
||||
bool HadOneDivisor = false;
|
||||
bool AllDivisorsAreOnes = true;
|
||||
bool HadTautologicalLanes = false;
|
||||
bool AllLanesAreTautological = true;
|
||||
bool HadEvenDivisor = false;
|
||||
bool AllDivisorsArePowerOfTwo = true;
|
||||
SmallVector<SDValue, 16> PAmts, KAmts, QAmts;
|
||||
bool HadTautologicalInvertedLanes = false;
|
||||
SmallVector<SDValue, 16> PAmts, KAmts, QAmts, IAmts;
|
||||
|
||||
auto BuildUREMPattern = [&](ConstantSDNode *C) {
|
||||
auto BuildUREMPattern = [&](ConstantSDNode *CDiv, ConstantSDNode *CCmp) {
|
||||
// Division by 0 is UB. Leave it to be constant-folded elsewhere.
|
||||
if (C->isNullValue())
|
||||
if (CDiv->isNullValue())
|
||||
return false;
|
||||
|
||||
const APInt &D = C->getAPIntValue();
|
||||
// If all divisors are ones, we will prefer to avoid the fold.
|
||||
HadOneDivisor |= D.isOneValue();
|
||||
AllDivisorsAreOnes &= D.isOneValue();
|
||||
const APInt &D = CDiv->getAPIntValue();
|
||||
const APInt &Cmp = CCmp->getAPIntValue();
|
||||
|
||||
// x u% C1` is *always* less than C1. So given `x u% C1 == C2`,
|
||||
// if C2 is not less than C1, the comparison is always false.
|
||||
// But we will only be able to produce the comparison that will give the
|
||||
// opposive tautological answer. So this lane would need to be fixed up.
|
||||
bool TautologicalInvertedLane = D.ule(Cmp);
|
||||
HadTautologicalInvertedLanes |= TautologicalInvertedLane;
|
||||
|
||||
// If we are checking that remainder is something smaller than the divisor,
|
||||
// then this comparison isn't tautological. For now this is not handled,
|
||||
// other than the comparison that remainder is zero.
|
||||
if (!Cmp.isNullValue() && !TautologicalInvertedLane)
|
||||
return false;
|
||||
|
||||
// If all lanes are tautological (either all divisors are ones, or divisor
|
||||
// is not greater than the constant we are comparing with),
|
||||
// we will prefer to avoid the fold.
|
||||
bool TautologicalLane = D.isOneValue() || TautologicalInvertedLane;
|
||||
HadTautologicalLanes |= TautologicalLane;
|
||||
AllLanesAreTautological &= TautologicalLane;
|
||||
|
||||
// Decompose D into D0 * 2^K
|
||||
unsigned K = D.countTrailingZeros();
|
||||
|
@ -5025,13 +5039,14 @@ TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode,
|
|||
assert(APInt::getAllOnesValue(ShSVT.getSizeInBits()).ugt(K) &&
|
||||
"We are expecting that K is always less than all-ones for ShSVT");
|
||||
|
||||
// If the divisor is 1 the result can be constant-folded.
|
||||
if (D.isOneValue()) {
|
||||
// If the lane is tautological the result can be constant-folded.
|
||||
if (TautologicalLane) {
|
||||
// Set P and K amount to a bogus values so we can try to splat them.
|
||||
P = 0;
|
||||
K = -1;
|
||||
assert(Q.isAllOnesValue() &&
|
||||
"Expecting all-ones comparison for one divisor");
|
||||
// And ensure that comparison constant is tautological,
|
||||
// it will always compare true/false.
|
||||
Q = -1;
|
||||
}
|
||||
|
||||
PAmts.push_back(DAG.getConstant(P, DL, SVT));
|
||||
|
@ -5045,11 +5060,11 @@ TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode,
|
|||
SDValue D = REMNode.getOperand(1);
|
||||
|
||||
// Collect the values from each element.
|
||||
if (!ISD::matchUnaryPredicate(D, BuildUREMPattern))
|
||||
if (!ISD::matchBinaryPredicate(D, CompTargetNode, BuildUREMPattern))
|
||||
return SDValue();
|
||||
|
||||
// If this is a urem by a one, avoid the fold since it can be constant-folded.
|
||||
if (AllDivisorsAreOnes)
|
||||
// If all lanes are tautological, the result can be constant-folded.
|
||||
if (AllLanesAreTautological)
|
||||
return SDValue();
|
||||
|
||||
// If this is a urem by a powers-of-two, avoid the fold since it can be
|
||||
|
@ -5059,7 +5074,7 @@ TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode,
|
|||
|
||||
SDValue PVal, KVal, QVal;
|
||||
if (VT.isVector()) {
|
||||
if (HadOneDivisor) {
|
||||
if (HadTautologicalLanes) {
|
||||
// Try to turn PAmts into a splat, since we don't care about the values
|
||||
// that are currently '0'. If we can't, just keep '0'`s.
|
||||
turnVectorIntoSplatVector(PAmts, isNullConstant);
|
||||
|
@ -5096,8 +5111,41 @@ TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode,
|
|||
}
|
||||
|
||||
// UREM: (setule/setugt (rotr (mul N, P), K), Q)
|
||||
return DAG.getSetCC(DL, SETCCVT, Op0, QVal,
|
||||
((Cond == ISD::SETEQ) ? ISD::SETULE : ISD::SETUGT));
|
||||
SDValue NewCC =
|
||||
DAG.getSetCC(DL, SETCCVT, Op0, QVal,
|
||||
((Cond == ISD::SETEQ) ? ISD::SETULE : ISD::SETUGT));
|
||||
if (!HadTautologicalInvertedLanes)
|
||||
return NewCC;
|
||||
|
||||
// If any lanes previously compared always-false, the NewCC will give
|
||||
// always-true result for them, so we need to fixup those lanes.
|
||||
// Or the other way around for inequality predicate.
|
||||
assert(VT.isVector() && "Can/should only get here for vectors.");
|
||||
Created.push_back(NewCC.getNode());
|
||||
|
||||
// x u% C1` is *always* less than C1. So given `x u% C1 == C2`,
|
||||
// if C2 is not less than C1, the comparison is always false.
|
||||
// But we have produced the comparison that will give the
|
||||
// opposive tautological answer. So these lanes would need to be fixed up.
|
||||
SDValue TautologicalInvertedChannels =
|
||||
DAG.getSetCC(DL, SETCCVT, D, CompTargetNode, ISD::SETULE);
|
||||
Created.push_back(TautologicalInvertedChannels.getNode());
|
||||
|
||||
if (isOperationLegalOrCustom(ISD::VSELECT, SETCCVT)) {
|
||||
// If we have a vector select, let's replace the comparison results in the
|
||||
// affected lanes with the correct tautological result.
|
||||
SDValue Replacement = DAG.getBoolConstant(Cond == ISD::SETEQ ? false : true,
|
||||
DL, SETCCVT, SETCCVT);
|
||||
return DAG.getNode(ISD::VSELECT, DL, SETCCVT, TautologicalInvertedChannels,
|
||||
Replacement, NewCC);
|
||||
}
|
||||
|
||||
// Else, we can just invert the comparison result in the appropriate lanes.
|
||||
if (isOperationLegalOrCustom(ISD::XOR, SETCCVT))
|
||||
return DAG.getNode(ISD::XOR, DL, SETCCVT, NewCC,
|
||||
TautologicalInvertedChannels);
|
||||
|
||||
return SDValue(); // Don't know how to lower.
|
||||
}
|
||||
|
||||
/// Given an ISD::SREM used only by an ISD::SETEQ or ISD::SETNE
|
||||
|
|
|
@ -22,23 +22,14 @@ define <4 x i1> @t1_all_odd_eq(<4 x i32> %X) nounwind {
|
|||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: adrp x8, .LCPI1_0
|
||||
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI1_0]
|
||||
; CHECK-NEXT: adrp x8, .LCPI1_1
|
||||
; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI1_1]
|
||||
; CHECK-NEXT: adrp x8, .LCPI1_2
|
||||
; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI1_2]
|
||||
; CHECK-NEXT: adrp x8, .LCPI1_3
|
||||
; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI1_3]
|
||||
; CHECK-NEXT: adrp x8, .LCPI1_4
|
||||
; CHECK-NEXT: umull2 v5.2d, v0.4s, v1.4s
|
||||
; CHECK-NEXT: umull v1.2d, v0.2s, v1.2s
|
||||
; CHECK-NEXT: neg v2.4s, v2.4s
|
||||
; CHECK-NEXT: uzp2 v1.4s, v1.4s, v5.4s
|
||||
; CHECK-NEXT: ldr q5, [x8, :lo12:.LCPI1_4]
|
||||
; CHECK-NEXT: ushl v1.4s, v1.4s, v2.4s
|
||||
; CHECK-NEXT: bsl v3.16b, v0.16b, v1.16b
|
||||
; CHECK-NEXT: mls v0.4s, v3.4s, v4.4s
|
||||
; CHECK-NEXT: cmeq v0.4s, v0.4s, v5.4s
|
||||
; CHECK-NEXT: mov w8, #43691
|
||||
; CHECK-NEXT: movk w8, #43690, lsl #16
|
||||
; CHECK-NEXT: dup v2.4s, w8
|
||||
; CHECK-NEXT: mul v0.4s, v0.4s, v2.4s
|
||||
; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s
|
||||
; CHECK-NEXT: xtn v0.4h, v0.4s
|
||||
; CHECK-NEXT: movi d1, #0xffff0000ffff0000
|
||||
; CHECK-NEXT: eor v0.8b, v0.8b, v1.8b
|
||||
; CHECK-NEXT: ret
|
||||
%urem = urem <4 x i32> %X, <i32 3, i32 1, i32 1, i32 9>
|
||||
%cmp = icmp eq <4 x i32> %urem, <i32 0, i32 42, i32 0, i32 42>
|
||||
|
@ -50,24 +41,14 @@ define <4 x i1> @t1_all_odd_ne(<4 x i32> %X) nounwind {
|
|||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: adrp x8, .LCPI2_0
|
||||
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI2_0]
|
||||
; CHECK-NEXT: adrp x8, .LCPI2_1
|
||||
; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI2_1]
|
||||
; CHECK-NEXT: adrp x8, .LCPI2_2
|
||||
; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI2_2]
|
||||
; CHECK-NEXT: adrp x8, .LCPI2_3
|
||||
; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI2_3]
|
||||
; CHECK-NEXT: adrp x8, .LCPI2_4
|
||||
; CHECK-NEXT: umull2 v5.2d, v0.4s, v1.4s
|
||||
; CHECK-NEXT: umull v1.2d, v0.2s, v1.2s
|
||||
; CHECK-NEXT: neg v2.4s, v2.4s
|
||||
; CHECK-NEXT: uzp2 v1.4s, v1.4s, v5.4s
|
||||
; CHECK-NEXT: ldr q5, [x8, :lo12:.LCPI2_4]
|
||||
; CHECK-NEXT: ushl v1.4s, v1.4s, v2.4s
|
||||
; CHECK-NEXT: bsl v3.16b, v0.16b, v1.16b
|
||||
; CHECK-NEXT: mls v0.4s, v3.4s, v4.4s
|
||||
; CHECK-NEXT: cmeq v0.4s, v0.4s, v5.4s
|
||||
; CHECK-NEXT: mvn v0.16b, v0.16b
|
||||
; CHECK-NEXT: mov w8, #43691
|
||||
; CHECK-NEXT: movk w8, #43690, lsl #16
|
||||
; CHECK-NEXT: dup v2.4s, w8
|
||||
; CHECK-NEXT: mul v0.4s, v0.4s, v2.4s
|
||||
; CHECK-NEXT: cmhi v0.4s, v0.4s, v1.4s
|
||||
; CHECK-NEXT: xtn v0.4h, v0.4s
|
||||
; CHECK-NEXT: movi d1, #0xffff0000ffff0000
|
||||
; CHECK-NEXT: eor v0.8b, v0.8b, v1.8b
|
||||
; CHECK-NEXT: ret
|
||||
%urem = urem <4 x i32> %X, <i32 3, i32 1, i32 1, i32 9>
|
||||
%cmp = icmp ne <4 x i32> %urem, <i32 0, i32 42, i32 0, i32 42>
|
||||
|
@ -79,25 +60,13 @@ define <8 x i1> @t2_narrow(<8 x i16> %X) nounwind {
|
|||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: adrp x8, .LCPI3_0
|
||||
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI3_0]
|
||||
; CHECK-NEXT: adrp x8, .LCPI3_1
|
||||
; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI3_1]
|
||||
; CHECK-NEXT: adrp x8, .LCPI3_2
|
||||
; CHECK-NEXT: umull2 v4.4s, v0.8h, v1.8h
|
||||
; CHECK-NEXT: umull v1.4s, v0.4h, v1.4h
|
||||
; CHECK-NEXT: uzp2 v1.8h, v1.8h, v4.8h
|
||||
; CHECK-NEXT: neg v3.8h, v3.8h
|
||||
; CHECK-NEXT: movi v2.2d, #0xffff00000000ffff
|
||||
; CHECK-NEXT: ushl v1.8h, v1.8h, v3.8h
|
||||
; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI3_2]
|
||||
; CHECK-NEXT: adrp x8, .LCPI3_3
|
||||
; CHECK-NEXT: movi v4.2d, #0x00ffffffff0000
|
||||
; CHECK-NEXT: and v1.16b, v1.16b, v2.16b
|
||||
; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI3_3]
|
||||
; CHECK-NEXT: and v4.16b, v0.16b, v4.16b
|
||||
; CHECK-NEXT: orr v1.16b, v4.16b, v1.16b
|
||||
; CHECK-NEXT: mls v0.8h, v1.8h, v3.8h
|
||||
; CHECK-NEXT: cmeq v0.8h, v0.8h, v2.8h
|
||||
; CHECK-NEXT: mov w8, #43691
|
||||
; CHECK-NEXT: dup v2.8h, w8
|
||||
; CHECK-NEXT: mul v0.8h, v0.8h, v2.8h
|
||||
; CHECK-NEXT: cmhs v0.8h, v1.8h, v0.8h
|
||||
; CHECK-NEXT: xtn v0.8b, v0.8h
|
||||
; CHECK-NEXT: movi d1, #0xffff0000ffff0000
|
||||
; CHECK-NEXT: eor v0.8b, v0.8b, v1.8b
|
||||
; CHECK-NEXT: ret
|
||||
%urem = urem <8 x i16> %X, <i16 3, i16 1, i16 1, i16 9, i16 3, i16 1, i16 1, i16 9>
|
||||
%cmp = icmp eq <8 x i16> %urem, <i16 0, i16 0, i16 42, i16 42, i16 0, i16 0, i16 42, i16 42>
|
||||
|
@ -108,18 +77,19 @@ define <2 x i1> @t3_wide(<2 x i64> %X) nounwind {
|
|||
; CHECK-LABEL: t3_wide:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov x9, #-6148914691236517206
|
||||
; CHECK-NEXT: fmov x8, d0
|
||||
; CHECK-NEXT: adrp x11, .LCPI4_0
|
||||
; CHECK-NEXT: mov x8, v0.d[1]
|
||||
; CHECK-NEXT: movk x9, #43691
|
||||
; CHECK-NEXT: adrp x10, .LCPI4_0
|
||||
; CHECK-NEXT: umulh x9, x8, x9
|
||||
; CHECK-NEXT: ldr q0, [x10, :lo12:.LCPI4_0]
|
||||
; CHECK-NEXT: lsr x9, x9, #1
|
||||
; CHECK-NEXT: add x9, x9, x9, lsl #1
|
||||
; CHECK-NEXT: sub x8, x8, x9
|
||||
; CHECK-NEXT: movi v1.2d, #0000000000000000
|
||||
; CHECK-NEXT: mov v1.d[0], x8
|
||||
; CHECK-NEXT: cmeq v0.2d, v1.2d, v0.2d
|
||||
; CHECK-NEXT: fmov x10, d0
|
||||
; CHECK-NEXT: ldr q0, [x11, :lo12:.LCPI4_0]
|
||||
; CHECK-NEXT: mul x10, x10, x9
|
||||
; CHECK-NEXT: mul x8, x8, x9
|
||||
; CHECK-NEXT: fmov d1, x10
|
||||
; CHECK-NEXT: mov v1.d[1], x8
|
||||
; CHECK-NEXT: cmhs v0.2d, v0.2d, v1.2d
|
||||
; CHECK-NEXT: xtn v0.2s, v0.2d
|
||||
; CHECK-NEXT: movi d1, #0xffffffff00000000
|
||||
; CHECK-NEXT: eor v0.8b, v0.8b, v1.8b
|
||||
; CHECK-NEXT: ret
|
||||
%urem = urem <2 x i64> %X, <i64 3, i64 1>
|
||||
%cmp = icmp eq <2 x i64> %urem, <i64 0, i64 42>
|
||||
|
|
|
@ -25,94 +25,54 @@ define <4 x i1> @t0_all_tautological(<4 x i32> %X) nounwind {
|
|||
define <4 x i1> @t1_all_odd_eq(<4 x i32> %X) nounwind {
|
||||
; CHECK-SSE2-LABEL: t1_all_odd_eq:
|
||||
; CHECK-SSE2: # %bb.0:
|
||||
; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2863311531,0,0,954437177]
|
||||
; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm2
|
||||
; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2863311531,2863311531,2863311531,2863311531]
|
||||
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
|
||||
; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm0
|
||||
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
|
||||
; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm2
|
||||
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3]
|
||||
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,2,3,3]
|
||||
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
|
||||
; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm3
|
||||
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,3,2,3]
|
||||
; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
|
||||
; CHECK-SSE2-NEXT: psrld $1, %xmm2
|
||||
; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm1
|
||||
; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm2[3,3]
|
||||
; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm3 = [3,1,1,9]
|
||||
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm3[2,2,3,3]
|
||||
; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm4
|
||||
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm4[0,2,2,3]
|
||||
; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm4
|
||||
; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm4 = xmm4[1,2],xmm2[0,3]
|
||||
; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm4 = xmm4[2,0,1,3]
|
||||
; CHECK-SSE2-NEXT: pmuludq %xmm3, %xmm4
|
||||
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm4[0,2,2,3]
|
||||
; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
|
||||
; CHECK-SSE2-NEXT: psubd %xmm2, %xmm0
|
||||
; CHECK-SSE2-NEXT: pcmpeqd {{.*}}(%rip), %xmm0
|
||||
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
|
||||
; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
|
||||
; CHECK-SSE2-NEXT: pxor {{.*}}(%rip), %xmm0
|
||||
; CHECK-SSE2-NEXT: pcmpgtd {{.*}}(%rip), %xmm0
|
||||
; CHECK-SSE2-NEXT: pandn {{.*}}(%rip), %xmm0
|
||||
; CHECK-SSE2-NEXT: retq
|
||||
;
|
||||
; CHECK-SSE41-LABEL: t1_all_odd_eq:
|
||||
; CHECK-SSE41: # %bb.0:
|
||||
; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2863311531,0,0,954437177]
|
||||
; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,2,3,3]
|
||||
; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
|
||||
; CHECK-SSE41-NEXT: pmuludq %xmm2, %xmm3
|
||||
; CHECK-SSE41-NEXT: pmuludq %xmm0, %xmm1
|
||||
; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
|
||||
; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7]
|
||||
; CHECK-SSE41-NEXT: psrld $1, %xmm1
|
||||
; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,3,4,5],xmm1[6,7]
|
||||
; CHECK-SSE41-NEXT: pmulld {{.*}}(%rip), %xmm1
|
||||
; CHECK-SSE41-NEXT: psubd %xmm1, %xmm0
|
||||
; CHECK-SSE41-NEXT: pcmpeqd {{.*}}(%rip), %xmm0
|
||||
; CHECK-SSE41-NEXT: pmulld {{.*}}(%rip), %xmm0
|
||||
; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [1431655765,4294967295,4294967295,4294967295]
|
||||
; CHECK-SSE41-NEXT: pminud %xmm0, %xmm1
|
||||
; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0
|
||||
; CHECK-SSE41-NEXT: pxor %xmm1, %xmm1
|
||||
; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
|
||||
; CHECK-SSE41-NEXT: retq
|
||||
;
|
||||
; CHECK-AVX1-LABEL: t1_all_odd_eq:
|
||||
; CHECK-AVX1: # %bb.0:
|
||||
; CHECK-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [2863311531,0,0,954437177]
|
||||
; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[2,2,3,3]
|
||||
; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
|
||||
; CHECK-AVX1-NEXT: vpmuludq %xmm2, %xmm3, %xmm2
|
||||
; CHECK-AVX1-NEXT: vpmuludq %xmm1, %xmm0, %xmm1
|
||||
; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
|
||||
; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
|
||||
; CHECK-AVX1-NEXT: vpsrld $1, %xmm1, %xmm1
|
||||
; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,3,4,5],xmm1[6,7]
|
||||
; CHECK-AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1
|
||||
; CHECK-AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0
|
||||
; CHECK-AVX1-NEXT: vpcmpeqd {{.*}}(%rip), %xmm0, %xmm0
|
||||
; CHECK-AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0
|
||||
; CHECK-AVX1-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm1
|
||||
; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
|
||||
; CHECK-AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
|
||||
; CHECK-AVX1-NEXT: retq
|
||||
;
|
||||
; CHECK-AVX2-LABEL: t1_all_odd_eq:
|
||||
; CHECK-AVX2: # %bb.0:
|
||||
; CHECK-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [2863311531,0,0,954437177]
|
||||
; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[2,2,3,3]
|
||||
; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
|
||||
; CHECK-AVX2-NEXT: vpmuludq %xmm2, %xmm3, %xmm2
|
||||
; CHECK-AVX2-NEXT: vpmuludq %xmm1, %xmm0, %xmm1
|
||||
; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
|
||||
; CHECK-AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3]
|
||||
; CHECK-AVX2-NEXT: vpsrld $1, %xmm1, %xmm1
|
||||
; CHECK-AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm0[1,2],xmm1[3]
|
||||
; CHECK-AVX2-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1
|
||||
; CHECK-AVX2-NEXT: vpsubd %xmm1, %xmm0, %xmm0
|
||||
; CHECK-AVX2-NEXT: vpcmpeqd {{.*}}(%rip), %xmm0, %xmm0
|
||||
; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [2863311531,2863311531,2863311531,2863311531]
|
||||
; CHECK-AVX2-NEXT: vpmulld %xmm1, %xmm0, %xmm0
|
||||
; CHECK-AVX2-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm1
|
||||
; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
|
||||
; CHECK-AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; CHECK-AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
|
||||
; CHECK-AVX2-NEXT: retq
|
||||
;
|
||||
; CHECK-AVX512VL-LABEL: t1_all_odd_eq:
|
||||
; CHECK-AVX512VL: # %bb.0:
|
||||
; CHECK-AVX512VL-NEXT: vmovdqa {{.*#+}} xmm1 = [2863311531,0,0,954437177]
|
||||
; CHECK-AVX512VL-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[2,2,3,3]
|
||||
; CHECK-AVX512VL-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
|
||||
; CHECK-AVX512VL-NEXT: vpmuludq %xmm2, %xmm3, %xmm2
|
||||
; CHECK-AVX512VL-NEXT: vpmuludq %xmm1, %xmm0, %xmm1
|
||||
; CHECK-AVX512VL-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
|
||||
; CHECK-AVX512VL-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3]
|
||||
; CHECK-AVX512VL-NEXT: vpsrld $1, %xmm1, %xmm1
|
||||
; CHECK-AVX512VL-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm0[1,2],xmm1[3]
|
||||
; CHECK-AVX512VL-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1
|
||||
; CHECK-AVX512VL-NEXT: vpsubd %xmm1, %xmm0, %xmm0
|
||||
; CHECK-AVX512VL-NEXT: vpcmpeqd {{.*}}(%rip), %xmm0, %xmm0
|
||||
; CHECK-AVX512VL-NEXT: vpmulld {{.*}}(%rip){1to4}, %xmm0, %xmm0
|
||||
; CHECK-AVX512VL-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm1
|
||||
; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
|
||||
; CHECK-AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; CHECK-AVX512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
|
||||
; CHECK-AVX512VL-NEXT: retq
|
||||
%urem = urem <4 x i32> %X, <i32 3, i32 1, i32 1, i32 9>
|
||||
%cmp = icmp eq <4 x i32> %urem, <i32 0, i32 42, i32 0, i32 42>
|
||||
|
@ -122,103 +82,60 @@ define <4 x i1> @t1_all_odd_eq(<4 x i32> %X) nounwind {
|
|||
define <4 x i1> @t1_all_odd_ne(<4 x i32> %X) nounwind {
|
||||
; CHECK-SSE2-LABEL: t1_all_odd_ne:
|
||||
; CHECK-SSE2: # %bb.0:
|
||||
; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2863311531,0,0,954437177]
|
||||
; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm2
|
||||
; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2863311531,2863311531,2863311531,2863311531]
|
||||
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
|
||||
; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm0
|
||||
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
|
||||
; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm2
|
||||
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3]
|
||||
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,2,3,3]
|
||||
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
|
||||
; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm3
|
||||
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,3,2,3]
|
||||
; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
|
||||
; CHECK-SSE2-NEXT: psrld $1, %xmm2
|
||||
; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm1
|
||||
; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm2[3,3]
|
||||
; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm3 = [3,1,1,9]
|
||||
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm3[2,2,3,3]
|
||||
; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm4
|
||||
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm4[0,2,2,3]
|
||||
; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm4
|
||||
; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm4 = xmm4[1,2],xmm2[0,3]
|
||||
; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm4 = xmm4[2,0,1,3]
|
||||
; CHECK-SSE2-NEXT: pmuludq %xmm3, %xmm4
|
||||
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm4[0,2,2,3]
|
||||
; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
|
||||
; CHECK-SSE2-NEXT: psubd %xmm2, %xmm0
|
||||
; CHECK-SSE2-NEXT: pcmpeqd {{.*}}(%rip), %xmm0
|
||||
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
|
||||
; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
|
||||
; CHECK-SSE2-NEXT: pxor {{.*}}(%rip), %xmm0
|
||||
; CHECK-SSE2-NEXT: pcmpgtd {{.*}}(%rip), %xmm0
|
||||
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
|
||||
; CHECK-SSE2-NEXT: pcmpeqd %xmm1, %xmm1
|
||||
; CHECK-SSE2-NEXT: pxor %xmm1, %xmm0
|
||||
; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
|
||||
; CHECK-SSE2-NEXT: retq
|
||||
;
|
||||
; CHECK-SSE41-LABEL: t1_all_odd_ne:
|
||||
; CHECK-SSE41: # %bb.0:
|
||||
; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2863311531,0,0,954437177]
|
||||
; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,2,3,3]
|
||||
; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
|
||||
; CHECK-SSE41-NEXT: pmuludq %xmm2, %xmm3
|
||||
; CHECK-SSE41-NEXT: pmuludq %xmm0, %xmm1
|
||||
; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
|
||||
; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7]
|
||||
; CHECK-SSE41-NEXT: psrld $1, %xmm1
|
||||
; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,3,4,5],xmm1[6,7]
|
||||
; CHECK-SSE41-NEXT: pmulld {{.*}}(%rip), %xmm1
|
||||
; CHECK-SSE41-NEXT: psubd %xmm1, %xmm0
|
||||
; CHECK-SSE41-NEXT: pcmpeqd {{.*}}(%rip), %xmm0
|
||||
; CHECK-SSE41-NEXT: pmulld {{.*}}(%rip), %xmm0
|
||||
; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [1431655765,4294967295,4294967295,4294967295]
|
||||
; CHECK-SSE41-NEXT: pminud %xmm0, %xmm1
|
||||
; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0
|
||||
; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm1
|
||||
; CHECK-SSE41-NEXT: pxor %xmm1, %xmm0
|
||||
; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
|
||||
; CHECK-SSE41-NEXT: retq
|
||||
;
|
||||
; CHECK-AVX1-LABEL: t1_all_odd_ne:
|
||||
; CHECK-AVX1: # %bb.0:
|
||||
; CHECK-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [2863311531,0,0,954437177]
|
||||
; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[2,2,3,3]
|
||||
; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
|
||||
; CHECK-AVX1-NEXT: vpmuludq %xmm2, %xmm3, %xmm2
|
||||
; CHECK-AVX1-NEXT: vpmuludq %xmm1, %xmm0, %xmm1
|
||||
; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
|
||||
; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
|
||||
; CHECK-AVX1-NEXT: vpsrld $1, %xmm1, %xmm1
|
||||
; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,3,4,5],xmm1[6,7]
|
||||
; CHECK-AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1
|
||||
; CHECK-AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0
|
||||
; CHECK-AVX1-NEXT: vpcmpeqd {{.*}}(%rip), %xmm0, %xmm0
|
||||
; CHECK-AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0
|
||||
; CHECK-AVX1-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm1
|
||||
; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
|
||||
; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
|
||||
; CHECK-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
|
||||
; CHECK-AVX1-NEXT: retq
|
||||
;
|
||||
; CHECK-AVX2-LABEL: t1_all_odd_ne:
|
||||
; CHECK-AVX2: # %bb.0:
|
||||
; CHECK-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [2863311531,0,0,954437177]
|
||||
; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[2,2,3,3]
|
||||
; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
|
||||
; CHECK-AVX2-NEXT: vpmuludq %xmm2, %xmm3, %xmm2
|
||||
; CHECK-AVX2-NEXT: vpmuludq %xmm1, %xmm0, %xmm1
|
||||
; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
|
||||
; CHECK-AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3]
|
||||
; CHECK-AVX2-NEXT: vpsrld $1, %xmm1, %xmm1
|
||||
; CHECK-AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm0[1,2],xmm1[3]
|
||||
; CHECK-AVX2-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1
|
||||
; CHECK-AVX2-NEXT: vpsubd %xmm1, %xmm0, %xmm0
|
||||
; CHECK-AVX2-NEXT: vpcmpeqd {{.*}}(%rip), %xmm0, %xmm0
|
||||
; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [2863311531,2863311531,2863311531,2863311531]
|
||||
; CHECK-AVX2-NEXT: vpmulld %xmm1, %xmm0, %xmm0
|
||||
; CHECK-AVX2-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm1
|
||||
; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
|
||||
; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
|
||||
; CHECK-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; CHECK-AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
|
||||
; CHECK-AVX2-NEXT: retq
|
||||
;
|
||||
; CHECK-AVX512VL-LABEL: t1_all_odd_ne:
|
||||
; CHECK-AVX512VL: # %bb.0:
|
||||
; CHECK-AVX512VL-NEXT: vmovdqa {{.*#+}} xmm1 = [2863311531,0,0,954437177]
|
||||
; CHECK-AVX512VL-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[2,2,3,3]
|
||||
; CHECK-AVX512VL-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
|
||||
; CHECK-AVX512VL-NEXT: vpmuludq %xmm2, %xmm3, %xmm2
|
||||
; CHECK-AVX512VL-NEXT: vpmuludq %xmm1, %xmm0, %xmm1
|
||||
; CHECK-AVX512VL-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
|
||||
; CHECK-AVX512VL-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3]
|
||||
; CHECK-AVX512VL-NEXT: vpsrld $1, %xmm1, %xmm1
|
||||
; CHECK-AVX512VL-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm0[1,2],xmm1[3]
|
||||
; CHECK-AVX512VL-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1
|
||||
; CHECK-AVX512VL-NEXT: vpsubd %xmm1, %xmm0, %xmm0
|
||||
; CHECK-AVX512VL-NEXT: vpcmpeqd {{.*}}(%rip), %xmm0, %xmm0
|
||||
; CHECK-AVX512VL-NEXT: vpmulld {{.*}}(%rip){1to4}, %xmm0, %xmm0
|
||||
; CHECK-AVX512VL-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm1
|
||||
; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
|
||||
; CHECK-AVX512VL-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0
|
||||
; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
|
||||
; CHECK-AVX512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
|
||||
; CHECK-AVX512VL-NEXT: retq
|
||||
%urem = urem <4 x i32> %X, <i32 3, i32 1, i32 1, i32 9>
|
||||
%cmp = icmp ne <4 x i32> %urem, <i32 0, i32 42, i32 0, i32 42>
|
||||
|
@ -228,72 +145,48 @@ define <4 x i1> @t1_all_odd_ne(<4 x i32> %X) nounwind {
|
|||
define <8 x i1> @t2_narrow(<8 x i16> %X) nounwind {
|
||||
; CHECK-SSE2-LABEL: t2_narrow:
|
||||
; CHECK-SSE2: # %bb.0:
|
||||
; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [0,65535,65535,0,0,65535,65535,0]
|
||||
; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm2
|
||||
; CHECK-SSE2-NEXT: pand %xmm1, %xmm2
|
||||
; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm3 = [43691,0,0,58255,43691,0,0,58255]
|
||||
; CHECK-SSE2-NEXT: pmulhuw %xmm0, %xmm3
|
||||
; CHECK-SSE2-NEXT: movdqa %xmm3, %xmm4
|
||||
; CHECK-SSE2-NEXT: psrlw $3, %xmm4
|
||||
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,3,2,3]
|
||||
; CHECK-SSE2-NEXT: psrlw $1, %xmm3
|
||||
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3]
|
||||
; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1]
|
||||
; CHECK-SSE2-NEXT: pandn %xmm3, %xmm1
|
||||
; CHECK-SSE2-NEXT: por %xmm2, %xmm1
|
||||
; CHECK-SSE2-NEXT: pmullw {{.*}}(%rip), %xmm1
|
||||
; CHECK-SSE2-NEXT: psubw %xmm1, %xmm0
|
||||
; CHECK-SSE2-NEXT: pcmpeqw {{.*}}(%rip), %xmm0
|
||||
; CHECK-SSE2-NEXT: pmullw {{.*}}(%rip), %xmm0
|
||||
; CHECK-SSE2-NEXT: psubusw {{.*}}(%rip), %xmm0
|
||||
; CHECK-SSE2-NEXT: pxor %xmm1, %xmm1
|
||||
; CHECK-SSE2-NEXT: pcmpeqw %xmm1, %xmm0
|
||||
; CHECK-SSE2-NEXT: pand {{.*}}(%rip), %xmm0
|
||||
; CHECK-SSE2-NEXT: retq
|
||||
;
|
||||
; CHECK-SSE41-LABEL: t2_narrow:
|
||||
; CHECK-SSE41: # %bb.0:
|
||||
; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [43691,0,0,58255,43691,0,0,58255]
|
||||
; CHECK-SSE41-NEXT: pmulhuw %xmm0, %xmm1
|
||||
; CHECK-SSE41-NEXT: movdqa %xmm1, %xmm2
|
||||
; CHECK-SSE41-NEXT: psrlw $3, %xmm2
|
||||
; CHECK-SSE41-NEXT: psrlw $1, %xmm1
|
||||
; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
|
||||
; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0],xmm0[1,2],xmm1[3,4],xmm0[5,6],xmm1[7]
|
||||
; CHECK-SSE41-NEXT: pmullw {{.*}}(%rip), %xmm1
|
||||
; CHECK-SSE41-NEXT: psubw %xmm1, %xmm0
|
||||
; CHECK-SSE41-NEXT: pcmpeqw {{.*}}(%rip), %xmm0
|
||||
; CHECK-SSE41-NEXT: pmullw {{.*}}(%rip), %xmm0
|
||||
; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [21845,65535,65535,65535,21845,65535,65535,65535]
|
||||
; CHECK-SSE41-NEXT: pminuw %xmm0, %xmm1
|
||||
; CHECK-SSE41-NEXT: pcmpeqw %xmm1, %xmm0
|
||||
; CHECK-SSE41-NEXT: pxor %xmm1, %xmm1
|
||||
; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
|
||||
; CHECK-SSE41-NEXT: retq
|
||||
;
|
||||
; CHECK-AVX1-LABEL: t2_narrow:
|
||||
; CHECK-AVX1: # %bb.0:
|
||||
; CHECK-AVX1-NEXT: vpmulhuw {{.*}}(%rip), %xmm0, %xmm1
|
||||
; CHECK-AVX1-NEXT: vpsrlw $3, %xmm1, %xmm2
|
||||
; CHECK-AVX1-NEXT: vpsrlw $1, %xmm1, %xmm1
|
||||
; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
|
||||
; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm0[1,2],xmm1[3,4],xmm0[5,6],xmm1[7]
|
||||
; CHECK-AVX1-NEXT: vpmullw {{.*}}(%rip), %xmm1, %xmm1
|
||||
; CHECK-AVX1-NEXT: vpsubw %xmm1, %xmm0, %xmm0
|
||||
; CHECK-AVX1-NEXT: vpcmpeqw {{.*}}(%rip), %xmm0, %xmm0
|
||||
; CHECK-AVX1-NEXT: vpmullw {{.*}}(%rip), %xmm0, %xmm0
|
||||
; CHECK-AVX1-NEXT: vpminuw {{.*}}(%rip), %xmm0, %xmm1
|
||||
; CHECK-AVX1-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
|
||||
; CHECK-AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
|
||||
; CHECK-AVX1-NEXT: retq
|
||||
;
|
||||
; CHECK-AVX2-LABEL: t2_narrow:
|
||||
; CHECK-AVX2: # %bb.0:
|
||||
; CHECK-AVX2-NEXT: vpmulhuw {{.*}}(%rip), %xmm0, %xmm1
|
||||
; CHECK-AVX2-NEXT: vpsrlw $3, %xmm1, %xmm2
|
||||
; CHECK-AVX2-NEXT: vpsrlw $1, %xmm1, %xmm1
|
||||
; CHECK-AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3]
|
||||
; CHECK-AVX2-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm0[1,2],xmm1[3,4],xmm0[5,6],xmm1[7]
|
||||
; CHECK-AVX2-NEXT: vpmullw {{.*}}(%rip), %xmm1, %xmm1
|
||||
; CHECK-AVX2-NEXT: vpsubw %xmm1, %xmm0, %xmm0
|
||||
; CHECK-AVX2-NEXT: vpcmpeqw {{.*}}(%rip), %xmm0, %xmm0
|
||||
; CHECK-AVX2-NEXT: vpmullw {{.*}}(%rip), %xmm0, %xmm0
|
||||
; CHECK-AVX2-NEXT: vpminuw {{.*}}(%rip), %xmm0, %xmm1
|
||||
; CHECK-AVX2-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
|
||||
; CHECK-AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; CHECK-AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
|
||||
; CHECK-AVX2-NEXT: retq
|
||||
;
|
||||
; CHECK-AVX512VL-LABEL: t2_narrow:
|
||||
; CHECK-AVX512VL: # %bb.0:
|
||||
; CHECK-AVX512VL-NEXT: vpmulhuw {{.*}}(%rip), %xmm0, %xmm1
|
||||
; CHECK-AVX512VL-NEXT: vpsrlw $3, %xmm1, %xmm2
|
||||
; CHECK-AVX512VL-NEXT: vpsrlw $1, %xmm1, %xmm1
|
||||
; CHECK-AVX512VL-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3]
|
||||
; CHECK-AVX512VL-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm0[1,2],xmm1[3,4],xmm0[5,6],xmm1[7]
|
||||
; CHECK-AVX512VL-NEXT: vpmullw {{.*}}(%rip), %xmm1, %xmm1
|
||||
; CHECK-AVX512VL-NEXT: vpsubw %xmm1, %xmm0, %xmm0
|
||||
; CHECK-AVX512VL-NEXT: vpcmpeqw {{.*}}(%rip), %xmm0, %xmm0
|
||||
; CHECK-AVX512VL-NEXT: vpmullw {{.*}}(%rip), %xmm0, %xmm0
|
||||
; CHECK-AVX512VL-NEXT: vpminuw {{.*}}(%rip), %xmm0, %xmm1
|
||||
; CHECK-AVX512VL-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
|
||||
; CHECK-AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; CHECK-AVX512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
|
||||
; CHECK-AVX512VL-NEXT: retq
|
||||
%urem = urem <8 x i16> %X, <i16 3, i16 1, i16 1, i16 9, i16 3, i16 1, i16 1, i16 9>
|
||||
%cmp = icmp eq <8 x i16> %urem, <i16 0, i16 0, i16 42, i16 42, i16 0, i16 0, i16 42, i16 42>
|
||||
|
@ -301,46 +194,81 @@ define <8 x i1> @t2_narrow(<8 x i16> %X) nounwind {
|
|||
}
|
||||
|
||||
define <2 x i1> @t3_wide(<2 x i64> %X) nounwind {
|
||||
; CHECK-SSE2-LABEL: t3_wide:
|
||||
; CHECK-SSE2: # %bb.0:
|
||||
; CHECK-SSE2-NEXT: movq %xmm0, %rcx
|
||||
; CHECK-SSE2-NEXT: movabsq $-6148914691236517205, %rdx # imm = 0xAAAAAAAAAAAAAAAB
|
||||
; CHECK-SSE2-NEXT: movq %rcx, %rax
|
||||
; CHECK-SSE2-NEXT: mulq %rdx
|
||||
; CHECK-SSE2-NEXT: shrq %rdx
|
||||
; CHECK-SSE2-NEXT: leaq (%rdx,%rdx,2), %rax
|
||||
; CHECK-SSE2-NEXT: subq %rax, %rcx
|
||||
; CHECK-SSE2-NEXT: movq %rcx, %xmm1
|
||||
; CHECK-SSE2-NEXT: pcmpeqd {{.*}}(%rip), %xmm1
|
||||
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,0,3,2]
|
||||
; CHECK-SSE2-NEXT: pand %xmm1, %xmm0
|
||||
; CHECK-SSE2-NEXT: retq
|
||||
; CHECK-SSE-LABEL: t3_wide:
|
||||
; CHECK-SSE: # %bb.0:
|
||||
; CHECK-SSE-NEXT: movdqa {{.*#+}} xmm1 = [12297829382473034411,12297829382473034411]
|
||||
; CHECK-SSE-NEXT: movdqa %xmm0, %xmm2
|
||||
; CHECK-SSE-NEXT: pmuludq %xmm1, %xmm2
|
||||
; CHECK-SSE-NEXT: movdqa %xmm0, %xmm3
|
||||
; CHECK-SSE-NEXT: psrlq $32, %xmm3
|
||||
; CHECK-SSE-NEXT: pmuludq %xmm1, %xmm3
|
||||
; CHECK-SSE-NEXT: pmuludq {{.*}}(%rip), %xmm0
|
||||
; CHECK-SSE-NEXT: paddq %xmm3, %xmm0
|
||||
; CHECK-SSE-NEXT: psllq $32, %xmm0
|
||||
; CHECK-SSE-NEXT: paddq %xmm2, %xmm0
|
||||
; CHECK-SSE-NEXT: pxor {{.*}}(%rip), %xmm0
|
||||
; CHECK-SSE-NEXT: movdqa {{.*#+}} xmm1 = [15372286730238776661,9223372034707292159]
|
||||
; CHECK-SSE-NEXT: movdqa %xmm0, %xmm2
|
||||
; CHECK-SSE-NEXT: pcmpgtd %xmm1, %xmm2
|
||||
; CHECK-SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
|
||||
; CHECK-SSE-NEXT: pcmpeqd %xmm1, %xmm0
|
||||
; CHECK-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
|
||||
; CHECK-SSE-NEXT: pand %xmm3, %xmm0
|
||||
; CHECK-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
|
||||
; CHECK-SSE-NEXT: por %xmm0, %xmm1
|
||||
; CHECK-SSE-NEXT: pcmpeqd %xmm0, %xmm0
|
||||
; CHECK-SSE-NEXT: pxor %xmm1, %xmm0
|
||||
; CHECK-SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
|
||||
; CHECK-SSE-NEXT: retq
|
||||
;
|
||||
; CHECK-SSE41-LABEL: t3_wide:
|
||||
; CHECK-SSE41: # %bb.0:
|
||||
; CHECK-SSE41-NEXT: movq %xmm0, %rcx
|
||||
; CHECK-SSE41-NEXT: movabsq $-6148914691236517205, %rdx # imm = 0xAAAAAAAAAAAAAAAB
|
||||
; CHECK-SSE41-NEXT: movq %rcx, %rax
|
||||
; CHECK-SSE41-NEXT: mulq %rdx
|
||||
; CHECK-SSE41-NEXT: shrq %rdx
|
||||
; CHECK-SSE41-NEXT: leaq (%rdx,%rdx,2), %rax
|
||||
; CHECK-SSE41-NEXT: subq %rax, %rcx
|
||||
; CHECK-SSE41-NEXT: movq %rcx, %xmm0
|
||||
; CHECK-SSE41-NEXT: pcmpeqq {{.*}}(%rip), %xmm0
|
||||
; CHECK-SSE41-NEXT: retq
|
||||
; CHECK-AVX1-LABEL: t3_wide:
|
||||
; CHECK-AVX1: # %bb.0:
|
||||
; CHECK-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [12297829382473034411,12297829382473034411]
|
||||
; CHECK-AVX1-NEXT: vpmuludq %xmm1, %xmm0, %xmm2
|
||||
; CHECK-AVX1-NEXT: vpsrlq $32, %xmm0, %xmm3
|
||||
; CHECK-AVX1-NEXT: vpmuludq %xmm1, %xmm3, %xmm1
|
||||
; CHECK-AVX1-NEXT: vpmuludq {{.*}}(%rip), %xmm0, %xmm0
|
||||
; CHECK-AVX1-NEXT: vpaddq %xmm1, %xmm0, %xmm0
|
||||
; CHECK-AVX1-NEXT: vpsllq $32, %xmm0, %xmm0
|
||||
; CHECK-AVX1-NEXT: vpaddq %xmm0, %xmm2, %xmm0
|
||||
; CHECK-AVX1-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
|
||||
; CHECK-AVX1-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
|
||||
; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
|
||||
; CHECK-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; CHECK-AVX1-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
|
||||
; CHECK-AVX1-NEXT: retq
|
||||
;
|
||||
; CHECK-AVX-LABEL: t3_wide:
|
||||
; CHECK-AVX: # %bb.0:
|
||||
; CHECK-AVX-NEXT: vmovq %xmm0, %rcx
|
||||
; CHECK-AVX-NEXT: movabsq $-6148914691236517205, %rdx # imm = 0xAAAAAAAAAAAAAAAB
|
||||
; CHECK-AVX-NEXT: movq %rcx, %rax
|
||||
; CHECK-AVX-NEXT: mulq %rdx
|
||||
; CHECK-AVX-NEXT: shrq %rdx
|
||||
; CHECK-AVX-NEXT: leaq (%rdx,%rdx,2), %rax
|
||||
; CHECK-AVX-NEXT: subq %rax, %rcx
|
||||
; CHECK-AVX-NEXT: vmovq %rcx, %xmm0
|
||||
; CHECK-AVX-NEXT: vpcmpeqq {{.*}}(%rip), %xmm0, %xmm0
|
||||
; CHECK-AVX-NEXT: retq
|
||||
; CHECK-AVX2-LABEL: t3_wide:
|
||||
; CHECK-AVX2: # %bb.0:
|
||||
; CHECK-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [12297829382473034411,12297829382473034411]
|
||||
; CHECK-AVX2-NEXT: vpmuludq %xmm1, %xmm0, %xmm2
|
||||
; CHECK-AVX2-NEXT: vpsrlq $32, %xmm0, %xmm3
|
||||
; CHECK-AVX2-NEXT: vpmuludq %xmm1, %xmm3, %xmm1
|
||||
; CHECK-AVX2-NEXT: vpmuludq {{.*}}(%rip), %xmm0, %xmm0
|
||||
; CHECK-AVX2-NEXT: vpaddq %xmm1, %xmm0, %xmm0
|
||||
; CHECK-AVX2-NEXT: vpsllq $32, %xmm0, %xmm0
|
||||
; CHECK-AVX2-NEXT: vpaddq %xmm0, %xmm2, %xmm0
|
||||
; CHECK-AVX2-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
|
||||
; CHECK-AVX2-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
|
||||
; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
|
||||
; CHECK-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; CHECK-AVX2-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
|
||||
; CHECK-AVX2-NEXT: retq
|
||||
;
|
||||
; CHECK-AVX512VL-LABEL: t3_wide:
|
||||
; CHECK-AVX512VL: # %bb.0:
|
||||
; CHECK-AVX512VL-NEXT: vmovdqa {{.*#+}} xmm1 = [12297829382473034411,12297829382473034411]
|
||||
; CHECK-AVX512VL-NEXT: vpmuludq %xmm1, %xmm0, %xmm2
|
||||
; CHECK-AVX512VL-NEXT: vpsrlq $32, %xmm0, %xmm3
|
||||
; CHECK-AVX512VL-NEXT: vpmuludq %xmm1, %xmm3, %xmm1
|
||||
; CHECK-AVX512VL-NEXT: vpmuludq {{.*}}(%rip), %xmm0, %xmm0
|
||||
; CHECK-AVX512VL-NEXT: vpaddq %xmm1, %xmm0, %xmm0
|
||||
; CHECK-AVX512VL-NEXT: vpsllq $32, %xmm0, %xmm0
|
||||
; CHECK-AVX512VL-NEXT: vpaddq %xmm0, %xmm2, %xmm0
|
||||
; CHECK-AVX512VL-NEXT: vpminuq {{.*}}(%rip), %xmm0, %xmm1
|
||||
; CHECK-AVX512VL-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
|
||||
; CHECK-AVX512VL-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
|
||||
; CHECK-AVX512VL-NEXT: retq
|
||||
%urem = urem <2 x i64> %X, <i64 3, i64 1>
|
||||
%cmp = icmp eq <2 x i64> %urem, <i64 0, i64 42>
|
||||
ret <2 x i1> %cmp
|
||||
|
|
Loading…
Reference in New Issue