forked from OSchip/llvm-project
[X86] getAVX512Node() - find constant broadcasts to encourage load-folding
If an operand is a bitcasted or widended constant, try to more aggressively create broadcastable constants for folding, which in particular helps non-VLX modes. I've refactored getAVX512Node so that VLX targets can make better use of this as well. NOTE: In the future, I think we should consider removing the broadcast of constant data from DAG entirely and move this to either X86InstrInfo::foldMemoryOperand or a new pass - AVX1/2 targets has similar problems with missed (whole vector) folds that need to be improved as well. Differential Revision: https://reviews.llvm.org/D113845
This commit is contained in:
parent
036207d5f2
commit
ea9e6aa423
|
@ -6400,29 +6400,63 @@ static SDValue getAVX512Node(unsigned Opcode, const SDLoc &DL, MVT VT,
|
|||
ArrayRef<SDValue> Ops, SelectionDAG &DAG,
|
||||
const X86Subtarget &Subtarget) {
|
||||
assert(Subtarget.hasAVX512() && "AVX512 target expected");
|
||||
|
||||
// If we have VLX or the type is already 512-bits, then create the node
|
||||
// directly.
|
||||
if (Subtarget.hasVLX() || VT.is512BitVector())
|
||||
return DAG.getNode(Opcode, DL, VT, Ops);
|
||||
|
||||
// Widen the vector ops.
|
||||
MVT SVT = VT.getScalarType();
|
||||
MVT WideVT = MVT::getVectorVT(SVT, 512 / SVT.getSizeInBits());
|
||||
SmallVector<SDValue> WideOps(Ops.begin(), Ops.end());
|
||||
for (SDValue &Op : WideOps) {
|
||||
|
||||
// If we have a 32/64 splatted constant, splat it to DstTy to
|
||||
// encourage a foldable broadcast'd operand.
|
||||
auto MakeBroadcastOp = [&](SDValue Op, MVT OpVT, MVT DstVT) {
|
||||
unsigned OpEltSizeInBits = OpVT.getScalarSizeInBits();
|
||||
// AVX512 broadcasts 32/64-bit operands.
|
||||
// TODO: Support float once getAVX512Node is used by fp-ops.
|
||||
if (!OpVT.isInteger() || OpEltSizeInBits < 32 ||
|
||||
!DAG.getTargetLoweringInfo().isTypeLegal(SVT))
|
||||
return SDValue();
|
||||
// If we're not widening, don't bother if we're not bitcasting.
|
||||
if (OpVT == DstVT && Op.getOpcode() != ISD::BITCAST)
|
||||
return SDValue();
|
||||
if (auto *BV = dyn_cast<BuildVectorSDNode>(peekThroughBitcasts(Op))) {
|
||||
APInt SplatValue, SplatUndef;
|
||||
unsigned SplatBitSize;
|
||||
bool HasAnyUndefs;
|
||||
if (BV->isConstantSplat(SplatValue, SplatUndef, SplatBitSize,
|
||||
HasAnyUndefs, OpEltSizeInBits) &&
|
||||
!HasAnyUndefs && SplatValue.getBitWidth() == OpEltSizeInBits)
|
||||
return DAG.getConstant(SplatValue, DL, DstVT);
|
||||
}
|
||||
return SDValue();
|
||||
};
|
||||
|
||||
bool Widen = !(Subtarget.hasVLX() || VT.is512BitVector());
|
||||
|
||||
MVT DstVT = VT;
|
||||
if (Widen)
|
||||
DstVT = MVT::getVectorVT(SVT, 512 / SVT.getSizeInBits());
|
||||
|
||||
// Canonicalize src operands.
|
||||
SmallVector<SDValue> SrcOps(Ops.begin(), Ops.end());
|
||||
for (SDValue &Op : SrcOps) {
|
||||
MVT OpVT = Op.getSimpleValueType();
|
||||
// Just pass through scalar operands.
|
||||
if (!OpVT.isVector())
|
||||
continue;
|
||||
assert(OpVT.getSizeInBits() == VT.getSizeInBits() &&
|
||||
"Vector size mismatch");
|
||||
assert(OpVT == VT && "Vector type mismatch");
|
||||
|
||||
if (SDValue BroadcastOp = MakeBroadcastOp(Op, OpVT, DstVT)) {
|
||||
Op = BroadcastOp;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Just widen the subvector by inserting into an undef wide vector.
|
||||
if (Widen)
|
||||
Op = widenSubVector(Op, false, Subtarget, DAG, DL, 512);
|
||||
}
|
||||
|
||||
SDValue Res = DAG.getNode(Opcode, DL, DstVT, SrcOps);
|
||||
|
||||
// Perform the 512-bit op then extract the bottom subvector.
|
||||
SDValue Res = DAG.getNode(Opcode, DL, WideVT, WideOps);
|
||||
return extractSubVector(Res, 0, DAG, DL, VT.getSizeInBits());
|
||||
if (Widen)
|
||||
Res = extractSubVector(Res, 0, DAG, DL, VT.getSizeInBits());
|
||||
return Res;
|
||||
}
|
||||
|
||||
/// Insert i1-subvector to i1-vector.
|
||||
|
@ -46204,7 +46238,8 @@ static SDValue canonicalizeBitSelect(SDNode *N, SelectionDAG &DAG,
|
|||
assert(N->getOpcode() == ISD::OR && "Unexpected Opcode");
|
||||
|
||||
MVT VT = N->getSimpleValueType(0);
|
||||
if (!VT.isVector() || (VT.getScalarSizeInBits() % 8) != 0)
|
||||
unsigned EltSizeInBits = VT.getScalarSizeInBits();
|
||||
if (!VT.isVector() || (EltSizeInBits % 8) != 0)
|
||||
return SDValue();
|
||||
|
||||
SDValue N0 = peekThroughBitcasts(N->getOperand(0));
|
||||
|
@ -46240,12 +46275,17 @@ static SDValue canonicalizeBitSelect(SDNode *N, SelectionDAG &DAG,
|
|||
|
||||
if (useVPTERNLOG(Subtarget, VT)) {
|
||||
// Emit a VPTERNLOG node directly - 0xCA is the imm code for A?B:C.
|
||||
SDValue A = DAG.getBitcast(VT, N0.getOperand(1));
|
||||
SDValue B = DAG.getBitcast(VT, N0.getOperand(0));
|
||||
SDValue C = DAG.getBitcast(VT, N1.getOperand(0));
|
||||
// VPTERNLOG is only available as vXi32/64-bit types.
|
||||
MVT OpSVT = EltSizeInBits == 32 ? MVT::i32 : MVT::i64;
|
||||
MVT OpVT =
|
||||
MVT::getVectorVT(OpSVT, VT.getSizeInBits() / OpSVT.getSizeInBits());
|
||||
SDValue A = DAG.getBitcast(OpVT, N0.getOperand(1));
|
||||
SDValue B = DAG.getBitcast(OpVT, N0.getOperand(0));
|
||||
SDValue C = DAG.getBitcast(OpVT, N1.getOperand(0));
|
||||
SDValue Imm = DAG.getTargetConstant(0xCA, DL, MVT::i8);
|
||||
return getAVX512Node(X86ISD::VPTERNLOG, DL, VT, {A, B, C, Imm}, DAG,
|
||||
Subtarget);
|
||||
SDValue Res = getAVX512Node(X86ISD::VPTERNLOG, DL, OpVT, {A, B, C, Imm},
|
||||
DAG, Subtarget);
|
||||
return DAG.getBitcast(VT, Res);
|
||||
}
|
||||
|
||||
SDValue X = N->getOperand(0);
|
||||
|
|
|
@ -370,8 +370,7 @@ declare <8 x half> @llvm.fabs.v8f16(<8 x half>)
|
|||
define <8 x half> @fcopysignv8f16(<8 x half> %x, <8 x half> %y) {
|
||||
; CHECK-LABEL: fcopysignv8f16:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: vpbroadcastw {{.*#+}} xmm2 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN]
|
||||
; CHECK-NEXT: vpternlogq $226, %xmm1, %xmm2, %xmm0
|
||||
; CHECK-NEXT: vpternlogq $228, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%a = call <8 x half> @llvm.copysign.v8f16(<8 x half> %x, <8 x half> %y)
|
||||
ret <8 x half> %a
|
||||
|
@ -412,8 +411,7 @@ declare <16 x half> @llvm.fabs.v16f16(<16 x half>)
|
|||
define <16 x half> @fcopysignv16f16(<16 x half> %x, <16 x half> %y) {
|
||||
; CHECK-LABEL: fcopysignv16f16:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: vpbroadcastw {{.*#+}} ymm2 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN]
|
||||
; CHECK-NEXT: vpternlogq $226, %ymm1, %ymm2, %ymm0
|
||||
; CHECK-NEXT: vpternlogq $228, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm1, %ymm0
|
||||
; CHECK-NEXT: retq
|
||||
%a = call <16 x half> @llvm.copysign.v16f16(<16 x half> %x, <16 x half> %y)
|
||||
ret <16 x half> %a
|
||||
|
@ -454,8 +452,7 @@ declare <32 x half> @llvm.fabs.v32f16(<32 x half>)
|
|||
define <32 x half> @fcopysignv32f16(<32 x half> %x, <32 x half> %y) {
|
||||
; CHECK-LABEL: fcopysignv32f16:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: vpbroadcastw {{.*#+}} zmm2 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN]
|
||||
; CHECK-NEXT: vpternlogq $226, %zmm1, %zmm2, %zmm0
|
||||
; CHECK-NEXT: vpternlogq $228, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm1, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%a = call <32 x half> @llvm.copysign.v32f16(<32 x half> %x, <32 x half> %y)
|
||||
ret <32 x half> %a
|
||||
|
|
|
@ -1666,12 +1666,12 @@ define <32 x i8> @var_rotate_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind "min-leg
|
|||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vpsllw $4, %ymm0, %ymm2
|
||||
; CHECK-NEXT: vpsrlw $4, %ymm0, %ymm3
|
||||
; CHECK-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm3
|
||||
; CHECK-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm2, %ymm3
|
||||
; CHECK-NEXT: vpsllw $5, %ymm1, %ymm1
|
||||
; CHECK-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
|
||||
; CHECK-NEXT: vpsllw $2, %ymm0, %ymm2
|
||||
; CHECK-NEXT: vpsrlw $6, %ymm0, %ymm3
|
||||
; CHECK-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm3
|
||||
; CHECK-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm2, %ymm3
|
||||
; CHECK-NEXT: vpaddb %ymm1, %ymm1, %ymm1
|
||||
; CHECK-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
|
||||
; CHECK-NEXT: vpsrlw $7, %ymm0, %ymm2
|
||||
|
@ -1784,7 +1784,7 @@ define <32 x i8> @splatconstant_rotate_v32i8(<32 x i8> %a) nounwind "min-legal-v
|
|||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vpsllw $4, %ymm0, %ymm1
|
||||
; CHECK-NEXT: vpsrlw $4, %ymm0, %ymm0
|
||||
; CHECK-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm0
|
||||
; CHECK-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm1, %ymm0
|
||||
; CHECK-NEXT: retq
|
||||
%shl = shl <32 x i8> %a, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
|
||||
%lshr = lshr <32 x i8> %a, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
|
||||
|
@ -1797,7 +1797,7 @@ define <32 x i8> @splatconstant_rotate_mask_v32i8(<32 x i8> %a) nounwind "min-le
|
|||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vpsllw $4, %ymm0, %ymm1
|
||||
; CHECK-NEXT: vpsrlw $4, %ymm0, %ymm0
|
||||
; CHECK-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm0
|
||||
; CHECK-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm1, %ymm0
|
||||
; CHECK-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
|
||||
; CHECK-NEXT: retq
|
||||
%shl = shl <32 x i8> %a, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
|
||||
|
|
|
@ -2855,10 +2855,9 @@ define <16 x i8> @splatconstant_funnnel_v16i8(<16 x i8> %x, <16 x i8> %y) nounwi
|
|||
;
|
||||
; AVX512F-LABEL: splatconstant_funnnel_v16i8:
|
||||
; AVX512F: # %bb.0:
|
||||
; AVX512F-NEXT: vmovdqa {{.*#+}} xmm2 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
|
||||
; AVX512F-NEXT: vpsrlw $4, %xmm1, %xmm1
|
||||
; AVX512F-NEXT: vpsllw $4, %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vpternlogq $226, %zmm1, %zmm2, %zmm0
|
||||
; AVX512F-NEXT: vpsllw $4, %xmm0, %xmm2
|
||||
; AVX512F-NEXT: vpsrlw $4, %xmm1, %xmm0
|
||||
; AVX512F-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm2, %zmm0
|
||||
; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
|
||||
; AVX512F-NEXT: vzeroupper
|
||||
; AVX512F-NEXT: retq
|
||||
|
@ -2872,20 +2871,18 @@ define <16 x i8> @splatconstant_funnnel_v16i8(<16 x i8> %x, <16 x i8> %y) nounwi
|
|||
;
|
||||
; AVX512BW-LABEL: splatconstant_funnnel_v16i8:
|
||||
; AVX512BW: # %bb.0:
|
||||
; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm2 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
|
||||
; AVX512BW-NEXT: vpsrlw $4, %xmm1, %xmm1
|
||||
; AVX512BW-NEXT: vpsllw $4, %xmm0, %xmm0
|
||||
; AVX512BW-NEXT: vpternlogq $226, %zmm1, %zmm2, %zmm0
|
||||
; AVX512BW-NEXT: vpsllw $4, %xmm0, %xmm2
|
||||
; AVX512BW-NEXT: vpsrlw $4, %xmm1, %xmm0
|
||||
; AVX512BW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm2, %zmm0
|
||||
; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
|
||||
; AVX512BW-NEXT: vzeroupper
|
||||
; AVX512BW-NEXT: retq
|
||||
;
|
||||
; AVX512VBMI2-LABEL: splatconstant_funnnel_v16i8:
|
||||
; AVX512VBMI2: # %bb.0:
|
||||
; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} xmm2 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
|
||||
; AVX512VBMI2-NEXT: vpsrlw $4, %xmm1, %xmm1
|
||||
; AVX512VBMI2-NEXT: vpsllw $4, %xmm0, %xmm0
|
||||
; AVX512VBMI2-NEXT: vpternlogq $226, %zmm1, %zmm2, %zmm0
|
||||
; AVX512VBMI2-NEXT: vpsllw $4, %xmm0, %xmm2
|
||||
; AVX512VBMI2-NEXT: vpsrlw $4, %xmm1, %xmm0
|
||||
; AVX512VBMI2-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm2, %zmm0
|
||||
; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
|
||||
; AVX512VBMI2-NEXT: vzeroupper
|
||||
; AVX512VBMI2-NEXT: retq
|
||||
|
|
|
@ -2521,10 +2521,9 @@ define <32 x i8> @splatconstant_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y) nounwi
|
|||
;
|
||||
; AVX512F-LABEL: splatconstant_funnnel_v32i8:
|
||||
; AVX512F: # %bb.0:
|
||||
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm2 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
|
||||
; AVX512F-NEXT: vpsrlw $4, %ymm1, %ymm1
|
||||
; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vpternlogq $226, %zmm1, %zmm2, %zmm0
|
||||
; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm2
|
||||
; AVX512F-NEXT: vpsrlw $4, %ymm1, %ymm0
|
||||
; AVX512F-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm2, %zmm0
|
||||
; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
|
@ -2532,24 +2531,22 @@ define <32 x i8> @splatconstant_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y) nounwi
|
|||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm2
|
||||
; AVX512VL-NEXT: vpsrlw $4, %ymm1, %ymm0
|
||||
; AVX512VL-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm0
|
||||
; AVX512VL-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm2, %ymm0
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
; AVX512BW-LABEL: splatconstant_funnnel_v32i8:
|
||||
; AVX512BW: # %bb.0:
|
||||
; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm2 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
|
||||
; AVX512BW-NEXT: vpsrlw $4, %ymm1, %ymm1
|
||||
; AVX512BW-NEXT: vpsllw $4, %ymm0, %ymm0
|
||||
; AVX512BW-NEXT: vpternlogq $226, %zmm1, %zmm2, %zmm0
|
||||
; AVX512BW-NEXT: vpsllw $4, %ymm0, %ymm2
|
||||
; AVX512BW-NEXT: vpsrlw $4, %ymm1, %ymm0
|
||||
; AVX512BW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm2, %zmm0
|
||||
; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
|
||||
; AVX512BW-NEXT: retq
|
||||
;
|
||||
; AVX512VBMI2-LABEL: splatconstant_funnnel_v32i8:
|
||||
; AVX512VBMI2: # %bb.0:
|
||||
; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} ymm2 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
|
||||
; AVX512VBMI2-NEXT: vpsrlw $4, %ymm1, %ymm1
|
||||
; AVX512VBMI2-NEXT: vpsllw $4, %ymm0, %ymm0
|
||||
; AVX512VBMI2-NEXT: vpternlogq $226, %zmm1, %zmm2, %zmm0
|
||||
; AVX512VBMI2-NEXT: vpsllw $4, %ymm0, %ymm2
|
||||
; AVX512VBMI2-NEXT: vpsrlw $4, %ymm1, %ymm0
|
||||
; AVX512VBMI2-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm2, %zmm0
|
||||
; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
|
||||
; AVX512VBMI2-NEXT: retq
|
||||
;
|
||||
|
@ -2557,14 +2554,14 @@ define <32 x i8> @splatconstant_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y) nounwi
|
|||
; AVX512VLBW: # %bb.0:
|
||||
; AVX512VLBW-NEXT: vpsllw $4, %ymm0, %ymm2
|
||||
; AVX512VLBW-NEXT: vpsrlw $4, %ymm1, %ymm0
|
||||
; AVX512VLBW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm0
|
||||
; AVX512VLBW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm2, %ymm0
|
||||
; AVX512VLBW-NEXT: retq
|
||||
;
|
||||
; AVX512VLVBMI2-LABEL: splatconstant_funnnel_v32i8:
|
||||
; AVX512VLVBMI2: # %bb.0:
|
||||
; AVX512VLVBMI2-NEXT: vpsllw $4, %ymm0, %ymm2
|
||||
; AVX512VLVBMI2-NEXT: vpsrlw $4, %ymm1, %ymm0
|
||||
; AVX512VLVBMI2-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm0
|
||||
; AVX512VLVBMI2-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm2, %ymm0
|
||||
; AVX512VLVBMI2-NEXT: retq
|
||||
;
|
||||
; XOPAVX1-LABEL: splatconstant_funnnel_v32i8:
|
||||
|
|
|
@ -1412,7 +1412,7 @@ define <64 x i8> @splatconstant_funnnel_v64i8(<64 x i8> %x, <64 x i8> %y) nounwi
|
|||
; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm1
|
||||
; AVX512F-NEXT: vpsrlw $4, %ymm1, %ymm1
|
||||
; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
|
||||
; AVX512F-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2, %zmm0
|
||||
; AVX512F-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm2, %zmm0
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: splatconstant_funnnel_v64i8:
|
||||
|
@ -1425,35 +1425,35 @@ define <64 x i8> @splatconstant_funnnel_v64i8(<64 x i8> %x, <64 x i8> %y) nounwi
|
|||
; AVX512VL-NEXT: vextracti64x4 $1, %zmm1, %ymm1
|
||||
; AVX512VL-NEXT: vpsrlw $4, %ymm1, %ymm1
|
||||
; AVX512VL-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
|
||||
; AVX512VL-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2, %zmm0
|
||||
; AVX512VL-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm2, %zmm0
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
; AVX512BW-LABEL: splatconstant_funnnel_v64i8:
|
||||
; AVX512BW: # %bb.0:
|
||||
; AVX512BW-NEXT: vpsllw $4, %zmm0, %zmm2
|
||||
; AVX512BW-NEXT: vpsrlw $4, %zmm1, %zmm0
|
||||
; AVX512BW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2, %zmm0
|
||||
; AVX512BW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm2, %zmm0
|
||||
; AVX512BW-NEXT: retq
|
||||
;
|
||||
; AVX512VBMI2-LABEL: splatconstant_funnnel_v64i8:
|
||||
; AVX512VBMI2: # %bb.0:
|
||||
; AVX512VBMI2-NEXT: vpsllw $4, %zmm0, %zmm2
|
||||
; AVX512VBMI2-NEXT: vpsrlw $4, %zmm1, %zmm0
|
||||
; AVX512VBMI2-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2, %zmm0
|
||||
; AVX512VBMI2-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm2, %zmm0
|
||||
; AVX512VBMI2-NEXT: retq
|
||||
;
|
||||
; AVX512VLBW-LABEL: splatconstant_funnnel_v64i8:
|
||||
; AVX512VLBW: # %bb.0:
|
||||
; AVX512VLBW-NEXT: vpsllw $4, %zmm0, %zmm2
|
||||
; AVX512VLBW-NEXT: vpsrlw $4, %zmm1, %zmm0
|
||||
; AVX512VLBW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2, %zmm0
|
||||
; AVX512VLBW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm2, %zmm0
|
||||
; AVX512VLBW-NEXT: retq
|
||||
;
|
||||
; AVX512VLVBMI2-LABEL: splatconstant_funnnel_v64i8:
|
||||
; AVX512VLVBMI2: # %bb.0:
|
||||
; AVX512VLVBMI2-NEXT: vpsllw $4, %zmm0, %zmm2
|
||||
; AVX512VLVBMI2-NEXT: vpsrlw $4, %zmm1, %zmm0
|
||||
; AVX512VLVBMI2-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2, %zmm0
|
||||
; AVX512VLVBMI2-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm2, %zmm0
|
||||
; AVX512VLVBMI2-NEXT: retq
|
||||
%res = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %x, <64 x i8> %y, <64 x i8> <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>)
|
||||
ret <64 x i8> %res
|
||||
|
|
|
@ -2086,10 +2086,9 @@ define <16 x i8> @splatconstant_funnnel_v16i8(<16 x i8> %x) nounwind {
|
|||
;
|
||||
; AVX512F-LABEL: splatconstant_funnnel_v16i8:
|
||||
; AVX512F: # %bb.0:
|
||||
; AVX512F-NEXT: vmovdqa {{.*#+}} xmm1 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
|
||||
; AVX512F-NEXT: vpsrlw $4, %xmm0, %xmm2
|
||||
; AVX512F-NEXT: vpsllw $4, %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vpternlogq $226, %zmm2, %zmm1, %zmm0
|
||||
; AVX512F-NEXT: vpsllw $4, %xmm0, %xmm1
|
||||
; AVX512F-NEXT: vpsrlw $4, %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm1, %zmm0
|
||||
; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
|
||||
; AVX512F-NEXT: vzeroupper
|
||||
; AVX512F-NEXT: retq
|
||||
|
@ -2103,10 +2102,9 @@ define <16 x i8> @splatconstant_funnnel_v16i8(<16 x i8> %x) nounwind {
|
|||
;
|
||||
; AVX512BW-LABEL: splatconstant_funnnel_v16i8:
|
||||
; AVX512BW: # %bb.0:
|
||||
; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm1 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
|
||||
; AVX512BW-NEXT: vpsrlw $4, %xmm0, %xmm2
|
||||
; AVX512BW-NEXT: vpsllw $4, %xmm0, %xmm0
|
||||
; AVX512BW-NEXT: vpternlogq $226, %zmm2, %zmm1, %zmm0
|
||||
; AVX512BW-NEXT: vpsllw $4, %xmm0, %xmm1
|
||||
; AVX512BW-NEXT: vpsrlw $4, %xmm0, %xmm0
|
||||
; AVX512BW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm1, %zmm0
|
||||
; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
|
||||
; AVX512BW-NEXT: vzeroupper
|
||||
; AVX512BW-NEXT: retq
|
||||
|
@ -2120,10 +2118,9 @@ define <16 x i8> @splatconstant_funnnel_v16i8(<16 x i8> %x) nounwind {
|
|||
;
|
||||
; AVX512VBMI2-LABEL: splatconstant_funnnel_v16i8:
|
||||
; AVX512VBMI2: # %bb.0:
|
||||
; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} xmm1 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
|
||||
; AVX512VBMI2-NEXT: vpsrlw $4, %xmm0, %xmm2
|
||||
; AVX512VBMI2-NEXT: vpsllw $4, %xmm0, %xmm0
|
||||
; AVX512VBMI2-NEXT: vpternlogq $226, %zmm2, %zmm1, %zmm0
|
||||
; AVX512VBMI2-NEXT: vpsllw $4, %xmm0, %xmm1
|
||||
; AVX512VBMI2-NEXT: vpsrlw $4, %xmm0, %xmm0
|
||||
; AVX512VBMI2-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm1, %zmm0
|
||||
; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
|
||||
; AVX512VBMI2-NEXT: vzeroupper
|
||||
; AVX512VBMI2-NEXT: retq
|
||||
|
|
|
@ -453,18 +453,16 @@ define <32 x i8> @var_funnnel_v32i8(<32 x i8> %x, <32 x i8> %amt) nounwind {
|
|||
;
|
||||
; AVX512F-LABEL: var_funnnel_v32i8:
|
||||
; AVX512F: # %bb.0:
|
||||
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm2 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
|
||||
; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm2
|
||||
; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm3
|
||||
; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm4
|
||||
; AVX512F-NEXT: vpternlogq $226, %zmm3, %zmm2, %zmm4
|
||||
; AVX512F-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm2, %zmm3
|
||||
; AVX512F-NEXT: vpsllw $5, %ymm1, %ymm1
|
||||
; AVX512F-NEXT: vpblendvb %ymm1, %ymm4, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vpsrlw $6, %ymm0, %ymm2
|
||||
; AVX512F-NEXT: vpsllw $2, %ymm0, %ymm3
|
||||
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm4 = [252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252]
|
||||
; AVX512F-NEXT: vpternlogq $202, %zmm2, %zmm3, %zmm4
|
||||
; AVX512F-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vpsllw $2, %ymm0, %ymm2
|
||||
; AVX512F-NEXT: vpsrlw $6, %ymm0, %ymm3
|
||||
; AVX512F-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm2, %zmm3
|
||||
; AVX512F-NEXT: vpaddb %ymm1, %ymm1, %ymm1
|
||||
; AVX512F-NEXT: vpblendvb %ymm1, %ymm4, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vpsrlw $7, %ymm0, %ymm2
|
||||
; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2
|
||||
; AVX512F-NEXT: vpaddb %ymm0, %ymm0, %ymm3
|
||||
|
@ -477,12 +475,12 @@ define <32 x i8> @var_funnnel_v32i8(<32 x i8> %x, <32 x i8> %amt) nounwind {
|
|||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm2
|
||||
; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm3
|
||||
; AVX512VL-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm3
|
||||
; AVX512VL-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm2, %ymm3
|
||||
; AVX512VL-NEXT: vpsllw $5, %ymm1, %ymm1
|
||||
; AVX512VL-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
|
||||
; AVX512VL-NEXT: vpsllw $2, %ymm0, %ymm2
|
||||
; AVX512VL-NEXT: vpsrlw $6, %ymm0, %ymm3
|
||||
; AVX512VL-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm3
|
||||
; AVX512VL-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm2, %ymm3
|
||||
; AVX512VL-NEXT: vpaddb %ymm1, %ymm1, %ymm1
|
||||
; AVX512VL-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
|
||||
; AVX512VL-NEXT: vpsrlw $7, %ymm0, %ymm2
|
||||
|
@ -1754,10 +1752,9 @@ define <32 x i8> @splatconstant_funnnel_v32i8(<32 x i8> %x) nounwind {
|
|||
;
|
||||
; AVX512F-LABEL: splatconstant_funnnel_v32i8:
|
||||
; AVX512F: # %bb.0:
|
||||
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm1 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
|
||||
; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm2
|
||||
; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vpternlogq $226, %zmm2, %zmm1, %zmm0
|
||||
; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm1
|
||||
; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm1, %zmm0
|
||||
; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
|
@ -1765,15 +1762,14 @@ define <32 x i8> @splatconstant_funnnel_v32i8(<32 x i8> %x) nounwind {
|
|||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm1
|
||||
; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm0
|
||||
; AVX512VL-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm0
|
||||
; AVX512VL-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm1, %ymm0
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
; AVX512BW-LABEL: splatconstant_funnnel_v32i8:
|
||||
; AVX512BW: # %bb.0:
|
||||
; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm1 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
|
||||
; AVX512BW-NEXT: vpsrlw $4, %ymm0, %ymm2
|
||||
; AVX512BW-NEXT: vpsllw $4, %ymm0, %ymm0
|
||||
; AVX512BW-NEXT: vpternlogq $226, %zmm2, %zmm1, %zmm0
|
||||
; AVX512BW-NEXT: vpsllw $4, %ymm0, %ymm1
|
||||
; AVX512BW-NEXT: vpsrlw $4, %ymm0, %ymm0
|
||||
; AVX512BW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm1, %zmm0
|
||||
; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
|
||||
; AVX512BW-NEXT: retq
|
||||
;
|
||||
|
@ -1781,15 +1777,14 @@ define <32 x i8> @splatconstant_funnnel_v32i8(<32 x i8> %x) nounwind {
|
|||
; AVX512VLBW: # %bb.0:
|
||||
; AVX512VLBW-NEXT: vpsllw $4, %ymm0, %ymm1
|
||||
; AVX512VLBW-NEXT: vpsrlw $4, %ymm0, %ymm0
|
||||
; AVX512VLBW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm0
|
||||
; AVX512VLBW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm1, %ymm0
|
||||
; AVX512VLBW-NEXT: retq
|
||||
;
|
||||
; AVX512VBMI2-LABEL: splatconstant_funnnel_v32i8:
|
||||
; AVX512VBMI2: # %bb.0:
|
||||
; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} ymm1 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
|
||||
; AVX512VBMI2-NEXT: vpsrlw $4, %ymm0, %ymm2
|
||||
; AVX512VBMI2-NEXT: vpsllw $4, %ymm0, %ymm0
|
||||
; AVX512VBMI2-NEXT: vpternlogq $226, %zmm2, %zmm1, %zmm0
|
||||
; AVX512VBMI2-NEXT: vpsllw $4, %ymm0, %ymm1
|
||||
; AVX512VBMI2-NEXT: vpsrlw $4, %ymm0, %ymm0
|
||||
; AVX512VBMI2-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm1, %zmm0
|
||||
; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
|
||||
; AVX512VBMI2-NEXT: retq
|
||||
;
|
||||
|
@ -1797,7 +1792,7 @@ define <32 x i8> @splatconstant_funnnel_v32i8(<32 x i8> %x) nounwind {
|
|||
; AVX512VLVBMI2: # %bb.0:
|
||||
; AVX512VLVBMI2-NEXT: vpsllw $4, %ymm0, %ymm1
|
||||
; AVX512VLVBMI2-NEXT: vpsrlw $4, %ymm0, %ymm0
|
||||
; AVX512VLVBMI2-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm0
|
||||
; AVX512VLVBMI2-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm1, %ymm0
|
||||
; AVX512VLVBMI2-NEXT: retq
|
||||
;
|
||||
; XOPAVX1-LABEL: splatconstant_funnnel_v32i8:
|
||||
|
|
|
@ -1039,7 +1039,7 @@ define <64 x i8> @splatconstant_funnnel_v64i8(<64 x i8> %x) nounwind {
|
|||
; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vpsrlw $4, %ymm2, %ymm2
|
||||
; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
|
||||
; AVX512F-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm0
|
||||
; AVX512F-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm1, %zmm0
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: splatconstant_funnnel_v64i8:
|
||||
|
@ -1051,35 +1051,35 @@ define <64 x i8> @splatconstant_funnnel_v64i8(<64 x i8> %x) nounwind {
|
|||
; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm0
|
||||
; AVX512VL-NEXT: vpsrlw $4, %ymm2, %ymm2
|
||||
; AVX512VL-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
|
||||
; AVX512VL-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm0
|
||||
; AVX512VL-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm1, %zmm0
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
; AVX512BW-LABEL: splatconstant_funnnel_v64i8:
|
||||
; AVX512BW: # %bb.0:
|
||||
; AVX512BW-NEXT: vpsllw $4, %zmm0, %zmm1
|
||||
; AVX512BW-NEXT: vpsrlw $4, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm0
|
||||
; AVX512BW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm1, %zmm0
|
||||
; AVX512BW-NEXT: retq
|
||||
;
|
||||
; AVX512VLBW-LABEL: splatconstant_funnnel_v64i8:
|
||||
; AVX512VLBW: # %bb.0:
|
||||
; AVX512VLBW-NEXT: vpsllw $4, %zmm0, %zmm1
|
||||
; AVX512VLBW-NEXT: vpsrlw $4, %zmm0, %zmm0
|
||||
; AVX512VLBW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm0
|
||||
; AVX512VLBW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm1, %zmm0
|
||||
; AVX512VLBW-NEXT: retq
|
||||
;
|
||||
; AVX512VBMI2-LABEL: splatconstant_funnnel_v64i8:
|
||||
; AVX512VBMI2: # %bb.0:
|
||||
; AVX512VBMI2-NEXT: vpsllw $4, %zmm0, %zmm1
|
||||
; AVX512VBMI2-NEXT: vpsrlw $4, %zmm0, %zmm0
|
||||
; AVX512VBMI2-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm0
|
||||
; AVX512VBMI2-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm1, %zmm0
|
||||
; AVX512VBMI2-NEXT: retq
|
||||
;
|
||||
; AVX512VLVBMI2-LABEL: splatconstant_funnnel_v64i8:
|
||||
; AVX512VLVBMI2: # %bb.0:
|
||||
; AVX512VLVBMI2-NEXT: vpsllw $4, %zmm0, %zmm1
|
||||
; AVX512VLVBMI2-NEXT: vpsrlw $4, %zmm0, %zmm0
|
||||
; AVX512VLVBMI2-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm0
|
||||
; AVX512VLVBMI2-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm1, %zmm0
|
||||
; AVX512VLVBMI2-NEXT: retq
|
||||
%res = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %x, <64 x i8> %x, <64 x i8> <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>)
|
||||
ret <64 x i8> %res
|
||||
|
|
|
@ -2574,10 +2574,9 @@ define <16 x i8> @splatconstant_funnnel_v16i8(<16 x i8> %x, <16 x i8> %y) nounwi
|
|||
;
|
||||
; AVX512F-LABEL: splatconstant_funnnel_v16i8:
|
||||
; AVX512F: # %bb.0:
|
||||
; AVX512F-NEXT: vmovdqa {{.*#+}} xmm2 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
|
||||
; AVX512F-NEXT: vpsrlw $4, %xmm1, %xmm1
|
||||
; AVX512F-NEXT: vpsllw $4, %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vpternlogq $226, %zmm1, %zmm2, %zmm0
|
||||
; AVX512F-NEXT: vpsllw $4, %xmm0, %xmm2
|
||||
; AVX512F-NEXT: vpsrlw $4, %xmm1, %xmm0
|
||||
; AVX512F-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm2, %zmm0
|
||||
; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
|
||||
; AVX512F-NEXT: vzeroupper
|
||||
; AVX512F-NEXT: retq
|
||||
|
@ -2591,20 +2590,18 @@ define <16 x i8> @splatconstant_funnnel_v16i8(<16 x i8> %x, <16 x i8> %y) nounwi
|
|||
;
|
||||
; AVX512BW-LABEL: splatconstant_funnnel_v16i8:
|
||||
; AVX512BW: # %bb.0:
|
||||
; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm2 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
|
||||
; AVX512BW-NEXT: vpsrlw $4, %xmm1, %xmm1
|
||||
; AVX512BW-NEXT: vpsllw $4, %xmm0, %xmm0
|
||||
; AVX512BW-NEXT: vpternlogq $226, %zmm1, %zmm2, %zmm0
|
||||
; AVX512BW-NEXT: vpsllw $4, %xmm0, %xmm2
|
||||
; AVX512BW-NEXT: vpsrlw $4, %xmm1, %xmm0
|
||||
; AVX512BW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm2, %zmm0
|
||||
; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
|
||||
; AVX512BW-NEXT: vzeroupper
|
||||
; AVX512BW-NEXT: retq
|
||||
;
|
||||
; AVX512VBMI2-LABEL: splatconstant_funnnel_v16i8:
|
||||
; AVX512VBMI2: # %bb.0:
|
||||
; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} xmm2 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
|
||||
; AVX512VBMI2-NEXT: vpsrlw $4, %xmm1, %xmm1
|
||||
; AVX512VBMI2-NEXT: vpsllw $4, %xmm0, %xmm0
|
||||
; AVX512VBMI2-NEXT: vpternlogq $226, %zmm1, %zmm2, %zmm0
|
||||
; AVX512VBMI2-NEXT: vpsllw $4, %xmm0, %xmm2
|
||||
; AVX512VBMI2-NEXT: vpsrlw $4, %xmm1, %xmm0
|
||||
; AVX512VBMI2-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm2, %zmm0
|
||||
; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
|
||||
; AVX512VBMI2-NEXT: vzeroupper
|
||||
; AVX512VBMI2-NEXT: retq
|
||||
|
|
|
@ -2199,10 +2199,9 @@ define <32 x i8> @splatconstant_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y) nounwi
|
|||
;
|
||||
; AVX512F-LABEL: splatconstant_funnnel_v32i8:
|
||||
; AVX512F: # %bb.0:
|
||||
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm2 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
|
||||
; AVX512F-NEXT: vpsrlw $4, %ymm1, %ymm1
|
||||
; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vpternlogq $226, %zmm1, %zmm2, %zmm0
|
||||
; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm2
|
||||
; AVX512F-NEXT: vpsrlw $4, %ymm1, %ymm0
|
||||
; AVX512F-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm2, %zmm0
|
||||
; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
|
@ -2210,24 +2209,22 @@ define <32 x i8> @splatconstant_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y) nounwi
|
|||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm2
|
||||
; AVX512VL-NEXT: vpsrlw $4, %ymm1, %ymm0
|
||||
; AVX512VL-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm0
|
||||
; AVX512VL-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm2, %ymm0
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
; AVX512BW-LABEL: splatconstant_funnnel_v32i8:
|
||||
; AVX512BW: # %bb.0:
|
||||
; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm2 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
|
||||
; AVX512BW-NEXT: vpsrlw $4, %ymm1, %ymm1
|
||||
; AVX512BW-NEXT: vpsllw $4, %ymm0, %ymm0
|
||||
; AVX512BW-NEXT: vpternlogq $226, %zmm1, %zmm2, %zmm0
|
||||
; AVX512BW-NEXT: vpsllw $4, %ymm0, %ymm2
|
||||
; AVX512BW-NEXT: vpsrlw $4, %ymm1, %ymm0
|
||||
; AVX512BW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm2, %zmm0
|
||||
; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
|
||||
; AVX512BW-NEXT: retq
|
||||
;
|
||||
; AVX512VBMI2-LABEL: splatconstant_funnnel_v32i8:
|
||||
; AVX512VBMI2: # %bb.0:
|
||||
; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} ymm2 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
|
||||
; AVX512VBMI2-NEXT: vpsrlw $4, %ymm1, %ymm1
|
||||
; AVX512VBMI2-NEXT: vpsllw $4, %ymm0, %ymm0
|
||||
; AVX512VBMI2-NEXT: vpternlogq $226, %zmm1, %zmm2, %zmm0
|
||||
; AVX512VBMI2-NEXT: vpsllw $4, %ymm0, %ymm2
|
||||
; AVX512VBMI2-NEXT: vpsrlw $4, %ymm1, %ymm0
|
||||
; AVX512VBMI2-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm2, %zmm0
|
||||
; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
|
||||
; AVX512VBMI2-NEXT: retq
|
||||
;
|
||||
|
@ -2235,14 +2232,14 @@ define <32 x i8> @splatconstant_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y) nounwi
|
|||
; AVX512VLBW: # %bb.0:
|
||||
; AVX512VLBW-NEXT: vpsllw $4, %ymm0, %ymm2
|
||||
; AVX512VLBW-NEXT: vpsrlw $4, %ymm1, %ymm0
|
||||
; AVX512VLBW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm0
|
||||
; AVX512VLBW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm2, %ymm0
|
||||
; AVX512VLBW-NEXT: retq
|
||||
;
|
||||
; AVX512VLVBMI2-LABEL: splatconstant_funnnel_v32i8:
|
||||
; AVX512VLVBMI2: # %bb.0:
|
||||
; AVX512VLVBMI2-NEXT: vpsllw $4, %ymm0, %ymm2
|
||||
; AVX512VLVBMI2-NEXT: vpsrlw $4, %ymm1, %ymm0
|
||||
; AVX512VLVBMI2-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm0
|
||||
; AVX512VLVBMI2-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm2, %ymm0
|
||||
; AVX512VLVBMI2-NEXT: retq
|
||||
;
|
||||
; XOPAVX1-LABEL: splatconstant_funnnel_v32i8:
|
||||
|
|
|
@ -1394,7 +1394,7 @@ define <64 x i8> @splatconstant_funnnel_v64i8(<64 x i8> %x, <64 x i8> %y) nounwi
|
|||
; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm1
|
||||
; AVX512F-NEXT: vpsrlw $4, %ymm1, %ymm1
|
||||
; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
|
||||
; AVX512F-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2, %zmm0
|
||||
; AVX512F-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm2, %zmm0
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: splatconstant_funnnel_v64i8:
|
||||
|
@ -1407,35 +1407,35 @@ define <64 x i8> @splatconstant_funnnel_v64i8(<64 x i8> %x, <64 x i8> %y) nounwi
|
|||
; AVX512VL-NEXT: vextracti64x4 $1, %zmm1, %ymm1
|
||||
; AVX512VL-NEXT: vpsrlw $4, %ymm1, %ymm1
|
||||
; AVX512VL-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
|
||||
; AVX512VL-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2, %zmm0
|
||||
; AVX512VL-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm2, %zmm0
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
; AVX512BW-LABEL: splatconstant_funnnel_v64i8:
|
||||
; AVX512BW: # %bb.0:
|
||||
; AVX512BW-NEXT: vpsllw $4, %zmm0, %zmm2
|
||||
; AVX512BW-NEXT: vpsrlw $4, %zmm1, %zmm0
|
||||
; AVX512BW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2, %zmm0
|
||||
; AVX512BW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm2, %zmm0
|
||||
; AVX512BW-NEXT: retq
|
||||
;
|
||||
; AVX512VBMI2-LABEL: splatconstant_funnnel_v64i8:
|
||||
; AVX512VBMI2: # %bb.0:
|
||||
; AVX512VBMI2-NEXT: vpsllw $4, %zmm0, %zmm2
|
||||
; AVX512VBMI2-NEXT: vpsrlw $4, %zmm1, %zmm0
|
||||
; AVX512VBMI2-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2, %zmm0
|
||||
; AVX512VBMI2-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm2, %zmm0
|
||||
; AVX512VBMI2-NEXT: retq
|
||||
;
|
||||
; AVX512VLBW-LABEL: splatconstant_funnnel_v64i8:
|
||||
; AVX512VLBW: # %bb.0:
|
||||
; AVX512VLBW-NEXT: vpsllw $4, %zmm0, %zmm2
|
||||
; AVX512VLBW-NEXT: vpsrlw $4, %zmm1, %zmm0
|
||||
; AVX512VLBW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2, %zmm0
|
||||
; AVX512VLBW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm2, %zmm0
|
||||
; AVX512VLBW-NEXT: retq
|
||||
;
|
||||
; AVX512VLVBMI2-LABEL: splatconstant_funnnel_v64i8:
|
||||
; AVX512VLVBMI2: # %bb.0:
|
||||
; AVX512VLVBMI2-NEXT: vpsllw $4, %zmm0, %zmm2
|
||||
; AVX512VLVBMI2-NEXT: vpsrlw $4, %zmm1, %zmm0
|
||||
; AVX512VLVBMI2-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2, %zmm0
|
||||
; AVX512VLVBMI2-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm2, %zmm0
|
||||
; AVX512VLVBMI2-NEXT: retq
|
||||
%res = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %x, <64 x i8> %y, <64 x i8> <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>)
|
||||
ret <64 x i8> %res
|
||||
|
|
|
@ -2174,10 +2174,9 @@ define <16 x i8> @splatconstant_funnnel_v16i8(<16 x i8> %x) nounwind {
|
|||
;
|
||||
; AVX512F-LABEL: splatconstant_funnnel_v16i8:
|
||||
; AVX512F: # %bb.0:
|
||||
; AVX512F-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
|
||||
; AVX512F-NEXT: vpsllw $4, %xmm0, %xmm2
|
||||
; AVX512F-NEXT: vpsrlw $4, %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vpternlogq $226, %zmm2, %zmm1, %zmm0
|
||||
; AVX512F-NEXT: vpsrlw $4, %xmm0, %xmm1
|
||||
; AVX512F-NEXT: vpsllw $4, %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm1, %zmm0
|
||||
; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
|
||||
; AVX512F-NEXT: vzeroupper
|
||||
; AVX512F-NEXT: retq
|
||||
|
@ -2191,10 +2190,9 @@ define <16 x i8> @splatconstant_funnnel_v16i8(<16 x i8> %x) nounwind {
|
|||
;
|
||||
; AVX512BW-LABEL: splatconstant_funnnel_v16i8:
|
||||
; AVX512BW: # %bb.0:
|
||||
; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
|
||||
; AVX512BW-NEXT: vpsllw $4, %xmm0, %xmm2
|
||||
; AVX512BW-NEXT: vpsrlw $4, %xmm0, %xmm0
|
||||
; AVX512BW-NEXT: vpternlogq $226, %zmm2, %zmm1, %zmm0
|
||||
; AVX512BW-NEXT: vpsrlw $4, %xmm0, %xmm1
|
||||
; AVX512BW-NEXT: vpsllw $4, %xmm0, %xmm0
|
||||
; AVX512BW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm1, %zmm0
|
||||
; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
|
||||
; AVX512BW-NEXT: vzeroupper
|
||||
; AVX512BW-NEXT: retq
|
||||
|
@ -2208,10 +2206,9 @@ define <16 x i8> @splatconstant_funnnel_v16i8(<16 x i8> %x) nounwind {
|
|||
;
|
||||
; AVX512VBMI2-LABEL: splatconstant_funnnel_v16i8:
|
||||
; AVX512VBMI2: # %bb.0:
|
||||
; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
|
||||
; AVX512VBMI2-NEXT: vpsllw $4, %xmm0, %xmm2
|
||||
; AVX512VBMI2-NEXT: vpsrlw $4, %xmm0, %xmm0
|
||||
; AVX512VBMI2-NEXT: vpternlogq $226, %zmm2, %zmm1, %zmm0
|
||||
; AVX512VBMI2-NEXT: vpsrlw $4, %xmm0, %xmm1
|
||||
; AVX512VBMI2-NEXT: vpsllw $4, %xmm0, %xmm0
|
||||
; AVX512VBMI2-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm1, %zmm0
|
||||
; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
|
||||
; AVX512VBMI2-NEXT: vzeroupper
|
||||
; AVX512VBMI2-NEXT: retq
|
||||
|
|
|
@ -490,20 +490,18 @@ define <32 x i8> @var_funnnel_v32i8(<32 x i8> %x, <32 x i8> %amt) nounwind {
|
|||
;
|
||||
; AVX512F-LABEL: var_funnnel_v32i8:
|
||||
; AVX512F: # %bb.0:
|
||||
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm2 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
|
||||
; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm2
|
||||
; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm3
|
||||
; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm4
|
||||
; AVX512F-NEXT: vpternlogq $226, %zmm3, %zmm2, %zmm4
|
||||
; AVX512F-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm2, %zmm3
|
||||
; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; AVX512F-NEXT: vpsubb %ymm1, %ymm2, %ymm1
|
||||
; AVX512F-NEXT: vpsllw $5, %ymm1, %ymm1
|
||||
; AVX512F-NEXT: vpblendvb %ymm1, %ymm4, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vpsrlw $6, %ymm0, %ymm2
|
||||
; AVX512F-NEXT: vpsllw $2, %ymm0, %ymm3
|
||||
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm4 = [252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252]
|
||||
; AVX512F-NEXT: vpternlogq $202, %zmm2, %zmm3, %zmm4
|
||||
; AVX512F-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vpsllw $2, %ymm0, %ymm2
|
||||
; AVX512F-NEXT: vpsrlw $6, %ymm0, %ymm3
|
||||
; AVX512F-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm2, %zmm3
|
||||
; AVX512F-NEXT: vpaddb %ymm1, %ymm1, %ymm1
|
||||
; AVX512F-NEXT: vpblendvb %ymm1, %ymm4, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vpsrlw $7, %ymm0, %ymm2
|
||||
; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2
|
||||
; AVX512F-NEXT: vpaddb %ymm0, %ymm0, %ymm3
|
||||
|
@ -516,14 +514,14 @@ define <32 x i8> @var_funnnel_v32i8(<32 x i8> %x, <32 x i8> %amt) nounwind {
|
|||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm2
|
||||
; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm3
|
||||
; AVX512VL-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm3
|
||||
; AVX512VL-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm2, %ymm3
|
||||
; AVX512VL-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; AVX512VL-NEXT: vpsubb %ymm1, %ymm2, %ymm1
|
||||
; AVX512VL-NEXT: vpsllw $5, %ymm1, %ymm1
|
||||
; AVX512VL-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
|
||||
; AVX512VL-NEXT: vpsllw $2, %ymm0, %ymm2
|
||||
; AVX512VL-NEXT: vpsrlw $6, %ymm0, %ymm3
|
||||
; AVX512VL-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm3
|
||||
; AVX512VL-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm2, %ymm3
|
||||
; AVX512VL-NEXT: vpaddb %ymm1, %ymm1, %ymm1
|
||||
; AVX512VL-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
|
||||
; AVX512VL-NEXT: vpsrlw $7, %ymm0, %ymm2
|
||||
|
@ -1851,10 +1849,9 @@ define <32 x i8> @splatconstant_funnnel_v32i8(<32 x i8> %x) nounwind {
|
|||
;
|
||||
; AVX512F-LABEL: splatconstant_funnnel_v32i8:
|
||||
; AVX512F: # %bb.0:
|
||||
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm1 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
|
||||
; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm2
|
||||
; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vpternlogq $226, %zmm2, %zmm1, %zmm0
|
||||
; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm1
|
||||
; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm1, %zmm0
|
||||
; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
|
@ -1862,15 +1859,14 @@ define <32 x i8> @splatconstant_funnnel_v32i8(<32 x i8> %x) nounwind {
|
|||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm1
|
||||
; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm0
|
||||
; AVX512VL-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm0
|
||||
; AVX512VL-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm1, %ymm0
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
; AVX512BW-LABEL: splatconstant_funnnel_v32i8:
|
||||
; AVX512BW: # %bb.0:
|
||||
; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
|
||||
; AVX512BW-NEXT: vpsllw $4, %ymm0, %ymm2
|
||||
; AVX512BW-NEXT: vpsrlw $4, %ymm0, %ymm0
|
||||
; AVX512BW-NEXT: vpternlogq $226, %zmm2, %zmm1, %zmm0
|
||||
; AVX512BW-NEXT: vpsrlw $4, %ymm0, %ymm1
|
||||
; AVX512BW-NEXT: vpsllw $4, %ymm0, %ymm0
|
||||
; AVX512BW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm1, %zmm0
|
||||
; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
|
||||
; AVX512BW-NEXT: retq
|
||||
;
|
||||
|
@ -1878,15 +1874,14 @@ define <32 x i8> @splatconstant_funnnel_v32i8(<32 x i8> %x) nounwind {
|
|||
; AVX512VLBW: # %bb.0:
|
||||
; AVX512VLBW-NEXT: vpsrlw $4, %ymm0, %ymm1
|
||||
; AVX512VLBW-NEXT: vpsllw $4, %ymm0, %ymm0
|
||||
; AVX512VLBW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm0
|
||||
; AVX512VLBW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm1, %ymm0
|
||||
; AVX512VLBW-NEXT: retq
|
||||
;
|
||||
; AVX512VBMI2-LABEL: splatconstant_funnnel_v32i8:
|
||||
; AVX512VBMI2: # %bb.0:
|
||||
; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
|
||||
; AVX512VBMI2-NEXT: vpsllw $4, %ymm0, %ymm2
|
||||
; AVX512VBMI2-NEXT: vpsrlw $4, %ymm0, %ymm0
|
||||
; AVX512VBMI2-NEXT: vpternlogq $226, %zmm2, %zmm1, %zmm0
|
||||
; AVX512VBMI2-NEXT: vpsrlw $4, %ymm0, %ymm1
|
||||
; AVX512VBMI2-NEXT: vpsllw $4, %ymm0, %ymm0
|
||||
; AVX512VBMI2-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm1, %zmm0
|
||||
; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
|
||||
; AVX512VBMI2-NEXT: retq
|
||||
;
|
||||
|
@ -1894,7 +1889,7 @@ define <32 x i8> @splatconstant_funnnel_v32i8(<32 x i8> %x) nounwind {
|
|||
; AVX512VLVBMI2: # %bb.0:
|
||||
; AVX512VLVBMI2-NEXT: vpsrlw $4, %ymm0, %ymm1
|
||||
; AVX512VLVBMI2-NEXT: vpsllw $4, %ymm0, %ymm0
|
||||
; AVX512VLVBMI2-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm0
|
||||
; AVX512VLVBMI2-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm1, %ymm0
|
||||
; AVX512VLVBMI2-NEXT: retq
|
||||
;
|
||||
; XOPAVX1-LABEL: splatconstant_funnnel_v32i8:
|
||||
|
|
|
@ -1063,7 +1063,7 @@ define <64 x i8> @splatconstant_funnnel_v64i8(<64 x i8> %x) nounwind {
|
|||
; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vpsllw $4, %ymm2, %ymm2
|
||||
; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
|
||||
; AVX512F-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm0
|
||||
; AVX512F-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm1, %zmm0
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: splatconstant_funnnel_v64i8:
|
||||
|
@ -1075,35 +1075,35 @@ define <64 x i8> @splatconstant_funnnel_v64i8(<64 x i8> %x) nounwind {
|
|||
; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm0
|
||||
; AVX512VL-NEXT: vpsllw $4, %ymm2, %ymm2
|
||||
; AVX512VL-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
|
||||
; AVX512VL-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm0
|
||||
; AVX512VL-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm1, %zmm0
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
; AVX512BW-LABEL: splatconstant_funnnel_v64i8:
|
||||
; AVX512BW: # %bb.0:
|
||||
; AVX512BW-NEXT: vpsrlw $4, %zmm0, %zmm1
|
||||
; AVX512BW-NEXT: vpsllw $4, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm0
|
||||
; AVX512BW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm1, %zmm0
|
||||
; AVX512BW-NEXT: retq
|
||||
;
|
||||
; AVX512VLBW-LABEL: splatconstant_funnnel_v64i8:
|
||||
; AVX512VLBW: # %bb.0:
|
||||
; AVX512VLBW-NEXT: vpsrlw $4, %zmm0, %zmm1
|
||||
; AVX512VLBW-NEXT: vpsllw $4, %zmm0, %zmm0
|
||||
; AVX512VLBW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm0
|
||||
; AVX512VLBW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm1, %zmm0
|
||||
; AVX512VLBW-NEXT: retq
|
||||
;
|
||||
; AVX512VBMI2-LABEL: splatconstant_funnnel_v64i8:
|
||||
; AVX512VBMI2: # %bb.0:
|
||||
; AVX512VBMI2-NEXT: vpsrlw $4, %zmm0, %zmm1
|
||||
; AVX512VBMI2-NEXT: vpsllw $4, %zmm0, %zmm0
|
||||
; AVX512VBMI2-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm0
|
||||
; AVX512VBMI2-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm1, %zmm0
|
||||
; AVX512VBMI2-NEXT: retq
|
||||
;
|
||||
; AVX512VLVBMI2-LABEL: splatconstant_funnnel_v64i8:
|
||||
; AVX512VLVBMI2: # %bb.0:
|
||||
; AVX512VLVBMI2-NEXT: vpsrlw $4, %zmm0, %zmm1
|
||||
; AVX512VLVBMI2-NEXT: vpsllw $4, %zmm0, %zmm0
|
||||
; AVX512VLVBMI2-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm0
|
||||
; AVX512VLVBMI2-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm1, %zmm0
|
||||
; AVX512VLVBMI2-NEXT: retq
|
||||
%res = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %x, <64 x i8> %x, <64 x i8> <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>)
|
||||
ret <64 x i8> %res
|
||||
|
|
|
@ -2056,10 +2056,9 @@ define <16 x i8> @splatconstant_rotate_v16i8(<16 x i8> %a) nounwind {
|
|||
; AVX512F-LABEL: splatconstant_rotate_v16i8:
|
||||
; AVX512F: # %bb.0:
|
||||
; AVX512F-NEXT: vpsllw $4, %xmm0, %xmm1
|
||||
; AVX512F-NEXT: vmovdqa {{.*#+}} xmm2 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
|
||||
; AVX512F-NEXT: vpsrlw $4, %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vpternlogq $226, %zmm0, %zmm2, %zmm1
|
||||
; AVX512F-NEXT: vmovdqa %xmm1, %xmm0
|
||||
; AVX512F-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm1, %zmm0
|
||||
; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
|
||||
; AVX512F-NEXT: vzeroupper
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
|
@ -2073,10 +2072,9 @@ define <16 x i8> @splatconstant_rotate_v16i8(<16 x i8> %a) nounwind {
|
|||
; AVX512BW-LABEL: splatconstant_rotate_v16i8:
|
||||
; AVX512BW: # %bb.0:
|
||||
; AVX512BW-NEXT: vpsllw $4, %xmm0, %xmm1
|
||||
; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm2 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
|
||||
; AVX512BW-NEXT: vpsrlw $4, %xmm0, %xmm0
|
||||
; AVX512BW-NEXT: vpternlogq $226, %zmm0, %zmm2, %zmm1
|
||||
; AVX512BW-NEXT: vmovdqa %xmm1, %xmm0
|
||||
; AVX512BW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm1, %zmm0
|
||||
; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
|
||||
; AVX512BW-NEXT: vzeroupper
|
||||
; AVX512BW-NEXT: retq
|
||||
;
|
||||
|
@ -2090,10 +2088,9 @@ define <16 x i8> @splatconstant_rotate_v16i8(<16 x i8> %a) nounwind {
|
|||
; AVX512VBMI2-LABEL: splatconstant_rotate_v16i8:
|
||||
; AVX512VBMI2: # %bb.0:
|
||||
; AVX512VBMI2-NEXT: vpsllw $4, %xmm0, %xmm1
|
||||
; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} xmm2 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
|
||||
; AVX512VBMI2-NEXT: vpsrlw $4, %xmm0, %xmm0
|
||||
; AVX512VBMI2-NEXT: vpternlogq $226, %zmm0, %zmm2, %zmm1
|
||||
; AVX512VBMI2-NEXT: vmovdqa %xmm1, %xmm0
|
||||
; AVX512VBMI2-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm1, %zmm0
|
||||
; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
|
||||
; AVX512VBMI2-NEXT: vzeroupper
|
||||
; AVX512VBMI2-NEXT: retq
|
||||
;
|
||||
|
|
|
@ -449,18 +449,16 @@ define <32 x i8> @var_rotate_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind {
|
|||
;
|
||||
; AVX512F-LABEL: var_rotate_v32i8:
|
||||
; AVX512F: # %bb.0:
|
||||
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm2 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
|
||||
; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm2
|
||||
; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm3
|
||||
; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm4
|
||||
; AVX512F-NEXT: vpternlogq $226, %zmm3, %zmm2, %zmm4
|
||||
; AVX512F-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm2, %zmm3
|
||||
; AVX512F-NEXT: vpsllw $5, %ymm1, %ymm1
|
||||
; AVX512F-NEXT: vpblendvb %ymm1, %ymm4, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vpsrlw $6, %ymm0, %ymm2
|
||||
; AVX512F-NEXT: vpsllw $2, %ymm0, %ymm3
|
||||
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm4 = [252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252]
|
||||
; AVX512F-NEXT: vpternlogq $202, %zmm2, %zmm3, %zmm4
|
||||
; AVX512F-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vpsllw $2, %ymm0, %ymm2
|
||||
; AVX512F-NEXT: vpsrlw $6, %ymm0, %ymm3
|
||||
; AVX512F-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm2, %zmm3
|
||||
; AVX512F-NEXT: vpaddb %ymm1, %ymm1, %ymm1
|
||||
; AVX512F-NEXT: vpblendvb %ymm1, %ymm4, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vpsrlw $7, %ymm0, %ymm2
|
||||
; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2
|
||||
; AVX512F-NEXT: vpaddb %ymm0, %ymm0, %ymm3
|
||||
|
@ -473,12 +471,12 @@ define <32 x i8> @var_rotate_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind {
|
|||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm2
|
||||
; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm3
|
||||
; AVX512VL-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm3
|
||||
; AVX512VL-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm2, %ymm3
|
||||
; AVX512VL-NEXT: vpsllw $5, %ymm1, %ymm1
|
||||
; AVX512VL-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
|
||||
; AVX512VL-NEXT: vpsllw $2, %ymm0, %ymm2
|
||||
; AVX512VL-NEXT: vpsrlw $6, %ymm0, %ymm3
|
||||
; AVX512VL-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm3
|
||||
; AVX512VL-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm2, %ymm3
|
||||
; AVX512VL-NEXT: vpaddb %ymm1, %ymm1, %ymm1
|
||||
; AVX512VL-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
|
||||
; AVX512VL-NEXT: vpsrlw $7, %ymm0, %ymm2
|
||||
|
@ -1743,10 +1741,9 @@ define <32 x i8> @splatconstant_rotate_v32i8(<32 x i8> %a) nounwind {
|
|||
;
|
||||
; AVX512F-LABEL: splatconstant_rotate_v32i8:
|
||||
; AVX512F: # %bb.0:
|
||||
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm1 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
|
||||
; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm2
|
||||
; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vpternlogq $226, %zmm2, %zmm1, %zmm0
|
||||
; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm1
|
||||
; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm1, %zmm0
|
||||
; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
|
@ -1754,39 +1751,37 @@ define <32 x i8> @splatconstant_rotate_v32i8(<32 x i8> %a) nounwind {
|
|||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm1
|
||||
; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm0
|
||||
; AVX512VL-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm0
|
||||
; AVX512VL-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm1, %ymm0
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
; AVX512BW-LABEL: splatconstant_rotate_v32i8:
|
||||
; AVX512BW: # %bb.0:
|
||||
; AVX512BW-NEXT: vpsllw $4, %ymm0, %ymm1
|
||||
; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm2 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
|
||||
; AVX512BW-NEXT: vpsrlw $4, %ymm0, %ymm0
|
||||
; AVX512BW-NEXT: vpternlogq $226, %zmm0, %zmm2, %zmm1
|
||||
; AVX512BW-NEXT: vmovdqa %ymm1, %ymm0
|
||||
; AVX512BW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm1, %zmm0
|
||||
; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
|
||||
; AVX512BW-NEXT: retq
|
||||
;
|
||||
; AVX512VLBW-LABEL: splatconstant_rotate_v32i8:
|
||||
; AVX512VLBW: # %bb.0:
|
||||
; AVX512VLBW-NEXT: vpsllw $4, %ymm0, %ymm1
|
||||
; AVX512VLBW-NEXT: vpsrlw $4, %ymm0, %ymm0
|
||||
; AVX512VLBW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm0
|
||||
; AVX512VLBW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm1, %ymm0
|
||||
; AVX512VLBW-NEXT: retq
|
||||
;
|
||||
; AVX512VBMI2-LABEL: splatconstant_rotate_v32i8:
|
||||
; AVX512VBMI2: # %bb.0:
|
||||
; AVX512VBMI2-NEXT: vpsllw $4, %ymm0, %ymm1
|
||||
; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} ymm2 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
|
||||
; AVX512VBMI2-NEXT: vpsrlw $4, %ymm0, %ymm0
|
||||
; AVX512VBMI2-NEXT: vpternlogq $226, %zmm0, %zmm2, %zmm1
|
||||
; AVX512VBMI2-NEXT: vmovdqa %ymm1, %ymm0
|
||||
; AVX512VBMI2-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm1, %zmm0
|
||||
; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
|
||||
; AVX512VBMI2-NEXT: retq
|
||||
;
|
||||
; AVX512VLVBMI2-LABEL: splatconstant_rotate_v32i8:
|
||||
; AVX512VLVBMI2: # %bb.0:
|
||||
; AVX512VLVBMI2-NEXT: vpsllw $4, %ymm0, %ymm1
|
||||
; AVX512VLVBMI2-NEXT: vpsrlw $4, %ymm0, %ymm0
|
||||
; AVX512VLVBMI2-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm0
|
||||
; AVX512VLVBMI2-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm1, %ymm0
|
||||
; AVX512VLVBMI2-NEXT: retq
|
||||
;
|
||||
; XOPAVX1-LABEL: splatconstant_rotate_v32i8:
|
||||
|
@ -2101,10 +2096,9 @@ define <32 x i8> @splatconstant_rotate_mask_v32i8(<32 x i8> %a) nounwind {
|
|||
;
|
||||
; AVX512F-LABEL: splatconstant_rotate_mask_v32i8:
|
||||
; AVX512F: # %bb.0:
|
||||
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm1 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
|
||||
; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm2
|
||||
; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vpternlogq $226, %zmm2, %zmm1, %zmm0
|
||||
; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm1
|
||||
; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm1, %zmm0
|
||||
; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
|
@ -2112,7 +2106,7 @@ define <32 x i8> @splatconstant_rotate_mask_v32i8(<32 x i8> %a) nounwind {
|
|||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm1
|
||||
; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm0
|
||||
; AVX512VL-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm0
|
||||
; AVX512VL-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm1, %ymm0
|
||||
; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
|
|
|
@ -992,7 +992,7 @@ define <64 x i8> @splatconstant_rotate_v64i8(<64 x i8> %a) nounwind {
|
|||
; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vpsrlw $4, %ymm2, %ymm2
|
||||
; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
|
||||
; AVX512F-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm0
|
||||
; AVX512F-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm1, %zmm0
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: splatconstant_rotate_v64i8:
|
||||
|
@ -1004,35 +1004,35 @@ define <64 x i8> @splatconstant_rotate_v64i8(<64 x i8> %a) nounwind {
|
|||
; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm0
|
||||
; AVX512VL-NEXT: vpsrlw $4, %ymm2, %ymm2
|
||||
; AVX512VL-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
|
||||
; AVX512VL-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm0
|
||||
; AVX512VL-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm1, %zmm0
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
; AVX512BW-LABEL: splatconstant_rotate_v64i8:
|
||||
; AVX512BW: # %bb.0:
|
||||
; AVX512BW-NEXT: vpsllw $4, %zmm0, %zmm1
|
||||
; AVX512BW-NEXT: vpsrlw $4, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm0
|
||||
; AVX512BW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm1, %zmm0
|
||||
; AVX512BW-NEXT: retq
|
||||
;
|
||||
; AVX512VLBW-LABEL: splatconstant_rotate_v64i8:
|
||||
; AVX512VLBW: # %bb.0:
|
||||
; AVX512VLBW-NEXT: vpsllw $4, %zmm0, %zmm1
|
||||
; AVX512VLBW-NEXT: vpsrlw $4, %zmm0, %zmm0
|
||||
; AVX512VLBW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm0
|
||||
; AVX512VLBW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm1, %zmm0
|
||||
; AVX512VLBW-NEXT: retq
|
||||
;
|
||||
; AVX512VBMI2-LABEL: splatconstant_rotate_v64i8:
|
||||
; AVX512VBMI2: # %bb.0:
|
||||
; AVX512VBMI2-NEXT: vpsllw $4, %zmm0, %zmm1
|
||||
; AVX512VBMI2-NEXT: vpsrlw $4, %zmm0, %zmm0
|
||||
; AVX512VBMI2-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm0
|
||||
; AVX512VBMI2-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm1, %zmm0
|
||||
; AVX512VBMI2-NEXT: retq
|
||||
;
|
||||
; AVX512VLVBMI2-LABEL: splatconstant_rotate_v64i8:
|
||||
; AVX512VLVBMI2: # %bb.0:
|
||||
; AVX512VLVBMI2-NEXT: vpsllw $4, %zmm0, %zmm1
|
||||
; AVX512VLVBMI2-NEXT: vpsrlw $4, %zmm0, %zmm0
|
||||
; AVX512VLVBMI2-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm0
|
||||
; AVX512VLVBMI2-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm1, %zmm0
|
||||
; AVX512VLVBMI2-NEXT: retq
|
||||
%shl = shl <64 x i8> %a, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
|
||||
%lshr = lshr <64 x i8> %a, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
|
||||
|
|
Loading…
Reference in New Issue