forked from OSchip/llvm-project
[X86][AVX512] Add lowering of vXi32/vXi64 ISD::ROTL/ISD::ROTR
Add support for lowering to ISD::ROTL/ISD::ROTR, including rotate by immediate Differential Revision: https://reviews.llvm.org/D35463 llvm-svn: 308177
This commit is contained in:
parent
a2a814f925
commit
1cbe8c2ca5
|
@ -1335,6 +1335,13 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
|
|||
setOperationAction(ISD::CTTZ, VT, Custom);
|
||||
}
|
||||
|
||||
// NonVLX sub-targets extend 128/256 vectors to use the 512 version.
|
||||
for (auto VT : {MVT::v4i32, MVT::v8i32, MVT::v16i32, MVT::v2i64, MVT::v4i64,
|
||||
MVT::v8i64}) {
|
||||
setOperationAction(ISD::ROTL, VT, Custom);
|
||||
setOperationAction(ISD::ROTR, VT, Custom);
|
||||
}
|
||||
|
||||
// Need to promote to 64-bit even though we have 32-bit masked instructions
|
||||
// because the IR optimizers rearrange bitcasts around logic ops leaving
|
||||
// too many variations to handle if we don't promote them.
|
||||
|
@ -22665,10 +22672,31 @@ static SDValue LowerRotate(SDValue Op, const X86Subtarget &Subtarget,
|
|||
SDLoc DL(Op);
|
||||
SDValue R = Op.getOperand(0);
|
||||
SDValue Amt = Op.getOperand(1);
|
||||
unsigned Opcode = Op.getOpcode();
|
||||
unsigned EltSizeInBits = VT.getScalarSizeInBits();
|
||||
|
||||
if (Subtarget.hasAVX512()) {
|
||||
// Attempt to rotate by immediate.
|
||||
APInt UndefElts;
|
||||
SmallVector<APInt, 16> EltBits;
|
||||
if (getTargetConstantBitsFromNode(Amt, EltSizeInBits, UndefElts, EltBits)) {
|
||||
if (!UndefElts && llvm::all_of(EltBits, [EltBits](APInt &V) {
|
||||
return EltBits[0] == V;
|
||||
})) {
|
||||
unsigned Op = (Opcode == ISD::ROTL ? X86ISD::VROTLI : X86ISD::VROTRI);
|
||||
uint64_t RotateAmt = EltBits[0].urem(EltSizeInBits);
|
||||
return DAG.getNode(Op, DL, VT, R,
|
||||
DAG.getConstant(RotateAmt, DL, MVT::i8));
|
||||
}
|
||||
}
|
||||
|
||||
// Else, fall-back on VPROLV/VPRORV.
|
||||
return Op;
|
||||
}
|
||||
|
||||
assert(VT.isVector() && "Custom lowering only for vector rotates!");
|
||||
assert(Subtarget.hasXOP() && "XOP support required for vector rotates!");
|
||||
assert((Op.getOpcode() == ISD::ROTL) && "Only ROTL supported");
|
||||
assert((Opcode == ISD::ROTL) && "Only ROTL supported");
|
||||
|
||||
// XOP has 128-bit vector variable + immediate rotates.
|
||||
// +ve/-ve Amt = rotate left/right.
|
||||
|
@ -22683,7 +22711,7 @@ static SDValue LowerRotate(SDValue Op, const X86Subtarget &Subtarget,
|
|||
if (auto *BVAmt = dyn_cast<BuildVectorSDNode>(Amt)) {
|
||||
if (auto *RotateConst = BVAmt->getConstantSplatNode()) {
|
||||
uint64_t RotateAmt = RotateConst->getAPIntValue().getZExtValue();
|
||||
assert(RotateAmt < VT.getScalarSizeInBits() && "Rotation out of range");
|
||||
assert(RotateAmt < EltSizeInBits && "Rotation out of range");
|
||||
return DAG.getNode(X86ISD::VPROTI, DL, VT, R,
|
||||
DAG.getConstant(RotateAmt, DL, MVT::i8));
|
||||
}
|
||||
|
@ -24030,7 +24058,8 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
|
|||
case ISD::MULHU: return LowerMULH(Op, Subtarget, DAG);
|
||||
case ISD::UMUL_LOHI:
|
||||
case ISD::SMUL_LOHI: return LowerMUL_LOHI(Op, Subtarget, DAG);
|
||||
case ISD::ROTL: return LowerRotate(Op, Subtarget, DAG);
|
||||
case ISD::ROTL:
|
||||
case ISD::ROTR: return LowerRotate(Op, Subtarget, DAG);
|
||||
case ISD::SRA:
|
||||
case ISD::SRL:
|
||||
case ISD::SHL: return LowerShift(Op, Subtarget, DAG);
|
||||
|
|
|
@ -5676,6 +5676,109 @@ defm : avx512_var_shift_int_lowering_mb<"VPSRAVQ", v2i64x_info, [HasVLX]>;
|
|||
defm : avx512_var_shift_int_lowering_mb<"VPSRAVQ", v4i64x_info, [HasVLX]>;
|
||||
defm : avx512_var_shift_int_lowering_mb<"VPSRAVQ", v8i64_info, [HasAVX512]>;
|
||||
|
||||
|
||||
// Use 512bit VPROL/VPROLI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX.
|
||||
let Predicates = [HasAVX512, NoVLX] in {
|
||||
def : Pat<(v2i64 (rotl (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
|
||||
(EXTRACT_SUBREG (v8i64
|
||||
(VPROLVQZrr
|
||||
(v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
|
||||
(INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm))),
|
||||
sub_xmm)>;
|
||||
def : Pat<(v4i64 (rotl (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
|
||||
(EXTRACT_SUBREG (v8i64
|
||||
(VPROLVQZrr
|
||||
(v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
|
||||
(INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm))),
|
||||
sub_ymm)>;
|
||||
|
||||
def : Pat<(v4i32 (rotl (v4i32 VR128X:$src1), (v4i32 VR128X:$src2))),
|
||||
(EXTRACT_SUBREG (v16i32
|
||||
(VPROLVDZrr
|
||||
(v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
|
||||
(INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm))),
|
||||
sub_xmm)>;
|
||||
def : Pat<(v8i32 (rotl (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
|
||||
(EXTRACT_SUBREG (v16i32
|
||||
(VPROLVDZrr
|
||||
(v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
|
||||
(INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm))),
|
||||
sub_ymm)>;
|
||||
|
||||
def : Pat<(v2i64 (X86vrotli (v2i64 VR128X:$src1), (i8 imm:$src2))),
|
||||
(EXTRACT_SUBREG (v8i64
|
||||
(VPROLQZri
|
||||
(v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
|
||||
imm:$src2)), sub_xmm)>;
|
||||
def : Pat<(v4i64 (X86vrotli (v4i64 VR256X:$src1), (i8 imm:$src2))),
|
||||
(EXTRACT_SUBREG (v8i64
|
||||
(VPROLQZri
|
||||
(v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
|
||||
imm:$src2)), sub_ymm)>;
|
||||
|
||||
def : Pat<(v4i32 (X86vrotli (v4i32 VR128X:$src1), (i8 imm:$src2))),
|
||||
(EXTRACT_SUBREG (v16i32
|
||||
(VPROLDZri
|
||||
(v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
|
||||
imm:$src2)), sub_xmm)>;
|
||||
def : Pat<(v8i32 (X86vrotli (v8i32 VR256X:$src1), (i8 imm:$src2))),
|
||||
(EXTRACT_SUBREG (v16i32
|
||||
(VPROLDZri
|
||||
(v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
|
||||
imm:$src2)), sub_ymm)>;
|
||||
}
|
||||
|
||||
// Use 512bit VPROR/VPRORI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX.
|
||||
let Predicates = [HasAVX512, NoVLX] in {
|
||||
def : Pat<(v2i64 (rotr (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
|
||||
(EXTRACT_SUBREG (v8i64
|
||||
(VPRORVQZrr
|
||||
(v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
|
||||
(INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm))),
|
||||
sub_xmm)>;
|
||||
def : Pat<(v4i64 (rotr (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
|
||||
(EXTRACT_SUBREG (v8i64
|
||||
(VPRORVQZrr
|
||||
(v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
|
||||
(INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm))),
|
||||
sub_ymm)>;
|
||||
|
||||
def : Pat<(v4i32 (rotr (v4i32 VR128X:$src1), (v4i32 VR128X:$src2))),
|
||||
(EXTRACT_SUBREG (v16i32
|
||||
(VPRORVDZrr
|
||||
(v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
|
||||
(INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm))),
|
||||
sub_xmm)>;
|
||||
def : Pat<(v8i32 (rotr (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
|
||||
(EXTRACT_SUBREG (v16i32
|
||||
(VPRORVDZrr
|
||||
(v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
|
||||
(INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm))),
|
||||
sub_ymm)>;
|
||||
|
||||
def : Pat<(v2i64 (X86vrotri (v2i64 VR128X:$src1), (i8 imm:$src2))),
|
||||
(EXTRACT_SUBREG (v8i64
|
||||
(VPRORQZri
|
||||
(v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
|
||||
imm:$src2)), sub_xmm)>;
|
||||
def : Pat<(v4i64 (X86vrotri (v4i64 VR256X:$src1), (i8 imm:$src2))),
|
||||
(EXTRACT_SUBREG (v8i64
|
||||
(VPRORQZri
|
||||
(v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
|
||||
imm:$src2)), sub_ymm)>;
|
||||
|
||||
def : Pat<(v4i32 (X86vrotri (v4i32 VR128X:$src1), (i8 imm:$src2))),
|
||||
(EXTRACT_SUBREG (v16i32
|
||||
(VPRORDZri
|
||||
(v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
|
||||
imm:$src2)), sub_xmm)>;
|
||||
def : Pat<(v8i32 (X86vrotri (v8i32 VR256X:$src1), (i8 imm:$src2))),
|
||||
(EXTRACT_SUBREG (v16i32
|
||||
(VPRORDZri
|
||||
(v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
|
||||
imm:$src2)), sub_ymm)>;
|
||||
}
|
||||
|
||||
//===-------------------------------------------------------------------===//
|
||||
// 1-src variable permutation VPERMW/D/Q
|
||||
//===-------------------------------------------------------------------===//
|
||||
|
|
|
@ -7,29 +7,27 @@ declare <16 x i32> @llvm.x86.avx512.mask.prorv.d.512(<16 x i32>, <16 x i32>, <16
|
|||
declare <8 x i64> @llvm.x86.avx512.mask.prolv.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
|
||||
declare <8 x i64> @llvm.x86.avx512.mask.prorv.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
|
||||
|
||||
; Tests showing failure to replace variable rotates with immediate splat versions.
|
||||
; Tests showing replacement of variable rotates with immediate splat versions.
|
||||
|
||||
define <16 x i32> @test_splat_rol_v16i32(<16 x i32> %x0, <16 x i32> %x1, i16 %x2) {
|
||||
; KNL-LABEL: test_splat_rol_v16i32:
|
||||
; KNL: # BB#0:
|
||||
; KNL-NEXT: kmovw %edi, %k1
|
||||
; KNL-NEXT: vpbroadcastd {{.*#+}} zmm2 = [5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5]
|
||||
; KNL-NEXT: vprolvd %zmm2, %zmm0, %zmm3
|
||||
; KNL-NEXT: vprolvd %zmm2, %zmm0, %zmm1 {%k1}
|
||||
; KNL-NEXT: vprolvd %zmm2, %zmm0, %zmm0 {%k1} {z}
|
||||
; KNL-NEXT: vprold $5, %zmm0, %zmm1 {%k1}
|
||||
; KNL-NEXT: vprold $5, %zmm0, %zmm2 {%k1} {z}
|
||||
; KNL-NEXT: vpaddd %zmm2, %zmm1, %zmm1
|
||||
; KNL-NEXT: vprold $5, %zmm0, %zmm0
|
||||
; KNL-NEXT: vpaddd %zmm0, %zmm1, %zmm0
|
||||
; KNL-NEXT: vpaddd %zmm3, %zmm0, %zmm0
|
||||
; KNL-NEXT: retq
|
||||
;
|
||||
; SKX-LABEL: test_splat_rol_v16i32:
|
||||
; SKX: # BB#0:
|
||||
; SKX-NEXT: kmovd %edi, %k1
|
||||
; SKX-NEXT: vpbroadcastd {{.*#+}} zmm2 = [5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5]
|
||||
; SKX-NEXT: vprolvd %zmm2, %zmm0, %zmm3
|
||||
; SKX-NEXT: vprolvd %zmm2, %zmm0, %zmm1 {%k1}
|
||||
; SKX-NEXT: vprolvd %zmm2, %zmm0, %zmm0 {%k1} {z}
|
||||
; SKX-NEXT: vprold $5, %zmm0, %zmm1 {%k1}
|
||||
; SKX-NEXT: vprold $5, %zmm0, %zmm2 {%k1} {z}
|
||||
; SKX-NEXT: vpaddd %zmm2, %zmm1, %zmm1
|
||||
; SKX-NEXT: vprold $5, %zmm0, %zmm0
|
||||
; SKX-NEXT: vpaddd %zmm0, %zmm1, %zmm0
|
||||
; SKX-NEXT: vpaddd %zmm3, %zmm0, %zmm0
|
||||
; SKX-NEXT: retq
|
||||
%res = call <16 x i32> @llvm.x86.avx512.mask.prolv.d.512(<16 x i32> %x0, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>, <16 x i32> %x1, i16 %x2)
|
||||
%res1 = call <16 x i32> @llvm.x86.avx512.mask.prolv.d.512(<16 x i32> %x0, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>, <16 x i32> zeroinitializer, i16 %x2)
|
||||
|
@ -43,23 +41,21 @@ define <8 x i64>@test_splat_rol_v8i64(<8 x i64> %x0, <8 x i64> %x1, i8 %x2) {
|
|||
; KNL-LABEL: test_splat_rol_v8i64:
|
||||
; KNL: # BB#0:
|
||||
; KNL-NEXT: kmovw %edi, %k1
|
||||
; KNL-NEXT: vpbroadcastq {{.*#+}} zmm2 = [5,5,5,5,5,5,5,5]
|
||||
; KNL-NEXT: vprolvq %zmm2, %zmm0, %zmm3
|
||||
; KNL-NEXT: vprolvq %zmm2, %zmm0, %zmm1 {%k1}
|
||||
; KNL-NEXT: vprolvq %zmm2, %zmm0, %zmm0 {%k1} {z}
|
||||
; KNL-NEXT: vprolq $5, %zmm0, %zmm1 {%k1}
|
||||
; KNL-NEXT: vprolq $5, %zmm0, %zmm2 {%k1} {z}
|
||||
; KNL-NEXT: vpaddq %zmm2, %zmm1, %zmm1
|
||||
; KNL-NEXT: vprolq $5, %zmm0, %zmm0
|
||||
; KNL-NEXT: vpaddq %zmm0, %zmm1, %zmm0
|
||||
; KNL-NEXT: vpaddq %zmm3, %zmm0, %zmm0
|
||||
; KNL-NEXT: retq
|
||||
;
|
||||
; SKX-LABEL: test_splat_rol_v8i64:
|
||||
; SKX: # BB#0:
|
||||
; SKX-NEXT: kmovd %edi, %k1
|
||||
; SKX-NEXT: vpbroadcastq {{.*#+}} zmm2 = [5,5,5,5,5,5,5,5]
|
||||
; SKX-NEXT: vprolvq %zmm2, %zmm0, %zmm3
|
||||
; SKX-NEXT: vprolvq %zmm2, %zmm0, %zmm1 {%k1}
|
||||
; SKX-NEXT: vprolvq %zmm2, %zmm0, %zmm0 {%k1} {z}
|
||||
; SKX-NEXT: vprolq $5, %zmm0, %zmm1 {%k1}
|
||||
; SKX-NEXT: vprolq $5, %zmm0, %zmm2 {%k1} {z}
|
||||
; SKX-NEXT: vpaddq %zmm2, %zmm1, %zmm1
|
||||
; SKX-NEXT: vprolq $5, %zmm0, %zmm0
|
||||
; SKX-NEXT: vpaddq %zmm0, %zmm1, %zmm0
|
||||
; SKX-NEXT: vpaddq %zmm3, %zmm0, %zmm0
|
||||
; SKX-NEXT: retq
|
||||
%res = call <8 x i64> @llvm.x86.avx512.mask.prolv.q.512(<8 x i64> %x0, <8 x i64> <i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5>, <8 x i64> %x1, i8 %x2)
|
||||
%res1 = call <8 x i64> @llvm.x86.avx512.mask.prolv.q.512(<8 x i64> %x0, <8 x i64> <i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5>, <8 x i64> zeroinitializer, i8 %x2)
|
||||
|
@ -73,23 +69,21 @@ define <16 x i32> @test_splat_ror_v16i32(<16 x i32> %x0, <16 x i32> %x1, i16 %x2
|
|||
; KNL-LABEL: test_splat_ror_v16i32:
|
||||
; KNL: # BB#0:
|
||||
; KNL-NEXT: kmovw %edi, %k1
|
||||
; KNL-NEXT: vpbroadcastd {{.*#+}} zmm2 = [5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5]
|
||||
; KNL-NEXT: vprorvd %zmm2, %zmm0, %zmm3
|
||||
; KNL-NEXT: vprorvd %zmm2, %zmm0, %zmm1 {%k1}
|
||||
; KNL-NEXT: vprorvd %zmm2, %zmm0, %zmm0 {%k1} {z}
|
||||
; KNL-NEXT: vprord $5, %zmm0, %zmm1 {%k1}
|
||||
; KNL-NEXT: vprord $5, %zmm0, %zmm2 {%k1} {z}
|
||||
; KNL-NEXT: vpaddd %zmm2, %zmm1, %zmm1
|
||||
; KNL-NEXT: vprord $5, %zmm0, %zmm0
|
||||
; KNL-NEXT: vpaddd %zmm0, %zmm1, %zmm0
|
||||
; KNL-NEXT: vpaddd %zmm3, %zmm0, %zmm0
|
||||
; KNL-NEXT: retq
|
||||
;
|
||||
; SKX-LABEL: test_splat_ror_v16i32:
|
||||
; SKX: # BB#0:
|
||||
; SKX-NEXT: kmovd %edi, %k1
|
||||
; SKX-NEXT: vpbroadcastd {{.*#+}} zmm2 = [5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5]
|
||||
; SKX-NEXT: vprorvd %zmm2, %zmm0, %zmm3
|
||||
; SKX-NEXT: vprorvd %zmm2, %zmm0, %zmm1 {%k1}
|
||||
; SKX-NEXT: vprorvd %zmm2, %zmm0, %zmm0 {%k1} {z}
|
||||
; SKX-NEXT: vprord $5, %zmm0, %zmm1 {%k1}
|
||||
; SKX-NEXT: vprord $5, %zmm0, %zmm2 {%k1} {z}
|
||||
; SKX-NEXT: vpaddd %zmm2, %zmm1, %zmm1
|
||||
; SKX-NEXT: vprord $5, %zmm0, %zmm0
|
||||
; SKX-NEXT: vpaddd %zmm0, %zmm1, %zmm0
|
||||
; SKX-NEXT: vpaddd %zmm3, %zmm0, %zmm0
|
||||
; SKX-NEXT: retq
|
||||
%res = call <16 x i32> @llvm.x86.avx512.mask.prorv.d.512(<16 x i32> %x0, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>, <16 x i32> %x1, i16 %x2)
|
||||
%res1 = call <16 x i32> @llvm.x86.avx512.mask.prorv.d.512(<16 x i32> %x0, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>, <16 x i32> zeroinitializer, i16 %x2)
|
||||
|
@ -103,23 +97,21 @@ define <8 x i64>@test_splat_ror_v8i64(<8 x i64> %x0, <8 x i64> %x1, i8 %x2) {
|
|||
; KNL-LABEL: test_splat_ror_v8i64:
|
||||
; KNL: # BB#0:
|
||||
; KNL-NEXT: kmovw %edi, %k1
|
||||
; KNL-NEXT: vpbroadcastq {{.*#+}} zmm2 = [5,5,5,5,5,5,5,5]
|
||||
; KNL-NEXT: vprorvq %zmm2, %zmm0, %zmm3
|
||||
; KNL-NEXT: vprorvq %zmm2, %zmm0, %zmm1 {%k1}
|
||||
; KNL-NEXT: vprorvq %zmm2, %zmm0, %zmm0 {%k1} {z}
|
||||
; KNL-NEXT: vprorq $5, %zmm0, %zmm1 {%k1}
|
||||
; KNL-NEXT: vprorq $5, %zmm0, %zmm2 {%k1} {z}
|
||||
; KNL-NEXT: vpaddq %zmm2, %zmm1, %zmm1
|
||||
; KNL-NEXT: vprorq $5, %zmm0, %zmm0
|
||||
; KNL-NEXT: vpaddq %zmm0, %zmm1, %zmm0
|
||||
; KNL-NEXT: vpaddq %zmm3, %zmm0, %zmm0
|
||||
; KNL-NEXT: retq
|
||||
;
|
||||
; SKX-LABEL: test_splat_ror_v8i64:
|
||||
; SKX: # BB#0:
|
||||
; SKX-NEXT: kmovd %edi, %k1
|
||||
; SKX-NEXT: vpbroadcastq {{.*#+}} zmm2 = [5,5,5,5,5,5,5,5]
|
||||
; SKX-NEXT: vprorvq %zmm2, %zmm0, %zmm3
|
||||
; SKX-NEXT: vprorvq %zmm2, %zmm0, %zmm1 {%k1}
|
||||
; SKX-NEXT: vprorvq %zmm2, %zmm0, %zmm0 {%k1} {z}
|
||||
; SKX-NEXT: vprorq $5, %zmm0, %zmm1 {%k1}
|
||||
; SKX-NEXT: vprorq $5, %zmm0, %zmm2 {%k1} {z}
|
||||
; SKX-NEXT: vpaddq %zmm2, %zmm1, %zmm1
|
||||
; SKX-NEXT: vprorq $5, %zmm0, %zmm0
|
||||
; SKX-NEXT: vpaddq %zmm0, %zmm1, %zmm0
|
||||
; SKX-NEXT: vpaddq %zmm3, %zmm0, %zmm0
|
||||
; SKX-NEXT: retq
|
||||
%res = call <8 x i64> @llvm.x86.avx512.mask.prorv.q.512(<8 x i64> %x0, <8 x i64> <i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5>, <8 x i64> %x1, i8 %x2)
|
||||
%res1 = call <8 x i64> @llvm.x86.avx512.mask.prorv.q.512(<8 x i64> %x0, <8 x i64> <i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5>, <8 x i64> zeroinitializer, i8 %x2)
|
||||
|
@ -129,28 +121,26 @@ define <8 x i64>@test_splat_ror_v8i64(<8 x i64> %x0, <8 x i64> %x1, i8 %x2) {
|
|||
ret <8 x i64> %res4
|
||||
}
|
||||
|
||||
; Tests showing failure to replace out-of-bounds variable rotates with in-bounds immediate splat versions.
|
||||
; Tests showing replacement of out-of-bounds variable rotates with in-bounds immediate splat versions.
|
||||
|
||||
define <16 x i32> @test_splat_bounds_rol_v16i32(<16 x i32> %x0, <16 x i32> %x1, i16 %x2) {
|
||||
; KNL-LABEL: test_splat_bounds_rol_v16i32:
|
||||
; KNL: # BB#0:
|
||||
; KNL-NEXT: kmovw %edi, %k1
|
||||
; KNL-NEXT: vprolvd {{.*}}(%rip){1to16}, %zmm0, %zmm1 {%k1}
|
||||
; KNL-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2
|
||||
; KNL-NEXT: vprolvd %zmm2, %zmm0, %zmm2 {%k1} {z}
|
||||
; KNL-NEXT: vprold $1, %zmm0, %zmm1 {%k1}
|
||||
; KNL-NEXT: vprold $31, %zmm0, %zmm2 {%k1} {z}
|
||||
; KNL-NEXT: vpaddd %zmm2, %zmm1, %zmm1
|
||||
; KNL-NEXT: vprolvd {{.*}}(%rip){1to16}, %zmm0, %zmm0
|
||||
; KNL-NEXT: vprold $30, %zmm0, %zmm0
|
||||
; KNL-NEXT: vpaddd %zmm0, %zmm1, %zmm0
|
||||
; KNL-NEXT: retq
|
||||
;
|
||||
; SKX-LABEL: test_splat_bounds_rol_v16i32:
|
||||
; SKX: # BB#0:
|
||||
; SKX-NEXT: kmovd %edi, %k1
|
||||
; SKX-NEXT: vprolvd {{.*}}(%rip){1to16}, %zmm0, %zmm1 {%k1}
|
||||
; SKX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2
|
||||
; SKX-NEXT: vprolvd %zmm2, %zmm0, %zmm2 {%k1} {z}
|
||||
; SKX-NEXT: vprold $1, %zmm0, %zmm1 {%k1}
|
||||
; SKX-NEXT: vprold $31, %zmm0, %zmm2 {%k1} {z}
|
||||
; SKX-NEXT: vpaddd %zmm2, %zmm1, %zmm1
|
||||
; SKX-NEXT: vprolvd {{.*}}(%rip){1to16}, %zmm0, %zmm0
|
||||
; SKX-NEXT: vprold $30, %zmm0, %zmm0
|
||||
; SKX-NEXT: vpaddd %zmm0, %zmm1, %zmm0
|
||||
; SKX-NEXT: retq
|
||||
%res = call <16 x i32> @llvm.x86.avx512.mask.prolv.d.512(<16 x i32> %x0, <16 x i32> <i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33>, <16 x i32> %x1, i16 %x2)
|
||||
|
@ -165,22 +155,20 @@ define <8 x i64>@test_splat_bounds_rol_v8i64(<8 x i64> %x0, <8 x i64> %x1, i8 %x
|
|||
; KNL-LABEL: test_splat_bounds_rol_v8i64:
|
||||
; KNL: # BB#0:
|
||||
; KNL-NEXT: kmovw %edi, %k1
|
||||
; KNL-NEXT: vprolvq {{.*}}(%rip){1to8}, %zmm0, %zmm1 {%k1}
|
||||
; KNL-NEXT: vprolvq {{.*}}(%rip){1to8}, %zmm0, %zmm2 {%k1} {z}
|
||||
; KNL-NEXT: vprolq $62, %zmm0, %zmm1 {%k1}
|
||||
; KNL-NEXT: vprolq $1, %zmm0, %zmm2 {%k1} {z}
|
||||
; KNL-NEXT: vpaddq %zmm2, %zmm1, %zmm1
|
||||
; KNL-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2
|
||||
; KNL-NEXT: vprolvq %zmm2, %zmm0, %zmm0
|
||||
; KNL-NEXT: vprolq $63, %zmm0, %zmm0
|
||||
; KNL-NEXT: vpaddq %zmm0, %zmm1, %zmm0
|
||||
; KNL-NEXT: retq
|
||||
;
|
||||
; SKX-LABEL: test_splat_bounds_rol_v8i64:
|
||||
; SKX: # BB#0:
|
||||
; SKX-NEXT: kmovd %edi, %k1
|
||||
; SKX-NEXT: vprolvq {{.*}}(%rip){1to8}, %zmm0, %zmm1 {%k1}
|
||||
; SKX-NEXT: vprolvq {{.*}}(%rip){1to8}, %zmm0, %zmm2 {%k1} {z}
|
||||
; SKX-NEXT: vprolq $62, %zmm0, %zmm1 {%k1}
|
||||
; SKX-NEXT: vprolq $1, %zmm0, %zmm2 {%k1} {z}
|
||||
; SKX-NEXT: vpaddq %zmm2, %zmm1, %zmm1
|
||||
; SKX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2
|
||||
; SKX-NEXT: vprolvq %zmm2, %zmm0, %zmm0
|
||||
; SKX-NEXT: vprolq $63, %zmm0, %zmm0
|
||||
; SKX-NEXT: vpaddq %zmm0, %zmm1, %zmm0
|
||||
; SKX-NEXT: retq
|
||||
%res = call <8 x i64> @llvm.x86.avx512.mask.prolv.q.512(<8 x i64> %x0, <8 x i64> <i64 65534, i64 65534, i64 65534, i64 65534, i64 65534, i64 65534, i64 65534, i64 65534>, <8 x i64> %x1, i8 %x2)
|
||||
|
@ -195,22 +183,20 @@ define <16 x i32> @test_splat_bounds_ror_v16i32(<16 x i32> %x0, <16 x i32> %x1,
|
|||
; KNL-LABEL: test_splat_bounds_ror_v16i32:
|
||||
; KNL: # BB#0:
|
||||
; KNL-NEXT: kmovw %edi, %k1
|
||||
; KNL-NEXT: vprorvd {{.*}}(%rip){1to16}, %zmm0, %zmm1 {%k1}
|
||||
; KNL-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2
|
||||
; KNL-NEXT: vprorvd %zmm2, %zmm0, %zmm2 {%k1} {z}
|
||||
; KNL-NEXT: vprord $1, %zmm0, %zmm1 {%k1}
|
||||
; KNL-NEXT: vprord $31, %zmm0, %zmm2 {%k1} {z}
|
||||
; KNL-NEXT: vpaddd %zmm2, %zmm1, %zmm1
|
||||
; KNL-NEXT: vprorvd {{.*}}(%rip){1to16}, %zmm0, %zmm0
|
||||
; KNL-NEXT: vprord $30, %zmm0, %zmm0
|
||||
; KNL-NEXT: vpaddd %zmm0, %zmm1, %zmm0
|
||||
; KNL-NEXT: retq
|
||||
;
|
||||
; SKX-LABEL: test_splat_bounds_ror_v16i32:
|
||||
; SKX: # BB#0:
|
||||
; SKX-NEXT: kmovd %edi, %k1
|
||||
; SKX-NEXT: vprorvd {{.*}}(%rip){1to16}, %zmm0, %zmm1 {%k1}
|
||||
; SKX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2
|
||||
; SKX-NEXT: vprorvd %zmm2, %zmm0, %zmm2 {%k1} {z}
|
||||
; SKX-NEXT: vprord $1, %zmm0, %zmm1 {%k1}
|
||||
; SKX-NEXT: vprord $31, %zmm0, %zmm2 {%k1} {z}
|
||||
; SKX-NEXT: vpaddd %zmm2, %zmm1, %zmm1
|
||||
; SKX-NEXT: vprorvd {{.*}}(%rip){1to16}, %zmm0, %zmm0
|
||||
; SKX-NEXT: vprord $30, %zmm0, %zmm0
|
||||
; SKX-NEXT: vpaddd %zmm0, %zmm1, %zmm0
|
||||
; SKX-NEXT: retq
|
||||
%res = call <16 x i32> @llvm.x86.avx512.mask.prorv.d.512(<16 x i32> %x0, <16 x i32> <i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33>, <16 x i32> %x1, i16 %x2)
|
||||
|
@ -225,22 +211,20 @@ define <8 x i64>@test_splat_bounds_ror_v8i64(<8 x i64> %x0, <8 x i64> %x1, i8 %x
|
|||
; KNL-LABEL: test_splat_bounds_ror_v8i64:
|
||||
; KNL: # BB#0:
|
||||
; KNL-NEXT: kmovw %edi, %k1
|
||||
; KNL-NEXT: vprorvq {{.*}}(%rip){1to8}, %zmm0, %zmm1 {%k1}
|
||||
; KNL-NEXT: vprorvq {{.*}}(%rip){1to8}, %zmm0, %zmm2 {%k1} {z}
|
||||
; KNL-NEXT: vprorq $62, %zmm0, %zmm1 {%k1}
|
||||
; KNL-NEXT: vprorq $1, %zmm0, %zmm2 {%k1} {z}
|
||||
; KNL-NEXT: vpaddq %zmm2, %zmm1, %zmm1
|
||||
; KNL-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2
|
||||
; KNL-NEXT: vprorvq %zmm2, %zmm0, %zmm0
|
||||
; KNL-NEXT: vprorq $63, %zmm0, %zmm0
|
||||
; KNL-NEXT: vpaddq %zmm0, %zmm1, %zmm0
|
||||
; KNL-NEXT: retq
|
||||
;
|
||||
; SKX-LABEL: test_splat_bounds_ror_v8i64:
|
||||
; SKX: # BB#0:
|
||||
; SKX-NEXT: kmovd %edi, %k1
|
||||
; SKX-NEXT: vprorvq {{.*}}(%rip){1to8}, %zmm0, %zmm1 {%k1}
|
||||
; SKX-NEXT: vprorvq {{.*}}(%rip){1to8}, %zmm0, %zmm2 {%k1} {z}
|
||||
; SKX-NEXT: vprorq $62, %zmm0, %zmm1 {%k1}
|
||||
; SKX-NEXT: vprorq $1, %zmm0, %zmm2 {%k1} {z}
|
||||
; SKX-NEXT: vpaddq %zmm2, %zmm1, %zmm1
|
||||
; SKX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2
|
||||
; SKX-NEXT: vprorvq %zmm2, %zmm0, %zmm0
|
||||
; SKX-NEXT: vprorq $63, %zmm0, %zmm0
|
||||
; SKX-NEXT: vpaddq %zmm0, %zmm1, %zmm0
|
||||
; SKX-NEXT: retq
|
||||
%res = call <8 x i64> @llvm.x86.avx512.mask.prorv.q.512(<8 x i64> %x0, <8 x i64> <i64 65534, i64 65534, i64 65534, i64 65534, i64 65534, i64 65534, i64 65534, i64 65534>, <8 x i64> %x1, i8 %x2)
|
||||
|
|
|
@ -11,12 +11,7 @@ define <4 x i32> @combine_vec_rot_rot(<4 x i32> %x) {
|
|||
;
|
||||
; AVX512-LABEL: combine_vec_rot_rot:
|
||||
; AVX512: # BB#0:
|
||||
; AVX512-NEXT: vpsrlvd {{.*}}(%rip), %xmm0, %xmm1
|
||||
; AVX512-NEXT: vpsllvd {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpor %xmm0, %xmm1, %xmm0
|
||||
; AVX512-NEXT: vpsrlvd {{.*}}(%rip), %xmm0, %xmm1
|
||||
; AVX512-NEXT: vpsllvd {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpor %xmm0, %xmm1, %xmm0
|
||||
; AVX512-NEXT: vprolvd {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX512-NEXT: retq
|
||||
%1 = lshr <4 x i32> %x, <i32 1, i32 2, i32 3, i32 4>
|
||||
%2 = shl <4 x i32> %x, <i32 31, i32 30, i32 29, i32 28>
|
||||
|
@ -35,12 +30,7 @@ define <4 x i32> @combine_vec_rot_rot_splat(<4 x i32> %x) {
|
|||
;
|
||||
; AVX512-LABEL: combine_vec_rot_rot_splat:
|
||||
; AVX512: # BB#0:
|
||||
; AVX512-NEXT: vpsrld $3, %xmm0, %xmm1
|
||||
; AVX512-NEXT: vpslld $29, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpor %xmm0, %xmm1, %xmm0
|
||||
; AVX512-NEXT: vpsrld $22, %xmm0, %xmm1
|
||||
; AVX512-NEXT: vpslld $10, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpor %xmm0, %xmm1, %xmm0
|
||||
; AVX512-NEXT: vprold $7, %xmm0, %xmm0
|
||||
; AVX512-NEXT: retq
|
||||
%1 = lshr <4 x i32> %x, <i32 3, i32 3, i32 3, i32 3>
|
||||
%2 = shl <4 x i32> %x, <i32 29, i32 29, i32 29, i32 29>
|
||||
|
@ -58,12 +48,6 @@ define <4 x i32> @combine_vec_rot_rot_splat_zero(<4 x i32> %x) {
|
|||
;
|
||||
; AVX512-LABEL: combine_vec_rot_rot_splat_zero:
|
||||
; AVX512: # BB#0:
|
||||
; AVX512-NEXT: vpsrld $1, %xmm0, %xmm1
|
||||
; AVX512-NEXT: vpslld $31, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpor %xmm0, %xmm1, %xmm0
|
||||
; AVX512-NEXT: vpsrld $31, %xmm0, %xmm1
|
||||
; AVX512-NEXT: vpaddd %xmm0, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpor %xmm0, %xmm1, %xmm0
|
||||
; AVX512-NEXT: retq
|
||||
%1 = lshr <4 x i32> %x, <i32 1, i32 1, i32 1, i32 1>
|
||||
%2 = shl <4 x i32> %x, <i32 31, i32 31, i32 31, i32 31>
|
||||
|
|
|
@ -77,14 +77,19 @@ define <2 x i64> @var_rotate_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind {
|
|||
; AVX2-NEXT: vpor %xmm0, %xmm1, %xmm0
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: var_rotate_v2i64:
|
||||
; AVX512: # BB#0:
|
||||
; AVX512-NEXT: vmovdqa {{.*#+}} xmm2 = [64,64]
|
||||
; AVX512-NEXT: vpsubq %xmm1, %xmm2, %xmm2
|
||||
; AVX512-NEXT: vpsllvq %xmm1, %xmm0, %xmm1
|
||||
; AVX512-NEXT: vpsrlvq %xmm2, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpor %xmm0, %xmm1, %xmm0
|
||||
; AVX512-NEXT: retq
|
||||
; AVX512BW-LABEL: var_rotate_v2i64:
|
||||
; AVX512BW: # BB#0:
|
||||
; AVX512BW-NEXT: # kill: %XMM1<def> %XMM1<kill> %ZMM1<def>
|
||||
; AVX512BW-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
|
||||
; AVX512BW-NEXT: vprolvq %zmm1, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
|
||||
; AVX512BW-NEXT: vzeroupper
|
||||
; AVX512BW-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: var_rotate_v2i64:
|
||||
; AVX512VL: # BB#0:
|
||||
; AVX512VL-NEXT: vprolvq %xmm1, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
; XOP-LABEL: var_rotate_v2i64:
|
||||
; XOP: # BB#0:
|
||||
|
@ -214,14 +219,19 @@ define <4 x i32> @var_rotate_v4i32(<4 x i32> %a, <4 x i32> %b) nounwind {
|
|||
; AVX2-NEXT: vpor %xmm0, %xmm1, %xmm0
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: var_rotate_v4i32:
|
||||
; AVX512: # BB#0:
|
||||
; AVX512-NEXT: vpbroadcastd {{.*#+}} xmm2 = [32,32,32,32]
|
||||
; AVX512-NEXT: vpsubd %xmm1, %xmm2, %xmm2
|
||||
; AVX512-NEXT: vpsllvd %xmm1, %xmm0, %xmm1
|
||||
; AVX512-NEXT: vpsrlvd %xmm2, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpor %xmm0, %xmm1, %xmm0
|
||||
; AVX512-NEXT: retq
|
||||
; AVX512BW-LABEL: var_rotate_v4i32:
|
||||
; AVX512BW: # BB#0:
|
||||
; AVX512BW-NEXT: # kill: %XMM1<def> %XMM1<kill> %ZMM1<def>
|
||||
; AVX512BW-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
|
||||
; AVX512BW-NEXT: vprolvd %zmm1, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
|
||||
; AVX512BW-NEXT: vzeroupper
|
||||
; AVX512BW-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: var_rotate_v4i32:
|
||||
; AVX512VL: # BB#0:
|
||||
; AVX512VL-NEXT: vprolvd %xmm1, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
; XOP-LABEL: var_rotate_v4i32:
|
||||
; XOP: # BB#0:
|
||||
|
@ -844,12 +854,19 @@ define <2 x i64> @constant_rotate_v2i64(<2 x i64> %a) nounwind {
|
|||
; AVX2-NEXT: vpor %xmm0, %xmm1, %xmm0
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: constant_rotate_v2i64:
|
||||
; AVX512: # BB#0:
|
||||
; AVX512-NEXT: vpsllvq {{.*}}(%rip), %xmm0, %xmm1
|
||||
; AVX512-NEXT: vpsrlvq {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpor %xmm0, %xmm1, %xmm0
|
||||
; AVX512-NEXT: retq
|
||||
; AVX512BW-LABEL: constant_rotate_v2i64:
|
||||
; AVX512BW: # BB#0:
|
||||
; AVX512BW-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
|
||||
; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm1 = [4,14]
|
||||
; AVX512BW-NEXT: vprolvq %zmm1, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
|
||||
; AVX512BW-NEXT: vzeroupper
|
||||
; AVX512BW-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: constant_rotate_v2i64:
|
||||
; AVX512VL: # BB#0:
|
||||
; AVX512VL-NEXT: vprolvq {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
; XOP-LABEL: constant_rotate_v2i64:
|
||||
; XOP: # BB#0:
|
||||
|
@ -940,12 +957,19 @@ define <4 x i32> @constant_rotate_v4i32(<4 x i32> %a) nounwind {
|
|||
; AVX2-NEXT: vpor %xmm0, %xmm1, %xmm0
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: constant_rotate_v4i32:
|
||||
; AVX512: # BB#0:
|
||||
; AVX512-NEXT: vpsllvd {{.*}}(%rip), %xmm0, %xmm1
|
||||
; AVX512-NEXT: vpsrlvd {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpor %xmm0, %xmm1, %xmm0
|
||||
; AVX512-NEXT: retq
|
||||
; AVX512BW-LABEL: constant_rotate_v4i32:
|
||||
; AVX512BW: # BB#0:
|
||||
; AVX512BW-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
|
||||
; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm1 = [4,5,6,7]
|
||||
; AVX512BW-NEXT: vprolvd %zmm1, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
|
||||
; AVX512BW-NEXT: vzeroupper
|
||||
; AVX512BW-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: constant_rotate_v4i32:
|
||||
; AVX512VL: # BB#0:
|
||||
; AVX512VL-NEXT: vprolvd {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
; XOP-LABEL: constant_rotate_v4i32:
|
||||
; XOP: # BB#0:
|
||||
|
@ -1343,12 +1367,18 @@ define <2 x i64> @splatconstant_rotate_v2i64(<2 x i64> %a) nounwind {
|
|||
; AVX-NEXT: vpor %xmm0, %xmm1, %xmm0
|
||||
; AVX-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: splatconstant_rotate_v2i64:
|
||||
; AVX512: # BB#0:
|
||||
; AVX512-NEXT: vpsllq $14, %xmm0, %xmm1
|
||||
; AVX512-NEXT: vpsrlq $50, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpor %xmm0, %xmm1, %xmm0
|
||||
; AVX512-NEXT: retq
|
||||
; AVX512BW-LABEL: splatconstant_rotate_v2i64:
|
||||
; AVX512BW: # BB#0:
|
||||
; AVX512BW-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
|
||||
; AVX512BW-NEXT: vprolq $14, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
|
||||
; AVX512BW-NEXT: vzeroupper
|
||||
; AVX512BW-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: splatconstant_rotate_v2i64:
|
||||
; AVX512VL: # BB#0:
|
||||
; AVX512VL-NEXT: vprolq $14, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
; XOP-LABEL: splatconstant_rotate_v2i64:
|
||||
; XOP: # BB#0:
|
||||
|
@ -1384,12 +1414,18 @@ define <4 x i32> @splatconstant_rotate_v4i32(<4 x i32> %a) nounwind {
|
|||
; AVX-NEXT: vpor %xmm0, %xmm1, %xmm0
|
||||
; AVX-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: splatconstant_rotate_v4i32:
|
||||
; AVX512: # BB#0:
|
||||
; AVX512-NEXT: vpslld $4, %xmm0, %xmm1
|
||||
; AVX512-NEXT: vpsrld $28, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpor %xmm0, %xmm1, %xmm0
|
||||
; AVX512-NEXT: retq
|
||||
; AVX512BW-LABEL: splatconstant_rotate_v4i32:
|
||||
; AVX512BW: # BB#0:
|
||||
; AVX512BW-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
|
||||
; AVX512BW-NEXT: vprold $4, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
|
||||
; AVX512BW-NEXT: vzeroupper
|
||||
; AVX512BW-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: splatconstant_rotate_v4i32:
|
||||
; AVX512VL: # BB#0:
|
||||
; AVX512VL-NEXT: vprold $4, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
; XOP-LABEL: splatconstant_rotate_v4i32:
|
||||
; XOP: # BB#0:
|
||||
|
@ -1516,11 +1552,19 @@ define <2 x i64> @splatconstant_rotate_mask_v2i64(<2 x i64> %a) nounwind {
|
|||
; AVX-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: splatconstant_rotate_mask_v2i64:
|
||||
; AVX512: # BB#0:
|
||||
; AVX512-NEXT: vpsrlq $49, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX512-NEXT: retq
|
||||
; AVX512BW-LABEL: splatconstant_rotate_mask_v2i64:
|
||||
; AVX512BW: # BB#0:
|
||||
; AVX512BW-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
|
||||
; AVX512BW-NEXT: vprolq $15, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX512BW-NEXT: vzeroupper
|
||||
; AVX512BW-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: splatconstant_rotate_mask_v2i64:
|
||||
; AVX512VL: # BB#0:
|
||||
; AVX512VL-NEXT: vprolq $15, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
; XOP-LABEL: splatconstant_rotate_mask_v2i64:
|
||||
; XOP: # BB#0:
|
||||
|
@ -1567,14 +1611,19 @@ define <4 x i32> @splatconstant_rotate_mask_v4i32(<4 x i32> %a) nounwind {
|
|||
; AVX-NEXT: vpor %xmm0, %xmm1, %xmm0
|
||||
; AVX-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: splatconstant_rotate_mask_v4i32:
|
||||
; AVX512: # BB#0:
|
||||
; AVX512-NEXT: vpslld $4, %xmm0, %xmm1
|
||||
; AVX512-NEXT: vpsrld $28, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1
|
||||
; AVX512-NEXT: vpor %xmm0, %xmm1, %xmm0
|
||||
; AVX512-NEXT: retq
|
||||
; AVX512BW-LABEL: splatconstant_rotate_mask_v4i32:
|
||||
; AVX512BW: # BB#0:
|
||||
; AVX512BW-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
|
||||
; AVX512BW-NEXT: vprold $4, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX512BW-NEXT: vzeroupper
|
||||
; AVX512BW-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: splatconstant_rotate_mask_v4i32:
|
||||
; AVX512VL: # BB#0:
|
||||
; AVX512VL-NEXT: vprold $4, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
; XOP-LABEL: splatconstant_rotate_mask_v4i32:
|
||||
; XOP: # BB#0:
|
||||
|
|
|
@ -48,14 +48,18 @@ define <4 x i64> @var_rotate_v4i64(<4 x i64> %a, <4 x i64> %b) nounwind {
|
|||
; AVX2-NEXT: vpor %ymm0, %ymm1, %ymm0
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: var_rotate_v4i64:
|
||||
; AVX512: # BB#0:
|
||||
; AVX512-NEXT: vpbroadcastq {{.*#+}} ymm2 = [64,64,64,64]
|
||||
; AVX512-NEXT: vpsubq %ymm1, %ymm2, %ymm2
|
||||
; AVX512-NEXT: vpsllvq %ymm1, %ymm0, %ymm1
|
||||
; AVX512-NEXT: vpsrlvq %ymm2, %ymm0, %ymm0
|
||||
; AVX512-NEXT: vpor %ymm0, %ymm1, %ymm0
|
||||
; AVX512-NEXT: retq
|
||||
; AVX512BW-LABEL: var_rotate_v4i64:
|
||||
; AVX512BW: # BB#0:
|
||||
; AVX512BW-NEXT: # kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
|
||||
; AVX512BW-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
|
||||
; AVX512BW-NEXT: vprolvq %zmm1, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
|
||||
; AVX512BW-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: var_rotate_v4i64:
|
||||
; AVX512VL: # BB#0:
|
||||
; AVX512VL-NEXT: vprolvq %ymm1, %ymm0, %ymm0
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
; XOPAVX1-LABEL: var_rotate_v4i64:
|
||||
; XOPAVX1: # BB#0:
|
||||
|
@ -135,14 +139,18 @@ define <8 x i32> @var_rotate_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind {
|
|||
; AVX2-NEXT: vpor %ymm0, %ymm1, %ymm0
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: var_rotate_v8i32:
|
||||
; AVX512: # BB#0:
|
||||
; AVX512-NEXT: vpbroadcastd {{.*#+}} ymm2 = [32,32,32,32,32,32,32,32]
|
||||
; AVX512-NEXT: vpsubd %ymm1, %ymm2, %ymm2
|
||||
; AVX512-NEXT: vpsllvd %ymm1, %ymm0, %ymm1
|
||||
; AVX512-NEXT: vpsrlvd %ymm2, %ymm0, %ymm0
|
||||
; AVX512-NEXT: vpor %ymm0, %ymm1, %ymm0
|
||||
; AVX512-NEXT: retq
|
||||
; AVX512BW-LABEL: var_rotate_v8i32:
|
||||
; AVX512BW: # BB#0:
|
||||
; AVX512BW-NEXT: # kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
|
||||
; AVX512BW-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
|
||||
; AVX512BW-NEXT: vprolvd %zmm1, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
|
||||
; AVX512BW-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: var_rotate_v8i32:
|
||||
; AVX512VL: # BB#0:
|
||||
; AVX512VL-NEXT: vprolvd %ymm1, %ymm0, %ymm0
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
; XOPAVX1-LABEL: var_rotate_v8i32:
|
||||
; XOPAVX1: # BB#0:
|
||||
|
@ -483,12 +491,18 @@ define <4 x i64> @constant_rotate_v4i64(<4 x i64> %a) nounwind {
|
|||
; AVX2-NEXT: vpor %ymm0, %ymm1, %ymm0
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: constant_rotate_v4i64:
|
||||
; AVX512: # BB#0:
|
||||
; AVX512-NEXT: vpsllvq {{.*}}(%rip), %ymm0, %ymm1
|
||||
; AVX512-NEXT: vpsrlvq {{.*}}(%rip), %ymm0, %ymm0
|
||||
; AVX512-NEXT: vpor %ymm0, %ymm1, %ymm0
|
||||
; AVX512-NEXT: retq
|
||||
; AVX512BW-LABEL: constant_rotate_v4i64:
|
||||
; AVX512BW: # BB#0:
|
||||
; AVX512BW-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
|
||||
; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm1 = [4,14,50,60]
|
||||
; AVX512BW-NEXT: vprolvq %zmm1, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
|
||||
; AVX512BW-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: constant_rotate_v4i64:
|
||||
; AVX512VL: # BB#0:
|
||||
; AVX512VL-NEXT: vprolvq {{.*}}(%rip), %ymm0, %ymm0
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
; XOPAVX1-LABEL: constant_rotate_v4i64:
|
||||
; XOPAVX1: # BB#0:
|
||||
|
@ -543,12 +557,18 @@ define <8 x i32> @constant_rotate_v8i32(<8 x i32> %a) nounwind {
|
|||
; AVX2-NEXT: vpor %ymm0, %ymm1, %ymm0
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: constant_rotate_v8i32:
|
||||
; AVX512: # BB#0:
|
||||
; AVX512-NEXT: vpsllvd {{.*}}(%rip), %ymm0, %ymm1
|
||||
; AVX512-NEXT: vpsrlvd {{.*}}(%rip), %ymm0, %ymm0
|
||||
; AVX512-NEXT: vpor %ymm0, %ymm1, %ymm0
|
||||
; AVX512-NEXT: retq
|
||||
; AVX512BW-LABEL: constant_rotate_v8i32:
|
||||
; AVX512BW: # BB#0:
|
||||
; AVX512BW-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
|
||||
; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm1 = [4,5,6,7,8,9,10,11]
|
||||
; AVX512BW-NEXT: vprolvd %zmm1, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
|
||||
; AVX512BW-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: constant_rotate_v8i32:
|
||||
; AVX512VL: # BB#0:
|
||||
; AVX512VL-NEXT: vprolvd {{.*}}(%rip), %ymm0, %ymm0
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
; XOPAVX1-LABEL: constant_rotate_v8i32:
|
||||
; XOPAVX1: # BB#0:
|
||||
|
@ -792,12 +812,17 @@ define <4 x i64> @splatconstant_rotate_v4i64(<4 x i64> %a) nounwind {
|
|||
; AVX2-NEXT: vpor %ymm0, %ymm1, %ymm0
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: splatconstant_rotate_v4i64:
|
||||
; AVX512: # BB#0:
|
||||
; AVX512-NEXT: vpsllq $14, %ymm0, %ymm1
|
||||
; AVX512-NEXT: vpsrlq $50, %ymm0, %ymm0
|
||||
; AVX512-NEXT: vpor %ymm0, %ymm1, %ymm0
|
||||
; AVX512-NEXT: retq
|
||||
; AVX512BW-LABEL: splatconstant_rotate_v4i64:
|
||||
; AVX512BW: # BB#0:
|
||||
; AVX512BW-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
|
||||
; AVX512BW-NEXT: vprolq $14, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
|
||||
; AVX512BW-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: splatconstant_rotate_v4i64:
|
||||
; AVX512VL: # BB#0:
|
||||
; AVX512VL-NEXT: vprolq $14, %ymm0, %ymm0
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
; XOPAVX1-LABEL: splatconstant_rotate_v4i64:
|
||||
; XOPAVX1: # BB#0:
|
||||
|
@ -840,12 +865,17 @@ define <8 x i32> @splatconstant_rotate_v8i32(<8 x i32> %a) nounwind {
|
|||
; AVX2-NEXT: vpor %ymm0, %ymm1, %ymm0
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: splatconstant_rotate_v8i32:
|
||||
; AVX512: # BB#0:
|
||||
; AVX512-NEXT: vpslld $4, %ymm0, %ymm1
|
||||
; AVX512-NEXT: vpsrld $28, %ymm0, %ymm0
|
||||
; AVX512-NEXT: vpor %ymm0, %ymm1, %ymm0
|
||||
; AVX512-NEXT: retq
|
||||
; AVX512BW-LABEL: splatconstant_rotate_v8i32:
|
||||
; AVX512BW: # BB#0:
|
||||
; AVX512BW-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
|
||||
; AVX512BW-NEXT: vprold $4, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
|
||||
; AVX512BW-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: splatconstant_rotate_v8i32:
|
||||
; AVX512VL: # BB#0:
|
||||
; AVX512VL-NEXT: vprold $4, %ymm0, %ymm0
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
; XOPAVX1-LABEL: splatconstant_rotate_v8i32:
|
||||
; XOPAVX1: # BB#0:
|
||||
|
@ -994,11 +1024,18 @@ define <4 x i64> @splatconstant_rotate_mask_v4i64(<4 x i64> %a) nounwind {
|
|||
; AVX2-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: splatconstant_rotate_mask_v4i64:
|
||||
; AVX512: # BB#0:
|
||||
; AVX512-NEXT: vpsrlq $49, %ymm0, %ymm0
|
||||
; AVX512-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
|
||||
; AVX512-NEXT: retq
|
||||
; AVX512BW-LABEL: splatconstant_rotate_mask_v4i64:
|
||||
; AVX512BW: # BB#0:
|
||||
; AVX512BW-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
|
||||
; AVX512BW-NEXT: vprolq $15, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
|
||||
; AVX512BW-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: splatconstant_rotate_mask_v4i64:
|
||||
; AVX512VL: # BB#0:
|
||||
; AVX512VL-NEXT: vprolq $15, %ymm0, %ymm0
|
||||
; AVX512VL-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
; XOPAVX1-LABEL: splatconstant_rotate_mask_v4i64:
|
||||
; XOPAVX1: # BB#0:
|
||||
|
@ -1049,14 +1086,18 @@ define <8 x i32> @splatconstant_rotate_mask_v8i32(<8 x i32> %a) nounwind {
|
|||
; AVX2-NEXT: vpor %ymm0, %ymm1, %ymm0
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: splatconstant_rotate_mask_v8i32:
|
||||
; AVX512: # BB#0:
|
||||
; AVX512-NEXT: vpslld $4, %ymm0, %ymm1
|
||||
; AVX512-NEXT: vpsrld $28, %ymm0, %ymm0
|
||||
; AVX512-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
|
||||
; AVX512-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1
|
||||
; AVX512-NEXT: vpor %ymm0, %ymm1, %ymm0
|
||||
; AVX512-NEXT: retq
|
||||
; AVX512BW-LABEL: splatconstant_rotate_mask_v8i32:
|
||||
; AVX512BW: # BB#0:
|
||||
; AVX512BW-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
|
||||
; AVX512BW-NEXT: vprold $4, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
|
||||
; AVX512BW-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: splatconstant_rotate_mask_v8i32:
|
||||
; AVX512VL: # BB#0:
|
||||
; AVX512VL-NEXT: vprold $4, %ymm0, %ymm0
|
||||
; AVX512VL-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
; XOPAVX1-LABEL: splatconstant_rotate_mask_v8i32:
|
||||
; XOPAVX1: # BB#0:
|
||||
|
|
|
@ -11,11 +11,7 @@
|
|||
define <8 x i64> @var_rotate_v8i64(<8 x i64> %a, <8 x i64> %b) nounwind {
|
||||
; AVX512-LABEL: var_rotate_v8i64:
|
||||
; AVX512: # BB#0:
|
||||
; AVX512-NEXT: vpbroadcastq {{.*#+}} zmm2 = [64,64,64,64,64,64,64,64]
|
||||
; AVX512-NEXT: vpsubq %zmm1, %zmm2, %zmm2
|
||||
; AVX512-NEXT: vpsllvq %zmm1, %zmm0, %zmm1
|
||||
; AVX512-NEXT: vpsrlvq %zmm2, %zmm0, %zmm0
|
||||
; AVX512-NEXT: vporq %zmm0, %zmm1, %zmm0
|
||||
; AVX512-NEXT: vprolvq %zmm1, %zmm0, %zmm0
|
||||
; AVX512-NEXT: retq
|
||||
%b64 = sub <8 x i64> <i64 64, i64 64, i64 64, i64 64, i64 64, i64 64, i64 64, i64 64>, %b
|
||||
%shl = shl <8 x i64> %a, %b
|
||||
|
@ -27,11 +23,7 @@ define <8 x i64> @var_rotate_v8i64(<8 x i64> %a, <8 x i64> %b) nounwind {
|
|||
define <16 x i32> @var_rotate_v16i32(<16 x i32> %a, <16 x i32> %b) nounwind {
|
||||
; AVX512-LABEL: var_rotate_v16i32:
|
||||
; AVX512: # BB#0:
|
||||
; AVX512-NEXT: vpbroadcastd {{.*#+}} zmm2 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
|
||||
; AVX512-NEXT: vpsubd %zmm1, %zmm2, %zmm2
|
||||
; AVX512-NEXT: vpsllvd %zmm1, %zmm0, %zmm1
|
||||
; AVX512-NEXT: vpsrlvd %zmm2, %zmm0, %zmm0
|
||||
; AVX512-NEXT: vpord %zmm0, %zmm1, %zmm0
|
||||
; AVX512-NEXT: vprolvd %zmm1, %zmm0, %zmm0
|
||||
; AVX512-NEXT: retq
|
||||
%b32 = sub <16 x i32> <i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32>, %b
|
||||
%shl = shl <16 x i32> %a, %b
|
||||
|
@ -315,9 +307,7 @@ define <64 x i8> @var_rotate_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {
|
|||
define <8 x i64> @constant_rotate_v8i64(<8 x i64> %a) nounwind {
|
||||
; AVX512-LABEL: constant_rotate_v8i64:
|
||||
; AVX512: # BB#0:
|
||||
; AVX512-NEXT: vpsllvq {{.*}}(%rip), %zmm0, %zmm1
|
||||
; AVX512-NEXT: vpsrlvq {{.*}}(%rip), %zmm0, %zmm0
|
||||
; AVX512-NEXT: vporq %zmm0, %zmm1, %zmm0
|
||||
; AVX512-NEXT: vprolvq {{.*}}(%rip), %zmm0, %zmm0
|
||||
; AVX512-NEXT: retq
|
||||
%shl = shl <8 x i64> %a, <i64 4, i64 14, i64 50, i64 60, i64 4, i64 14, i64 50, i64 60>
|
||||
%lshr = lshr <8 x i64> %a, <i64 60, i64 50, i64 14, i64 4, i64 60, i64 50, i64 14, i64 4>
|
||||
|
@ -328,9 +318,7 @@ define <8 x i64> @constant_rotate_v8i64(<8 x i64> %a) nounwind {
|
|||
define <16 x i32> @constant_rotate_v16i32(<16 x i32> %a) nounwind {
|
||||
; AVX512-LABEL: constant_rotate_v16i32:
|
||||
; AVX512: # BB#0:
|
||||
; AVX512-NEXT: vpsllvd {{.*}}(%rip), %zmm0, %zmm1
|
||||
; AVX512-NEXT: vpsrlvd {{.*}}(%rip), %zmm0, %zmm0
|
||||
; AVX512-NEXT: vpord %zmm0, %zmm1, %zmm0
|
||||
; AVX512-NEXT: vprolvd {{.*}}(%rip), %zmm0, %zmm0
|
||||
; AVX512-NEXT: retq
|
||||
%shl = shl <16 x i32> %a, <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
|
||||
%lshr = lshr <16 x i32> %a, <i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21>
|
||||
|
@ -571,9 +559,7 @@ define <64 x i8> @constant_rotate_v64i8(<64 x i8> %a) nounwind {
|
|||
define <8 x i64> @splatconstant_rotate_v8i64(<8 x i64> %a) nounwind {
|
||||
; AVX512-LABEL: splatconstant_rotate_v8i64:
|
||||
; AVX512: # BB#0:
|
||||
; AVX512-NEXT: vpsllq $14, %zmm0, %zmm1
|
||||
; AVX512-NEXT: vpsrlq $50, %zmm0, %zmm0
|
||||
; AVX512-NEXT: vporq %zmm0, %zmm1, %zmm0
|
||||
; AVX512-NEXT: vprolq $14, %zmm0, %zmm0
|
||||
; AVX512-NEXT: retq
|
||||
%shl = shl <8 x i64> %a, <i64 14, i64 14, i64 14, i64 14, i64 14, i64 14, i64 14, i64 14>
|
||||
%lshr = lshr <8 x i64> %a, <i64 50, i64 50, i64 50, i64 50, i64 50, i64 50, i64 50, i64 50>
|
||||
|
@ -584,9 +570,7 @@ define <8 x i64> @splatconstant_rotate_v8i64(<8 x i64> %a) nounwind {
|
|||
define <16 x i32> @splatconstant_rotate_v16i32(<16 x i32> %a) nounwind {
|
||||
; AVX512-LABEL: splatconstant_rotate_v16i32:
|
||||
; AVX512: # BB#0:
|
||||
; AVX512-NEXT: vpslld $4, %zmm0, %zmm1
|
||||
; AVX512-NEXT: vpsrld $28, %zmm0, %zmm0
|
||||
; AVX512-NEXT: vpord %zmm0, %zmm1, %zmm0
|
||||
; AVX512-NEXT: vprold $4, %zmm0, %zmm0
|
||||
; AVX512-NEXT: retq
|
||||
%shl = shl <16 x i32> %a, <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
|
||||
%lshr = lshr <16 x i32> %a, <i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28>
|
||||
|
@ -697,7 +681,7 @@ define <64 x i8> @splatconstant_rotate_v64i8(<64 x i8> %a) nounwind {
|
|||
define <8 x i64> @splatconstant_rotate_mask_v8i64(<8 x i64> %a) nounwind {
|
||||
; AVX512-LABEL: splatconstant_rotate_mask_v8i64:
|
||||
; AVX512: # BB#0:
|
||||
; AVX512-NEXT: vpsrlq $49, %zmm0, %zmm0
|
||||
; AVX512-NEXT: vprolq $15, %zmm0, %zmm0
|
||||
; AVX512-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0
|
||||
; AVX512-NEXT: retq
|
||||
%shl = shl <8 x i64> %a, <i64 15, i64 15, i64 15, i64 15, i64 15, i64 15, i64 15, i64 15>
|
||||
|
@ -711,11 +695,8 @@ define <8 x i64> @splatconstant_rotate_mask_v8i64(<8 x i64> %a) nounwind {
|
|||
define <16 x i32> @splatconstant_rotate_mask_v16i32(<16 x i32> %a) nounwind {
|
||||
; AVX512-LABEL: splatconstant_rotate_mask_v16i32:
|
||||
; AVX512: # BB#0:
|
||||
; AVX512-NEXT: vpslld $4, %zmm0, %zmm1
|
||||
; AVX512-NEXT: vpsrld $28, %zmm0, %zmm0
|
||||
; AVX512-NEXT: vprold $4, %zmm0, %zmm0
|
||||
; AVX512-NEXT: vpandd {{.*}}(%rip), %zmm0, %zmm0
|
||||
; AVX512-NEXT: vpandd {{.*}}(%rip), %zmm1, %zmm1
|
||||
; AVX512-NEXT: vporq %zmm0, %zmm1, %zmm0
|
||||
; AVX512-NEXT: retq
|
||||
%shl = shl <16 x i32> %a, <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
|
||||
%lshr = lshr <16 x i32> %a, <i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28>
|
||||
|
|
Loading…
Reference in New Issue