[X86] Remove AVX2 and SSE2 pslldq and psrldq intrinsics. We can represent them in IR with vector shuffles now. All their uses have been removed from clang in favor of shuffles.

llvm-svn: 229640
This commit is contained in:
Craig Topper 2015-02-18 06:24:44 +00:00
parent 4486d61c03
commit b324e43aed
10 changed files with 171 additions and 160 deletions

View File

@ -453,13 +453,6 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_sse2_psrai_d : GCCBuiltin<"__builtin_ia32_psradi128">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty,
llvm_i32_ty], [IntrNoMem]>;
def int_x86_sse2_psll_dq :
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty,
llvm_i32_ty], [IntrNoMem]>;
def int_x86_sse2_psrl_dq :
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty,
llvm_i32_ty], [IntrNoMem]>;
}
// Conversion ops
@ -1580,13 +1573,6 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty,
llvm_i32_ty], [IntrNoMem]>;
def int_x86_avx2_psll_dq :
Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty,
llvm_i32_ty], [IntrNoMem]>;
def int_x86_avx2_psrl_dq :
Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty,
llvm_i32_ty], [IntrNoMem]>;
def int_x86_avx512_mask_pslli_d : GCCBuiltin<"__builtin_ia32_pslldi512">,
Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty,
llvm_i32_ty, llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>;

View File

@ -163,6 +163,10 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
Name == "x86.avx.vbroadcast.ss" ||
Name == "x86.avx.vbroadcast.ss.256" ||
Name == "x86.avx.vbroadcast.sd.256" ||
Name == "x86.sse2.psll.dq" ||
Name == "x86.sse2.psrl.dq" ||
Name == "x86.avx2.psll.dq" ||
Name == "x86.avx2.psrl.dq" ||
Name == "x86.sse2.psll.dq.bs" ||
Name == "x86.sse2.psrl.dq.bs" ||
Name == "x86.avx2.psll.dq.bs" ||
@ -372,6 +376,80 @@ static MetadataAsValue *getExpression(Value *VarOperand, Function *F) {
return MetadataAsValue::get(F->getContext(), Expr);
}
// Handles upgrading SSE2 and AVX2 PSLLDQ intrinsics by converting them
// to byte shuffles.
static Value *UpgradeX86PSLLDQIntrinsics(IRBuilder<> &Builder, LLVMContext &C,
Value *Op, unsigned NumLanes,
unsigned Shift) {
// Each lane is 16 bytes.
unsigned NumElts = NumLanes * 16;
// Bitcast from a 64-bit element type to a byte element type.
Op = Builder.CreateBitCast(Op,
VectorType::get(Type::getInt8Ty(C), NumElts),
"cast");
// We'll be shuffling in zeroes.
Value *Res = ConstantVector::getSplat(NumElts, Builder.getInt8(0));
// If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
// we'll just return the zero vector.
if (Shift < 16) {
SmallVector<Constant*, 32> Idxs;
// 256-bit version is split into two 16-byte lanes.
for (unsigned l = 0; l != NumElts; l += 16)
for (unsigned i = 0; i != 16; ++i) {
unsigned Idx = NumElts + i - Shift;
if (Idx < NumElts)
Idx -= NumElts - 16; // end of lane, switch operand.
Idxs.push_back(Builder.getInt32(Idx + l));
}
Res = Builder.CreateShuffleVector(Res, Op, ConstantVector::get(Idxs));
}
// Bitcast back to a 64-bit element type.
return Builder.CreateBitCast(Res,
VectorType::get(Type::getInt64Ty(C), 2*NumLanes),
"cast");
}
// Handles upgrading SSE2 and AVX2 PSRLDQ intrinsics by converting them
// to byte shuffles.
static Value *UpgradeX86PSRLDQIntrinsics(IRBuilder<> &Builder, LLVMContext &C,
Value *Op, unsigned NumLanes,
unsigned Shift) {
// Each lane is 16 bytes.
unsigned NumElts = NumLanes * 16;
// Bitcast from a 64-bit element type to a byte element type.
Op = Builder.CreateBitCast(Op,
VectorType::get(Type::getInt8Ty(C), NumElts),
"cast");
// We'll be shuffling in zeroes.
Value *Res = ConstantVector::getSplat(NumElts, Builder.getInt8(0));
// If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
// we'll just return the zero vector.
if (Shift < 16) {
SmallVector<Constant*, 32> Idxs;
// 256-bit version is split into two 16-byte lanes.
for (unsigned l = 0; l != NumElts; l += 16)
for (unsigned i = 0; i != 16; ++i) {
unsigned Idx = i + Shift;
if (Idx >= 16)
Idx += NumElts - 16; // end of lane, switch operand.
Idxs.push_back(Builder.getInt32(Idx + l));
}
Res = Builder.CreateShuffleVector(Op, Res, ConstantVector::get(Idxs));
}
// Bitcast back to a 64-bit element type.
return Builder.CreateBitCast(Res,
VectorType::get(Type::getInt64Ty(C), 2*NumLanes),
"cast");
}
// UpgradeIntrinsicCall - Upgrade a call to an old intrinsic to be a call the
// upgraded intrinsic. All argument and return casting must be provided in
// order to seamlessly integrate with existing context.
@ -491,89 +569,46 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
for (unsigned I = 0; I < EltNum; ++I)
Rep = Builder.CreateInsertElement(Rep, Load,
ConstantInt::get(I32Ty, I));
} else if (Name == "llvm.x86.sse2.psll.dq") {
// 128-bit shift left specified in bits.
unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
Rep = UpgradeX86PSLLDQIntrinsics(Builder, C, CI->getArgOperand(0), 1,
Shift / 8); // Shift is in bits.
} else if (Name == "llvm.x86.sse2.psrl.dq") {
// 128-bit shift right specified in bits.
unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
Rep = UpgradeX86PSRLDQIntrinsics(Builder, C, CI->getArgOperand(0), 1,
Shift / 8); // Shift is in bits.
} else if (Name == "llvm.x86.avx2.psll.dq") {
// 256-bit shift left specified in bits.
unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
Rep = UpgradeX86PSLLDQIntrinsics(Builder, C, CI->getArgOperand(0), 2,
Shift / 8); // Shift is in bits.
} else if (Name == "llvm.x86.avx2.psrl.dq") {
// 256-bit shift right specified in bits.
unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
Rep = UpgradeX86PSRLDQIntrinsics(Builder, C, CI->getArgOperand(0), 2,
Shift / 8); // Shift is in bits.
} else if (Name == "llvm.x86.sse2.psll.dq.bs") {
Value *Op0 = ConstantVector::getSplat(16, Builder.getInt8(0));
Value *Op1 = Builder.CreateBitCast(CI->getArgOperand(0),
VectorType::get(Type::getInt8Ty(C),16),
"cast");
// 128-bit shift left specified in bytes.
unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
if (Shift < 16) {
SmallVector<Constant*, 16> Idxs;
for (unsigned i = 16; i != 32; ++i)
Idxs.push_back(Builder.getInt32(i - Shift));
Op0 = Builder.CreateShuffleVector(Op0, Op1, ConstantVector::get(Idxs));
}
Rep = Builder.CreateBitCast(Op0,
VectorType::get(Type::getInt64Ty(C), 2),
"cast");
Rep = UpgradeX86PSLLDQIntrinsics(Builder, C, CI->getArgOperand(0), 1,
Shift);
} else if (Name == "llvm.x86.sse2.psrl.dq.bs") {
Value *Op0 = Builder.CreateBitCast(CI->getArgOperand(0),
VectorType::get(Type::getInt8Ty(C),16),
"cast");
Value *Op1 = ConstantVector::getSplat(16, Builder.getInt8(0));
// 128-bit shift right specified in bytes.
unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
if (Shift < 16) {
SmallVector<Constant*, 16> Idxs;
for (unsigned i = 0; i != 16; ++i)
Idxs.push_back(Builder.getInt32(i + Shift));
Op1 = Builder.CreateShuffleVector(Op0, Op1, ConstantVector::get(Idxs));
}
Rep = Builder.CreateBitCast(Op1,
VectorType::get(Type::getInt64Ty(C), 2),
"cast");
Rep = UpgradeX86PSRLDQIntrinsics(Builder, C, CI->getArgOperand(0), 1,
Shift);
} else if (Name == "llvm.x86.avx2.psll.dq.bs") {
Value *Op0 = ConstantVector::getSplat(32, Builder.getInt8(0));
Value *Op1 = Builder.CreateBitCast(CI->getArgOperand(0),
VectorType::get(Type::getInt8Ty(C),32),
"cast");
// 256-bit shift left specified in bytes.
unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
if (Shift < 16) {
SmallVector<Constant*, 32> Idxs;
for (unsigned l = 0; l != 32; l += 16)
for (unsigned i = 0; i != 16; ++i) {
unsigned Idx = 32 + i - Shift;
if (Idx < 32) Idx -= 16; // end of lane, switch operand.
Idxs.push_back(Builder.getInt32(Idx + l));
}
Op1 = Builder.CreateShuffleVector(Op0, Op1, ConstantVector::get(Idxs));
}
Rep = Builder.CreateBitCast(Op1,
VectorType::get(Type::getInt64Ty(C), 4),
"cast");
Rep = UpgradeX86PSLLDQIntrinsics(Builder, C, CI->getArgOperand(0), 2,
Shift);
} else if (Name == "llvm.x86.avx2.psrl.dq.bs") {
Value *Op0 = Builder.CreateBitCast(CI->getArgOperand(0),
VectorType::get(Type::getInt8Ty(C),32),
"cast");
Value *Op1 = ConstantVector::getSplat(32, Builder.getInt8(0));
// 256-bit shift right specified in bytes.
unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
if (Shift < 16) {
SmallVector<Constant*, 32> Idxs;
for (unsigned l = 0; l != 32; l += 16)
for (unsigned i = 0; i != 16; ++i) {
unsigned Idx = i + Shift;
if (Idx >= 16) Idx += 16; // end of lane, switch operand.
Idxs.push_back(Builder.getInt32(Idx + l));
}
Op0 = Builder.CreateShuffleVector(Op0, Op1, ConstantVector::get(Idxs));
}
Rep = Builder.CreateBitCast(Op0,
VectorType::get(Type::getInt64Ty(C), 4),
"cast");
Rep = UpgradeX86PSRLDQIntrinsics(Builder, C, CI->getArgOperand(0), 2,
Shift);
} else {
bool PD128 = false, PD256 = false, PS128 = false, PS256 = false;
if (Name == "llvm.x86.avx.vpermil.pd.256")

View File

@ -4283,26 +4283,11 @@ let ExeDomain = SSEPackedInt, SchedRW = [WriteVecShift], hasSideEffects = 0 in {
} // Constraints = "$src1 = $dst"
let Predicates = [HasAVX] in {
def : Pat<(int_x86_sse2_psll_dq VR128:$src1, imm:$src2),
(VPSLLDQri VR128:$src1, (BYTE_imm imm:$src2))>;
def : Pat<(int_x86_sse2_psrl_dq VR128:$src1, imm:$src2),
(VPSRLDQri VR128:$src1, (BYTE_imm imm:$src2))>;
def : Pat<(v2f64 (X86fsrl VR128:$src1, i32immSExt8:$src2)),
(VPSRLDQri VR128:$src1, (BYTE_imm imm:$src2))>;
}
let Predicates = [HasAVX2] in {
def : Pat<(int_x86_avx2_psll_dq VR256:$src1, imm:$src2),
(VPSLLDQYri VR256:$src1, (BYTE_imm imm:$src2))>;
def : Pat<(int_x86_avx2_psrl_dq VR256:$src1, imm:$src2),
(VPSRLDQYri VR256:$src1, (BYTE_imm imm:$src2))>;
}
let Predicates = [UseSSE2] in {
def : Pat<(int_x86_sse2_psll_dq VR128:$src1, imm:$src2),
(PSLLDQri VR128:$src1, (BYTE_imm imm:$src2))>;
def : Pat<(int_x86_sse2_psrl_dq VR128:$src1, imm:$src2),
(PSRLDQri VR128:$src1, (BYTE_imm imm:$src2))>;
def : Pat<(v2f64 (X86fsrl VR128:$src1, i32immSExt8:$src2)),
(PSRLDQri VR128:$src1, (BYTE_imm imm:$src2))>;
}

View File

@ -2270,7 +2270,6 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
case llvm::Intrinsic::x86_avx2_pslli_w:
case llvm::Intrinsic::x86_avx2_pslli_d:
case llvm::Intrinsic::x86_avx2_pslli_q:
case llvm::Intrinsic::x86_avx2_psll_dq:
case llvm::Intrinsic::x86_avx2_psrl_w:
case llvm::Intrinsic::x86_avx2_psrl_d:
case llvm::Intrinsic::x86_avx2_psrl_q:
@ -2281,14 +2280,12 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
case llvm::Intrinsic::x86_avx2_psrli_q:
case llvm::Intrinsic::x86_avx2_psrai_w:
case llvm::Intrinsic::x86_avx2_psrai_d:
case llvm::Intrinsic::x86_avx2_psrl_dq:
case llvm::Intrinsic::x86_sse2_psll_w:
case llvm::Intrinsic::x86_sse2_psll_d:
case llvm::Intrinsic::x86_sse2_psll_q:
case llvm::Intrinsic::x86_sse2_pslli_w:
case llvm::Intrinsic::x86_sse2_pslli_d:
case llvm::Intrinsic::x86_sse2_pslli_q:
case llvm::Intrinsic::x86_sse2_psll_dq:
case llvm::Intrinsic::x86_sse2_psrl_w:
case llvm::Intrinsic::x86_sse2_psrl_d:
case llvm::Intrinsic::x86_sse2_psrl_q:
@ -2299,7 +2296,6 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
case llvm::Intrinsic::x86_sse2_psrli_q:
case llvm::Intrinsic::x86_sse2_psrai_w:
case llvm::Intrinsic::x86_sse2_psrai_d:
case llvm::Intrinsic::x86_sse2_psrl_dq:
case llvm::Intrinsic::x86_mmx_psll_w:
case llvm::Intrinsic::x86_mmx_psll_d:
case llvm::Intrinsic::x86_mmx_psll_q:
@ -2334,10 +2330,6 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
// Byte shifts are not implemented.
// case llvm::Intrinsic::x86_avx512_psll_dq_bs:
// case llvm::Intrinsic::x86_avx512_psrl_dq_bs:
// case llvm::Intrinsic::x86_avx2_psll_dq_bs:
// case llvm::Intrinsic::x86_avx2_psrl_dq_bs:
// case llvm::Intrinsic::x86_sse2_psll_dq_bs:
// case llvm::Intrinsic::x86_sse2_psrl_dq_bs:
case llvm::Intrinsic::x86_sse2_packsswb_128:
case llvm::Intrinsic::x86_sse2_packssdw_128:

View File

@ -24,3 +24,17 @@ define <8 x float> @test_x86_avx_dp_ps_256(<8 x float> %a0, <8 x float> %a1) {
declare <8 x float> @llvm.x86.avx.dp.ps.256(<8 x float>, <8 x float>, i32) nounwind readnone
define <2 x i64> @test_x86_sse2_psll_dq(<2 x i64> %a0) {
; CHECK: vpslldq {{.*#+}} xmm0 = zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
%res = call <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64> %a0, i32 8) ; <<2 x i64>> [#uses=1]
ret <2 x i64> %res
}
declare <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64>, i32) nounwind readnone
define <2 x i64> @test_x86_sse2_psrl_dq(<2 x i64> %a0) {
; CHECK: vpsrldq {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero
%res = call <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64> %a0, i32 8) ; <<2 x i64>> [#uses=1]
ret <2 x i64> %res
}
declare <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64>, i32) nounwind readnone

View File

@ -457,14 +457,6 @@ define <4 x i32> @test_x86_sse2_psll_d(<4 x i32> %a0, <4 x i32> %a1) {
declare <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32>, <4 x i32>) nounwind readnone
define <2 x i64> @test_x86_sse2_psll_dq(<2 x i64> %a0) {
; CHECK: vpslldq {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
%res = call <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
ret <2 x i64> %res
}
declare <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64>, i32) nounwind readnone
define <2 x i64> @test_x86_sse2_psll_q(<2 x i64> %a0, <2 x i64> %a1) {
; CHECK: vpsllq
%res = call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
@ -545,14 +537,6 @@ define <4 x i32> @test_x86_sse2_psrl_d(<4 x i32> %a0, <4 x i32> %a1) {
declare <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32>, <4 x i32>) nounwind readnone
define <2 x i64> @test_x86_sse2_psrl_dq(<2 x i64> %a0) {
; CHECK: vpsrldq {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
%res = call <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
ret <2 x i64> %res
}
declare <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64>, i32) nounwind readnone
define <2 x i64> @test_x86_sse2_psrl_q(<2 x i64> %a0, <2 x i64> %a1) {
; CHECK: vpsrlq
%res = call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]

View File

@ -46,3 +46,19 @@ define <4 x i64> @test_x86_avx2_psrl_dq_bs(<4 x i64> %a0) {
ret <4 x i64> %res
}
declare <4 x i64> @llvm.x86.avx2.psrl.dq.bs(<4 x i64>, i32) nounwind readnone
define <4 x i64> @test_x86_avx2_psll_dq(<4 x i64> %a0) {
; CHECK: vpslldq {{.*#+}} ymm0 = zero,ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14],zero,ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28,29,30]
%res = call <4 x i64> @llvm.x86.avx2.psll.dq(<4 x i64> %a0, i32 8) ; <<4 x i64>> [#uses=1]
ret <4 x i64> %res
}
declare <4 x i64> @llvm.x86.avx2.psll.dq(<4 x i64>, i32) nounwind readnone
define <4 x i64> @test_x86_avx2_psrl_dq(<4 x i64> %a0) {
; CHECK: vpsrldq {{.*#+}} ymm0 = ymm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,ymm0[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],zero
%res = call <4 x i64> @llvm.x86.avx2.psrl.dq(<4 x i64> %a0, i32 8) ; <<4 x i64>> [#uses=1]
ret <4 x i64> %res
}
declare <4 x i64> @llvm.x86.avx2.psrl.dq(<4 x i64>, i32) nounwind readnone

View File

@ -160,14 +160,6 @@ define <8 x i32> @test_x86_avx2_psll_d(<8 x i32> %a0, <4 x i32> %a1) {
declare <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32>, <4 x i32>) nounwind readnone
define <4 x i64> @test_x86_avx2_psll_dq(<4 x i64> %a0) {
; CHECK: vpslldq {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31]
%res = call <4 x i64> @llvm.x86.avx2.psll.dq(<4 x i64> %a0, i32 7) ; <<4 x i64>> [#uses=1]
ret <4 x i64> %res
}
declare <4 x i64> @llvm.x86.avx2.psll.dq(<4 x i64>, i32) nounwind readnone
define <4 x i64> @test_x86_avx2_psll_q(<4 x i64> %a0, <2 x i64> %a1) {
; CHECK: vpsllq
%res = call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %a0, <2 x i64> %a1) ; <<4 x i64>> [#uses=1]
@ -248,14 +240,6 @@ define <8 x i32> @test_x86_avx2_psrl_d(<8 x i32> %a0, <4 x i32> %a1) {
declare <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32>, <4 x i32>) nounwind readnone
define <4 x i64> @test_x86_avx2_psrl_dq(<4 x i64> %a0) {
; CHECK: vpsrldq {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31]
%res = call <4 x i64> @llvm.x86.avx2.psrl.dq(<4 x i64> %a0, i32 7) ; <<4 x i64>> [#uses=1]
ret <4 x i64> %res
}
declare <4 x i64> @llvm.x86.avx2.psrl.dq(<4 x i64>, i32) nounwind readnone
define <4 x i64> @test_x86_avx2_psrl_q(<4 x i64> %a0, <2 x i64> %a1) {
; CHECK: vpsrlq
%res = call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %a0, <2 x i64> %a1) ; <<4 x i64>> [#uses=1]

View File

@ -0,0 +1,31 @@
; RUN: llc < %s -mtriple=x86_64-apple-darwin -march=x86 -mcpu=pentium4 -mattr=sse2 | FileCheck %s
define <2 x i64> @test_x86_sse2_psll_dq_bs(<2 x i64> %a0) {
; CHECK: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8]
%res = call <2 x i64> @llvm.x86.sse2.psll.dq.bs(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
ret <2 x i64> %res
}
declare <2 x i64> @llvm.x86.sse2.psll.dq.bs(<2 x i64>, i32) nounwind readnone
define <2 x i64> @test_x86_sse2_psrl_dq_bs(<2 x i64> %a0) {
; CHECK: psrldq {{.*#+}} xmm0 = xmm0[7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero
%res = call <2 x i64> @llvm.x86.sse2.psrl.dq.bs(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
ret <2 x i64> %res
}
declare <2 x i64> @llvm.x86.sse2.psrl.dq.bs(<2 x i64>, i32) nounwind readnone
define <2 x i64> @test_x86_sse2_psll_dq(<2 x i64> %a0) {
; CHECK: pslldq {{.*#+}} xmm0 = zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
%res = call <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64> %a0, i32 8) ; <<2 x i64>> [#uses=1]
ret <2 x i64> %res
}
declare <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64>, i32) nounwind readnone
define <2 x i64> @test_x86_sse2_psrl_dq(<2 x i64> %a0) {
; CHECK: psrldq {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero
%res = call <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64> %a0, i32 8) ; <<2 x i64>> [#uses=1]
ret <2 x i64> %res
}
declare <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64>, i32) nounwind readnone

View File

@ -410,14 +410,6 @@ define <4 x i32> @test_x86_sse2_psll_d(<4 x i32> %a0, <4 x i32> %a1) {
declare <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32>, <4 x i32>) nounwind readnone
define <2 x i64> @test_x86_sse2_psll_dq(<2 x i64> %a0) {
; CHECK: pslldq {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
%res = call <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
ret <2 x i64> %res
}
declare <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64>, i32) nounwind readnone
define <2 x i64> @test_x86_sse2_psll_q(<2 x i64> %a0, <2 x i64> %a1) {
; CHECK: psllq
%res = call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
@ -498,14 +490,6 @@ define <4 x i32> @test_x86_sse2_psrl_d(<4 x i32> %a0, <4 x i32> %a1) {
declare <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32>, <4 x i32>) nounwind readnone
define <2 x i64> @test_x86_sse2_psrl_dq(<2 x i64> %a0) {
; CHECK: psrldq {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
%res = call <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
ret <2 x i64> %res
}
declare <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64>, i32) nounwind readnone
define <2 x i64> @test_x86_sse2_psrl_q(<2 x i64> %a0, <2 x i64> %a1) {
; CHECK: psrlq
%res = call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]