forked from OSchip/llvm-project
Support arbitrary addrspace pointers in masked load/store intrinsics
This patch fixes the problem which occurs when loop-vectorize tries to use @llvm.masked.load/store intrinsic for a non-default addrspace pointer. It fails with "Calling a function with a bad signature!" assertion in CallInst constructor because it tries to pass a non-default addrspace pointer to the pointer argument which has default addrspace. The fix is to add pointer type as another overloaded type to @llvm.masked.load/store intrinsics. Reviewed By: reames Differential Revision: http://reviews.llvm.org/D17270 llvm-svn: 263158
This commit is contained in:
parent
b45bf0af91
commit
3c8fc57e16
|
@ -11342,12 +11342,12 @@ This is an overloaded intrinsic. The loaded data is a vector of any integer, flo
|
|||
|
||||
::
|
||||
|
||||
declare <16 x float> @llvm.masked.load.v16f32 (<16 x float>* <ptr>, i32 <alignment>, <16 x i1> <mask>, <16 x float> <passthru>)
|
||||
declare <2 x double> @llvm.masked.load.v2f64 (<2 x double>* <ptr>, i32 <alignment>, <2 x i1> <mask>, <2 x double> <passthru>)
|
||||
declare <16 x float> @llvm.masked.load.v16f32.p0v16f32 (<16 x float>* <ptr>, i32 <alignment>, <16 x i1> <mask>, <16 x float> <passthru>)
|
||||
declare <2 x double> @llvm.masked.load.v2f64.p0v2f64 (<2 x double>* <ptr>, i32 <alignment>, <2 x i1> <mask>, <2 x double> <passthru>)
|
||||
;; The data is a vector of pointers to double
|
||||
declare <8 x double*> @llvm.masked.load.v8p0f64 (<8 x double*>* <ptr>, i32 <alignment>, <8 x i1> <mask>, <8 x double*> <passthru>)
|
||||
declare <8 x double*> @llvm.masked.load.v8p0f64.p0v8p0f64 (<8 x double*>* <ptr>, i32 <alignment>, <8 x i1> <mask>, <8 x double*> <passthru>)
|
||||
;; The data is a vector of function pointers
|
||||
declare <8 x i32 ()*> @llvm.masked.load.v8p0f_i32f (<8 x i32 ()*>* <ptr>, i32 <alignment>, <8 x i1> <mask>, <8 x i32 ()*> <passthru>)
|
||||
declare <8 x i32 ()*> @llvm.masked.load.v8p0f_i32f.p0v8p0f_i32f (<8 x i32 ()*>* <ptr>, i32 <alignment>, <8 x i1> <mask>, <8 x i32 ()*> <passthru>)
|
||||
|
||||
Overview:
|
||||
"""""""""
|
||||
|
@ -11370,7 +11370,7 @@ The result of this operation is equivalent to a regular vector load instruction
|
|||
|
||||
::
|
||||
|
||||
%res = call <16 x float> @llvm.masked.load.v16f32 (<16 x float>* %ptr, i32 4, <16 x i1>%mask, <16 x float> %passthru)
|
||||
%res = call <16 x float> @llvm.masked.load.v16f32.p0v16f32 (<16 x float>* %ptr, i32 4, <16 x i1>%mask, <16 x float> %passthru)
|
||||
|
||||
;; The result of the two following instructions is identical aside from potential memory access exception
|
||||
%loadlal = load <16 x float>, <16 x float>* %ptr, align 4
|
||||
|
@ -11387,12 +11387,12 @@ This is an overloaded intrinsic. The data stored in memory is a vector of any in
|
|||
|
||||
::
|
||||
|
||||
declare void @llvm.masked.store.v8i32 (<8 x i32> <value>, <8 x i32>* <ptr>, i32 <alignment>, <8 x i1> <mask>)
|
||||
declare void @llvm.masked.store.v16f32 (<16 x float> <value>, <16 x float>* <ptr>, i32 <alignment>, <16 x i1> <mask>)
|
||||
declare void @llvm.masked.store.v8i32.p0v8i32 (<8 x i32> <value>, <8 x i32>* <ptr>, i32 <alignment>, <8 x i1> <mask>)
|
||||
declare void @llvm.masked.store.v16f32.p0v16f32 (<16 x float> <value>, <16 x float>* <ptr>, i32 <alignment>, <16 x i1> <mask>)
|
||||
;; The data is a vector of pointers to double
|
||||
declare void @llvm.masked.store.v8p0f64 (<8 x double*> <value>, <8 x double*>* <ptr>, i32 <alignment>, <8 x i1> <mask>)
|
||||
declare void @llvm.masked.store.v8p0f64.p0v8p0f64 (<8 x double*> <value>, <8 x double*>* <ptr>, i32 <alignment>, <8 x i1> <mask>)
|
||||
;; The data is a vector of function pointers
|
||||
declare void @llvm.masked.store.v4p0f_i32f (<4 x i32 ()*> <value>, <4 x i32 ()*>* <ptr>, i32 <alignment>, <4 x i1> <mask>)
|
||||
declare void @llvm.masked.store.v4p0f_i32f.p0v4p0f_i32f (<4 x i32 ()*> <value>, <4 x i32 ()*>* <ptr>, i32 <alignment>, <4 x i1> <mask>)
|
||||
|
||||
Overview:
|
||||
"""""""""
|
||||
|
@ -11413,7 +11413,7 @@ The result of this operation is equivalent to a load-modify-store sequence. Howe
|
|||
|
||||
::
|
||||
|
||||
call void @llvm.masked.store.v16f32(<16 x float> %value, <16 x float>* %ptr, i32 4, <16 x i1> %mask)
|
||||
call void @llvm.masked.store.v16f32.p0v16f32(<16 x float> %value, <16 x float>* %ptr, i32 4, <16 x i1> %mask)
|
||||
|
||||
;; The result of the following instructions is identical aside from potential data races and memory access exceptions
|
||||
%oldval = load <16 x float>, <16 x float>* %ptr, align 4
|
||||
|
|
|
@ -520,9 +520,9 @@ public:
|
|||
|
||||
private:
|
||||
/// \brief Create a call to a masked intrinsic with given Id.
|
||||
/// Masked intrinsic has only one overloaded type - data type.
|
||||
CallInst *CreateMaskedIntrinsic(Intrinsic::ID Id, ArrayRef<Value *> Ops,
|
||||
Type *DataTy, const Twine &Name = "");
|
||||
ArrayRef<Type *> OverloadedTypes,
|
||||
const Twine &Name = "");
|
||||
|
||||
Value *getCastedInt8PtrValue(Value *Ptr);
|
||||
};
|
||||
|
|
|
@ -632,13 +632,14 @@ def int_clear_cache : Intrinsic<[], [llvm_ptr_ty, llvm_ptr_ty],
|
|||
|
||||
//===-------------------------- Masked Intrinsics -------------------------===//
|
||||
//
|
||||
def int_masked_store : Intrinsic<[], [llvm_anyvector_ty, LLVMPointerTo<0>,
|
||||
def int_masked_store : Intrinsic<[], [llvm_anyvector_ty,
|
||||
LLVMAnyPointerType<LLVMMatchType<0>>,
|
||||
llvm_i32_ty,
|
||||
LLVMVectorSameWidth<0, llvm_i1_ty>],
|
||||
[IntrReadWriteArgMem]>;
|
||||
|
||||
def int_masked_load : Intrinsic<[llvm_anyvector_ty],
|
||||
[LLVMPointerTo<0>, llvm_i32_ty,
|
||||
[LLVMAnyPointerType<LLVMMatchType<0>>, llvm_i32_ty,
|
||||
LLVMVectorSameWidth<0, llvm_i1_ty>, LLVMMatchType<0>],
|
||||
[IntrReadArgMem]>;
|
||||
|
||||
|
|
|
@ -145,6 +145,31 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
|
|||
break;
|
||||
}
|
||||
|
||||
case 'm': {
|
||||
if (Name.startswith("masked.load.")) {
|
||||
Type *Tys[] = { F->getReturnType(), F->arg_begin()->getType() };
|
||||
if (F->getName() != Intrinsic::getName(Intrinsic::masked_load, Tys)) {
|
||||
F->setName(Name + ".old");
|
||||
NewFn = Intrinsic::getDeclaration(F->getParent(),
|
||||
Intrinsic::masked_load,
|
||||
Tys);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
if (Name.startswith("masked.store.")) {
|
||||
auto Args = F->getFunctionType()->params();
|
||||
Type *Tys[] = { Args[0], Args[1] };
|
||||
if (F->getName() != Intrinsic::getName(Intrinsic::masked_store, Tys)) {
|
||||
F->setName(Name + ".old");
|
||||
NewFn = Intrinsic::getDeclaration(F->getParent(),
|
||||
Intrinsic::masked_store,
|
||||
Tys);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case 'o':
|
||||
// We only need to change the name to match the mangling including the
|
||||
// address space.
|
||||
|
@ -790,6 +815,15 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
|
|||
CI->eraseFromParent();
|
||||
return;
|
||||
}
|
||||
|
||||
case Intrinsic::masked_load:
|
||||
case Intrinsic::masked_store: {
|
||||
SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
|
||||
CI->arg_operands().end());
|
||||
CI->replaceAllUsesWith(Builder.CreateCall(NewFn, Args));
|
||||
CI->eraseFromParent();
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -212,13 +212,15 @@ CallInst *IRBuilderBase::CreateAssumption(Value *Cond) {
|
|||
CallInst *IRBuilderBase::CreateMaskedLoad(Value *Ptr, unsigned Align,
|
||||
Value *Mask, Value *PassThru,
|
||||
const Twine &Name) {
|
||||
// DataTy is the overloaded type
|
||||
Type *DataTy = cast<PointerType>(Ptr->getType())->getElementType();
|
||||
PointerType *PtrTy = cast<PointerType>(Ptr->getType());
|
||||
Type *DataTy = PtrTy->getElementType();
|
||||
assert(DataTy->isVectorTy() && "Ptr should point to a vector");
|
||||
if (!PassThru)
|
||||
PassThru = UndefValue::get(DataTy);
|
||||
Type *OverloadedTypes[] = { DataTy, PtrTy };
|
||||
Value *Ops[] = { Ptr, getInt32(Align), Mask, PassThru};
|
||||
return CreateMaskedIntrinsic(Intrinsic::masked_load, Ops, DataTy, Name);
|
||||
return CreateMaskedIntrinsic(Intrinsic::masked_load, Ops,
|
||||
OverloadedTypes, Name);
|
||||
}
|
||||
|
||||
/// \brief Create a call to a Masked Store intrinsic.
|
||||
|
@ -229,19 +231,22 @@ CallInst *IRBuilderBase::CreateMaskedLoad(Value *Ptr, unsigned Align,
|
|||
/// be accessed in memory
|
||||
CallInst *IRBuilderBase::CreateMaskedStore(Value *Val, Value *Ptr,
|
||||
unsigned Align, Value *Mask) {
|
||||
PointerType *PtrTy = cast<PointerType>(Ptr->getType());
|
||||
Type *DataTy = PtrTy->getElementType();
|
||||
assert(DataTy->isVectorTy() && "Ptr should point to a vector");
|
||||
Type *OverloadedTypes[] = { DataTy, PtrTy };
|
||||
Value *Ops[] = { Val, Ptr, getInt32(Align), Mask };
|
||||
// Type of the data to be stored - the only one overloaded type
|
||||
return CreateMaskedIntrinsic(Intrinsic::masked_store, Ops, Val->getType());
|
||||
return CreateMaskedIntrinsic(Intrinsic::masked_store, Ops, OverloadedTypes);
|
||||
}
|
||||
|
||||
/// Create a call to a Masked intrinsic, with given intrinsic Id,
|
||||
/// an array of operands - Ops, and one overloaded type - DataTy
|
||||
/// an array of operands - Ops, and an array of overloaded types -
|
||||
/// OverloadedTypes.
|
||||
CallInst *IRBuilderBase::CreateMaskedIntrinsic(Intrinsic::ID Id,
|
||||
ArrayRef<Value *> Ops,
|
||||
Type *DataTy,
|
||||
ArrayRef<Type *> OverloadedTypes,
|
||||
const Twine &Name) {
|
||||
Module *M = BB->getParent()->getParent();
|
||||
Type *OverloadedTypes[] = { DataTy };
|
||||
Value *TheFn = Intrinsic::getDeclaration(M, Id, OverloadedTypes);
|
||||
return createCallHelper(TheFn, Ops, this, Name);
|
||||
}
|
||||
|
@ -270,7 +275,7 @@ CallInst *IRBuilderBase::CreateMaskedGather(Value *Ptrs, unsigned Align,
|
|||
|
||||
// We specify only one type when we create this intrinsic. Types of other
|
||||
// arguments are derived from this type.
|
||||
return CreateMaskedIntrinsic(Intrinsic::masked_gather, Ops, DataTy, Name);
|
||||
return CreateMaskedIntrinsic(Intrinsic::masked_gather, Ops, { DataTy }, Name);
|
||||
}
|
||||
|
||||
/// \brief Create a call to a Masked Scatter intrinsic.
|
||||
|
@ -300,7 +305,7 @@ CallInst *IRBuilderBase::CreateMaskedScatter(Value *Data, Value *Ptrs,
|
|||
|
||||
// We specify only one type when we create this intrinsic. Types of other
|
||||
// arguments are derived from this type.
|
||||
return CreateMaskedIntrinsic(Intrinsic::masked_scatter, Ops, DataTy);
|
||||
return CreateMaskedIntrinsic(Intrinsic::masked_scatter, Ops, { DataTy });
|
||||
}
|
||||
|
||||
template <typename T0, typename T1, typename T2, typename T3>
|
||||
|
|
|
@ -7,7 +7,7 @@
|
|||
; AVX2: Found an estimated cost of 4 {{.*}}.masked
|
||||
define <2 x double> @test1(<2 x i64> %trigger, <2 x double>* %addr, <2 x double> %dst) {
|
||||
%mask = icmp eq <2 x i64> %trigger, zeroinitializer
|
||||
%res = call <2 x double> @llvm.masked.load.v2f64(<2 x double>* %addr, i32 4, <2 x i1>%mask, <2 x double>%dst)
|
||||
%res = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %addr, i32 4, <2 x i1>%mask, <2 x double>%dst)
|
||||
ret <2 x double> %res
|
||||
}
|
||||
|
||||
|
@ -15,7 +15,7 @@ define <2 x double> @test1(<2 x i64> %trigger, <2 x double>* %addr, <2 x double>
|
|||
; AVX2: Found an estimated cost of 4 {{.*}}.masked
|
||||
define <4 x i32> @test2(<4 x i32> %trigger, <4 x i32>* %addr, <4 x i32> %dst) {
|
||||
%mask = icmp eq <4 x i32> %trigger, zeroinitializer
|
||||
%res = call <4 x i32> @llvm.masked.load.v4i32(<4 x i32>* %addr, i32 4, <4 x i1>%mask, <4 x i32>%dst)
|
||||
%res = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %addr, i32 4, <4 x i1>%mask, <4 x i32>%dst)
|
||||
ret <4 x i32> %res
|
||||
}
|
||||
|
||||
|
@ -23,7 +23,7 @@ define <4 x i32> @test2(<4 x i32> %trigger, <4 x i32>* %addr, <4 x i32> %dst) {
|
|||
; AVX2: Found an estimated cost of 4 {{.*}}.masked
|
||||
define void @test3(<4 x i32> %trigger, <4 x i32>* %addr, <4 x i32> %val) {
|
||||
%mask = icmp eq <4 x i32> %trigger, zeroinitializer
|
||||
call void @llvm.masked.store.v4i32(<4 x i32>%val, <4 x i32>* %addr, i32 4, <4 x i1>%mask)
|
||||
call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>%val, <4 x i32>* %addr, i32 4, <4 x i1>%mask)
|
||||
ret void
|
||||
}
|
||||
|
||||
|
@ -31,7 +31,7 @@ define void @test3(<4 x i32> %trigger, <4 x i32>* %addr, <4 x i32> %val) {
|
|||
; AVX2: Found an estimated cost of 4 {{.*}}.masked
|
||||
define <8 x float> @test4(<8 x i32> %trigger, <8 x float>* %addr, <8 x float> %dst) {
|
||||
%mask = icmp eq <8 x i32> %trigger, zeroinitializer
|
||||
%res = call <8 x float> @llvm.masked.load.v8f32(<8 x float>* %addr, i32 4, <8 x i1>%mask, <8 x float>%dst)
|
||||
%res = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %addr, i32 4, <8 x i1>%mask, <8 x float>%dst)
|
||||
ret <8 x float> %res
|
||||
}
|
||||
|
||||
|
@ -39,7 +39,7 @@ define <8 x float> @test4(<8 x i32> %trigger, <8 x float>* %addr, <8 x float> %d
|
|||
; AVX2: Found an estimated cost of 5 {{.*}}.masked
|
||||
define void @test5(<2 x i32> %trigger, <2 x float>* %addr, <2 x float> %val) {
|
||||
%mask = icmp eq <2 x i32> %trigger, zeroinitializer
|
||||
call void @llvm.masked.store.v2f32(<2 x float>%val, <2 x float>* %addr, i32 4, <2 x i1>%mask)
|
||||
call void @llvm.masked.store.v2f32.p0v2f32(<2 x float>%val, <2 x float>* %addr, i32 4, <2 x i1>%mask)
|
||||
ret void
|
||||
}
|
||||
|
||||
|
@ -47,7 +47,7 @@ define void @test5(<2 x i32> %trigger, <2 x float>* %addr, <2 x float> %val) {
|
|||
; AVX2: Found an estimated cost of 6 {{.*}}.masked
|
||||
define void @test6(<2 x i32> %trigger, <2 x i32>* %addr, <2 x i32> %val) {
|
||||
%mask = icmp eq <2 x i32> %trigger, zeroinitializer
|
||||
call void @llvm.masked.store.v2i32(<2 x i32>%val, <2 x i32>* %addr, i32 4, <2 x i1>%mask)
|
||||
call void @llvm.masked.store.v2i32.p0v2i32(<2 x i32>%val, <2 x i32>* %addr, i32 4, <2 x i1>%mask)
|
||||
ret void
|
||||
}
|
||||
|
||||
|
@ -55,7 +55,7 @@ define void @test6(<2 x i32> %trigger, <2 x i32>* %addr, <2 x i32> %val) {
|
|||
; AVX2: Found an estimated cost of 5 {{.*}}.masked
|
||||
define <2 x float> @test7(<2 x i32> %trigger, <2 x float>* %addr, <2 x float> %dst) {
|
||||
%mask = icmp eq <2 x i32> %trigger, zeroinitializer
|
||||
%res = call <2 x float> @llvm.masked.load.v2f32(<2 x float>* %addr, i32 4, <2 x i1>%mask, <2 x float>%dst)
|
||||
%res = call <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float>* %addr, i32 4, <2 x i1>%mask, <2 x float>%dst)
|
||||
ret <2 x float> %res
|
||||
}
|
||||
|
||||
|
@ -63,7 +63,7 @@ define <2 x float> @test7(<2 x i32> %trigger, <2 x float>* %addr, <2 x float> %d
|
|||
; AVX2: Found an estimated cost of 6 {{.*}}.masked
|
||||
define <2 x i32> @test8(<2 x i32> %trigger, <2 x i32>* %addr, <2 x i32> %dst) {
|
||||
%mask = icmp eq <2 x i32> %trigger, zeroinitializer
|
||||
%res = call <2 x i32> @llvm.masked.load.v2i32(<2 x i32>* %addr, i32 4, <2 x i1>%mask, <2 x i32>%dst)
|
||||
%res = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* %addr, i32 4, <2 x i1>%mask, <2 x i32>%dst)
|
||||
ret <2 x i32> %res
|
||||
}
|
||||
|
||||
|
@ -279,24 +279,22 @@ declare void @llvm.masked.scatter.v4i32(<4 x i32> %a1, <4 x i32*> %ptr, i32, <4
|
|||
declare void @llvm.masked.scatter.v16i32(<16 x i32>%val, <16 x i32*> %gep.random, i32, <16 x i1> %imask)
|
||||
declare <16 x float> @llvm.masked.gather.v16f32(<16 x float*> %gep.v, i32, <16 x i1> %mask, <16 x float>)
|
||||
|
||||
declare <16 x i32> @llvm.masked.load.v16i32(<16 x i32>*, i32, <16 x i1>, <16 x i32>)
|
||||
declare <4 x i32> @llvm.masked.load.v4i32(<4 x i32>*, i32, <4 x i1>, <4 x i32>)
|
||||
declare <2 x i32> @llvm.masked.load.v2i32(<2 x i32>*, i32, <2 x i1>, <2 x i32>)
|
||||
declare void @llvm.masked.store.v16i32(<16 x i32>, <16 x i32>*, i32, <16 x i1>)
|
||||
declare void @llvm.masked.store.v8i32(<8 x i32>, <8 x i32>*, i32, <8 x i1>)
|
||||
declare void @llvm.masked.store.v4i32(<4 x i32>, <4 x i32>*, i32, <4 x i1>)
|
||||
declare void @llvm.masked.store.v2f32(<2 x float>, <2 x float>*, i32, <2 x i1>)
|
||||
declare void @llvm.masked.store.v2i32(<2 x i32>, <2 x i32>*, i32, <2 x i1>)
|
||||
declare void @llvm.masked.store.v16f32(<16 x float>, <16 x float>*, i32, <16 x i1>)
|
||||
declare void @llvm.masked.store.v16f32p(<16 x float>*, <16 x float>**, i32, <16 x i1>)
|
||||
declare <16 x float> @llvm.masked.load.v16f32(<16 x float>*, i32, <16 x i1>, <16 x float>)
|
||||
declare <8 x float> @llvm.masked.load.v8f32(<8 x float>*, i32, <8 x i1>, <8 x float>)
|
||||
declare <4 x float> @llvm.masked.load.v4f32(<4 x float>*, i32, <4 x i1>, <4 x float>)
|
||||
declare <2 x float> @llvm.masked.load.v2f32(<2 x float>*, i32, <2 x i1>, <2 x float>)
|
||||
declare <8 x double> @llvm.masked.load.v8f64(<8 x double>*, i32, <8 x i1>, <8 x double>)
|
||||
declare <4 x double> @llvm.masked.load.v4f64(<4 x double>*, i32, <4 x i1>, <4 x double>)
|
||||
declare <2 x double> @llvm.masked.load.v2f64(<2 x double>*, i32, <2 x i1>, <2 x double>)
|
||||
declare void @llvm.masked.store.v8f64(<8 x double>, <8 x double>*, i32, <8 x i1>)
|
||||
declare void @llvm.masked.store.v2f64(<2 x double>, <2 x double>*, i32, <2 x i1>)
|
||||
declare void @llvm.masked.store.v2i64(<2 x i64>, <2 x i64>*, i32, <2 x i1>)
|
||||
|
||||
declare <16 x i32> @llvm.masked.load.v16i32.p0v16i32(<16 x i32>*, i32, <16 x i1>, <16 x i32>)
|
||||
declare <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>*, i32, <4 x i1>, <4 x i32>)
|
||||
declare <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>*, i32, <2 x i1>, <2 x i32>)
|
||||
declare void @llvm.masked.store.v16i32.p0v16i32(<16 x i32>, <16 x i32>*, i32, <16 x i1>)
|
||||
declare void @llvm.masked.store.v8i32.p0v8i32(<8 x i32>, <8 x i32>*, i32, <8 x i1>)
|
||||
declare void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>, <4 x i32>*, i32, <4 x i1>)
|
||||
declare void @llvm.masked.store.v2f32.p0v2f32(<2 x float>, <2 x float>*, i32, <2 x i1>)
|
||||
declare void @llvm.masked.store.v2i32.p0v2i32(<2 x i32>, <2 x i32>*, i32, <2 x i1>)
|
||||
declare void @llvm.masked.store.v16f32.p0v16f32(<16 x float>, <16 x float>*, i32, <16 x i1>)
|
||||
declare <16 x float> @llvm.masked.load.v16f32.p0v16f32(<16 x float>*, i32, <16 x i1>, <16 x float>)
|
||||
declare <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>*, i32, <8 x i1>, <8 x float>)
|
||||
declare <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>*, i32, <4 x i1>, <4 x float>)
|
||||
declare <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float>*, i32, <2 x i1>, <2 x float>)
|
||||
declare <8 x double> @llvm.masked.load.v8f64.p0v8f64(<8 x double>*, i32, <8 x i1>, <8 x double>)
|
||||
declare <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>*, i32, <4 x i1>, <4 x double>)
|
||||
declare <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>*, i32, <2 x i1>, <2 x double>)
|
||||
declare void @llvm.masked.store.v8f64.p0v8f64(<8 x double>, <8 x double>*, i32, <8 x i1>)
|
||||
declare void @llvm.masked.store.v2f64.p0v2f64(<2 x double>, <2 x double>*, i32, <2 x i1>)
|
||||
declare void @llvm.masked.store.v2i64.p0v2i64(<2 x i64>, <2 x i64>*, i32, <2 x i1>)
|
||||
|
|
|
@ -54,7 +54,24 @@ entry:
|
|||
define i32 @test.objectsize() {
|
||||
; CHECK-LABEL: @test.objectsize(
|
||||
; CHECK: @llvm.objectsize.i32.p0i8
|
||||
; CHECK-DAG: declare i32 @llvm.objectsize.i32.p0i8
|
||||
%s = call i32 @llvm.objectsize.i32(i8* getelementptr inbounds ([60 x i8], [60 x i8]* @a, i32 0, i32 0), i1 false)
|
||||
ret i32 %s
|
||||
}
|
||||
|
||||
declare <2 x double> @llvm.masked.load.v2f64(<2 x double>* %ptrs, i32, <2 x i1> %mask, <2 x double> %src0)
|
||||
|
||||
define <2 x double> @tests.masked.load(<2 x double>* %ptr, <2 x i1> %mask, <2 x double> %passthru) {
|
||||
; CHECK-LABEL: @tests.masked.load(
|
||||
; CHECK: @llvm.masked.load.v2f64.p0v2f64
|
||||
%res = call <2 x double> @llvm.masked.load.v2f64(<2 x double>* %ptr, i32 1, <2 x i1> %mask, <2 x double> %passthru)
|
||||
ret <2 x double> %res
|
||||
}
|
||||
|
||||
declare void @llvm.masked.store.v2f64(<2 x double> %val, <2 x double>* %ptrs, i32, <2 x i1> %mask)
|
||||
|
||||
define void @tests.masked.store(<2 x double>* %ptr, <2 x i1> %mask, <2 x double> %val) {
|
||||
; CHECK-LABEL: @tests.masked.store(
|
||||
; CHECK: @llvm.masked.store.v2f64.p0v2f64
|
||||
call void @llvm.masked.store.v2f64(<2 x double> %val, <2 x double>* %ptr, i32 3, <2 x i1> %mask)
|
||||
ret void
|
||||
}
|
|
@ -18,7 +18,7 @@ define <32 x double> @test_load_32f64(<32 x double>* %ptrs, <32 x i1> %mask, <32
|
|||
; AVX512BW-NEXT: vmovaps %zmm3, %zmm2
|
||||
; AVX512BW-NEXT: vmovaps %zmm4, %zmm3
|
||||
; AVX512BW-NEXT: retq
|
||||
%res = call <32 x double> @llvm.masked.load.v32f64(<32 x double>* %ptrs, i32 4, <32 x i1> %mask, <32 x double> %src0)
|
||||
%res = call <32 x double> @llvm.masked.load.v32f64.p0v32f64(<32 x double>* %ptrs, i32 4, <32 x i1> %mask, <32 x double> %src0)
|
||||
ret <32 x double> %res
|
||||
}
|
||||
|
||||
|
@ -39,9 +39,9 @@ define <32 x i64> @test_load_32i64(<32 x i64>* %ptrs, <32 x i1> %mask, <32 x i64
|
|||
; AVX512BW-NEXT: vmovaps %zmm3, %zmm2
|
||||
; AVX512BW-NEXT: vmovaps %zmm4, %zmm3
|
||||
; AVX512BW-NEXT: retq
|
||||
%res = call <32 x i64> @llvm.masked.load.v32i64(<32 x i64>* %ptrs, i32 4, <32 x i1> %mask, <32 x i64> %src0)
|
||||
%res = call <32 x i64> @llvm.masked.load.v32i64.p0v32i64(<32 x i64>* %ptrs, i32 4, <32 x i1> %mask, <32 x i64> %src0)
|
||||
ret <32 x i64> %res
|
||||
}
|
||||
|
||||
declare <32 x i64> @llvm.masked.load.v32i64(<32 x i64>* %ptrs, i32, <32 x i1> %mask, <32 x i64> %src0)
|
||||
declare <32 x double> @llvm.masked.load.v32f64(<32 x double>* %ptrs, i32, <32 x i1> %mask, <32 x double> %src0)
|
||||
declare <32 x i64> @llvm.masked.load.v32i64.p0v32i64(<32 x i64>* %ptrs, i32, <32 x i1> %mask, <32 x i64> %src0)
|
||||
declare <32 x double> @llvm.masked.load.v32f64.p0v32f64(<32 x double>* %ptrs, i32, <32 x i1> %mask, <32 x double> %src0)
|
||||
|
|
|
@ -40,7 +40,7 @@ define <16 x i32> @test1(<16 x i32> %trigger, <16 x i32>* %addr) {
|
|||
; AVX512-NEXT: vmovdqu32 (%rdi), %zmm0 {%k1} {z}
|
||||
; AVX512-NEXT: retq
|
||||
%mask = icmp eq <16 x i32> %trigger, zeroinitializer
|
||||
%res = call <16 x i32> @llvm.masked.load.v16i32(<16 x i32>* %addr, i32 4, <16 x i1>%mask, <16 x i32>undef)
|
||||
%res = call <16 x i32> @llvm.masked.load.v16i32.p0v16i32(<16 x i32>* %addr, i32 4, <16 x i1>%mask, <16 x i32>undef)
|
||||
ret <16 x i32> %res
|
||||
}
|
||||
|
||||
|
@ -76,7 +76,7 @@ define <16 x i32> @test2(<16 x i32> %trigger, <16 x i32>* %addr) {
|
|||
; AVX512-NEXT: vmovdqu32 (%rdi), %zmm0 {%k1} {z}
|
||||
; AVX512-NEXT: retq
|
||||
%mask = icmp eq <16 x i32> %trigger, zeroinitializer
|
||||
%res = call <16 x i32> @llvm.masked.load.v16i32(<16 x i32>* %addr, i32 4, <16 x i1>%mask, <16 x i32>zeroinitializer)
|
||||
%res = call <16 x i32> @llvm.masked.load.v16i32.p0v16i32(<16 x i32>* %addr, i32 4, <16 x i1>%mask, <16 x i32>zeroinitializer)
|
||||
ret <16 x i32> %res
|
||||
}
|
||||
|
||||
|
@ -114,7 +114,7 @@ define void @test3(<16 x i32> %trigger, <16 x i32>* %addr, <16 x i32> %val) {
|
|||
; AVX512-NEXT: vmovdqu32 %zmm1, (%rdi) {%k1}
|
||||
; AVX512-NEXT: retq
|
||||
%mask = icmp eq <16 x i32> %trigger, zeroinitializer
|
||||
call void @llvm.masked.store.v16i32(<16 x i32>%val, <16 x i32>* %addr, i32 4, <16 x i1>%mask)
|
||||
call void @llvm.masked.store.v16i32.p0v16i32(<16 x i32>%val, <16 x i32>* %addr, i32 4, <16 x i1>%mask)
|
||||
ret void
|
||||
}
|
||||
|
||||
|
@ -155,7 +155,7 @@ define <16 x float> @test4(<16 x i32> %trigger, <16 x float>* %addr, <16 x float
|
|||
; AVX512-NEXT: vmovaps %zmm1, %zmm0
|
||||
; AVX512-NEXT: retq
|
||||
%mask = icmp eq <16 x i32> %trigger, zeroinitializer
|
||||
%res = call <16 x float> @llvm.masked.load.v16f32(<16 x float>* %addr, i32 4, <16 x i1>%mask, <16 x float> %dst)
|
||||
%res = call <16 x float> @llvm.masked.load.v16f32.p0v16f32(<16 x float>* %addr, i32 4, <16 x i1>%mask, <16 x float> %dst)
|
||||
ret <16 x float> %res
|
||||
}
|
||||
|
||||
|
@ -210,7 +210,7 @@ define <8 x double> @test5(<8 x i32> %trigger, <8 x double>* %addr, <8 x double>
|
|||
; SKX-NEXT: vmovaps %zmm1, %zmm0
|
||||
; SKX-NEXT: retq
|
||||
%mask = icmp eq <8 x i32> %trigger, zeroinitializer
|
||||
%res = call <8 x double> @llvm.masked.load.v8f64(<8 x double>* %addr, i32 4, <8 x i1>%mask, <8 x double>%dst)
|
||||
%res = call <8 x double> @llvm.masked.load.v8f64.p0v8f64(<8 x double>* %addr, i32 4, <8 x i1>%mask, <8 x double>%dst)
|
||||
ret <8 x double> %res
|
||||
}
|
||||
|
||||
|
@ -239,7 +239,7 @@ define <2 x double> @test6(<2 x i64> %trigger, <2 x double>* %addr, <2 x double>
|
|||
; SKX-NEXT: vmovaps %zmm1, %zmm0
|
||||
; SKX-NEXT: retq
|
||||
%mask = icmp eq <2 x i64> %trigger, zeroinitializer
|
||||
%res = call <2 x double> @llvm.masked.load.v2f64(<2 x double>* %addr, i32 4, <2 x i1>%mask, <2 x double>%dst)
|
||||
%res = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %addr, i32 4, <2 x i1>%mask, <2 x double>%dst)
|
||||
ret <2 x double> %res
|
||||
}
|
||||
|
||||
|
@ -268,7 +268,7 @@ define <4 x float> @test7(<4 x i32> %trigger, <4 x float>* %addr, <4 x float> %d
|
|||
; SKX-NEXT: vmovaps %zmm1, %zmm0
|
||||
; SKX-NEXT: retq
|
||||
%mask = icmp eq <4 x i32> %trigger, zeroinitializer
|
||||
%res = call <4 x float> @llvm.masked.load.v4f32(<4 x float>* %addr, i32 4, <4 x i1>%mask, <4 x float>%dst)
|
||||
%res = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %addr, i32 4, <4 x i1>%mask, <4 x float>%dst)
|
||||
ret <4 x float> %res
|
||||
}
|
||||
|
||||
|
@ -305,7 +305,7 @@ define <4 x i32> @test8(<4 x i32> %trigger, <4 x i32>* %addr, <4 x i32> %dst) {
|
|||
; SKX-NEXT: vmovaps %zmm1, %zmm0
|
||||
; SKX-NEXT: retq
|
||||
%mask = icmp eq <4 x i32> %trigger, zeroinitializer
|
||||
%res = call <4 x i32> @llvm.masked.load.v4i32(<4 x i32>* %addr, i32 4, <4 x i1>%mask, <4 x i32>%dst)
|
||||
%res = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %addr, i32 4, <4 x i1>%mask, <4 x i32>%dst)
|
||||
ret <4 x i32> %res
|
||||
}
|
||||
|
||||
|
@ -338,7 +338,7 @@ define void @test9(<4 x i32> %trigger, <4 x i32>* %addr, <4 x i32> %val) {
|
|||
; SKX-NEXT: vmovdqu32 %xmm1, (%rdi) {%k1}
|
||||
; SKX-NEXT: retq
|
||||
%mask = icmp eq <4 x i32> %trigger, zeroinitializer
|
||||
call void @llvm.masked.store.v4i32(<4 x i32>%val, <4 x i32>* %addr, i32 4, <4 x i1>%mask)
|
||||
call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>%val, <4 x i32>* %addr, i32 4, <4 x i1>%mask)
|
||||
ret void
|
||||
}
|
||||
|
||||
|
@ -381,7 +381,7 @@ define <4 x double> @test10(<4 x i32> %trigger, <4 x double>* %addr, <4 x double
|
|||
; SKX-NEXT: vmovaps %zmm1, %zmm0
|
||||
; SKX-NEXT: retq
|
||||
%mask = icmp eq <4 x i32> %trigger, zeroinitializer
|
||||
%res = call <4 x double> @llvm.masked.load.v4f64(<4 x double>* %addr, i32 32, <4 x i1>%mask, <4 x double>%dst)
|
||||
%res = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* %addr, i32 32, <4 x i1>%mask, <4 x double>%dst)
|
||||
ret <4 x double> %res
|
||||
}
|
||||
|
||||
|
@ -420,7 +420,7 @@ define <4 x double> @test10b(<4 x i32> %trigger, <4 x double>* %addr, <4 x doubl
|
|||
; SKX-NEXT: vmovapd (%rdi), %ymm0 {%k1} {z}
|
||||
; SKX-NEXT: retq
|
||||
%mask = icmp eq <4 x i32> %trigger, zeroinitializer
|
||||
%res = call <4 x double> @llvm.masked.load.v4f64(<4 x double>* %addr, i32 32, <4 x i1>%mask, <4 x double>zeroinitializer)
|
||||
%res = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* %addr, i32 32, <4 x i1>%mask, <4 x double>zeroinitializer)
|
||||
ret <4 x double> %res
|
||||
}
|
||||
|
||||
|
@ -462,7 +462,7 @@ define <8 x float> @test11a(<8 x i32> %trigger, <8 x float>* %addr, <8 x float>
|
|||
; SKX-NEXT: vmovaps %zmm1, %zmm0
|
||||
; SKX-NEXT: retq
|
||||
%mask = icmp eq <8 x i32> %trigger, zeroinitializer
|
||||
%res = call <8 x float> @llvm.masked.load.v8f32(<8 x float>* %addr, i32 32, <8 x i1>%mask, <8 x float>%dst)
|
||||
%res = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %addr, i32 32, <8 x i1>%mask, <8 x float>%dst)
|
||||
ret <8 x float> %res
|
||||
}
|
||||
|
||||
|
@ -507,7 +507,7 @@ define <8 x i32> @test11b(<8 x i1> %mask, <8 x i32>* %addr, <8 x i32> %dst) {
|
|||
; SKX-NEXT: vmovdqu32 (%rdi), %ymm1 {%k1}
|
||||
; SKX-NEXT: vmovaps %zmm1, %zmm0
|
||||
; SKX-NEXT: retq
|
||||
%res = call <8 x i32> @llvm.masked.load.v8i32(<8 x i32>* %addr, i32 4, <8 x i1>%mask, <8 x i32>%dst)
|
||||
%res = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* %addr, i32 4, <8 x i1>%mask, <8 x i32>%dst)
|
||||
ret <8 x i32> %res
|
||||
}
|
||||
|
||||
|
@ -548,7 +548,7 @@ define <8 x float> @test11c(<8 x i1> %mask, <8 x float>* %addr) {
|
|||
; SKX-NEXT: vpmovw2m %xmm0, %k1
|
||||
; SKX-NEXT: vmovaps (%rdi), %ymm0 {%k1} {z}
|
||||
; SKX-NEXT: retq
|
||||
%res = call <8 x float> @llvm.masked.load.v8f32(<8 x float>* %addr, i32 32, <8 x i1> %mask, <8 x float> zeroinitializer)
|
||||
%res = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %addr, i32 32, <8 x i1> %mask, <8 x float> zeroinitializer)
|
||||
ret <8 x float> %res
|
||||
}
|
||||
|
||||
|
@ -589,7 +589,7 @@ define <8 x i32> @test11d(<8 x i1> %mask, <8 x i32>* %addr) {
|
|||
; SKX-NEXT: vpmovw2m %xmm0, %k1
|
||||
; SKX-NEXT: vmovdqu32 (%rdi), %ymm0 {%k1} {z}
|
||||
; SKX-NEXT: retq
|
||||
%res = call <8 x i32> @llvm.masked.load.v8i32(<8 x i32>* %addr, i32 4, <8 x i1> %mask, <8 x i32> zeroinitializer)
|
||||
%res = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* %addr, i32 4, <8 x i1> %mask, <8 x i32> zeroinitializer)
|
||||
ret <8 x i32> %res
|
||||
}
|
||||
|
||||
|
@ -629,7 +629,7 @@ define void @test12(<8 x i32> %trigger, <8 x i32>* %addr, <8 x i32> %val) {
|
|||
; SKX-NEXT: vmovdqu32 %ymm1, (%rdi) {%k1}
|
||||
; SKX-NEXT: retq
|
||||
%mask = icmp eq <8 x i32> %trigger, zeroinitializer
|
||||
call void @llvm.masked.store.v8i32(<8 x i32>%val, <8 x i32>* %addr, i32 4, <8 x i1>%mask)
|
||||
call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32>%val, <8 x i32>* %addr, i32 4, <8 x i1>%mask)
|
||||
ret void
|
||||
}
|
||||
|
||||
|
@ -667,7 +667,7 @@ define void @test13(<16 x i32> %trigger, <16 x float>* %addr, <16 x float> %val)
|
|||
; AVX512-NEXT: vmovups %zmm1, (%rdi) {%k1}
|
||||
; AVX512-NEXT: retq
|
||||
%mask = icmp eq <16 x i32> %trigger, zeroinitializer
|
||||
call void @llvm.masked.store.v16f32(<16 x float>%val, <16 x float>* %addr, i32 4, <16 x i1>%mask)
|
||||
call void @llvm.masked.store.v16f32.p0v16f32(<16 x float>%val, <16 x float>* %addr, i32 4, <16 x i1>%mask)
|
||||
ret void
|
||||
}
|
||||
|
||||
|
@ -712,7 +712,7 @@ define void @test14(<2 x i32> %trigger, <2 x float>* %addr, <2 x float> %val) {
|
|||
; SKX-NEXT: vmovups %xmm1, (%rdi) {%k1}
|
||||
; SKX-NEXT: retq
|
||||
%mask = icmp eq <2 x i32> %trigger, zeroinitializer
|
||||
call void @llvm.masked.store.v2f32(<2 x float>%val, <2 x float>* %addr, i32 4, <2 x i1>%mask)
|
||||
call void @llvm.masked.store.v2f32.p0v2f32(<2 x float>%val, <2 x float>* %addr, i32 4, <2 x i1>%mask)
|
||||
ret void
|
||||
}
|
||||
|
||||
|
@ -758,7 +758,7 @@ define void @test15(<2 x i32> %trigger, <2 x i32>* %addr, <2 x i32> %val) {
|
|||
; SKX-NEXT: vpmovqd %xmm1, (%rdi) {%k1}
|
||||
; SKX-NEXT: retq
|
||||
%mask = icmp eq <2 x i32> %trigger, zeroinitializer
|
||||
call void @llvm.masked.store.v2i32(<2 x i32>%val, <2 x i32>* %addr, i32 4, <2 x i1>%mask)
|
||||
call void @llvm.masked.store.v2i32.p0v2i32(<2 x i32>%val, <2 x i32>* %addr, i32 4, <2 x i1>%mask)
|
||||
ret void
|
||||
}
|
||||
|
||||
|
@ -807,7 +807,7 @@ define <2 x float> @test16(<2 x i32> %trigger, <2 x float>* %addr, <2 x float> %
|
|||
; SKX-NEXT: vmovaps %zmm1, %zmm0
|
||||
; SKX-NEXT: retq
|
||||
%mask = icmp eq <2 x i32> %trigger, zeroinitializer
|
||||
%res = call <2 x float> @llvm.masked.load.v2f32(<2 x float>* %addr, i32 4, <2 x i1>%mask, <2 x float>%dst)
|
||||
%res = call <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float>* %addr, i32 4, <2 x i1>%mask, <2 x float>%dst)
|
||||
ret <2 x float> %res
|
||||
}
|
||||
|
||||
|
@ -863,7 +863,7 @@ define <2 x i32> @test17(<2 x i32> %trigger, <2 x i32>* %addr, <2 x i32> %dst) {
|
|||
; SKX-NEXT: vpmovsxdq %xmm0, %xmm0
|
||||
; SKX-NEXT: retq
|
||||
%mask = icmp eq <2 x i32> %trigger, zeroinitializer
|
||||
%res = call <2 x i32> @llvm.masked.load.v2i32(<2 x i32>* %addr, i32 4, <2 x i1>%mask, <2 x i32>%dst)
|
||||
%res = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* %addr, i32 4, <2 x i1>%mask, <2 x i32>%dst)
|
||||
ret <2 x i32> %res
|
||||
}
|
||||
|
||||
|
@ -908,7 +908,7 @@ define <2 x float> @test18(<2 x i32> %trigger, <2 x float>* %addr) {
|
|||
; SKX-NEXT: vmovups (%rdi), %xmm0 {%k1} {z}
|
||||
; SKX-NEXT: retq
|
||||
%mask = icmp eq <2 x i32> %trigger, zeroinitializer
|
||||
%res = call <2 x float> @llvm.masked.load.v2f32(<2 x float>* %addr, i32 4, <2 x i1>%mask, <2 x float>undef)
|
||||
%res = call <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float>* %addr, i32 4, <2 x i1>%mask, <2 x float>undef)
|
||||
ret <2 x float> %res
|
||||
}
|
||||
|
||||
|
@ -931,7 +931,7 @@ define <4 x float> @load_all(<4 x i32> %trigger, <4 x float>* %addr) {
|
|||
; SKX-NEXT: vmovups (%rdi), %xmm0 {%k1} {z}
|
||||
; SKX-NEXT: retq
|
||||
%mask = icmp eq <4 x i32> %trigger, zeroinitializer
|
||||
%res = call <4 x float> @llvm.masked.load.v4f32(<4 x float>* %addr, i32 4, <4 x i1><i1 true, i1 true, i1 true, i1 true>, <4 x float>undef)
|
||||
%res = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %addr, i32 4, <4 x i1><i1 true, i1 true, i1 true, i1 true>, <4 x float>undef)
|
||||
ret <4 x float> %res
|
||||
}
|
||||
|
||||
|
@ -960,7 +960,7 @@ define <4 x float> @mload_constmask_v4f32(<4 x float>* %addr, <4 x float> %dst)
|
|||
; SKX-NEXT: kmovw %eax, %k1
|
||||
; SKX-NEXT: vmovups (%rdi), %xmm0 {%k1}
|
||||
; SKX-NEXT: retq
|
||||
%res = call <4 x float> @llvm.masked.load.v4f32(<4 x float>* %addr, i32 4, <4 x i1> <i1 1, i1 0, i1 1, i1 1>, <4 x float> %dst)
|
||||
%res = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %addr, i32 4, <4 x i1> <i1 1, i1 0, i1 1, i1 1>, <4 x float> %dst)
|
||||
ret <4 x float> %res
|
||||
}
|
||||
|
||||
|
@ -994,7 +994,7 @@ define <4 x i32> @mload_constmask_v4i32(<4 x i32>* %addr, <4 x i32> %dst) {
|
|||
; SKX-NEXT: kmovw %eax, %k1
|
||||
; SKX-NEXT: vmovdqu32 (%rdi), %xmm0 {%k1}
|
||||
; SKX-NEXT: retq
|
||||
%res = call <4 x i32> @llvm.masked.load.v4i32(<4 x i32>* %addr, i32 4, <4 x i1> <i1 0, i1 1, i1 1, i1 1>, <4 x i32> %dst)
|
||||
%res = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %addr, i32 4, <4 x i1> <i1 0, i1 1, i1 1, i1 1>, <4 x i32> %dst)
|
||||
ret <4 x i32> %res
|
||||
}
|
||||
|
||||
|
@ -1021,7 +1021,7 @@ define <8 x float> @mload_constmask_v8f32(<8 x float>* %addr, <8 x float> %dst)
|
|||
; SKX-NEXT: kmovw %eax, %k1
|
||||
; SKX-NEXT: vmovups (%rdi), %ymm0 {%k1}
|
||||
; SKX-NEXT: retq
|
||||
%res = call <8 x float> @llvm.masked.load.v8f32(<8 x float>* %addr, i32 4, <8 x i1> <i1 1, i1 1, i1 1, i1 0, i1 0, i1 0, i1 0, i1 0>, <8 x float> %dst)
|
||||
%res = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %addr, i32 4, <8 x i1> <i1 1, i1 1, i1 1, i1 0, i1 0, i1 0, i1 0, i1 0>, <8 x float> %dst)
|
||||
ret <8 x float> %res
|
||||
}
|
||||
|
||||
|
@ -1046,7 +1046,7 @@ define <4 x double> @mload_constmask_v4f64(<4 x double>* %addr, <4 x double> %ds
|
|||
; SKX-NEXT: kmovw %eax, %k1
|
||||
; SKX-NEXT: vmovupd (%rdi), %ymm0 {%k1}
|
||||
; SKX-NEXT: retq
|
||||
%res = call <4 x double> @llvm.masked.load.v4f64(<4 x double>* %addr, i32 4, <4 x i1> <i1 1, i1 1, i1 1, i1 0>, <4 x double> %dst)
|
||||
%res = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* %addr, i32 4, <4 x i1> <i1 1, i1 1, i1 1, i1 0>, <4 x double> %dst)
|
||||
ret <4 x double> %res
|
||||
}
|
||||
|
||||
|
@ -1080,7 +1080,7 @@ define <8 x i32> @mload_constmask_v8i32(<8 x i32>* %addr, <8 x i32> %dst) {
|
|||
; SKX-NEXT: kmovw %eax, %k1
|
||||
; SKX-NEXT: vmovdqu32 (%rdi), %ymm0 {%k1}
|
||||
; SKX-NEXT: retq
|
||||
%res = call <8 x i32> @llvm.masked.load.v8i32(<8 x i32>* %addr, i32 4, <8 x i1> <i1 1, i1 1, i1 1, i1 0, i1 0, i1 0, i1 0, i1 1>, <8 x i32> %dst)
|
||||
%res = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* %addr, i32 4, <8 x i1> <i1 1, i1 1, i1 1, i1 0, i1 0, i1 0, i1 0, i1 1>, <8 x i32> %dst)
|
||||
ret <8 x i32> %res
|
||||
}
|
||||
|
||||
|
@ -1112,7 +1112,7 @@ define <4 x i64> @mload_constmask_v4i64(<4 x i64>* %addr, <4 x i64> %dst) {
|
|||
; SKX-NEXT: kmovw %eax, %k1
|
||||
; SKX-NEXT: vmovdqu64 (%rdi), %ymm0 {%k1}
|
||||
; SKX-NEXT: retq
|
||||
%res = call <4 x i64> @llvm.masked.load.v4i64(<4 x i64>* %addr, i32 4, <4 x i1> <i1 1, i1 0, i1 0, i1 1>, <4 x i64> %dst)
|
||||
%res = call <4 x i64> @llvm.masked.load.v4i64.p0v4i64(<4 x i64>* %addr, i32 4, <4 x i1> <i1 1, i1 0, i1 0, i1 1>, <4 x i64> %dst)
|
||||
ret <4 x i64> %res
|
||||
}
|
||||
|
||||
|
@ -1135,7 +1135,7 @@ define <8 x double> @mload_constmask_v8f64(<8 x double>* %addr, <8 x double> %ds
|
|||
; AVX512-NEXT: kmovw %eax, %k1
|
||||
; AVX512-NEXT: vmovupd (%rdi), %zmm0 {%k1}
|
||||
; AVX512-NEXT: retq
|
||||
%res = call <8 x double> @llvm.masked.load.v8f64(<8 x double>* %addr, i32 4, <8 x i1> <i1 1, i1 1, i1 1, i1 0, i1 0, i1 0, i1 0, i1 1>, <8 x double> %dst)
|
||||
%res = call <8 x double> @llvm.masked.load.v8f64.p0v8f64(<8 x double>* %addr, i32 4, <8 x i1> <i1 1, i1 1, i1 1, i1 0, i1 0, i1 0, i1 0, i1 1>, <8 x double> %dst)
|
||||
ret <8 x double> %res
|
||||
}
|
||||
|
||||
|
@ -1160,7 +1160,7 @@ define <4 x double> @mload_constmask_v4f64_undef_passthrough(<4 x double>* %addr
|
|||
; SKX-NEXT: kmovw %eax, %k1
|
||||
; SKX-NEXT: vmovupd (%rdi), %ymm0 {%k1} {z}
|
||||
; SKX-NEXT: retq
|
||||
%res = call <4 x double> @llvm.masked.load.v4f64(<4 x double>* %addr, i32 4, <4 x i1> <i1 1, i1 1, i1 1, i1 0>, <4 x double> undef)
|
||||
%res = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* %addr, i32 4, <4 x i1> <i1 1, i1 1, i1 1, i1 0>, <4 x double> undef)
|
||||
ret <4 x double> %res
|
||||
}
|
||||
|
||||
|
@ -1189,7 +1189,7 @@ define <4 x i64> @mload_constmask_v4i64_undef_passthrough(<4 x i64>* %addr) {
|
|||
; SKX-NEXT: kmovw %eax, %k1
|
||||
; SKX-NEXT: vmovdqu64 (%rdi), %ymm0 {%k1} {z}
|
||||
; SKX-NEXT: retq
|
||||
%res = call <4 x i64> @llvm.masked.load.v4i64(<4 x i64>* %addr, i32 4, <4 x i1> <i1 0, i1 1, i1 1, i1 0>, <4 x i64> undef)
|
||||
%res = call <4 x i64> @llvm.masked.load.v4i64.p0v4i64(<4 x i64>* %addr, i32 4, <4 x i1> <i1 0, i1 1, i1 1, i1 0>, <4 x i64> undef)
|
||||
ret <4 x i64> %res
|
||||
}
|
||||
|
||||
|
@ -1218,7 +1218,7 @@ define void @test21(<4 x i32> %trigger, <4 x i32>* %addr, <4 x i32> %val) {
|
|||
; SKX-NEXT: vmovdqu32 %xmm1, (%rdi) {%k1}
|
||||
; SKX-NEXT: retq
|
||||
%mask = icmp eq <4 x i32> %trigger, zeroinitializer
|
||||
call void @llvm.masked.store.v4i32(<4 x i32>%val, <4 x i32>* %addr, i32 4, <4 x i1><i1 true, i1 true, i1 true, i1 true>)
|
||||
call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>%val, <4 x i32>* %addr, i32 4, <4 x i1><i1 true, i1 true, i1 true, i1 true>)
|
||||
ret void
|
||||
}
|
||||
|
||||
|
@ -1234,7 +1234,7 @@ define void @one_mask_bit_set1(<4 x i32>* %addr, <4 x i32> %val) {
|
|||
; AVX512: ## BB#0:
|
||||
; AVX512-NEXT: vmovd %xmm0, (%rdi)
|
||||
; AVX512-NEXT: retq
|
||||
call void @llvm.masked.store.v4i32(<4 x i32> %val, <4 x i32>* %addr, i32 4, <4 x i1><i1 true, i1 false, i1 false, i1 false>)
|
||||
call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %val, <4 x i32>* %addr, i32 4, <4 x i1><i1 true, i1 false, i1 false, i1 false>)
|
||||
ret void
|
||||
}
|
||||
|
||||
|
@ -1250,7 +1250,7 @@ define void @one_mask_bit_set2(<4 x float>* %addr, <4 x float> %val) {
|
|||
; AVX512: ## BB#0:
|
||||
; AVX512-NEXT: vextractps $2, %xmm0, 8(%rdi)
|
||||
; AVX512-NEXT: retq
|
||||
call void @llvm.masked.store.v4f32(<4 x float> %val, <4 x float>* %addr, i32 4, <4 x i1><i1 false, i1 false, i1 true, i1 false>)
|
||||
call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %val, <4 x float>* %addr, i32 4, <4 x i1><i1 false, i1 false, i1 true, i1 false>)
|
||||
ret void
|
||||
}
|
||||
|
||||
|
@ -1269,7 +1269,7 @@ define void @one_mask_bit_set3(<4 x i64>* %addr, <4 x i64> %val) {
|
|||
; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm0
|
||||
; AVX512-NEXT: vmovq %xmm0, 16(%rdi)
|
||||
; AVX512-NEXT: retq
|
||||
call void @llvm.masked.store.v4i64(<4 x i64> %val, <4 x i64>* %addr, i32 4, <4 x i1><i1 false, i1 false, i1 true, i1 false>)
|
||||
call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> %val, <4 x i64>* %addr, i32 4, <4 x i1><i1 false, i1 false, i1 true, i1 false>)
|
||||
ret void
|
||||
}
|
||||
|
||||
|
@ -1288,7 +1288,7 @@ define void @one_mask_bit_set4(<4 x double>* %addr, <4 x double> %val) {
|
|||
; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm0
|
||||
; AVX512-NEXT: vmovhpd %xmm0, 24(%rdi)
|
||||
; AVX512-NEXT: retq
|
||||
call void @llvm.masked.store.v4f64(<4 x double> %val, <4 x double>* %addr, i32 4, <4 x i1><i1 false, i1 false, i1 false, i1 true>)
|
||||
call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> %val, <4 x double>* %addr, i32 4, <4 x i1><i1 false, i1 false, i1 false, i1 true>)
|
||||
ret void
|
||||
}
|
||||
|
||||
|
@ -1307,7 +1307,7 @@ define void @one_mask_bit_set5(<8 x double>* %addr, <8 x double> %val) {
|
|||
; AVX512-NEXT: vextractf32x4 $3, %zmm0, %xmm0
|
||||
; AVX512-NEXT: vmovlpd %xmm0, 48(%rdi)
|
||||
; AVX512-NEXT: retq
|
||||
call void @llvm.masked.store.v8f64(<8 x double> %val, <8 x double>* %addr, i32 4, <8 x i1><i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true, i1 false>)
|
||||
call void @llvm.masked.store.v8f64.p0v8f64(<8 x double> %val, <8 x double>* %addr, i32 4, <8 x i1><i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true, i1 false>)
|
||||
ret void
|
||||
}
|
||||
|
||||
|
@ -1323,7 +1323,7 @@ define <4 x i32> @load_one_mask_bit_set1(<4 x i32>* %addr, <4 x i32> %val) {
|
|||
; AVX512: ## BB#0:
|
||||
; AVX512-NEXT: vpinsrd $0, (%rdi), %xmm0, %xmm0
|
||||
; AVX512-NEXT: retq
|
||||
%res = call <4 x i32> @llvm.masked.load.v4i32(<4 x i32>* %addr, i32 4, <4 x i1><i1 true, i1 false, i1 false, i1 false>, <4 x i32> %val)
|
||||
%res = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %addr, i32 4, <4 x i1><i1 true, i1 false, i1 false, i1 false>, <4 x i32> %val)
|
||||
ret <4 x i32> %res
|
||||
}
|
||||
|
||||
|
@ -1339,7 +1339,7 @@ define <4 x float> @load_one_mask_bit_set2(<4 x float>* %addr, <4 x float> %val)
|
|||
; AVX512: ## BB#0:
|
||||
; AVX512-NEXT: vinsertps $32, 8(%rdi), %xmm0, %xmm0 ## xmm0 = xmm0[0,1],mem[0],xmm0[3]
|
||||
; AVX512-NEXT: retq
|
||||
%res = call <4 x float> @llvm.masked.load.v4f32(<4 x float>* %addr, i32 4, <4 x i1><i1 false, i1 false, i1 true, i1 false>, <4 x float> %val)
|
||||
%res = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %addr, i32 4, <4 x i1><i1 false, i1 false, i1 true, i1 false>, <4 x float> %val)
|
||||
ret <4 x float> %res
|
||||
}
|
||||
|
||||
|
@ -1373,7 +1373,7 @@ define <4 x i64> @load_one_mask_bit_set3(<4 x i64>* %addr, <4 x i64> %val) {
|
|||
; SKX-NEXT: vpinsrq $0, 16(%rdi), %xmm1, %xmm1
|
||||
; SKX-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0
|
||||
; SKX-NEXT: retq
|
||||
%res = call <4 x i64> @llvm.masked.load.v4i64(<4 x i64>* %addr, i32 4, <4 x i1><i1 false, i1 false, i1 true, i1 false>, <4 x i64> %val)
|
||||
%res = call <4 x i64> @llvm.masked.load.v4i64.p0v4i64(<4 x i64>* %addr, i32 4, <4 x i1><i1 false, i1 false, i1 true, i1 false>, <4 x i64> %val)
|
||||
ret <4 x i64> %res
|
||||
}
|
||||
|
||||
|
@ -1400,7 +1400,7 @@ define <4 x double> @load_one_mask_bit_set4(<4 x double>* %addr, <4 x double> %v
|
|||
; SKX-NEXT: vmovhpd 24(%rdi), %xmm1, %xmm1 ## xmm1 = xmm1[0],mem[0]
|
||||
; SKX-NEXT: vinsertf32x4 $1, %xmm1, %ymm0, %ymm0
|
||||
; SKX-NEXT: retq
|
||||
%res = call <4 x double> @llvm.masked.load.v4f64(<4 x double>* %addr, i32 4, <4 x i1><i1 false, i1 false, i1 false, i1 true>, <4 x double> %val)
|
||||
%res = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* %addr, i32 4, <4 x i1><i1 false, i1 false, i1 false, i1 true>, <4 x double> %val)
|
||||
ret <4 x double> %res
|
||||
}
|
||||
|
||||
|
@ -1421,37 +1421,36 @@ define <8 x double> @load_one_mask_bit_set5(<8 x double>* %addr, <8 x double> %v
|
|||
; AVX512-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0]
|
||||
; AVX512-NEXT: vinsertf32x4 $3, %xmm1, %zmm0, %zmm0
|
||||
; AVX512-NEXT: retq
|
||||
%res = call <8 x double> @llvm.masked.load.v8f64(<8 x double>* %addr, i32 4, <8 x i1><i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true>, <8 x double> %val)
|
||||
%res = call <8 x double> @llvm.masked.load.v8f64.p0v8f64(<8 x double>* %addr, i32 4, <8 x i1><i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true>, <8 x double> %val)
|
||||
ret <8 x double> %res
|
||||
}
|
||||
|
||||
declare <16 x i32> @llvm.masked.load.v16i32(<16 x i32>*, i32, <16 x i1>, <16 x i32>)
|
||||
declare <4 x i32> @llvm.masked.load.v4i32(<4 x i32>*, i32, <4 x i1>, <4 x i32>)
|
||||
declare <2 x i32> @llvm.masked.load.v2i32(<2 x i32>*, i32, <2 x i1>, <2 x i32>)
|
||||
declare <4 x i64> @llvm.masked.load.v4i64(<4 x i64>*, i32, <4 x i1>, <4 x i64>)
|
||||
declare void @llvm.masked.store.v16i32(<16 x i32>, <16 x i32>*, i32, <16 x i1>)
|
||||
declare void @llvm.masked.store.v8i32(<8 x i32>, <8 x i32>*, i32, <8 x i1>)
|
||||
declare void @llvm.masked.store.v4i32(<4 x i32>, <4 x i32>*, i32, <4 x i1>)
|
||||
declare void @llvm.masked.store.v4i64(<4 x i64>, <4 x i64>*, i32, <4 x i1>)
|
||||
declare void @llvm.masked.store.v2f32(<2 x float>, <2 x float>*, i32, <2 x i1>)
|
||||
declare void @llvm.masked.store.v2i32(<2 x i32>, <2 x i32>*, i32, <2 x i1>)
|
||||
declare void @llvm.masked.store.v4f32(<4 x float>, <4 x float>*, i32, <4 x i1>)
|
||||
declare void @llvm.masked.store.v16f32(<16 x float>, <16 x float>*, i32, <16 x i1>)
|
||||
declare void @llvm.masked.store.v16f32p(<16 x float>*, <16 x float>**, i32, <16 x i1>)
|
||||
declare <16 x float> @llvm.masked.load.v16f32(<16 x float>*, i32, <16 x i1>, <16 x float>)
|
||||
declare <8 x float> @llvm.masked.load.v8f32(<8 x float>*, i32, <8 x i1>, <8 x float>)
|
||||
declare <8 x i32> @llvm.masked.load.v8i32(<8 x i32>*, i32, <8 x i1>, <8 x i32>)
|
||||
declare <4 x float> @llvm.masked.load.v4f32(<4 x float>*, i32, <4 x i1>, <4 x float>)
|
||||
declare <2 x float> @llvm.masked.load.v2f32(<2 x float>*, i32, <2 x i1>, <2 x float>)
|
||||
declare <8 x double> @llvm.masked.load.v8f64(<8 x double>*, i32, <8 x i1>, <8 x double>)
|
||||
declare <4 x double> @llvm.masked.load.v4f64(<4 x double>*, i32, <4 x i1>, <4 x double>)
|
||||
declare <2 x double> @llvm.masked.load.v2f64(<2 x double>*, i32, <2 x i1>, <2 x double>)
|
||||
declare void @llvm.masked.store.v8f64(<8 x double>, <8 x double>*, i32, <8 x i1>)
|
||||
declare void @llvm.masked.store.v4f64(<4 x double>, <4 x double>*, i32, <4 x i1>)
|
||||
declare void @llvm.masked.store.v2f64(<2 x double>, <2 x double>*, i32, <2 x i1>)
|
||||
declare void @llvm.masked.store.v2i64(<2 x i64>, <2 x i64>*, i32, <2 x i1>)
|
||||
declare <16 x i32> @llvm.masked.load.v16i32.p0v16i32(<16 x i32>*, i32, <16 x i1>, <16 x i32>)
|
||||
declare <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>*, i32, <4 x i1>, <4 x i32>)
|
||||
declare <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>*, i32, <2 x i1>, <2 x i32>)
|
||||
declare <4 x i64> @llvm.masked.load.v4i64.p0v4i64(<4 x i64>*, i32, <4 x i1>, <4 x i64>)
|
||||
declare void @llvm.masked.store.v16i32.p0v16i32(<16 x i32>, <16 x i32>*, i32, <16 x i1>)
|
||||
declare void @llvm.masked.store.v8i32.p0v8i32(<8 x i32>, <8 x i32>*, i32, <8 x i1>)
|
||||
declare void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>, <4 x i32>*, i32, <4 x i1>)
|
||||
declare void @llvm.masked.store.v4i64.p0v4i64(<4 x i64>, <4 x i64>*, i32, <4 x i1>)
|
||||
declare void @llvm.masked.store.v2f32.p0v2f32(<2 x float>, <2 x float>*, i32, <2 x i1>)
|
||||
declare void @llvm.masked.store.v2i32.p0v2i32(<2 x i32>, <2 x i32>*, i32, <2 x i1>)
|
||||
declare void @llvm.masked.store.v4f32.p0v4f32(<4 x float>, <4 x float>*, i32, <4 x i1>)
|
||||
declare void @llvm.masked.store.v16f32.p0v16f32(<16 x float>, <16 x float>*, i32, <16 x i1>)
|
||||
declare <16 x float> @llvm.masked.load.v16f32.p0v16f32(<16 x float>*, i32, <16 x i1>, <16 x float>)
|
||||
declare <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>*, i32, <8 x i1>, <8 x float>)
|
||||
declare <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>*, i32, <8 x i1>, <8 x i32>)
|
||||
declare <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>*, i32, <4 x i1>, <4 x float>)
|
||||
declare <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float>*, i32, <2 x i1>, <2 x float>)
|
||||
declare <8 x double> @llvm.masked.load.v8f64.p0v8f64(<8 x double>*, i32, <8 x i1>, <8 x double>)
|
||||
declare <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>*, i32, <4 x i1>, <4 x double>)
|
||||
declare <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>*, i32, <2 x i1>, <2 x double>)
|
||||
declare void @llvm.masked.store.v8f64.p0v8f64(<8 x double>, <8 x double>*, i32, <8 x i1>)
|
||||
declare void @llvm.masked.store.v4f64.p0v4f64(<4 x double>, <4 x double>*, i32, <4 x i1>)
|
||||
declare void @llvm.masked.store.v2f64.p0v2f64(<2 x double>, <2 x double>*, i32, <2 x i1>)
|
||||
declare void @llvm.masked.store.v2i64.p0v2i64(<2 x i64>, <2 x i64>*, i32, <2 x i1>)
|
||||
|
||||
declare <16 x i32*> @llvm.masked.load.v16p0i32(<16 x i32*>*, i32, <16 x i1>, <16 x i32*>)
|
||||
declare <16 x i32*> @llvm.masked.load.v16p0i32.p0v16p0i32(<16 x i32*>*, i32, <16 x i1>, <16 x i32*>)
|
||||
|
||||
define <16 x i32*> @test23(<16 x i32*> %trigger, <16 x i32*>* %addr) {
|
||||
; AVX1-LABEL: test23:
|
||||
|
@ -1501,13 +1500,13 @@ define <16 x i32*> @test23(<16 x i32*> %trigger, <16 x i32*>* %addr) {
|
|||
; AVX512-NEXT: vmovdqu64 (%rdi), %zmm0 {%k1} {z}
|
||||
; AVX512-NEXT: retq
|
||||
%mask = icmp eq <16 x i32*> %trigger, zeroinitializer
|
||||
%res = call <16 x i32*> @llvm.masked.load.v16p0i32(<16 x i32*>* %addr, i32 4, <16 x i1>%mask, <16 x i32*>zeroinitializer)
|
||||
%res = call <16 x i32*> @llvm.masked.load.v16p0i32.p0v16p0i32(<16 x i32*>* %addr, i32 4, <16 x i1>%mask, <16 x i32*>zeroinitializer)
|
||||
ret <16 x i32*> %res
|
||||
}
|
||||
|
||||
%mystruct = type { i16, i16, [1 x i8*] }
|
||||
|
||||
declare <16 x %mystruct*> @llvm.masked.load.v16p0mystruct(<16 x %mystruct*>*, i32, <16 x i1>, <16 x %mystruct*>)
|
||||
declare <16 x %mystruct*> @llvm.masked.load.v16p0mystruct.p0v16p0mystruct(<16 x %mystruct*>*, i32, <16 x i1>, <16 x %mystruct*>)
|
||||
|
||||
define <16 x %mystruct*> @test24(<16 x i1> %mask, <16 x %mystruct*>* %addr) {
|
||||
; AVX1-LABEL: test24:
|
||||
|
@ -1596,7 +1595,7 @@ define <16 x %mystruct*> @test24(<16 x i1> %mask, <16 x %mystruct*>* %addr) {
|
|||
; SKX-NEXT: kshiftrw $8, %k1, %k1
|
||||
; SKX-NEXT: vmovdqu64 64(%rdi), %zmm1 {%k1} {z}
|
||||
; SKX-NEXT: retq
|
||||
%res = call <16 x %mystruct*> @llvm.masked.load.v16p0mystruct(<16 x %mystruct*>* %addr, i32 4, <16 x i1>%mask, <16 x %mystruct*>zeroinitializer)
|
||||
%res = call <16 x %mystruct*> @llvm.masked.load.v16p0mystruct.p0v16p0mystruct(<16 x %mystruct*>* %addr, i32 4, <16 x i1>%mask, <16 x %mystruct*>zeroinitializer)
|
||||
ret <16 x %mystruct*> %res
|
||||
}
|
||||
|
||||
|
@ -1687,10 +1686,10 @@ define void @test_store_16i64(<16 x i64>* %ptrs, <16 x i1> %mask, <16 x i64> %sr
|
|||
; SKX-NEXT: kshiftrw $8, %k1, %k1
|
||||
; SKX-NEXT: vmovdqu64 %zmm2, 64(%rdi) {%k1}
|
||||
; SKX-NEXT: retq
|
||||
call void @llvm.masked.store.v16i64(<16 x i64> %src0, <16 x i64>* %ptrs, i32 4, <16 x i1> %mask)
|
||||
call void @llvm.masked.store.v16i64.p0v16i64(<16 x i64> %src0, <16 x i64>* %ptrs, i32 4, <16 x i1> %mask)
|
||||
ret void
|
||||
}
|
||||
declare void @llvm.masked.store.v16i64(<16 x i64> %src0, <16 x i64>* %ptrs, i32, <16 x i1> %mask)
|
||||
declare void @llvm.masked.store.v16i64.p0v16i64(<16 x i64> %src0, <16 x i64>* %ptrs, i32, <16 x i1> %mask)
|
||||
|
||||
define void @test_store_16f64(<16 x double>* %ptrs, <16 x i1> %mask, <16 x double> %src0) {
|
||||
; AVX1-LABEL: test_store_16f64:
|
||||
|
@ -1779,10 +1778,10 @@ define void @test_store_16f64(<16 x double>* %ptrs, <16 x i1> %mask, <16 x doubl
|
|||
; SKX-NEXT: kshiftrw $8, %k1, %k1
|
||||
; SKX-NEXT: vmovupd %zmm2, 64(%rdi) {%k1}
|
||||
; SKX-NEXT: retq
|
||||
call void @llvm.masked.store.v16f64(<16 x double> %src0, <16 x double>* %ptrs, i32 4, <16 x i1> %mask)
|
||||
call void @llvm.masked.store.v16f64.p0v16f64(<16 x double> %src0, <16 x double>* %ptrs, i32 4, <16 x i1> %mask)
|
||||
ret void
|
||||
}
|
||||
declare void @llvm.masked.store.v16f64(<16 x double> %src0, <16 x double>* %ptrs, i32, <16 x i1> %mask)
|
||||
declare void @llvm.masked.store.v16f64.p0v16f64(<16 x double> %src0, <16 x double>* %ptrs, i32, <16 x i1> %mask)
|
||||
|
||||
define <16 x i64> @test_load_16i64(<16 x i64>* %ptrs, <16 x i1> %mask, <16 x i64> %src0) {
|
||||
; AVX1-LABEL: test_load_16i64:
|
||||
|
@ -1883,10 +1882,10 @@ define <16 x i64> @test_load_16i64(<16 x i64>* %ptrs, <16 x i1> %mask, <16 x i64
|
|||
; SKX-NEXT: vmovaps %zmm1, %zmm0
|
||||
; SKX-NEXT: vmovaps %zmm2, %zmm1
|
||||
; SKX-NEXT: retq
|
||||
%res = call <16 x i64> @llvm.masked.load.v16i64(<16 x i64>* %ptrs, i32 4, <16 x i1> %mask, <16 x i64> %src0)
|
||||
%res = call <16 x i64> @llvm.masked.load.v16i64.p0v16i64(<16 x i64>* %ptrs, i32 4, <16 x i1> %mask, <16 x i64> %src0)
|
||||
ret <16 x i64> %res
|
||||
}
|
||||
declare <16 x i64> @llvm.masked.load.v16i64(<16 x i64>* %ptrs, i32, <16 x i1> %mask, <16 x i64> %src0)
|
||||
declare <16 x i64> @llvm.masked.load.v16i64.p0v16i64(<16 x i64>* %ptrs, i32, <16 x i1> %mask, <16 x i64> %src0)
|
||||
|
||||
define <16 x double> @test_load_16f64(<16 x double>* %ptrs, <16 x i1> %mask, <16 x double> %src0) {
|
||||
; AVX1-LABEL: test_load_16f64:
|
||||
|
@ -1987,10 +1986,10 @@ define <16 x double> @test_load_16f64(<16 x double>* %ptrs, <16 x i1> %mask, <16
|
|||
; SKX-NEXT: vmovaps %zmm1, %zmm0
|
||||
; SKX-NEXT: vmovaps %zmm2, %zmm1
|
||||
; SKX-NEXT: retq
|
||||
%res = call <16 x double> @llvm.masked.load.v16f64(<16 x double>* %ptrs, i32 4, <16 x i1> %mask, <16 x double> %src0)
|
||||
%res = call <16 x double> @llvm.masked.load.v16f64.p0v16f64(<16 x double>* %ptrs, i32 4, <16 x i1> %mask, <16 x double> %src0)
|
||||
ret <16 x double> %res
|
||||
}
|
||||
declare <16 x double> @llvm.masked.load.v16f64(<16 x double>* %ptrs, i32, <16 x i1> %mask, <16 x double> %src0)
|
||||
declare <16 x double> @llvm.masked.load.v16f64.p0v16f64(<16 x double>* %ptrs, i32, <16 x i1> %mask, <16 x double> %src0)
|
||||
|
||||
define <32 x double> @test_load_32f64(<32 x double>* %ptrs, <32 x i1> %mask, <32 x double> %src0) {
|
||||
; AVX1-LABEL: test_load_32f64:
|
||||
|
@ -2218,10 +2217,11 @@ define <32 x double> @test_load_32f64(<32 x double>* %ptrs, <32 x i1> %mask, <32
|
|||
; SKX-NEXT: vmovaps %zmm3, %zmm2
|
||||
; SKX-NEXT: vmovaps %zmm4, %zmm3
|
||||
; SKX-NEXT: retq
|
||||
%res = call <32 x double> @llvm.masked.load.v32f64(<32 x double>* %ptrs, i32 4, <32 x i1> %mask, <32 x double> %src0)
|
||||
%res = call <32 x double> @llvm.masked.load.v32f64.p0v32f64(<32 x double>* %ptrs, i32 4, <32 x i1> %mask, <32 x double> %src0)
|
||||
ret <32 x double> %res
|
||||
}
|
||||
declare <32 x double> @llvm.masked.load.v32f64(<32 x double>* %ptrs, i32, <32 x i1> %mask, <32 x double> %src0)
|
||||
|
||||
declare <32 x double> @llvm.masked.load.v32f64.p0v32f64(<32 x double>* %ptrs, i32, <32 x i1> %mask, <32 x double> %src0)
|
||||
|
||||
define <16 x i8> @test_mask_load_16xi8(<16 x i1> %mask, <16 x i8>* %addr, <16 x i8> %val) {
|
||||
; SKX-LABEL: test_mask_load_16xi8:
|
||||
|
@ -2230,10 +2230,10 @@ define <16 x i8> @test_mask_load_16xi8(<16 x i1> %mask, <16 x i8>* %addr, <16 x
|
|||
; SKX-NEXT: vpmovb2m %xmm0, %k1
|
||||
; SKX-NEXT: vmovdqu8 (%rdi), %xmm0 {%k1} {z}
|
||||
; SKX-NEXT: retq
|
||||
%res = call <16 x i8> @llvm.masked.load.v16i8(<16 x i8>* %addr, i32 4, <16 x i1>%mask, <16 x i8> undef)
|
||||
%res = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %addr, i32 4, <16 x i1>%mask, <16 x i8> undef)
|
||||
ret <16 x i8> %res
|
||||
}
|
||||
declare <16 x i8> @llvm.masked.load.v16i8(<16 x i8>*, i32, <16 x i1>, <16 x i8>)
|
||||
declare <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>*, i32, <16 x i1>, <16 x i8>)
|
||||
|
||||
define <32 x i8> @test_mask_load_32xi8(<32 x i1> %mask, <32 x i8>* %addr, <32 x i8> %val) {
|
||||
; SKX-LABEL: test_mask_load_32xi8:
|
||||
|
@ -2242,10 +2242,10 @@ define <32 x i8> @test_mask_load_32xi8(<32 x i1> %mask, <32 x i8>* %addr, <32 x
|
|||
; SKX-NEXT: vpmovb2m %ymm0, %k1
|
||||
; SKX-NEXT: vmovdqu8 (%rdi), %ymm0 {%k1} {z}
|
||||
; SKX-NEXT: retq
|
||||
%res = call <32 x i8> @llvm.masked.load.v32i8(<32 x i8>* %addr, i32 4, <32 x i1>%mask, <32 x i8> zeroinitializer)
|
||||
%res = call <32 x i8> @llvm.masked.load.v32i8.p0v32i8(<32 x i8>* %addr, i32 4, <32 x i1>%mask, <32 x i8> zeroinitializer)
|
||||
ret <32 x i8> %res
|
||||
}
|
||||
declare <32 x i8> @llvm.masked.load.v32i8(<32 x i8>*, i32, <32 x i1>, <32 x i8>)
|
||||
declare <32 x i8> @llvm.masked.load.v32i8.p0v32i8(<32 x i8>*, i32, <32 x i1>, <32 x i8>)
|
||||
|
||||
define <64 x i8> @test_mask_load_64xi8(<64 x i1> %mask, <64 x i8>* %addr, <64 x i8> %val) {
|
||||
; SKX-LABEL: test_mask_load_64xi8:
|
||||
|
@ -2255,10 +2255,10 @@ define <64 x i8> @test_mask_load_64xi8(<64 x i1> %mask, <64 x i8>* %addr, <64 x
|
|||
; SKX-NEXT: vmovdqu8 (%rdi), %zmm1 {%k1}
|
||||
; SKX-NEXT: vmovaps %zmm1, %zmm0
|
||||
; SKX-NEXT: retq
|
||||
%res = call <64 x i8> @llvm.masked.load.v64i8(<64 x i8>* %addr, i32 4, <64 x i1>%mask, <64 x i8> %val)
|
||||
%res = call <64 x i8> @llvm.masked.load.v64i8.p0v64i8(<64 x i8>* %addr, i32 4, <64 x i1>%mask, <64 x i8> %val)
|
||||
ret <64 x i8> %res
|
||||
}
|
||||
declare <64 x i8> @llvm.masked.load.v64i8(<64 x i8>*, i32, <64 x i1>, <64 x i8>)
|
||||
declare <64 x i8> @llvm.masked.load.v64i8.p0v64i8(<64 x i8>*, i32, <64 x i1>, <64 x i8>)
|
||||
|
||||
define <8 x i16> @test_mask_load_8xi16(<8 x i1> %mask, <8 x i16>* %addr, <8 x i16> %val) {
|
||||
; SKX-LABEL: test_mask_load_8xi16:
|
||||
|
@ -2267,10 +2267,10 @@ define <8 x i16> @test_mask_load_8xi16(<8 x i1> %mask, <8 x i16>* %addr, <8 x i1
|
|||
; SKX-NEXT: vpmovw2m %xmm0, %k1
|
||||
; SKX-NEXT: vmovdqu16 (%rdi), %xmm0 {%k1} {z}
|
||||
; SKX-NEXT: retq
|
||||
%res = call <8 x i16> @llvm.masked.load.v8i16(<8 x i16>* %addr, i32 4, <8 x i1>%mask, <8 x i16> undef)
|
||||
%res = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %addr, i32 4, <8 x i1>%mask, <8 x i16> undef)
|
||||
ret <8 x i16> %res
|
||||
}
|
||||
declare <8 x i16> @llvm.masked.load.v8i16(<8 x i16>*, i32, <8 x i1>, <8 x i16>)
|
||||
declare <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>*, i32, <8 x i1>, <8 x i16>)
|
||||
|
||||
define <16 x i16> @test_mask_load_16xi16(<16 x i1> %mask, <16 x i16>* %addr, <16 x i16> %val) {
|
||||
; SKX-LABEL: test_mask_load_16xi16:
|
||||
|
@ -2279,10 +2279,10 @@ define <16 x i16> @test_mask_load_16xi16(<16 x i1> %mask, <16 x i16>* %addr, <16
|
|||
; SKX-NEXT: vpmovb2m %xmm0, %k1
|
||||
; SKX-NEXT: vmovdqu16 (%rdi), %ymm0 {%k1} {z}
|
||||
; SKX-NEXT: retq
|
||||
%res = call <16 x i16> @llvm.masked.load.v16i16(<16 x i16>* %addr, i32 4, <16 x i1>%mask, <16 x i16> zeroinitializer)
|
||||
%res = call <16 x i16> @llvm.masked.load.v16i16.p0v16i16(<16 x i16>* %addr, i32 4, <16 x i1>%mask, <16 x i16> zeroinitializer)
|
||||
ret <16 x i16> %res
|
||||
}
|
||||
declare <16 x i16> @llvm.masked.load.v16i16(<16 x i16>*, i32, <16 x i1>, <16 x i16>)
|
||||
declare <16 x i16> @llvm.masked.load.v16i16.p0v16i16(<16 x i16>*, i32, <16 x i1>, <16 x i16>)
|
||||
|
||||
define <32 x i16> @test_mask_load_32xi16(<32 x i1> %mask, <32 x i16>* %addr, <32 x i16> %val) {
|
||||
; SKX-LABEL: test_mask_load_32xi16:
|
||||
|
@ -2292,10 +2292,10 @@ define <32 x i16> @test_mask_load_32xi16(<32 x i1> %mask, <32 x i16>* %addr, <32
|
|||
; SKX-NEXT: vmovdqu16 (%rdi), %zmm1 {%k1}
|
||||
; SKX-NEXT: vmovaps %zmm1, %zmm0
|
||||
; SKX-NEXT: retq
|
||||
%res = call <32 x i16> @llvm.masked.load.v32i16(<32 x i16>* %addr, i32 4, <32 x i1>%mask, <32 x i16> %val)
|
||||
%res = call <32 x i16> @llvm.masked.load.v32i16.p0v32i16(<32 x i16>* %addr, i32 4, <32 x i1>%mask, <32 x i16> %val)
|
||||
ret <32 x i16> %res
|
||||
}
|
||||
declare <32 x i16> @llvm.masked.load.v32i16(<32 x i16>*, i32, <32 x i1>, <32 x i16>)
|
||||
declare <32 x i16> @llvm.masked.load.v32i16.p0v32i16(<32 x i16>*, i32, <32 x i1>, <32 x i16>)
|
||||
|
||||
define void @test_mask_store_16xi8(<16 x i1> %mask, <16 x i8>* %addr, <16 x i8> %val) {
|
||||
; SKX-LABEL: test_mask_store_16xi8:
|
||||
|
@ -2304,10 +2304,10 @@ define void @test_mask_store_16xi8(<16 x i1> %mask, <16 x i8>* %addr, <16 x i8>
|
|||
; SKX-NEXT: vpmovb2m %xmm0, %k1
|
||||
; SKX-NEXT: vmovdqu8 %xmm1, (%rdi) {%k1}
|
||||
; SKX-NEXT: retq
|
||||
call void @llvm.masked.store.v16i8(<16 x i8> %val, <16 x i8>* %addr, i32 4, <16 x i1>%mask)
|
||||
call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %val, <16 x i8>* %addr, i32 4, <16 x i1>%mask)
|
||||
ret void
|
||||
}
|
||||
declare void @llvm.masked.store.v16i8(<16 x i8>, <16 x i8>*, i32, <16 x i1>)
|
||||
declare void @llvm.masked.store.v16i8.p0v16i8(<16 x i8>, <16 x i8>*, i32, <16 x i1>)
|
||||
|
||||
define void @test_mask_store_32xi8(<32 x i1> %mask, <32 x i8>* %addr, <32 x i8> %val) {
|
||||
; SKX-LABEL: test_mask_store_32xi8:
|
||||
|
@ -2316,10 +2316,10 @@ define void @test_mask_store_32xi8(<32 x i1> %mask, <32 x i8>* %addr, <32 x i8>
|
|||
; SKX-NEXT: vpmovb2m %ymm0, %k1
|
||||
; SKX-NEXT: vmovdqu8 %ymm1, (%rdi) {%k1}
|
||||
; SKX-NEXT: retq
|
||||
call void @llvm.masked.store.v32i8(<32 x i8> %val, <32 x i8>* %addr, i32 4, <32 x i1>%mask)
|
||||
call void @llvm.masked.store.v32i8.p0v32i8(<32 x i8> %val, <32 x i8>* %addr, i32 4, <32 x i1>%mask)
|
||||
ret void
|
||||
}
|
||||
declare void @llvm.masked.store.v32i8(<32 x i8>, <32 x i8>*, i32, <32 x i1>)
|
||||
declare void @llvm.masked.store.v32i8.p0v32i8(<32 x i8>, <32 x i8>*, i32, <32 x i1>)
|
||||
|
||||
define void @test_mask_store_64xi8(<64 x i1> %mask, <64 x i8>* %addr, <64 x i8> %val) {
|
||||
; SKX-LABEL: test_mask_store_64xi8:
|
||||
|
@ -2328,10 +2328,10 @@ define void @test_mask_store_64xi8(<64 x i1> %mask, <64 x i8>* %addr, <64 x i8>
|
|||
; SKX-NEXT: vpmovb2m %zmm0, %k1
|
||||
; SKX-NEXT: vmovdqu8 %zmm1, (%rdi) {%k1}
|
||||
; SKX-NEXT: retq
|
||||
call void @llvm.masked.store.v64i8(<64 x i8> %val, <64 x i8>* %addr, i32 4, <64 x i1>%mask)
|
||||
call void @llvm.masked.store.v64i8.p0v64i8(<64 x i8> %val, <64 x i8>* %addr, i32 4, <64 x i1>%mask)
|
||||
ret void
|
||||
}
|
||||
declare void @llvm.masked.store.v64i8(<64 x i8>, <64 x i8>*, i32, <64 x i1>)
|
||||
declare void @llvm.masked.store.v64i8.p0v64i8(<64 x i8>, <64 x i8>*, i32, <64 x i1>)
|
||||
|
||||
define void @test_mask_store_8xi16(<8 x i1> %mask, <8 x i16>* %addr, <8 x i16> %val) {
|
||||
; SKX-LABEL: test_mask_store_8xi16:
|
||||
|
@ -2340,10 +2340,10 @@ define void @test_mask_store_8xi16(<8 x i1> %mask, <8 x i16>* %addr, <8 x i16> %
|
|||
; SKX-NEXT: vpmovw2m %xmm0, %k1
|
||||
; SKX-NEXT: vmovdqu16 %xmm1, (%rdi) {%k1}
|
||||
; SKX-NEXT: retq
|
||||
call void @llvm.masked.store.v8i16(<8 x i16> %val, <8 x i16>* %addr, i32 4, <8 x i1>%mask)
|
||||
call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %val, <8 x i16>* %addr, i32 4, <8 x i1>%mask)
|
||||
ret void
|
||||
}
|
||||
declare void @llvm.masked.store.v8i16(<8 x i16>, <8 x i16>*, i32, <8 x i1>)
|
||||
declare void @llvm.masked.store.v8i16.p0v8i16(<8 x i16>, <8 x i16>*, i32, <8 x i1>)
|
||||
|
||||
define void @test_mask_store_16xi16(<16 x i1> %mask, <16 x i16>* %addr, <16 x i16> %val) {
|
||||
; SKX-LABEL: test_mask_store_16xi16:
|
||||
|
@ -2352,10 +2352,10 @@ define void @test_mask_store_16xi16(<16 x i1> %mask, <16 x i16>* %addr, <16 x i1
|
|||
; SKX-NEXT: vpmovb2m %xmm0, %k1
|
||||
; SKX-NEXT: vmovdqu16 %ymm1, (%rdi) {%k1}
|
||||
; SKX-NEXT: retq
|
||||
call void @llvm.masked.store.v16i16(<16 x i16> %val, <16 x i16>* %addr, i32 4, <16 x i1>%mask)
|
||||
call void @llvm.masked.store.v16i16.p0v16i16(<16 x i16> %val, <16 x i16>* %addr, i32 4, <16 x i1>%mask)
|
||||
ret void
|
||||
}
|
||||
declare void @llvm.masked.store.v16i16(<16 x i16>, <16 x i16>*, i32, <16 x i1>)
|
||||
declare void @llvm.masked.store.v16i16.p0v16i16(<16 x i16>, <16 x i16>*, i32, <16 x i1>)
|
||||
|
||||
define void @test_mask_store_32xi16(<32 x i1> %mask, <32 x i16>* %addr, <32 x i16> %val) {
|
||||
; SKX-LABEL: test_mask_store_32xi16:
|
||||
|
@ -2364,7 +2364,8 @@ define void @test_mask_store_32xi16(<32 x i1> %mask, <32 x i16>* %addr, <32 x i1
|
|||
; SKX-NEXT: vpmovb2m %ymm0, %k1
|
||||
; SKX-NEXT: vmovdqu16 %zmm1, (%rdi) {%k1}
|
||||
; SKX-NEXT: retq
|
||||
call void @llvm.masked.store.v32i16(<32 x i16> %val, <32 x i16>* %addr, i32 4, <32 x i1>%mask)
|
||||
call void @llvm.masked.store.v32i16.p0v32i16(<32 x i16> %val, <32 x i16>* %addr, i32 4, <32 x i1>%mask)
|
||||
ret void
|
||||
}
|
||||
declare void @llvm.masked.store.v32i16(<32 x i16>, <32 x i16>*, i32, <32 x i1>)
|
||||
|
||||
declare void @llvm.masked.store.v32i16.p0v32i16(<32 x i16>, <32 x i16>*, i32, <32 x i1>)
|
||||
|
|
|
@ -1,12 +1,12 @@
|
|||
; RUN: opt -instcombine -S < %s | FileCheck %s
|
||||
|
||||
declare <2 x double> @llvm.masked.load.v2f64(<2 x double>* %ptrs, i32, <2 x i1> %mask, <2 x double> %src0)
|
||||
declare void @llvm.masked.store.v2f64(<2 x double> %val, <2 x double>* %ptrs, i32, <2 x i1> %mask)
|
||||
declare <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %ptrs, i32, <2 x i1> %mask, <2 x double> %src0)
|
||||
declare void @llvm.masked.store.v2f64.p0v2f64(<2 x double> %val, <2 x double>* %ptrs, i32, <2 x i1> %mask)
|
||||
declare <2 x double> @llvm.masked.gather.v2f64(<2 x double*> %ptrs, i32, <2 x i1> %mask, <2 x double> %passthru)
|
||||
declare void @llvm.masked.scatter.v2f64(<2 x double> %val, <2 x double*> %ptrs, i32, <2 x i1> %mask)
|
||||
|
||||
define <2 x double> @load_zeromask(<2 x double>* %ptr, <2 x double> %passthru) {
|
||||
%res = call <2 x double> @llvm.masked.load.v2f64(<2 x double>* %ptr, i32 1, <2 x i1> zeroinitializer, <2 x double> %passthru)
|
||||
%res = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %ptr, i32 1, <2 x i1> zeroinitializer, <2 x double> %passthru)
|
||||
ret <2 x double> %res
|
||||
|
||||
; CHECK-LABEL: @load_zeromask(
|
||||
|
@ -14,7 +14,7 @@ define <2 x double> @load_zeromask(<2 x double>* %ptr, <2 x double> %passthru)
|
|||
}
|
||||
|
||||
define <2 x double> @load_onemask(<2 x double>* %ptr, <2 x double> %passthru) {
|
||||
%res = call <2 x double> @llvm.masked.load.v2f64(<2 x double>* %ptr, i32 2, <2 x i1> <i1 1, i1 1>, <2 x double> %passthru)
|
||||
%res = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %ptr, i32 2, <2 x i1> <i1 1, i1 1>, <2 x double> %passthru)
|
||||
ret <2 x double> %res
|
||||
|
||||
; CHECK-LABEL: @load_onemask(
|
||||
|
@ -23,7 +23,7 @@ define <2 x double> @load_onemask(<2 x double>* %ptr, <2 x double> %passthru) {
|
|||
}
|
||||
|
||||
define void @store_zeromask(<2 x double>* %ptr, <2 x double> %val) {
|
||||
call void @llvm.masked.store.v2f64(<2 x double> %val, <2 x double>* %ptr, i32 3, <2 x i1> zeroinitializer)
|
||||
call void @llvm.masked.store.v2f64.p0v2f64(<2 x double> %val, <2 x double>* %ptr, i32 3, <2 x i1> zeroinitializer)
|
||||
ret void
|
||||
|
||||
; CHECK-LABEL: @store_zeromask(
|
||||
|
@ -31,7 +31,7 @@ define void @store_zeromask(<2 x double>* %ptr, <2 x double> %val) {
|
|||
}
|
||||
|
||||
define void @store_onemask(<2 x double>* %ptr, <2 x double> %val) {
|
||||
call void @llvm.masked.store.v2f64(<2 x double> %val, <2 x double>* %ptr, i32 4, <2 x i1> <i1 1, i1 1>)
|
||||
call void @llvm.masked.store.v2f64.p0v2f64(<2 x double> %val, <2 x double>* %ptr, i32 4, <2 x i1> <i1 1, i1 1>)
|
||||
ret void
|
||||
|
||||
; CHECK-LABEL: @store_onemask(
|
||||
|
|
|
@ -53,7 +53,7 @@ define <4 x float> @mload_one_one(i8* %f) {
|
|||
|
||||
; CHECK-LABEL: @mload_one_one(
|
||||
; CHECK-NEXT: %castvec = bitcast i8* %f to <4 x float>*
|
||||
; CHECK-NEXT: %1 = call <4 x float> @llvm.masked.load.v4f32(<4 x float>* %castvec, i32 1, <4 x i1> <i1 false, i1 false, i1 false, i1 true>, <4 x float> undef)
|
||||
; CHECK-NEXT: %1 = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %castvec, i32 1, <4 x i1> <i1 false, i1 false, i1 false, i1 true>, <4 x float> undef)
|
||||
; CHECK-NEXT: ret <4 x float> %1
|
||||
}
|
||||
|
||||
|
@ -65,7 +65,7 @@ define <2 x double> @mload_one_one_double(i8* %f) {
|
|||
|
||||
; CHECK-LABEL: @mload_one_one_double(
|
||||
; CHECK-NEXT: %castvec = bitcast i8* %f to <2 x double>*
|
||||
; CHECK-NEXT: %1 = call <2 x double> @llvm.masked.load.v2f64(<2 x double>* %castvec, i32 1, <2 x i1> <i1 true, i1 false>, <2 x double> undef)
|
||||
; CHECK-NEXT: %1 = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %castvec, i32 1, <2 x i1> <i1 true, i1 false>, <2 x double> undef)
|
||||
; CHECK-NEXT: ret <2 x double> %1
|
||||
}
|
||||
|
||||
|
@ -77,7 +77,7 @@ define <8 x float> @mload_v8f32(i8* %f) {
|
|||
|
||||
; CHECK-LABEL: @mload_v8f32(
|
||||
; CHECK-NEXT: %castvec = bitcast i8* %f to <8 x float>*
|
||||
; CHECK-NEXT: %1 = call <8 x float> @llvm.masked.load.v8f32(<8 x float>* %castvec, i32 1, <8 x i1> <i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 false>, <8 x float> undef)
|
||||
; CHECK-NEXT: %1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %castvec, i32 1, <8 x i1> <i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 false>, <8 x float> undef)
|
||||
; CHECK-NEXT: ret <8 x float> %1
|
||||
}
|
||||
|
||||
|
@ -87,7 +87,7 @@ define <4 x double> @mload_v4f64(i8* %f) {
|
|||
|
||||
; CHECK-LABEL: @mload_v4f64(
|
||||
; CHECK-NEXT: %castvec = bitcast i8* %f to <4 x double>*
|
||||
; CHECK-NEXT: %1 = call <4 x double> @llvm.masked.load.v4f64(<4 x double>* %castvec, i32 1, <4 x i1> <i1 true, i1 false, i1 false, i1 false>, <4 x double> undef)
|
||||
; CHECK-NEXT: %1 = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* %castvec, i32 1, <4 x i1> <i1 true, i1 false, i1 false, i1 false>, <4 x double> undef)
|
||||
; CHECK-NEXT: ret <4 x double> %1
|
||||
}
|
||||
|
||||
|
@ -99,7 +99,7 @@ define <4 x i32> @mload_v4i32(i8* %f) {
|
|||
|
||||
; CHECK-LABEL: @mload_v4i32(
|
||||
; CHECK-NEXT: %castvec = bitcast i8* %f to <4 x i32>*
|
||||
; CHECK-NEXT: %1 = call <4 x i32> @llvm.masked.load.v4i32(<4 x i32>* %castvec, i32 1, <4 x i1> <i1 false, i1 false, i1 false, i1 true>, <4 x i32> undef)
|
||||
; CHECK-NEXT: %1 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %castvec, i32 1, <4 x i1> <i1 false, i1 false, i1 false, i1 true>, <4 x i32> undef)
|
||||
; CHECK-NEXT: ret <4 x i32> %1
|
||||
}
|
||||
|
||||
|
@ -109,7 +109,7 @@ define <2 x i64> @mload_v2i64(i8* %f) {
|
|||
|
||||
; CHECK-LABEL: @mload_v2i64(
|
||||
; CHECK-NEXT: %castvec = bitcast i8* %f to <2 x i64>*
|
||||
; CHECK-NEXT: %1 = call <2 x i64> @llvm.masked.load.v2i64(<2 x i64>* %castvec, i32 1, <2 x i1> <i1 true, i1 false>, <2 x i64> undef)
|
||||
; CHECK-NEXT: %1 = call <2 x i64> @llvm.masked.load.v2i64.p0v2i64(<2 x i64>* %castvec, i32 1, <2 x i1> <i1 true, i1 false>, <2 x i64> undef)
|
||||
; CHECK-NEXT: ret <2 x i64> %1
|
||||
}
|
||||
|
||||
|
@ -119,7 +119,7 @@ define <8 x i32> @mload_v8i32(i8* %f) {
|
|||
|
||||
; CHECK-LABEL: @mload_v8i32(
|
||||
; CHECK-NEXT: %castvec = bitcast i8* %f to <8 x i32>*
|
||||
; CHECK-NEXT: %1 = call <8 x i32> @llvm.masked.load.v8i32(<8 x i32>* %castvec, i32 1, <8 x i1> <i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 false>, <8 x i32> undef)
|
||||
; CHECK-NEXT: %1 = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* %castvec, i32 1, <8 x i1> <i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 false>, <8 x i32> undef)
|
||||
; CHECK-NEXT: ret <8 x i32> %1
|
||||
}
|
||||
|
||||
|
@ -129,7 +129,7 @@ define <4 x i64> @mload_v4i64(i8* %f) {
|
|||
|
||||
; CHECK-LABEL: @mload_v4i64(
|
||||
; CHECK-NEXT: %castvec = bitcast i8* %f to <4 x i64>*
|
||||
; CHECK-NEXT: %1 = call <4 x i64> @llvm.masked.load.v4i64(<4 x i64>* %castvec, i32 1, <4 x i1> <i1 true, i1 false, i1 false, i1 false>, <4 x i64> undef)
|
||||
; CHECK-NEXT: %1 = call <4 x i64> @llvm.masked.load.v4i64.p0v4i64(<4 x i64>* %castvec, i32 1, <4 x i1> <i1 true, i1 false, i1 false, i1 false>, <4 x i64> undef)
|
||||
; CHECK-NEXT: ret <4 x i64> %1
|
||||
}
|
||||
|
||||
|
@ -187,7 +187,7 @@ define void @mstore_one_one(i8* %f, <4 x float> %v) {
|
|||
|
||||
; CHECK-LABEL: @mstore_one_one(
|
||||
; CHECK-NEXT: %castvec = bitcast i8* %f to <4 x float>*
|
||||
; CHECK-NEXT: call void @llvm.masked.store.v4f32(<4 x float> %v, <4 x float>* %castvec, i32 1, <4 x i1> <i1 false, i1 false, i1 false, i1 true>)
|
||||
; CHECK-NEXT: call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %v, <4 x float>* %castvec, i32 1, <4 x i1> <i1 false, i1 false, i1 false, i1 true>)
|
||||
; CHECK-NEXT: ret void
|
||||
}
|
||||
|
||||
|
@ -199,7 +199,7 @@ define void @mstore_one_one_double(i8* %f, <2 x double> %v) {
|
|||
|
||||
; CHECK-LABEL: @mstore_one_one_double(
|
||||
; CHECK-NEXT: %castvec = bitcast i8* %f to <2 x double>*
|
||||
; CHECK-NEXT: call void @llvm.masked.store.v2f64(<2 x double> %v, <2 x double>* %castvec, i32 1, <2 x i1> <i1 true, i1 false>)
|
||||
; CHECK-NEXT: call void @llvm.masked.store.v2f64.p0v2f64(<2 x double> %v, <2 x double>* %castvec, i32 1, <2 x i1> <i1 true, i1 false>)
|
||||
; CHECK-NEXT: ret void
|
||||
}
|
||||
|
||||
|
@ -211,7 +211,7 @@ define void @mstore_v8f32(i8* %f, <8 x float> %v) {
|
|||
|
||||
; CHECK-LABEL: @mstore_v8f32(
|
||||
; CHECK-NEXT: %castvec = bitcast i8* %f to <8 x float>*
|
||||
; CHECK-NEXT: call void @llvm.masked.store.v8f32(<8 x float> %v, <8 x float>* %castvec, i32 1, <8 x i1> <i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true>)
|
||||
; CHECK-NEXT: call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %v, <8 x float>* %castvec, i32 1, <8 x i1> <i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true>)
|
||||
; CHECK-NEXT: ret void
|
||||
}
|
||||
|
||||
|
@ -221,7 +221,7 @@ define void @mstore_v4f64(i8* %f, <4 x double> %v) {
|
|||
|
||||
; CHECK-LABEL: @mstore_v4f64(
|
||||
; CHECK-NEXT: %castvec = bitcast i8* %f to <4 x double>*
|
||||
; CHECK-NEXT: call void @llvm.masked.store.v4f64(<4 x double> %v, <4 x double>* %castvec, i32 1, <4 x i1> <i1 true, i1 false, i1 false, i1 false>)
|
||||
; CHECK-NEXT: call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> %v, <4 x double>* %castvec, i32 1, <4 x i1> <i1 true, i1 false, i1 false, i1 false>)
|
||||
; CHECK-NEXT: ret void
|
||||
}
|
||||
|
||||
|
@ -233,7 +233,7 @@ define void @mstore_v4i32(i8* %f, <4 x i32> %v) {
|
|||
|
||||
; CHECK-LABEL: @mstore_v4i32(
|
||||
; CHECK-NEXT: %castvec = bitcast i8* %f to <4 x i32>*
|
||||
; CHECK-NEXT: call void @llvm.masked.store.v4i32(<4 x i32> %v, <4 x i32>* %castvec, i32 1, <4 x i1> <i1 false, i1 false, i1 true, i1 true>)
|
||||
; CHECK-NEXT: call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %v, <4 x i32>* %castvec, i32 1, <4 x i1> <i1 false, i1 false, i1 true, i1 true>)
|
||||
; CHECK-NEXT: ret void
|
||||
}
|
||||
|
||||
|
@ -243,7 +243,7 @@ define void @mstore_v2i64(i8* %f, <2 x i64> %v) {
|
|||
|
||||
; CHECK-LABEL: @mstore_v2i64(
|
||||
; CHECK-NEXT: %castvec = bitcast i8* %f to <2 x i64>*
|
||||
; CHECK-NEXT: call void @llvm.masked.store.v2i64(<2 x i64> %v, <2 x i64>* %castvec, i32 1, <2 x i1> <i1 true, i1 false>)
|
||||
; CHECK-NEXT: call void @llvm.masked.store.v2i64.p0v2i64(<2 x i64> %v, <2 x i64>* %castvec, i32 1, <2 x i1> <i1 true, i1 false>)
|
||||
; CHECK-NEXT: ret void
|
||||
}
|
||||
|
||||
|
@ -253,7 +253,7 @@ define void @mstore_v8i32(i8* %f, <8 x i32> %v) {
|
|||
|
||||
; CHECK-LABEL: @mstore_v8i32(
|
||||
; CHECK-NEXT: %castvec = bitcast i8* %f to <8 x i32>*
|
||||
; CHECK-NEXT: call void @llvm.masked.store.v8i32(<8 x i32> %v, <8 x i32>* %castvec, i32 1, <8 x i1> <i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true>)
|
||||
; CHECK-NEXT: call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> %v, <8 x i32>* %castvec, i32 1, <8 x i1> <i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true>)
|
||||
; CHECK-NEXT: ret void
|
||||
}
|
||||
|
||||
|
@ -263,7 +263,7 @@ define void @mstore_v4i64(i8* %f, <4 x i64> %v) {
|
|||
|
||||
; CHECK-LABEL: @mstore_v4i64(
|
||||
; CHECK-NEXT: %castvec = bitcast i8* %f to <4 x i64>*
|
||||
; CHECK-NEXT: call void @llvm.masked.store.v4i64(<4 x i64> %v, <4 x i64>* %castvec, i32 1, <4 x i1> <i1 true, i1 false, i1 false, i1 false>)
|
||||
; CHECK-NEXT: call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> %v, <4 x i64>* %castvec, i32 1, <4 x i1> <i1 true, i1 false, i1 false, i1 false>)
|
||||
; CHECK-NEXT: ret void
|
||||
}
|
||||
|
||||
|
|
|
@ -18,16 +18,16 @@ target triple = "x86_64-pc_linux"
|
|||
|
||||
;AVX-LABEL: @foo1
|
||||
;AVX: icmp slt <8 x i32> %wide.load, <i32 100, i32 100, i32 100
|
||||
;AVX: call <8 x i32> @llvm.masked.load.v8i32
|
||||
;AVX: call <8 x i32> @llvm.masked.load.v8i32.p0v8i32
|
||||
;AVX: add nsw <8 x i32>
|
||||
;AVX: call void @llvm.masked.store.v8i32
|
||||
;AVX: call void @llvm.masked.store.v8i32.p0v8i32
|
||||
;AVX: ret void
|
||||
|
||||
;AVX512-LABEL: @foo1
|
||||
;AVX512: icmp slt <16 x i32> %wide.load, <i32 100, i32 100, i32 100
|
||||
;AVX512: call <16 x i32> @llvm.masked.load.v16i32
|
||||
;AVX512: call <16 x i32> @llvm.masked.load.v16i32.p0v16i32
|
||||
;AVX512: add nsw <16 x i32>
|
||||
;AVX512: call void @llvm.masked.store.v16i32
|
||||
;AVX512: call void @llvm.masked.store.v16i32.p0v16i32
|
||||
;AVX512: ret void
|
||||
|
||||
; Function Attrs: nounwind uwtable
|
||||
|
@ -89,6 +89,81 @@ for.end: ; preds = %for.cond
|
|||
ret void
|
||||
}
|
||||
|
||||
; The same as @foo1 but all the pointers are address space 1 pointers.
|
||||
|
||||
;AVX-LABEL: @foo1_addrspace1
|
||||
;AVX: icmp slt <8 x i32> %wide.load, <i32 100, i32 100, i32 100
|
||||
;AVX: call <8 x i32> @llvm.masked.load.v8i32.p1v8i32
|
||||
;AVX: add nsw <8 x i32>
|
||||
;AVX: call void @llvm.masked.store.v8i32.p1v8i32
|
||||
;AVX: ret void
|
||||
|
||||
;AVX512-LABEL: @foo1_addrspace1
|
||||
;AVX512: icmp slt <16 x i32> %wide.load, <i32 100, i32 100, i32 100
|
||||
;AVX512: call <16 x i32> @llvm.masked.load.v16i32.p1v16i32
|
||||
;AVX512: add nsw <16 x i32>
|
||||
;AVX512: call void @llvm.masked.store.v16i32.p1v16i32
|
||||
;AVX512: ret void
|
||||
|
||||
; Function Attrs: nounwind uwtable
|
||||
define void @foo1_addrspace1(i32 addrspace(1)* %A, i32 addrspace(1)* %B, i32 addrspace(1)* %trigger) {
|
||||
entry:
|
||||
%A.addr = alloca i32 addrspace(1)*, align 8
|
||||
%B.addr = alloca i32 addrspace(1)*, align 8
|
||||
%trigger.addr = alloca i32 addrspace(1)*, align 8
|
||||
%i = alloca i32, align 4
|
||||
store i32 addrspace(1)* %A, i32 addrspace(1)** %A.addr, align 8
|
||||
store i32 addrspace(1)* %B, i32 addrspace(1)** %B.addr, align 8
|
||||
store i32 addrspace(1)* %trigger, i32 addrspace(1)** %trigger.addr, align 8
|
||||
store i32 0, i32* %i, align 4
|
||||
br label %for.cond
|
||||
|
||||
for.cond: ; preds = %for.inc, %entry
|
||||
%0 = load i32, i32* %i, align 4
|
||||
%cmp = icmp slt i32 %0, 10000
|
||||
br i1 %cmp, label %for.body, label %for.end
|
||||
|
||||
for.body: ; preds = %for.cond
|
||||
%1 = load i32, i32* %i, align 4
|
||||
%idxprom = sext i32 %1 to i64
|
||||
%2 = load i32 addrspace(1)*, i32 addrspace(1)** %trigger.addr, align 8
|
||||
%arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %2, i64 %idxprom
|
||||
%3 = load i32, i32 addrspace(1)* %arrayidx, align 4
|
||||
%cmp1 = icmp slt i32 %3, 100
|
||||
br i1 %cmp1, label %if.then, label %if.end
|
||||
|
||||
if.then: ; preds = %for.body
|
||||
%4 = load i32, i32* %i, align 4
|
||||
%idxprom2 = sext i32 %4 to i64
|
||||
%5 = load i32 addrspace(1)*, i32 addrspace(1)** %B.addr, align 8
|
||||
%arrayidx3 = getelementptr inbounds i32, i32 addrspace(1)* %5, i64 %idxprom2
|
||||
%6 = load i32, i32 addrspace(1)* %arrayidx3, align 4
|
||||
%7 = load i32, i32* %i, align 4
|
||||
%idxprom4 = sext i32 %7 to i64
|
||||
%8 = load i32 addrspace(1)*, i32 addrspace(1)** %trigger.addr, align 8
|
||||
%arrayidx5 = getelementptr inbounds i32, i32 addrspace(1)* %8, i64 %idxprom4
|
||||
%9 = load i32, i32 addrspace(1)* %arrayidx5, align 4
|
||||
%add = add nsw i32 %6, %9
|
||||
%10 = load i32, i32* %i, align 4
|
||||
%idxprom6 = sext i32 %10 to i64
|
||||
%11 = load i32 addrspace(1)*, i32 addrspace(1)** %A.addr, align 8
|
||||
%arrayidx7 = getelementptr inbounds i32, i32 addrspace(1)* %11, i64 %idxprom6
|
||||
store i32 %add, i32 addrspace(1)* %arrayidx7, align 4
|
||||
br label %if.end
|
||||
|
||||
if.end: ; preds = %if.then, %for.body
|
||||
br label %for.inc
|
||||
|
||||
for.inc: ; preds = %if.end
|
||||
%12 = load i32, i32* %i, align 4
|
||||
%inc = add nsw i32 %12, 1
|
||||
store i32 %inc, i32* %i, align 4
|
||||
br label %for.cond
|
||||
|
||||
for.end: ; preds = %for.cond
|
||||
ret void
|
||||
}
|
||||
|
||||
; The source code:
|
||||
;
|
||||
;void foo2(float *A, float *B, int *trigger) {
|
||||
|
@ -102,16 +177,16 @@ for.end: ; preds = %for.cond
|
|||
|
||||
;AVX-LABEL: @foo2
|
||||
;AVX: icmp slt <8 x i32> %wide.load, <i32 100, i32 100, i32 100
|
||||
;AVX: call <8 x float> @llvm.masked.load.v8f32
|
||||
;AVX: call <8 x float> @llvm.masked.load.v8f32.p0v8f32
|
||||
;AVX: fadd <8 x float>
|
||||
;AVX: call void @llvm.masked.store.v8f32
|
||||
;AVX: call void @llvm.masked.store.v8f32.p0v8f32
|
||||
;AVX: ret void
|
||||
|
||||
;AVX512-LABEL: @foo2
|
||||
;AVX512: icmp slt <16 x i32> %wide.load, <i32 100, i32 100, i32 100
|
||||
;AVX512: call <16 x float> @llvm.masked.load.v16f32
|
||||
;AVX512: call <16 x float> @llvm.masked.load.v16f32.p0v16f32
|
||||
;AVX512: fadd <16 x float>
|
||||
;AVX512: call void @llvm.masked.store.v16f32
|
||||
;AVX512: call void @llvm.masked.store.v16f32.p0v16f32
|
||||
;AVX512: ret void
|
||||
|
||||
; Function Attrs: nounwind uwtable
|
||||
|
@ -187,18 +262,18 @@ for.end: ; preds = %for.cond
|
|||
|
||||
;AVX-LABEL: @foo3
|
||||
;AVX: icmp slt <4 x i32> %wide.load, <i32 100, i32 100,
|
||||
;AVX: call <4 x double> @llvm.masked.load.v4f64
|
||||
;AVX: call <4 x double> @llvm.masked.load.v4f64.p0v4f64
|
||||
;AVX: sitofp <4 x i32> %wide.load to <4 x double>
|
||||
;AVX: fadd <4 x double>
|
||||
;AVX: call void @llvm.masked.store.v4f64
|
||||
;AVX: call void @llvm.masked.store.v4f64.p0v4f64
|
||||
;AVX: ret void
|
||||
|
||||
;AVX512-LABEL: @foo3
|
||||
;AVX512: icmp slt <8 x i32> %wide.load, <i32 100, i32 100,
|
||||
;AVX512: call <8 x double> @llvm.masked.load.v8f64
|
||||
;AVX512: call <8 x double> @llvm.masked.load.v8f64.p0v8f64
|
||||
;AVX512: sitofp <8 x i32> %wide.load to <8 x double>
|
||||
;AVX512: fadd <8 x double>
|
||||
;AVX512: call void @llvm.masked.store.v8f64
|
||||
;AVX512: call void @llvm.masked.store.v8f64.p0v8f64
|
||||
;AVX512: ret void
|
||||
|
||||
|
||||
|
@ -429,17 +504,17 @@ for.end: ; preds = %for.cond
|
|||
;AVX2-LABEL: @foo6
|
||||
;AVX2: icmp sgt <4 x i32> %reverse, zeroinitializer
|
||||
;AVX2: shufflevector <4 x i1>{{.*}}<4 x i32> <i32 3, i32 2, i32 1, i32 0>
|
||||
;AVX2: call <4 x double> @llvm.masked.load.v4f64
|
||||
;AVX2: call <4 x double> @llvm.masked.load.v4f64.p0v4f64
|
||||
;AVX2: fadd <4 x double>
|
||||
;AVX2: call void @llvm.masked.store.v4f64
|
||||
;AVX2: call void @llvm.masked.store.v4f64.p0v4f64
|
||||
;AVX2: ret void
|
||||
|
||||
;AVX512-LABEL: @foo6
|
||||
;AVX512: icmp sgt <8 x i32> %reverse, zeroinitializer
|
||||
;AVX512: shufflevector <8 x i1>{{.*}}<8 x i32> <i32 7, i32 6, i32 5, i32 4
|
||||
;AVX512: call <8 x double> @llvm.masked.load.v8f64
|
||||
;AVX512: call <8 x double> @llvm.masked.load.v8f64.p0v8f64
|
||||
;AVX512: fadd <8 x double>
|
||||
;AVX512: call void @llvm.masked.store.v8f64
|
||||
;AVX512: call void @llvm.masked.store.v8f64.p0v8f64
|
||||
;AVX512: ret void
|
||||
|
||||
|
||||
|
@ -507,8 +582,8 @@ for.end: ; preds = %for.cond
|
|||
; }
|
||||
|
||||
;AVX512-LABEL: @foo7
|
||||
;AVX512: call <8 x double*> @llvm.masked.load.v8p0f64(<8 x double*>*
|
||||
;AVX512: call void @llvm.masked.store.v8f64
|
||||
;AVX512: call <8 x double*> @llvm.masked.load.v8p0f64.p0v8p0f64(<8 x double*>*
|
||||
;AVX512: call void @llvm.masked.store.v8f64.p0v8f64
|
||||
;AVX512: ret void
|
||||
|
||||
define void @foo7(double* noalias %out, double** noalias %in, i8* noalias %trigger, i32 %size) #0 {
|
||||
|
@ -579,8 +654,8 @@ for.end: ; preds = %for.cond
|
|||
;}
|
||||
|
||||
;AVX512-LABEL: @foo8
|
||||
;AVX512: call <8 x i32 ()*> @llvm.masked.load.v8p0f_i32f(<8 x i32 ()*>* %
|
||||
;AVX512: call void @llvm.masked.store.v8f64
|
||||
;AVX512: call <8 x i32 ()*> @llvm.masked.load.v8p0f_i32f.p0v8p0f_i32f(<8 x i32 ()*>* %
|
||||
;AVX512: call void @llvm.masked.store.v8f64.p0v8f64
|
||||
;AVX512: ret void
|
||||
|
||||
define void @foo8(double* noalias %out, i32 ()** noalias %in, i8* noalias %trigger, i32 %size) #0 {
|
||||
|
|
Loading…
Reference in New Issue